novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +14 -11
  3. novel_downloader/cli/export.py +19 -19
  4. novel_downloader/cli/ui.py +35 -8
  5. novel_downloader/config/adapter.py +216 -153
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/archived/deqixs/fetcher.py +1 -28
  8. novel_downloader/core/downloaders/__init__.py +2 -0
  9. novel_downloader/core/downloaders/base.py +34 -85
  10. novel_downloader/core/downloaders/common.py +147 -171
  11. novel_downloader/core/downloaders/qianbi.py +30 -64
  12. novel_downloader/core/downloaders/qidian.py +157 -184
  13. novel_downloader/core/downloaders/qqbook.py +292 -0
  14. novel_downloader/core/downloaders/registry.py +2 -2
  15. novel_downloader/core/exporters/__init__.py +2 -0
  16. novel_downloader/core/exporters/base.py +37 -59
  17. novel_downloader/core/exporters/common.py +620 -0
  18. novel_downloader/core/exporters/linovelib.py +47 -0
  19. novel_downloader/core/exporters/qidian.py +41 -12
  20. novel_downloader/core/exporters/qqbook.py +28 -0
  21. novel_downloader/core/exporters/registry.py +2 -2
  22. novel_downloader/core/fetchers/__init__.py +4 -2
  23. novel_downloader/core/fetchers/aaatxt.py +2 -22
  24. novel_downloader/core/fetchers/b520.py +3 -23
  25. novel_downloader/core/fetchers/base.py +80 -105
  26. novel_downloader/core/fetchers/biquyuedu.py +2 -22
  27. novel_downloader/core/fetchers/dxmwx.py +10 -22
  28. novel_downloader/core/fetchers/esjzone.py +6 -29
  29. novel_downloader/core/fetchers/guidaye.py +2 -22
  30. novel_downloader/core/fetchers/hetushu.py +9 -29
  31. novel_downloader/core/fetchers/i25zw.py +2 -16
  32. novel_downloader/core/fetchers/ixdzs8.py +2 -16
  33. novel_downloader/core/fetchers/jpxs123.py +2 -16
  34. novel_downloader/core/fetchers/lewenn.py +2 -22
  35. novel_downloader/core/fetchers/linovelib.py +4 -20
  36. novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
  37. novel_downloader/core/fetchers/piaotia.py +2 -16
  38. novel_downloader/core/fetchers/qbtr.py +2 -16
  39. novel_downloader/core/fetchers/qianbi.py +1 -20
  40. novel_downloader/core/fetchers/qidian.py +27 -68
  41. novel_downloader/core/fetchers/qqbook.py +177 -0
  42. novel_downloader/core/fetchers/quanben5.py +9 -29
  43. novel_downloader/core/fetchers/rate_limiter.py +22 -53
  44. novel_downloader/core/fetchers/sfacg.py +3 -16
  45. novel_downloader/core/fetchers/shencou.py +2 -16
  46. novel_downloader/core/fetchers/shuhaige.py +2 -22
  47. novel_downloader/core/fetchers/tongrenquan.py +2 -22
  48. novel_downloader/core/fetchers/ttkan.py +3 -14
  49. novel_downloader/core/fetchers/wanbengo.py +2 -22
  50. novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
  51. novel_downloader/core/fetchers/xiguashuwu.py +4 -20
  52. novel_downloader/core/fetchers/xs63b.py +3 -15
  53. novel_downloader/core/fetchers/xshbook.py +2 -22
  54. novel_downloader/core/fetchers/yamibo.py +4 -28
  55. novel_downloader/core/fetchers/yibige.py +13 -26
  56. novel_downloader/core/interfaces/exporter.py +19 -7
  57. novel_downloader/core/interfaces/fetcher.py +23 -49
  58. novel_downloader/core/interfaces/parser.py +2 -2
  59. novel_downloader/core/parsers/__init__.py +4 -2
  60. novel_downloader/core/parsers/b520.py +2 -2
  61. novel_downloader/core/parsers/base.py +5 -39
  62. novel_downloader/core/parsers/esjzone.py +3 -3
  63. novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
  64. novel_downloader/core/parsers/qidian.py +717 -0
  65. novel_downloader/core/parsers/qqbook.py +709 -0
  66. novel_downloader/core/parsers/xiguashuwu.py +8 -15
  67. novel_downloader/core/searchers/__init__.py +2 -2
  68. novel_downloader/core/searchers/b520.py +1 -1
  69. novel_downloader/core/searchers/base.py +2 -2
  70. novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
  71. novel_downloader/locales/en.json +3 -3
  72. novel_downloader/locales/zh.json +3 -3
  73. novel_downloader/models/__init__.py +2 -0
  74. novel_downloader/models/book.py +1 -0
  75. novel_downloader/models/config.py +12 -0
  76. novel_downloader/resources/config/settings.toml +23 -5
  77. novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
  78. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
  79. novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
  80. novel_downloader/utils/__init__.py +0 -2
  81. novel_downloader/utils/chapter_storage.py +2 -3
  82. novel_downloader/utils/constants.py +7 -3
  83. novel_downloader/utils/cookies.py +32 -17
  84. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  85. novel_downloader/utils/crypto_utils/aes_util.py +1 -1
  86. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  87. novel_downloader/utils/epub/__init__.py +2 -3
  88. novel_downloader/utils/epub/builder.py +6 -6
  89. novel_downloader/utils/epub/constants.py +1 -6
  90. novel_downloader/utils/epub/documents.py +7 -7
  91. novel_downloader/utils/epub/models.py +8 -8
  92. novel_downloader/utils/epub/utils.py +10 -10
  93. novel_downloader/utils/file_utils/io.py +48 -73
  94. novel_downloader/utils/file_utils/normalize.py +1 -7
  95. novel_downloader/utils/file_utils/sanitize.py +4 -11
  96. novel_downloader/utils/fontocr/__init__.py +13 -0
  97. novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
  98. novel_downloader/utils/fontocr/loader.py +52 -0
  99. novel_downloader/utils/logger.py +80 -56
  100. novel_downloader/utils/network.py +16 -40
  101. novel_downloader/utils/node_decryptor/__init__.py +13 -0
  102. novel_downloader/utils/node_decryptor/decryptor.py +342 -0
  103. novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
  104. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  105. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  106. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  107. novel_downloader/web/main.py +1 -1
  108. novel_downloader/web/pages/download.py +1 -1
  109. novel_downloader/web/pages/search.py +4 -4
  110. novel_downloader/web/services/task_manager.py +2 -0
  111. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
  112. novel_downloader-2.0.2.dist-info/RECORD +203 -0
  113. novel_downloader/core/exporters/common/__init__.py +0 -11
  114. novel_downloader/core/exporters/common/epub.py +0 -198
  115. novel_downloader/core/exporters/common/main_exporter.py +0 -64
  116. novel_downloader/core/exporters/common/txt.py +0 -146
  117. novel_downloader/core/exporters/epub_util.py +0 -215
  118. novel_downloader/core/exporters/linovelib/__init__.py +0 -11
  119. novel_downloader/core/exporters/linovelib/epub.py +0 -349
  120. novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
  121. novel_downloader/core/exporters/linovelib/txt.py +0 -139
  122. novel_downloader/core/exporters/txt_util.py +0 -67
  123. novel_downloader/core/parsers/qidian/__init__.py +0 -10
  124. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  125. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  126. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  127. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  128. novel_downloader/core/parsers/qidian/main_parser.py +0 -101
  129. novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
  130. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  131. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  132. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
  133. novel_downloader-2.0.0.dist-info/RECORD +0 -210
  134. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
  135. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
  136. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
  137. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -4,17 +4,20 @@ novel_downloader.config.adapter
4
4
  -------------------------------
5
5
 
6
6
  Defines ConfigAdapter, which maps a raw configuration dictionary and
7
- site name into structured dataclass-based config models.
7
+ site into structured dataclass-based config models.
8
8
  """
9
9
 
10
+ import contextlib
10
11
  import json
11
- from typing import Any, TypeVar, cast
12
+ from collections.abc import Mapping
13
+ from typing import Any, TypeVar
12
14
 
13
15
  from novel_downloader.models import (
14
16
  BookConfig,
15
17
  DownloaderConfig,
16
18
  ExporterConfig,
17
19
  FetcherConfig,
20
+ FontOCRConfig,
18
21
  ParserConfig,
19
22
  TextCleanerConfig,
20
23
  )
@@ -26,96 +29,109 @@ class ConfigAdapter:
26
29
  """
27
30
  Adapter to map a raw configuration dictionary and site name
28
31
  into structured dataclass configuration models.
32
+
33
+ Resolution order for each field:
34
+ 1. ``config["sites"][<site>]`` (if present)
35
+ 2. ``config["general"]`` (if present)
36
+ 3. Hard-coded default passed by the caller
29
37
  """
30
38
 
31
- def __init__(self, config: dict[str, Any], site: str):
39
+ def __init__(self, config: Mapping[str, Any], site: str):
32
40
  """
33
- Initialize the adapter.
41
+ Initialize the adapter with a configuration mapping and a site key.
34
42
 
35
- :param config: The fully loaded configuration dictionary.
36
- :param site: The current site name (e.g. "qidian").
43
+ :param config: Fully loaded configuration mapping.
44
+ :param site: Current site key (e.g., ``"qidian"``).
37
45
  """
38
- self._config = config
39
- self._site = site
40
- self._site_cfg: dict[str, Any] = self._get_site_cfg()
41
- self._gen_cfg: dict[str, Any] = config.get("general") or {}
46
+ self._config: dict[str, Any] = dict(config)
47
+ self._site: str = site
42
48
 
43
49
  def get_fetcher_config(self) -> FetcherConfig:
44
50
  """
45
- Build a FetcherConfig from the raw configuration.
51
+ Build a :class:`novel_downloader.models.FetcherConfig` by resolving fields
52
+ from site-specific and general settings.
46
53
 
47
- :return: A FetcherConfig instance with all fields populated.
54
+ :return: Fully populated configuration for the network fetcher.
48
55
  """
56
+ s, g = self._site_cfg, self._gen_cfg
49
57
  return FetcherConfig(
50
- request_interval=self._get_gen_cfg("request_interval", 2.0),
51
- retry_times=self._get_gen_cfg("retry_times", 3),
52
- backoff_factor=self._get_gen_cfg("backoff_factor", 2.0),
53
- timeout=self._get_gen_cfg("timeout", 30.0),
54
- max_connections=self._get_gen_cfg("max_connections", 10),
55
- max_rps=self._get_gen_cfg("max_rps", 1000.0),
56
- user_agent=self._get_gen_cfg("user_agent", None),
57
- headers=self._get_gen_cfg("headers", None),
58
- verify_ssl=self._get_gen_cfg("verify_ssl", True),
59
- locale_style=self._get_gen_cfg("locale_style", "simplified"),
58
+ request_interval=self._pick("request_interval", 2.0, s, g),
59
+ retry_times=self._pick("retry_times", 3, s, g),
60
+ backoff_factor=self._pick("backoff_factor", 2.0, s, g),
61
+ timeout=self._pick("timeout", 30.0, s, g),
62
+ max_connections=self._pick("max_connections", 10, s, g),
63
+ max_rps=self._pick("max_rps", 1000.0, s, g),
64
+ user_agent=self._pick("user_agent", None, s, g),
65
+ headers=self._pick("headers", None, s, g),
66
+ verify_ssl=self._pick("verify_ssl", True, s, g),
67
+ locale_style=self._pick("locale_style", "simplified", s, g),
60
68
  )
61
69
 
62
70
  def get_downloader_config(self) -> DownloaderConfig:
63
71
  """
64
- Build a DownloaderConfig using both general and site-specific settings.
72
+ Build a :class:`novel_downloader.models.DownloaderConfig` using both
73
+ general and site-specific settings.
65
74
 
66
- :return: A DownloaderConfig instance with all fields populated.
75
+ :return: Fully populated configuration for the chapter/page downloader.
67
76
  """
68
- gen = self._config.get("general", {})
69
- debug = gen.get("debug", {})
77
+ s, g = self._site_cfg, self._gen_cfg
78
+ debug = g.get("debug") or {}
70
79
  return DownloaderConfig(
71
- request_interval=self._get_gen_cfg("request_interval", 2.0),
72
- retry_times=self._get_gen_cfg("retry_times", 3),
73
- backoff_factor=self._get_gen_cfg("backoff_factor", 2.0),
74
- workers=self._get_gen_cfg("workers", 2),
75
- skip_existing=self._get_gen_cfg("skip_existing", True),
76
- login_required=self._site_cfg.get("login_required", False),
77
- save_html=debug.get("save_html", False),
78
- raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
79
- cache_dir=gen.get("cache_dir", "./novel_cache"),
80
- storage_batch_size=gen.get("storage_batch_size", 1),
80
+ request_interval=self._pick("request_interval", 2.0, s, g),
81
+ retry_times=self._pick("retry_times", 3, s, g),
82
+ backoff_factor=self._pick("backoff_factor", 2.0, s, g),
83
+ workers=self._pick("workers", 2, s, g),
84
+ skip_existing=self._pick("skip_existing", True, s, g),
85
+ login_required=bool(s.get("login_required", False)),
86
+ save_html=bool(debug.get("save_html", False)),
87
+ raw_data_dir=g.get("raw_data_dir", "./raw_data"),
88
+ cache_dir=g.get("cache_dir", "./novel_cache"),
89
+ storage_batch_size=g.get("storage_batch_size", 1),
81
90
  )
82
91
 
83
92
  def get_parser_config(self) -> ParserConfig:
84
93
  """
85
- Build a ParserConfig from general, OCR, and site-specific settings.
94
+ Build a :class:`novel_downloader.models.ParserConfig` from general,
95
+ OCR-related, and site-specific settings.
86
96
 
87
- :return: A ParserConfig instance with all fields populated.
97
+ :return: Fully populated configuration for the parser stage.
88
98
  """
89
- gen = self._config.get("general", {})
90
- font_ocr = gen.get("font_ocr", {})
99
+ g = self._gen_cfg
100
+ s = self._site_cfg
101
+ g_font = g.get("font_ocr") or {}
102
+ s_font = s.get("font_ocr") or {}
103
+ font_ocr: dict[str, Any] = {**g_font, **s_font}
91
104
  return ParserConfig(
92
- cache_dir=gen.get("cache_dir", "./novel_cache"),
93
- use_truncation=self._site_cfg.get("use_truncation", True),
94
- decode_font=font_ocr.get("decode_font", False),
95
- save_font_debug=font_ocr.get("save_font_debug", False),
96
- batch_size=font_ocr.get("batch_size", 32),
105
+ cache_dir=g.get("cache_dir", "./novel_cache"),
106
+ use_truncation=bool(s.get("use_truncation", True)),
107
+ decode_font=bool(font_ocr.get("decode_font", False)),
108
+ save_font_debug=bool(font_ocr.get("save_font_debug", False)),
109
+ batch_size=int(font_ocr.get("batch_size", 32)),
110
+ fontocr_cfg=self._dict_to_fontocr_cfg(font_ocr),
97
111
  )
98
112
 
99
113
  def get_exporter_config(self) -> ExporterConfig:
100
114
  """
101
- Build an ExporterConfig from output and general settings.
115
+ Build an :class:`novel_downloader.models.ExporterConfig` from the
116
+ ``output`` and ``cleaner`` sections plus general settings.
102
117
 
103
- :return: An ExporterConfig instance with all fields populated.
118
+ :return: Fully populated configuration for text/ebook export.
104
119
  """
105
- gen = self._config.get("general", {})
106
- out = self._config.get("output", {})
107
- cln = self._config.get("cleaner", {})
108
- fmt = out.get("formats", {})
109
- naming = out.get("naming", {})
110
- epub_opts = out.get("epub", {})
120
+ g = self._gen_cfg
121
+ out = self._config.get("output") or {}
122
+ cln = self._config.get("cleaner") or {}
123
+ fmt = out.get("formats") or {}
124
+ naming = out.get("naming") or {}
125
+ epub_opts = out.get("epub") or {}
126
+
111
127
  cleaner_cfg = self._dict_to_cleaner_cfg(cln)
112
128
  return ExporterConfig(
113
- cache_dir=gen.get("cache_dir", "./novel_cache"),
114
- raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
115
- output_dir=gen.get("output_dir", "./downloads"),
116
- clean_text=cln.get("clean_text", True),
129
+ cache_dir=g.get("cache_dir", "./novel_cache"),
130
+ raw_data_dir=g.get("raw_data_dir", "./raw_data"),
131
+ output_dir=g.get("output_dir", "./downloads"),
132
+ clean_text=cln.get("clean_text", False),
117
133
  make_txt=fmt.get("make_txt", True),
118
- make_epub=fmt.get("make_epub", False),
134
+ make_epub=fmt.get("make_epub", True),
119
135
  make_md=fmt.get("make_md", False),
120
136
  make_pdf=fmt.get("make_pdf", False),
121
137
  append_timestamp=naming.get("append_timestamp", True),
@@ -128,35 +144,36 @@ class ConfigAdapter:
128
144
 
129
145
  def get_login_config(self) -> dict[str, str]:
130
146
  """
131
- Return the subset of login fields present in current site config:
132
- * `username`
133
- * `password`
134
- * `cookies`
147
+ Extract login-related fields from the current site configuration.
148
+ Only non-empty string values are returned; values are stripped.
149
+
150
+ :return: A subset of ``{"username","password","cookies"}`` that are non-empty
135
151
  """
136
152
  out: dict[str, str] = {}
137
153
  for key in ("username", "password", "cookies"):
138
154
  val = self._site_cfg.get(key, "")
139
- val = val.strip()
140
- if val:
141
- out[key] = val
155
+ if isinstance(val, str):
156
+ s = val.strip()
157
+ if s:
158
+ out[key] = s
142
159
  return out
143
160
 
144
161
  def get_book_ids(self) -> list[BookConfig]:
145
162
  """
146
- Extract the list of target books from the site configuration.
163
+ Extract and normalize the list of target books for the current site.
147
164
 
148
- The site config may specify book_ids as:
149
- * a single string or integer
150
- * a dict with book_id and optional start_id, end_id, ignore_ids
151
- * a list of the above types
165
+ Accepted shapes for ``site.book_ids``:
166
+ * a single ``str`` or ``int`` (book id)
167
+ * a dict with fields: book_id and optional start_id, end_id, ignore_ids
168
+ * a ``list`` containing any mix of the above
152
169
 
153
- :return: A list of BookConfig dicts.
154
- :raises ValueError: if the raw book_ids is neither a str/int, dict, nor list.
170
+ :return: Normalized list of :class:`BookConfig`-compatible dictionaries.
171
+ :raises ValueError: If ``book_ids`` is neither a scalar ``str|int``, ``dict``,
172
+ nor ``list``.
155
173
  """
156
- site_cfg = self._get_site_cfg()
157
- raw = site_cfg.get("book_ids", [])
174
+ raw = self._site_cfg.get("book_ids", [])
158
175
 
159
- if isinstance(raw, str | int):
176
+ if isinstance(raw, (str | int)):
160
177
  return [{"book_id": str(raw)}]
161
178
 
162
179
  if isinstance(raw, dict):
@@ -170,151 +187,197 @@ class ConfigAdapter:
170
187
  result: list[BookConfig] = []
171
188
  for item in raw:
172
189
  try:
173
- if isinstance(item, str | int):
190
+ if isinstance(item, (str | int)):
174
191
  result.append({"book_id": str(item)})
175
192
  elif isinstance(item, dict):
176
193
  result.append(self._dict_to_book_cfg(item))
177
194
  except ValueError:
178
195
  continue
179
-
180
196
  return result
181
197
 
182
198
  def get_log_level(self) -> str:
183
199
  """
184
- Retrieve the logging level from [general.debug].
200
+ Retrieve the logging level from ``general.debug``.
185
201
 
186
- :return: The configured log level ("DEBUG", "INFO", "WARNING", "ERROR").
202
+ :return: One of ``"DEBUG"``, ``"INFO"``, ``"WARNING"``, ``"ERROR"``
187
203
  """
188
- debug_cfg = self._config.get("general", {}).get("debug", {})
204
+ debug_cfg = self._gen_cfg.get("debug", {})
189
205
  return debug_cfg.get("log_level") or "INFO"
190
206
 
191
207
  @property
192
208
  def site(self) -> str:
193
- """
194
- Get the current site name.
195
- """
196
209
  return self._site
197
210
 
198
211
  @site.setter
199
212
  def site(self, value: str) -> None:
200
- """
201
- Set a new site name for configuration lookups.
213
+ self._site = value
202
214
 
203
- :param value: The new site key in config["sites"] to use.
215
+ @property
216
+ def _gen_cfg(self) -> dict[str, Any]:
204
217
  """
205
- self._site = value
206
- self._site_cfg = self._get_site_cfg()
218
+ A read-only view of the global ``general`` settings.
207
219
 
208
- def _get_gen_cfg(self, key: str, default: T) -> T:
209
- return self._site_cfg.get(key) or self._gen_cfg.get(key) or default
220
+ :return: ``config["general"]`` if present, else ``{}``.
221
+ """
222
+ return self._config.get("general") or {}
210
223
 
211
- def _get_site_cfg(self) -> dict[str, Any]:
224
+ @property
225
+ def _site_cfg(self) -> dict[str, Any]:
212
226
  """
213
- Retrieve the configuration for a specific site.
227
+ Retrieve the configuration block for the current site.
214
228
 
215
229
  Lookup order:
216
- 1. If there is a site-specific entry under config["sites"], return that.
217
- 2. Otherwise, if a "common" entry exists under config["sites"], return that.
218
- 3. If neither is present, return an empty dict.
230
+ 1. If a site-specific entry exists under ``config["sites"]``, return it.
231
+ 2. Otherwise, if ``config["sites"]["common"]`` exists, return it.
232
+ 3. Else return an empty dict.
219
233
 
220
- :param site: Optional override of the site name; defaults to self._site.
221
- :return: The site-specific or common configuration dict.
234
+ :return: Site-specific mapping, common mapping, or ``{}``.
222
235
  """
223
236
  sites_cfg = self._config.get("sites") or {}
224
-
225
- if self._site in sites_cfg:
237
+ if self._site in sites_cfg and isinstance(sites_cfg[self._site], dict):
226
238
  return sites_cfg[self._site] or {}
227
-
228
239
  return sites_cfg.get("common") or {}
229
240
 
241
+ @staticmethod
242
+ def _has_key(d: Mapping[str, Any] | None, key: str) -> bool:
243
+ """
244
+ Check whether a mapping contains a key.
245
+
246
+ :param d: Mapping to inspect.
247
+ :param key: Key to look up.
248
+ :return: ``True`` if ``d`` is a Mapping and contains key; otherwise ``False``.
249
+ """
250
+ return isinstance(d, Mapping) and (key in d)
251
+
252
+ def _pick(self, key: str, default: T, *sources: Mapping[str, Any]) -> T:
253
+ """
254
+ Resolve ``key`` from the provided ``sources`` in order of precedence.
255
+
256
+ :param key: Configuration key to resolve.
257
+ :param default: Fallback value if ``key`` is absent in all sources.
258
+ :param sources: One or more mappings to check, in order of precedence.
259
+ :return: The first present value for ``key``, otherwise ``default``.
260
+ """
261
+ for src in sources:
262
+ if self._has_key(src, key):
263
+ return src[key] # type: ignore[no-any-return]
264
+ return default
265
+
230
266
  @staticmethod
231
267
  def _dict_to_book_cfg(data: dict[str, Any]) -> BookConfig:
232
268
  """
233
- Convert a dictionary to a BookConfig with normalized types.
269
+ Convert a raw dict into a :class:`novel_downloader.models.BookConfig`
270
+ with normalized types (all IDs coerced to strings).
234
271
 
235
272
  :param data: A dict that must contain at least "book_id".
236
- :return: A BookConfig dict with all values cast to strings or lists of strings.
237
- :raises ValueError: if the "book_id" field is missing.
273
+ :return: Normalized :class:`BookConfig` mapping.
274
+ :raises ValueError: If ``"book_id"`` is missing.
238
275
  """
239
276
  if "book_id" not in data:
240
277
  raise ValueError("Missing required field 'book_id'")
241
278
 
242
- result: BookConfig = {"book_id": str(data["book_id"])}
279
+ out: BookConfig = {"book_id": str(data["book_id"])}
243
280
 
244
281
  if "start_id" in data:
245
- result["start_id"] = str(data["start_id"])
246
-
282
+ out["start_id"] = str(data["start_id"])
247
283
  if "end_id" in data:
248
- result["end_id"] = str(data["end_id"])
249
-
284
+ out["end_id"] = str(data["end_id"])
250
285
  if "ignore_ids" in data:
251
- result["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
286
+ with contextlib.suppress(Exception):
287
+ out["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
288
+ return out
252
289
 
253
- return result
290
+ @staticmethod
291
+ def _dict_to_fontocr_cfg(data: dict[str, Any]) -> FontOCRConfig:
292
+ """
293
+ Convert a raw ``font_ocr`` dict into a :class:`FontOCRConfig`.
294
+ """
295
+ if not isinstance(data, dict):
296
+ return FontOCRConfig()
297
+
298
+ ishape = data.get("input_shape")
299
+ if isinstance(ishape, list):
300
+ ishape = tuple(ishape) # [C, H, W] -> (C, H, W)
301
+
302
+ return FontOCRConfig(
303
+ model_name=data.get("model_name"),
304
+ model_dir=data.get("model_dir"),
305
+ input_shape=ishape,
306
+ device=data.get("device"),
307
+ precision=data.get("precision", "fp32"),
308
+ cpu_threads=data.get("cpu_threads", 10),
309
+ enable_hpi=data.get("enable_hpi", False),
310
+ )
254
311
 
255
312
  @classmethod
256
313
  def _dict_to_cleaner_cfg(cls, cfg: dict[str, Any]) -> TextCleanerConfig:
257
314
  """
258
- Convert a nested dict of title/content rules into a TextCleanerConfig.
315
+ Convert a nested ``cleaner`` block into a
316
+ :class:`novel_downloader.models.TextCleanerConfig`.
259
317
 
260
318
  :param cfg: configuration dictionary
261
- :return: fully constructed TextCleanerConfig
319
+ :return: Aggregated title/content rules with external file contents merged
262
320
  """
263
- # Title rules
264
- title_section = cfg.get("title", {})
265
- title_remove = title_section.get("remove_patterns", [])
266
- title_repl = title_section.get("replace", {})
267
-
268
- title_ext = title_section.get("external", {})
269
- if title_ext.get("enabled", False):
270
- title_ext_rm_p = title_ext.get("remove_patterns", "")
271
- title_ext_rp_p = title_ext.get("replace", "")
272
-
273
- title_remove_ext = cls._load_str_list(title_ext_rm_p)
274
- title_remove += title_remove_ext
275
-
276
- title_repl_ext = cls._load_str_dict(title_ext_rp_p)
277
- title_repl = {**title_repl, **title_repl_ext}
278
-
279
- # Content rules
280
- content_section = cfg.get("content", {})
281
- content_remove = content_section.get("remove_patterns", [])
282
- content_repl = content_section.get("replace", {})
283
-
284
- content_ext = content_section.get("external", {})
285
-
286
- if content_ext.get("enabled", False):
287
- content_ext_rm_p = content_ext.get("remove_patterns", "")
288
- content_ext_rp_p = content_ext.get("replace", "")
289
-
290
- content_remove_ext = cls._load_str_list(content_ext_rm_p)
291
- content_remove += content_remove_ext
292
-
293
- content_repl_ext = cls._load_str_dict(content_ext_rp_p)
294
- content_repl = {**content_repl, **content_repl_ext}
295
-
321
+ t_remove, t_replace = cls._merge_rules(cfg.get("title", {}) or {})
322
+ c_remove, c_replace = cls._merge_rules(cfg.get("content", {}) or {})
296
323
  return TextCleanerConfig(
297
324
  remove_invisible=cfg.get("remove_invisible", True),
298
- title_remove_patterns=title_remove,
299
- title_replacements=title_repl,
300
- content_remove_patterns=content_remove,
301
- content_replacements=content_repl,
325
+ title_remove_patterns=t_remove,
326
+ title_replacements=t_replace,
327
+ content_remove_patterns=c_remove,
328
+ content_replacements=c_replace,
302
329
  )
303
330
 
331
+ @classmethod
332
+ def _merge_rules(cls, section: dict[str, Any]) -> tuple[list[str], dict[str, str]]:
333
+ """
334
+ Merge inline patterns/replacements with any enabled external files.
335
+
336
+ :param section: Mapping describing either the ``title`` or ``content`` rules.
337
+ :return: Tuple ``(remove_patterns, replace)`` after merging.
338
+ """
339
+ remove = list(section.get("remove_patterns") or [])
340
+ replace = dict(section.get("replace") or {})
341
+ ext = section.get("external") or {}
342
+ if ext.get("enabled", False):
343
+ rm_path = ext.get("remove_patterns") or ""
344
+ rp_path = ext.get("replace") or ""
345
+ remove += cls._load_str_list(rm_path)
346
+ replace.update(cls._load_str_dict(rp_path))
347
+ return remove, replace
348
+
304
349
  @staticmethod
305
350
  def _load_str_list(path: str) -> list[str]:
351
+ """
352
+ Load a JSON file containing a list of strings.
353
+
354
+ :param path: File path to a JSON array (e.g., ``["a", "b"]``).
355
+ :return: Parsed list on success; empty list if ``path`` is empty, file is
356
+ missing, or content is invalid.
357
+ """
358
+ if not path:
359
+ return []
306
360
  try:
307
361
  with open(path, encoding="utf-8") as f:
308
- parsed = json.load(f)
309
- return cast(list[str], parsed)
362
+ data = json.load(f)
363
+ return list(data) if isinstance(data, list) else []
310
364
  except Exception:
311
365
  return []
312
366
 
313
367
  @staticmethod
314
368
  def _load_str_dict(path: str) -> dict[str, str]:
369
+ """
370
+ Load a JSON file containing a dict of string-to-string mappings.
371
+
372
+ :param path: File path to a JSON object (e.g., ``{"old":"new"}``).
373
+ :return: Parsed dict on success; empty dict if ``path`` is empty, file is
374
+ missing, or content is invalid.
375
+ """
376
+ if not path:
377
+ return {}
315
378
  try:
316
379
  with open(path, encoding="utf-8") as f:
317
- parsed = json.load(f)
318
- return cast(dict[str, str], parsed)
380
+ data = json.load(f)
381
+ return dict(data) if isinstance(data, dict) else {}
319
382
  except Exception:
320
383
  return {}
@@ -7,12 +7,11 @@ This package serves as the core layer of the novel_downloader system.
7
7
 
8
8
  It provides factory methods for constructing key components required for
9
9
  downloading and processing online novel content, including:
10
-
11
- - Downloader: Handles the full download lifecycle of a book or a batch of books.
12
- - Parser: Extracts structured data from HTML or SSR content.
13
- - Fetcher: Sends HTTP requests and manages sessions, including login if required.
14
- - Exporter: Responsible for exporting downloaded data into various output formats.
15
- - search: Provides unified search functionality across supported novel sites.
10
+ * Downloader: Handles the full download lifecycle of a book or a batch of books.
11
+ * Parser: Extracts structured data from HTML or SSR content.
12
+ * Fetcher: Sends HTTP requests and manages sessions, including login if required.
13
+ * Exporter: Responsible for exporting downloaded data into various output formats.
14
+ * search: Provides unified search functionality across supported novel sites.
16
15
  """
17
16
 
18
17
  __all__ = [
@@ -8,8 +8,6 @@ novel_downloader.core.archived.deqixs.fetcher
8
8
  from typing import Any
9
9
 
10
10
  from novel_downloader.core.fetchers.base import BaseSession
11
- from novel_downloader.models import FetcherConfig
12
- from novel_downloader.utils import async_jitter_sleep
13
11
 
14
12
  # from novel_downloader.core.fetchers.registry import register_fetcher
15
13
 
@@ -26,25 +24,11 @@ class DeqixsSession(BaseSession):
26
24
  BOOK_INFO_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/"
27
25
  CHAPTER_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/{chapter_id}.html"
28
26
 
29
- def __init__(
30
- self,
31
- config: FetcherConfig,
32
- cookies: dict[str, str] | None = None,
33
- **kwargs: Any,
34
- ) -> None:
35
- super().__init__("deqixs", config, cookies, **kwargs)
36
-
37
27
  async def get_book_info(
38
28
  self,
39
29
  book_id: str,
40
30
  **kwargs: Any,
41
31
  ) -> list[str]:
42
- """
43
- Fetch the raw HTML of the book info page asynchronously.
44
-
45
- :param book_id: The book identifier.
46
- :return: The page content as a string.
47
- """
48
32
  url = self.book_info_url(book_id=book_id)
49
33
  return [await self.fetch(url, **kwargs)]
50
34
 
@@ -54,13 +38,6 @@ class DeqixsSession(BaseSession):
54
38
  chapter_id: str,
55
39
  **kwargs: Any,
56
40
  ) -> list[str]:
57
- """
58
- Fetch the raw HTML of a single chapter asynchronously.
59
-
60
- :param book_id: The book identifier.
61
- :param chapter_id: The chapter identifier.
62
- :return: The chapter content as a string.
63
- """
64
41
  html_pages: list[str] = []
65
42
  idx = 1
66
43
 
@@ -85,11 +62,7 @@ class DeqixsSession(BaseSession):
85
62
 
86
63
  html_pages.append(html)
87
64
  idx += 1
88
- await async_jitter_sleep(
89
- self.request_interval,
90
- mul_spread=1.1,
91
- max_sleep=self.request_interval + 2,
92
- )
65
+ await self._sleep()
93
66
 
94
67
  return html_pages
95
68
 
@@ -11,9 +11,11 @@ __all__ = [
11
11
  "CommonDownloader",
12
12
  "QianbiDownloader",
13
13
  "QidianDownloader",
14
+ "QqbookDownloader",
14
15
  ]
15
16
 
16
17
  from .common import CommonDownloader
17
18
  from .qianbi import QianbiDownloader
18
19
  from .qidian import QidianDownloader
20
+ from .qqbook import QqbookDownloader
19
21
  from .registry import get_downloader