novel-downloader 1.5.0__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +79 -66
  6. novel_downloader/cli/export.py +17 -21
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +206 -209
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +5 -5
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +17 -12
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +20 -14
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +56 -64
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +6 -19
  79. novel_downloader/core/interfaces/parser.py +7 -8
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +64 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +64 -69
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/main_parser.py +756 -48
  100. novel_downloader/core/parsers/qidian/utils/__init__.py +3 -21
  101. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  102. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +4 -4
  103. novel_downloader/core/parsers/quanben5.py +103 -0
  104. novel_downloader/core/parsers/registry.py +5 -16
  105. novel_downloader/core/parsers/sfacg.py +38 -45
  106. novel_downloader/core/parsers/shencou.py +215 -0
  107. novel_downloader/core/parsers/shuhaige.py +111 -0
  108. novel_downloader/core/parsers/tongrenquan.py +116 -0
  109. novel_downloader/core/parsers/ttkan.py +132 -0
  110. novel_downloader/core/parsers/wanbengo.py +191 -0
  111. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  112. novel_downloader/core/parsers/xiguashuwu.py +429 -0
  113. novel_downloader/core/parsers/xs63b.py +161 -0
  114. novel_downloader/core/parsers/xshbook.py +134 -0
  115. novel_downloader/core/parsers/yamibo.py +87 -131
  116. novel_downloader/core/parsers/yibige.py +166 -0
  117. novel_downloader/core/searchers/__init__.py +34 -3
  118. novel_downloader/core/searchers/aaatxt.py +107 -0
  119. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  120. novel_downloader/core/searchers/base.py +112 -36
  121. novel_downloader/core/searchers/dxmwx.py +105 -0
  122. novel_downloader/core/searchers/eightnovel.py +84 -0
  123. novel_downloader/core/searchers/esjzone.py +43 -25
  124. novel_downloader/core/searchers/hetushu.py +92 -0
  125. novel_downloader/core/searchers/i25zw.py +93 -0
  126. novel_downloader/core/searchers/ixdzs8.py +107 -0
  127. novel_downloader/core/searchers/jpxs123.py +107 -0
  128. novel_downloader/core/searchers/piaotia.py +100 -0
  129. novel_downloader/core/searchers/qbtr.py +106 -0
  130. novel_downloader/core/searchers/qianbi.py +74 -40
  131. novel_downloader/core/searchers/quanben5.py +144 -0
  132. novel_downloader/core/searchers/registry.py +24 -8
  133. novel_downloader/core/searchers/shuhaige.py +124 -0
  134. novel_downloader/core/searchers/tongrenquan.py +110 -0
  135. novel_downloader/core/searchers/ttkan.py +92 -0
  136. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  137. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  138. novel_downloader/core/searchers/xs63b.py +104 -0
  139. novel_downloader/locales/en.json +34 -85
  140. novel_downloader/locales/zh.json +35 -86
  141. novel_downloader/models/__init__.py +21 -22
  142. novel_downloader/models/book.py +44 -0
  143. novel_downloader/models/config.py +4 -37
  144. novel_downloader/models/login.py +1 -1
  145. novel_downloader/models/search.py +5 -0
  146. novel_downloader/resources/config/settings.toml +8 -70
  147. novel_downloader/resources/json/xiguashuwu.json +718 -0
  148. novel_downloader/utils/__init__.py +13 -24
  149. novel_downloader/utils/chapter_storage.py +5 -5
  150. novel_downloader/utils/constants.py +4 -31
  151. novel_downloader/utils/cookies.py +38 -35
  152. novel_downloader/utils/crypto_utils/__init__.py +7 -0
  153. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  154. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  155. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  156. novel_downloader/utils/crypto_utils/rc4.py +54 -0
  157. novel_downloader/utils/epub/__init__.py +3 -4
  158. novel_downloader/utils/epub/builder.py +6 -6
  159. novel_downloader/utils/epub/constants.py +62 -21
  160. novel_downloader/utils/epub/documents.py +95 -201
  161. novel_downloader/utils/epub/models.py +8 -22
  162. novel_downloader/utils/epub/utils.py +73 -106
  163. novel_downloader/utils/file_utils/__init__.py +2 -23
  164. novel_downloader/utils/file_utils/io.py +53 -188
  165. novel_downloader/utils/file_utils/normalize.py +1 -7
  166. novel_downloader/utils/file_utils/sanitize.py +4 -15
  167. novel_downloader/utils/fontocr/__init__.py +5 -14
  168. novel_downloader/utils/fontocr/core.py +216 -0
  169. novel_downloader/utils/fontocr/loader.py +50 -0
  170. novel_downloader/utils/logger.py +81 -65
  171. novel_downloader/utils/network.py +17 -41
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  176. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  177. novel_downloader/utils/time_utils/__init__.py +5 -11
  178. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  179. novel_downloader/utils/time_utils/sleep_utils.py +55 -49
  180. novel_downloader/web/__init__.py +13 -0
  181. novel_downloader/web/components/__init__.py +11 -0
  182. novel_downloader/web/components/navigation.py +35 -0
  183. novel_downloader/web/main.py +66 -0
  184. novel_downloader/web/pages/__init__.py +17 -0
  185. novel_downloader/web/pages/download.py +78 -0
  186. novel_downloader/web/pages/progress.py +147 -0
  187. novel_downloader/web/pages/search.py +329 -0
  188. novel_downloader/web/services/__init__.py +17 -0
  189. novel_downloader/web/services/client_dialog.py +164 -0
  190. novel_downloader/web/services/cred_broker.py +113 -0
  191. novel_downloader/web/services/cred_models.py +35 -0
  192. novel_downloader/web/services/task_manager.py +264 -0
  193. novel_downloader-2.0.1.dist-info/METADATA +172 -0
  194. novel_downloader-2.0.1.dist-info/RECORD +206 -0
  195. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/entry_points.txt +1 -1
  196. novel_downloader/core/downloaders/biquge.py +0 -29
  197. novel_downloader/core/downloaders/esjzone.py +0 -29
  198. novel_downloader/core/downloaders/linovelib.py +0 -29
  199. novel_downloader/core/downloaders/sfacg.py +0 -29
  200. novel_downloader/core/downloaders/yamibo.py +0 -29
  201. novel_downloader/core/exporters/biquge.py +0 -22
  202. novel_downloader/core/exporters/esjzone.py +0 -22
  203. novel_downloader/core/exporters/qianbi.py +0 -22
  204. novel_downloader/core/exporters/sfacg.py +0 -22
  205. novel_downloader/core/exporters/yamibo.py +0 -22
  206. novel_downloader/core/fetchers/base/__init__.py +0 -14
  207. novel_downloader/core/fetchers/base/browser.py +0 -422
  208. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  209. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  210. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  211. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  212. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  213. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  214. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  215. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  216. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  217. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  218. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  219. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  220. novel_downloader/core/parsers/biquge.py +0 -139
  221. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -90
  222. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -528
  223. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -157
  224. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  225. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -114
  226. novel_downloader/models/chapter.py +0 -25
  227. novel_downloader/models/types.py +0 -13
  228. novel_downloader/tui/__init__.py +0 -7
  229. novel_downloader/tui/app.py +0 -32
  230. novel_downloader/tui/main.py +0 -17
  231. novel_downloader/tui/screens/__init__.py +0 -14
  232. novel_downloader/tui/screens/home.py +0 -198
  233. novel_downloader/tui/screens/login.py +0 -74
  234. novel_downloader/tui/styles/home_layout.tcss +0 -79
  235. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  236. novel_downloader/utils/cache.py +0 -24
  237. novel_downloader/utils/crypto_utils.py +0 -71
  238. novel_downloader/utils/fontocr/hash_store.py +0 -280
  239. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  240. novel_downloader/utils/fontocr/model_loader.py +0 -69
  241. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  242. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  243. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  244. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  245. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  246. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/WHEEL +0 -0
  247. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/licenses/LICENSE +0 -0
  248. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.1.dist-info}/top_level.txt +0 -0
@@ -4,196 +4,172 @@ novel_downloader.config.adapter
4
4
  -------------------------------
5
5
 
6
6
  Defines ConfigAdapter, which maps a raw configuration dictionary and
7
- site name into structured dataclass-based config models.
7
+ site into structured dataclass-based config models.
8
8
  """
9
9
 
10
+ import contextlib
10
11
  import json
11
- from typing import Any, cast
12
+ from collections.abc import Mapping
13
+ from typing import Any, TypeVar
12
14
 
13
15
  from novel_downloader.models import (
14
16
  BookConfig,
15
17
  DownloaderConfig,
16
18
  ExporterConfig,
17
19
  FetcherConfig,
18
- LogLevel,
19
20
  ParserConfig,
20
21
  TextCleanerConfig,
21
22
  )
22
23
 
24
+ T = TypeVar("T")
25
+
23
26
 
24
27
  class ConfigAdapter:
25
28
  """
26
29
  Adapter to map a raw configuration dictionary and site name
27
30
  into structured dataclass configuration models.
28
- """
29
31
 
30
- _ALLOWED_LOG_LEVELS: tuple[LogLevel, ...] = (
31
- "DEBUG",
32
- "INFO",
33
- "WARNING",
34
- "ERROR",
35
- )
32
+ Resolution order for each field:
33
+ 1. ``config["sites"][<site>]`` (if present)
34
+ 2. ``config["general"]`` (if present)
35
+ 3. Hard-coded default passed by the caller
36
+ """
36
37
 
37
- def __init__(self, config: dict[str, Any], site: str):
38
+ def __init__(self, config: Mapping[str, Any], site: str):
38
39
  """
39
- Initialize the adapter.
40
+ Initialize the adapter with a configuration mapping and a site key.
40
41
 
41
- :param config: The fully loaded configuration dictionary.
42
- :param site: The current site name (e.g. "qidian").
42
+ :param config: Fully loaded configuration mapping.
43
+ :param site: Current site key (e.g., ``"qidian"``).
43
44
  """
44
- self._config = config
45
- self._site = site
45
+ self._config: dict[str, Any] = dict(config)
46
+ self._site: str = site
46
47
 
47
48
  def get_fetcher_config(self) -> FetcherConfig:
48
49
  """
49
- Build a FetcherConfig from the raw configuration.
50
-
51
- Reads from:
52
- - config["general"] for global defaults (e.g. request_interval)
53
- - config["requests"] for HTTP-specific settings (timeouts, retries, etc.)
54
- - site-specific overrides under config["sites"][site]
50
+ Build a :class:`novel_downloader.models.FetcherConfig` by resolving fields
51
+ from site-specific and general settings.
55
52
 
56
- :return: A FetcherConfig instance with all fields populated.
53
+ :return: Fully populated configuration for the network fetcher.
57
54
  """
58
- gen = self._config.get("general", {})
59
- req = self._config.get("requests", {})
60
- site_cfg = self._get_site_cfg()
55
+ s, g = self._site_cfg, self._gen_cfg
61
56
  return FetcherConfig(
62
- request_interval=gen.get("request_interval", 2.0),
63
- retry_times=req.get("retry_times", 3),
64
- backoff_factor=req.get("backoff_factor", 2.0),
65
- timeout=req.get("timeout", 30.0),
66
- max_connections=req.get("max_connections", 10),
67
- max_rps=req.get("max_rps", None),
68
- headless=req.get("headless", False),
69
- disable_images=req.get("disable_images", False),
70
- mode=site_cfg.get("mode", "session"),
71
- proxy=req.get("proxy", None),
72
- user_agent=req.get("user_agent", None),
73
- headers=req.get("headers", None),
74
- browser_type=req.get("browser_type", "chromium"),
75
- verify_ssl=req.get("verify_ssl", True),
57
+ request_interval=self._pick("request_interval", 2.0, s, g),
58
+ retry_times=self._pick("retry_times", 3, s, g),
59
+ backoff_factor=self._pick("backoff_factor", 2.0, s, g),
60
+ timeout=self._pick("timeout", 30.0, s, g),
61
+ max_connections=self._pick("max_connections", 10, s, g),
62
+ max_rps=self._pick("max_rps", 1000.0, s, g),
63
+ user_agent=self._pick("user_agent", None, s, g),
64
+ headers=self._pick("headers", None, s, g),
65
+ verify_ssl=self._pick("verify_ssl", True, s, g),
66
+ locale_style=self._pick("locale_style", "simplified", s, g),
76
67
  )
77
68
 
78
69
  def get_downloader_config(self) -> DownloaderConfig:
79
70
  """
80
- Build a DownloaderConfig using both general and site-specific settings.
71
+ Build a :class:`novel_downloader.models.DownloaderConfig` using both
72
+ general and site-specific settings.
81
73
 
82
- Reads from:
83
- - config["general"] for download directories, worker counts, etc.
84
- - config["requests"] for retry and backoff settings
85
- - config["general"]["debug"] for debug toggles (e.g. save_html)
86
- - config["sites"][site] for login credentials and mode
87
-
88
- :return: A DownloaderConfig instance with all fields populated.
74
+ :return: Fully populated configuration for the chapter/page downloader.
89
75
  """
90
- gen = self._config.get("general", {})
91
- req = self._config.get("requests", {})
92
- debug = gen.get("debug", {})
93
- site_cfg = self._get_site_cfg()
76
+ s, g = self._site_cfg, self._gen_cfg
77
+ debug = g.get("debug") or {}
94
78
  return DownloaderConfig(
95
- request_interval=gen.get("request_interval", 2.0),
96
- retry_times=req.get("retry_times", 3),
97
- backoff_factor=req.get("backoff_factor", 2.0),
98
- raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
99
- cache_dir=gen.get("cache_dir", "./novel_cache"),
100
- workers=gen.get("workers", 2),
101
- skip_existing=gen.get("skip_existing", True),
102
- login_required=site_cfg.get("login_required", False),
103
- save_html=debug.get("save_html", False),
104
- mode=site_cfg.get("mode", "session"),
105
- storage_batch_size=gen.get("storage_batch_size", 1),
106
- username=site_cfg.get("username", ""),
107
- password=site_cfg.get("password", ""),
108
- cookies=site_cfg.get("cookies", ""),
79
+ request_interval=self._pick("request_interval", 2.0, s, g),
80
+ retry_times=self._pick("retry_times", 3, s, g),
81
+ backoff_factor=self._pick("backoff_factor", 2.0, s, g),
82
+ workers=self._pick("workers", 2, s, g),
83
+ skip_existing=self._pick("skip_existing", True, s, g),
84
+ login_required=bool(s.get("login_required", False)),
85
+ save_html=bool(debug.get("save_html", False)),
86
+ raw_data_dir=g.get("raw_data_dir", "./raw_data"),
87
+ cache_dir=g.get("cache_dir", "./novel_cache"),
88
+ storage_batch_size=g.get("storage_batch_size", 1),
109
89
  )
110
90
 
111
91
  def get_parser_config(self) -> ParserConfig:
112
92
  """
113
- Build a ParserConfig from general, OCR, and site-specific settings.
114
-
115
- Reads from:
116
- - config["general"]["cache_dir"] for where to cache intermediate parses
117
- - config["general"]["font_ocr"] for font-decoding and OCR options
118
- - config["sites"][site] for parsing mode and truncation behavior
93
+ Build a :class:`novel_downloader.models.ParserConfig` from general,
94
+ OCR-related, and site-specific settings.
119
95
 
120
- :return: A ParserConfig instance with all fields populated.
96
+ :return: Fully populated configuration for the parser stage.
121
97
  """
122
- gen = self._config.get("general", {})
123
- font_ocr = gen.get("font_ocr", {})
124
- site_cfg = self._get_site_cfg()
98
+ g = self._gen_cfg
99
+ s = self._site_cfg
100
+ font_ocr = g.get("font_ocr") or {}
125
101
  return ParserConfig(
126
- cache_dir=gen.get("cache_dir", "./novel_cache"),
127
- use_truncation=site_cfg.get("use_truncation", True),
128
- decode_font=font_ocr.get("decode_font", False),
129
- use_freq=font_ocr.get("use_freq", False),
130
- use_ocr=font_ocr.get("use_ocr", True),
131
- use_vec=font_ocr.get("use_vec", False),
132
- ocr_version=font_ocr.get("ocr_version", "v1.0"),
133
- save_font_debug=font_ocr.get("save_font_debug", False),
134
- batch_size=font_ocr.get("batch_size", 32),
135
- gpu_mem=font_ocr.get("gpu_mem", 500),
136
- gpu_id=font_ocr.get("gpu_id", None),
137
- ocr_weight=font_ocr.get("ocr_weight", 0.6),
138
- vec_weight=font_ocr.get("vec_weight", 0.4),
139
- mode=site_cfg.get("mode", "session"),
102
+ cache_dir=g.get("cache_dir", "./novel_cache"),
103
+ use_truncation=bool(s.get("use_truncation", True)),
104
+ decode_font=bool(font_ocr.get("decode_font", False)),
105
+ save_font_debug=bool(font_ocr.get("save_font_debug", False)),
106
+ batch_size=int(font_ocr.get("batch_size", 32)),
140
107
  )
141
108
 
142
109
  def get_exporter_config(self) -> ExporterConfig:
143
110
  """
144
- Build an ExporterConfig from output and general settings.
145
-
146
- Reads from:
147
- - config["general"] for cache and raw data directories
148
- - config["output"]["formats"] for which formats to generate
149
- - config["output"]["naming"] for filename templates
150
- - config["output"]["epub"] for EPUB-specific options
151
- - config["sites"][site] for export split mode
111
+ Build an :class:`novel_downloader.models.ExporterConfig` from the
112
+ ``output`` and ``cleaner`` sections plus general settings.
152
113
 
153
- :return: An ExporterConfig instance with all fields populated.
114
+ :return: Fully populated configuration for text/ebook export.
154
115
  """
155
- gen = self._config.get("general", {})
156
- out = self._config.get("output", {})
157
- cln = self._config.get("cleaner", {})
158
- fmt = out.get("formats", {})
159
- naming = out.get("naming", {})
160
- epub_opts = out.get("epub", {})
161
- site_cfg = self._get_site_cfg()
116
+ g = self._gen_cfg
117
+ out = self._config.get("output") or {}
118
+ cln = self._config.get("cleaner") or {}
119
+ fmt = out.get("formats") or {}
120
+ naming = out.get("naming") or {}
121
+ epub_opts = out.get("epub") or {}
122
+
162
123
  cleaner_cfg = self._dict_to_cleaner_cfg(cln)
163
124
  return ExporterConfig(
164
- cache_dir=gen.get("cache_dir", "./novel_cache"),
165
- raw_data_dir=gen.get("raw_data_dir", "./raw_data"),
166
- output_dir=gen.get("output_dir", "./downloads"),
167
- clean_text=out.get("clean_text", True),
125
+ cache_dir=g.get("cache_dir", "./novel_cache"),
126
+ raw_data_dir=g.get("raw_data_dir", "./raw_data"),
127
+ output_dir=g.get("output_dir", "./downloads"),
128
+ clean_text=cln.get("clean_text", False),
168
129
  make_txt=fmt.get("make_txt", True),
169
- make_epub=fmt.get("make_epub", False),
130
+ make_epub=fmt.get("make_epub", True),
170
131
  make_md=fmt.get("make_md", False),
171
132
  make_pdf=fmt.get("make_pdf", False),
172
133
  append_timestamp=naming.get("append_timestamp", True),
173
134
  filename_template=naming.get("filename_template", "{title}_{author}"),
174
135
  include_cover=epub_opts.get("include_cover", True),
175
- include_toc=epub_opts.get("include_toc", False),
176
- include_picture=epub_opts.get("include_picture", False),
177
- split_mode=site_cfg.get("split_mode", "book"),
136
+ include_picture=epub_opts.get("include_picture", True),
137
+ split_mode=self._site_cfg.get("split_mode", "book"),
178
138
  cleaner_cfg=cleaner_cfg,
179
139
  )
180
140
 
141
+ def get_login_config(self) -> dict[str, str]:
142
+ """
143
+ Extract login-related fields from the current site configuration.
144
+ Only non-empty string values are returned; values are stripped.
145
+
146
+ :return: A subset of ``{"username","password","cookies"}`` that are non-empty
147
+ """
148
+ out: dict[str, str] = {}
149
+ for key in ("username", "password", "cookies"):
150
+ val = self._site_cfg.get(key, "")
151
+ if isinstance(val, str):
152
+ s = val.strip()
153
+ if s:
154
+ out[key] = s
155
+ return out
156
+
181
157
  def get_book_ids(self) -> list[BookConfig]:
182
158
  """
183
- Extract the list of target books from the site configuration.
159
+ Extract and normalize the list of target books for the current site.
184
160
 
185
- The site config may specify book_ids as:
186
- - a single string or integer
187
- - a dict with book_id and optional start_id, end_id, ignore_ids
188
- - a list of the above types
161
+ Accepted shapes for ``site.book_ids``:
162
+ * a single ``str`` or ``int`` (book id)
163
+ * a dict with fields: book_id and optional start_id, end_id, ignore_ids
164
+ * a ``list`` containing any mix of the above
189
165
 
190
- :return: A list of BookConfig dicts.
191
- :raises ValueError: if the raw book_ids is neither a str/int, dict, nor list.
166
+ :return: Normalized list of :class:`BookConfig`-compatible dictionaries.
167
+ :raises ValueError: If ``book_ids`` is neither a scalar ``str|int``, ``dict``,
168
+ nor ``list``.
192
169
  """
193
- site_cfg = self._get_site_cfg()
194
- raw = site_cfg.get("book_ids", [])
170
+ raw = self._site_cfg.get("book_ids", [])
195
171
 
196
- if isinstance(raw, str | int):
172
+ if isinstance(raw, (str | int)):
197
173
  return [{"book_id": str(raw)}]
198
174
 
199
175
  if isinstance(raw, dict):
@@ -207,154 +183,175 @@ class ConfigAdapter:
207
183
  result: list[BookConfig] = []
208
184
  for item in raw:
209
185
  try:
210
- if isinstance(item, str | int):
186
+ if isinstance(item, (str | int)):
211
187
  result.append({"book_id": str(item)})
212
188
  elif isinstance(item, dict):
213
189
  result.append(self._dict_to_book_cfg(item))
214
190
  except ValueError:
215
191
  continue
216
-
217
192
  return result
218
193
 
219
- def get_log_level(self) -> LogLevel:
194
+ def get_log_level(self) -> str:
220
195
  """
221
- Retrieve the logging level from [general.debug].
196
+ Retrieve the logging level from ``general.debug``.
222
197
 
223
- Reads from config["general"]["debug"]["log_level"], defaulting to "INFO"
224
- if not set or invalid.
225
-
226
- :return: The configured LogLevel literal ("DEBUG", "INFO", "WARNING", "ERROR").
198
+ :return: One of ``"DEBUG"``, ``"INFO"``, ``"WARNING"``, ``"ERROR"``
227
199
  """
228
- debug_cfg = self._config.get("general", {}).get("debug", {})
229
- raw = debug_cfg.get("log_level") or "INFO"
230
- if raw in self._ALLOWED_LOG_LEVELS:
231
- return cast(LogLevel, raw)
232
- return "INFO"
200
+ debug_cfg = self._gen_cfg.get("debug", {})
201
+ return debug_cfg.get("log_level") or "INFO"
233
202
 
234
203
  @property
235
204
  def site(self) -> str:
236
- """
237
- Get the current site name.
238
- """
239
205
  return self._site
240
206
 
241
207
  @site.setter
242
208
  def site(self, value: str) -> None:
209
+ self._site = value
210
+
211
+ @property
212
+ def _gen_cfg(self) -> dict[str, Any]:
243
213
  """
244
- Set a new site name for configuration lookups.
214
+ A read-only view of the global ``general`` settings.
245
215
 
246
- :param value: The new site key in config["sites"] to use.
216
+ :return: ``config["general"]`` if present, else ``{}``.
247
217
  """
248
- self._site = value
218
+ return self._config.get("general") or {}
249
219
 
250
- def _get_site_cfg(self, site: str | None = None) -> dict[str, Any]:
220
+ @property
221
+ def _site_cfg(self) -> dict[str, Any]:
251
222
  """
252
- Retrieve the configuration for a specific site.
223
+ Retrieve the configuration block for the current site.
253
224
 
254
225
  Lookup order:
255
- 1. If there is a site-specific entry under config["sites"], return that.
256
- 2. Otherwise, if a "common" entry exists under config["sites"], return that.
257
- 3. If neither is present, return an empty dict.
226
+ 1. If a site-specific entry exists under ``config["sites"]``, return it.
227
+ 2. Otherwise, if ``config["sites"]["common"]`` exists, return it.
228
+ 3. Else return an empty dict.
229
+
230
+ :return: Site-specific mapping, common mapping, or ``{}``.
231
+ """
232
+ sites_cfg = self._config.get("sites") or {}
233
+ if self._site in sites_cfg and isinstance(sites_cfg[self._site], dict):
234
+ return sites_cfg[self._site] or {}
235
+ return sites_cfg.get("common") or {}
236
+
237
+ @staticmethod
238
+ def _has_key(d: Mapping[str, Any] | None, key: str) -> bool:
239
+ """
240
+ Check whether a mapping contains a key.
258
241
 
259
- :param site: Optional override of the site name; defaults to self._site.
260
- :return: The site-specific or common configuration dict.
242
+ :param d: Mapping to inspect.
243
+ :param key: Key to look up.
244
+ :return: ``True`` if ``d`` is a Mapping and contains key; otherwise ``False``.
261
245
  """
262
- site = site or self._site
263
- sites_cfg = self._config.get("sites", {}) or {}
246
+ return isinstance(d, Mapping) and (key in d)
264
247
 
265
- if site in sites_cfg:
266
- return sites_cfg[site] or {}
248
+ def _pick(self, key: str, default: T, *sources: Mapping[str, Any]) -> T:
249
+ """
250
+ Resolve ``key`` from the provided ``sources`` in order of precedence.
267
251
 
268
- return sites_cfg.get("common", {}) or {}
252
+ :param key: Configuration key to resolve.
253
+ :param default: Fallback value if ``key`` is absent in all sources.
254
+ :param sources: One or more mappings to check, in order of precedence.
255
+ :return: The first present value for ``key``, otherwise ``default``.
256
+ """
257
+ for src in sources:
258
+ if self._has_key(src, key):
259
+ return src[key] # type: ignore[no-any-return]
260
+ return default
269
261
 
270
262
  @staticmethod
271
263
  def _dict_to_book_cfg(data: dict[str, Any]) -> BookConfig:
272
264
  """
273
- Convert a dictionary to a BookConfig with normalized types.
265
+ Convert a raw dict into a :class:`novel_downloader.models.BookConfig`
266
+ with normalized types (all IDs coerced to strings).
274
267
 
275
268
  :param data: A dict that must contain at least "book_id".
276
- :return: A BookConfig dict with all values cast to strings or lists of strings.
277
- :raises ValueError: if the "book_id" field is missing.
269
+ :return: Normalized :class:`BookConfig` mapping.
270
+ :raises ValueError: If ``"book_id"`` is missing.
278
271
  """
279
272
  if "book_id" not in data:
280
273
  raise ValueError("Missing required field 'book_id'")
281
274
 
282
- result: BookConfig = {"book_id": str(data["book_id"])}
275
+ out: BookConfig = {"book_id": str(data["book_id"])}
283
276
 
284
277
  if "start_id" in data:
285
- result["start_id"] = str(data["start_id"])
286
-
278
+ out["start_id"] = str(data["start_id"])
287
279
  if "end_id" in data:
288
- result["end_id"] = str(data["end_id"])
289
-
280
+ out["end_id"] = str(data["end_id"])
290
281
  if "ignore_ids" in data:
291
- result["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
292
-
293
- return result
282
+ with contextlib.suppress(Exception):
283
+ out["ignore_ids"] = [str(x) for x in data["ignore_ids"]]
284
+ return out
294
285
 
295
286
  @classmethod
296
287
  def _dict_to_cleaner_cfg(cls, cfg: dict[str, Any]) -> TextCleanerConfig:
297
288
  """
298
- Convert a nested dict of title/content rules into a TextCleanerConfig.
289
+ Convert a nested ``cleaner`` block into a
290
+ :class:`novel_downloader.models.TextCleanerConfig`.
299
291
 
300
292
  :param cfg: configuration dictionary
301
- :return: fully constructed TextCleanerConfig
293
+ :return: Aggregated title/content rules with external file contents merged
302
294
  """
303
- # Title rules
304
- title_section = cfg.get("title", {})
305
- title_remove = title_section.get("remove_patterns", [])
306
- title_repl = title_section.get("replace", {})
307
-
308
- title_ext = title_section.get("external", {})
309
- title_ext_en = title_ext.get("enabled", False)
310
- title_ext_rm_p = title_ext.get("remove_patterns", "")
311
- title_ext_rp_p = title_ext.get("replace", "")
312
- if title_ext_en:
313
- title_remove_ext = cls._load_str_list(title_ext_rm_p)
314
- title_remove += title_remove_ext
315
-
316
- title_repl_ext = cls._load_str_dict(title_ext_rp_p)
317
- title_repl = {**title_repl, **title_repl_ext}
318
-
319
- # Content rules
320
- content_section = cfg.get("content", {})
321
- content_remove = content_section.get("remove_patterns", [])
322
- content_repl = content_section.get("replace", {})
323
-
324
- content_ext = content_section.get("external", {})
325
- content_ext_en = content_ext.get("enabled", False)
326
- content_ext_rm_p = content_ext.get("remove_patterns", "")
327
- content_ext_rp_p = content_ext.get("replace", "")
328
-
329
- if content_ext_en:
330
- content_remove_ext = cls._load_str_list(content_ext_rm_p)
331
- content_remove += content_remove_ext
332
-
333
- content_repl_ext = cls._load_str_dict(content_ext_rp_p)
334
- content_repl = {**content_repl, **content_repl_ext}
335
-
295
+ t_remove, t_replace = cls._merge_rules(cfg.get("title", {}) or {})
296
+ c_remove, c_replace = cls._merge_rules(cfg.get("content", {}) or {})
336
297
  return TextCleanerConfig(
337
298
  remove_invisible=cfg.get("remove_invisible", True),
338
- title_remove_patterns=title_remove,
339
- title_replacements=title_repl,
340
- content_remove_patterns=content_remove,
341
- content_replacements=content_repl,
299
+ title_remove_patterns=t_remove,
300
+ title_replacements=t_replace,
301
+ content_remove_patterns=c_remove,
302
+ content_replacements=c_replace,
342
303
  )
343
304
 
305
+ @classmethod
306
+ def _merge_rules(cls, section: dict[str, Any]) -> tuple[list[str], dict[str, str]]:
307
+ """
308
+ Merge inline patterns/replacements with any enabled external files.
309
+
310
+ :param section: Mapping describing either the ``title`` or ``content`` rules.
311
+ :return: Tuple ``(remove_patterns, replace)`` after merging.
312
+ """
313
+ remove = list(section.get("remove_patterns") or [])
314
+ replace = dict(section.get("replace") or {})
315
+ ext = section.get("external") or {}
316
+ if ext.get("enabled", False):
317
+ rm_path = ext.get("remove_patterns") or ""
318
+ rp_path = ext.get("replace") or ""
319
+ remove += cls._load_str_list(rm_path)
320
+ replace.update(cls._load_str_dict(rp_path))
321
+ return remove, replace
322
+
344
323
  @staticmethod
345
324
  def _load_str_list(path: str) -> list[str]:
325
+ """
326
+ Load a JSON file containing a list of strings.
327
+
328
+ :param path: File path to a JSON array (e.g., ``["a", "b"]``).
329
+ :return: Parsed list on success; empty list if ``path`` is empty, file is
330
+ missing, or content is invalid.
331
+ """
332
+ if not path:
333
+ return []
346
334
  try:
347
335
  with open(path, encoding="utf-8") as f:
348
- parsed = json.load(f)
349
- return cast(list[str], parsed)
336
+ data = json.load(f)
337
+ return list(data) if isinstance(data, list) else []
350
338
  except Exception:
351
339
  return []
352
340
 
353
341
  @staticmethod
354
342
  def _load_str_dict(path: str) -> dict[str, str]:
343
+ """
344
+ Load a JSON file containing a dict of string-to-string mappings.
345
+
346
+ :param path: File path to a JSON object (e.g., ``{"old":"new"}``).
347
+ :return: Parsed dict on success; empty dict if ``path`` is empty, file is
348
+ missing, or content is invalid.
349
+ """
350
+ if not path:
351
+ return {}
355
352
  try:
356
353
  with open(path, encoding="utf-8") as f:
357
- parsed = json.load(f)
358
- return cast(dict[str, str], parsed)
354
+ data = json.load(f)
355
+ return dict(data) if isinstance(data, dict) else {}
359
356
  except Exception:
360
357
  return {}