novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +77 -64
  6. novel_downloader/cli/export.py +16 -20
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +65 -105
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +1 -0
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +14 -9
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +17 -11
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +61 -66
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  100. novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
  101. novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
  102. novel_downloader/core/parsers/qidian/main_parser.py +11 -38
  103. novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
  104. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  105. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  106. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  107. novel_downloader/core/parsers/quanben5.py +103 -0
  108. novel_downloader/core/parsers/registry.py +5 -16
  109. novel_downloader/core/parsers/sfacg.py +38 -45
  110. novel_downloader/core/parsers/shencou.py +215 -0
  111. novel_downloader/core/parsers/shuhaige.py +111 -0
  112. novel_downloader/core/parsers/tongrenquan.py +116 -0
  113. novel_downloader/core/parsers/ttkan.py +132 -0
  114. novel_downloader/core/parsers/wanbengo.py +191 -0
  115. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  116. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  117. novel_downloader/core/parsers/xs63b.py +161 -0
  118. novel_downloader/core/parsers/xshbook.py +134 -0
  119. novel_downloader/core/parsers/yamibo.py +87 -131
  120. novel_downloader/core/parsers/yibige.py +166 -0
  121. novel_downloader/core/searchers/__init__.py +34 -3
  122. novel_downloader/core/searchers/aaatxt.py +107 -0
  123. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  124. novel_downloader/core/searchers/base.py +112 -36
  125. novel_downloader/core/searchers/dxmwx.py +105 -0
  126. novel_downloader/core/searchers/eightnovel.py +84 -0
  127. novel_downloader/core/searchers/esjzone.py +43 -25
  128. novel_downloader/core/searchers/hetushu.py +92 -0
  129. novel_downloader/core/searchers/i25zw.py +93 -0
  130. novel_downloader/core/searchers/ixdzs8.py +107 -0
  131. novel_downloader/core/searchers/jpxs123.py +107 -0
  132. novel_downloader/core/searchers/piaotia.py +100 -0
  133. novel_downloader/core/searchers/qbtr.py +106 -0
  134. novel_downloader/core/searchers/qianbi.py +74 -40
  135. novel_downloader/core/searchers/quanben5.py +144 -0
  136. novel_downloader/core/searchers/registry.py +24 -8
  137. novel_downloader/core/searchers/shuhaige.py +124 -0
  138. novel_downloader/core/searchers/tongrenquan.py +110 -0
  139. novel_downloader/core/searchers/ttkan.py +92 -0
  140. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  141. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  142. novel_downloader/core/searchers/xs63b.py +104 -0
  143. novel_downloader/locales/en.json +31 -82
  144. novel_downloader/locales/zh.json +32 -83
  145. novel_downloader/models/__init__.py +21 -22
  146. novel_downloader/models/book.py +44 -0
  147. novel_downloader/models/config.py +4 -37
  148. novel_downloader/models/login.py +1 -1
  149. novel_downloader/models/search.py +5 -0
  150. novel_downloader/resources/config/settings.toml +8 -70
  151. novel_downloader/resources/json/xiguashuwu.json +718 -0
  152. novel_downloader/utils/__init__.py +13 -22
  153. novel_downloader/utils/chapter_storage.py +3 -2
  154. novel_downloader/utils/constants.py +4 -29
  155. novel_downloader/utils/cookies.py +6 -18
  156. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  157. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  158. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  159. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  160. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  161. novel_downloader/utils/epub/__init__.py +1 -1
  162. novel_downloader/utils/epub/constants.py +57 -16
  163. novel_downloader/utils/epub/documents.py +88 -194
  164. novel_downloader/utils/epub/models.py +0 -14
  165. novel_downloader/utils/epub/utils.py +63 -96
  166. novel_downloader/utils/file_utils/__init__.py +2 -23
  167. novel_downloader/utils/file_utils/io.py +3 -113
  168. novel_downloader/utils/file_utils/sanitize.py +0 -4
  169. novel_downloader/utils/fontocr.py +207 -0
  170. novel_downloader/utils/logger.py +8 -16
  171. novel_downloader/utils/network.py +2 -2
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/time_utils/__init__.py +5 -11
  176. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  177. novel_downloader/utils/time_utils/sleep_utils.py +4 -8
  178. novel_downloader/web/__init__.py +13 -0
  179. novel_downloader/web/components/__init__.py +11 -0
  180. novel_downloader/web/components/navigation.py +35 -0
  181. novel_downloader/web/main.py +66 -0
  182. novel_downloader/web/pages/__init__.py +17 -0
  183. novel_downloader/web/pages/download.py +78 -0
  184. novel_downloader/web/pages/progress.py +147 -0
  185. novel_downloader/web/pages/search.py +329 -0
  186. novel_downloader/web/services/__init__.py +17 -0
  187. novel_downloader/web/services/client_dialog.py +164 -0
  188. novel_downloader/web/services/cred_broker.py +113 -0
  189. novel_downloader/web/services/cred_models.py +35 -0
  190. novel_downloader/web/services/task_manager.py +264 -0
  191. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  192. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  193. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  194. novel_downloader/core/downloaders/biquge.py +0 -29
  195. novel_downloader/core/downloaders/esjzone.py +0 -29
  196. novel_downloader/core/downloaders/linovelib.py +0 -29
  197. novel_downloader/core/downloaders/sfacg.py +0 -29
  198. novel_downloader/core/downloaders/yamibo.py +0 -29
  199. novel_downloader/core/exporters/biquge.py +0 -22
  200. novel_downloader/core/exporters/esjzone.py +0 -22
  201. novel_downloader/core/exporters/qianbi.py +0 -22
  202. novel_downloader/core/exporters/sfacg.py +0 -22
  203. novel_downloader/core/exporters/yamibo.py +0 -22
  204. novel_downloader/core/fetchers/base/__init__.py +0 -14
  205. novel_downloader/core/fetchers/base/browser.py +0 -422
  206. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  207. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  208. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  209. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  210. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  211. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  212. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  213. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  214. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  215. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  216. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  217. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  218. novel_downloader/core/parsers/biquge.py +0 -139
  219. novel_downloader/models/chapter.py +0 -25
  220. novel_downloader/models/types.py +0 -13
  221. novel_downloader/tui/__init__.py +0 -7
  222. novel_downloader/tui/app.py +0 -32
  223. novel_downloader/tui/main.py +0 -17
  224. novel_downloader/tui/screens/__init__.py +0 -14
  225. novel_downloader/tui/screens/home.py +0 -198
  226. novel_downloader/tui/screens/login.py +0 -74
  227. novel_downloader/tui/styles/home_layout.tcss +0 -79
  228. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  229. novel_downloader/utils/cache.py +0 -24
  230. novel_downloader/utils/fontocr/__init__.py +0 -22
  231. novel_downloader/utils/fontocr/hash_store.py +0 -280
  232. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  233. novel_downloader/utils/fontocr/model_loader.py +0 -69
  234. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  235. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  236. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  237. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  238. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  239. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  240. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  241. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,23 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.config.loader
4
- ------------------------------
3
+ novel_downloader.config.file_io
4
+ -------------------------------
5
5
 
6
- Provides functionality to load Toml configuration files into Python
7
- dictionaries, with robust error handling and fallback support.
6
+ Provides functionality to load Toml configuration files into Python dict
8
7
  """
9
8
 
10
- __all__ = ["load_config"]
11
-
12
9
  import json
13
10
  import logging
14
11
  from pathlib import Path
15
- from typing import Any
12
+ from typing import Any, TypeVar
16
13
 
17
- from novel_downloader.utils.cache import cached_load_config
18
14
  from novel_downloader.utils.constants import SETTING_FILE
19
15
 
16
+ T = TypeVar("T")
20
17
  logger = logging.getLogger(__name__)
21
18
 
22
19
 
23
- def resolve_file_path(
20
+ def _resolve_file_path(
24
21
  user_path: str | Path | None,
25
22
  local_filename: str | list[str],
26
23
  fallback_path: Path,
@@ -29,9 +26,9 @@ def resolve_file_path(
29
26
  Resolve the file path to use based on a prioritized lookup order.
30
27
 
31
28
  Priority:
32
- 1. A user-specified path (if provided and exists)
33
- 2. A file in the current working directory with the given name
34
- 3. A globally registered fallback path
29
+ 1. A user-specified path (if provided and exists)
30
+ 2. A file in the current working directory with the given name
31
+ 3. A globally registered fallback path
35
32
 
36
33
  :param user_path: Optional user-specified file path.
37
34
  :param local_filename: File name to check in the current working directory.
@@ -117,7 +114,6 @@ def _load_by_extension(path: Path) -> dict[str, Any]:
117
114
  raise ValueError(f"Unsupported config file extension: {ext}")
118
115
 
119
116
 
120
- @cached_load_config
121
117
  def load_config(
122
118
  config_path: str | Path | None = None,
123
119
  ) -> dict[str, Any]:
@@ -125,9 +121,9 @@ def load_config(
125
121
  Load configuration data from a Toml file.
126
122
 
127
123
  :param config_path: Optional path to the Toml configuration file.
128
- :return: Parsed configuration as a dict.
124
+ :return: Parsed configuration as a dict.
129
125
  """
130
- path = resolve_file_path(
126
+ path = _resolve_file_path(
131
127
  user_path=config_path,
132
128
  local_filename=[
133
129
  "settings.toml",
@@ -148,6 +144,46 @@ def load_config(
148
144
  return {}
149
145
 
150
146
 
147
+ def get_config_value(keys: list[str], default: T) -> T:
148
+ """
149
+ Safely retrieve a nested config value.
150
+ """
151
+ cur = load_config()
152
+ for i, k in enumerate(keys):
153
+ if not isinstance(cur, dict):
154
+ return default
155
+ if i == len(keys) - 1:
156
+ val = cur.get(k, default)
157
+ return val if isinstance(val, type(default)) else default
158
+ cur = cur.get(k, {})
159
+ return default
160
+
161
+
162
+ def save_config(
163
+ config: dict[str, Any],
164
+ output_path: str | Path = SETTING_FILE,
165
+ ) -> None:
166
+ """
167
+ Save configuration data to disk in JSON format.
168
+
169
+ :param config: Dictionary containing configuration data to save.
170
+ :param output_path: Destination path to save the config (default: SETTING_FILE).
171
+ :raises Exception: If writing to the file fails.
172
+ """
173
+ output = Path(output_path).expanduser().resolve()
174
+ output.parent.mkdir(parents=True, exist_ok=True)
175
+
176
+ try:
177
+ with output.open("w", encoding="utf-8") as f:
178
+ json.dump(config, f, indent=2, ensure_ascii=False)
179
+ except Exception as e:
180
+ logger.error("[config] Failed to write config JSON '%s': %s", output, e)
181
+ raise
182
+
183
+ logger.info("[config] Configuration successfully saved to JSON: %s", output)
184
+ return
185
+
186
+
151
187
  def save_config_file(
152
188
  source_path: str | Path,
153
189
  output_path: str | Path = SETTING_FILE,
@@ -158,9 +194,9 @@ def save_config_file(
158
194
 
159
195
  :param source_path: The user-provided TOML file path.
160
196
  :param output_path: Destination path to save the config (default: SETTING_FILE).
197
+ :raises Exception: If writing to the file fails.
161
198
  """
162
199
  source = Path(source_path).expanduser().resolve()
163
- output = Path(output_path).expanduser().resolve()
164
200
 
165
201
  if not source.is_file():
166
202
  raise FileNotFoundError(f"Source file not found: {source}")
@@ -171,14 +207,5 @@ def save_config_file(
171
207
  logger.error("[config] Failed to load config file: %s", e)
172
208
  raise ValueError(f"Invalid config file: {source}") from e
173
209
 
174
- output.parent.mkdir(parents=True, exist_ok=True)
175
-
176
- try:
177
- with output.open("w", encoding="utf-8") as f:
178
- json.dump(data, f, indent=2, ensure_ascii=False)
179
- except Exception as e:
180
- logger.error("[config] Failed to write config JSON '%s': %s", output, e)
181
- raise
182
-
183
- logger.info("[config] Configuration successfully saved to JSON: %s", output)
210
+ save_config(data, output_path)
184
211
  return
@@ -12,6 +12,7 @@ downloading and processing online novel content, including:
12
12
  - Parser: Extracts structured data from HTML or SSR content.
13
13
  - Fetcher: Sends HTTP requests and manages sessions, including login if required.
14
14
  - Exporter: Responsible for exporting downloaded data into various output formats.
15
+ - search: Provides unified search functionality across supported novel sites.
15
16
  """
16
17
 
17
18
  __all__ = [
@@ -0,0 +1,115 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.deqixs.fetcher
4
+ ---------------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from novel_downloader.core.fetchers.base import BaseSession
11
+ from novel_downloader.models import FetcherConfig
12
+ from novel_downloader.utils import async_jitter_sleep
13
+
14
+ # from novel_downloader.core.fetchers.registry import register_fetcher
15
+
16
+
17
+ # @register_fetcher(
18
+ # site_keys=["deqixs"],
19
+ # )
20
+ class DeqixsSession(BaseSession):
21
+ """
22
+ A session class for interacting with the 得奇小说网 (www.deqixs.com) novel website.
23
+ """
24
+
25
+ BASE_URL = "https://www.deqixs.com"
26
+ BOOK_INFO_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/"
27
+ CHAPTER_URL = "https://www.deqixs.com/xiaoshuo/{book_id}/{chapter_id}.html"
28
+
29
+ def __init__(
30
+ self,
31
+ config: FetcherConfig,
32
+ cookies: dict[str, str] | None = None,
33
+ **kwargs: Any,
34
+ ) -> None:
35
+ super().__init__("deqixs", config, cookies, **kwargs)
36
+
37
+ async def get_book_info(
38
+ self,
39
+ book_id: str,
40
+ **kwargs: Any,
41
+ ) -> list[str]:
42
+ """
43
+ Fetch the raw HTML of the book info page asynchronously.
44
+
45
+ :param book_id: The book identifier.
46
+ :return: The page content as a string.
47
+ """
48
+ url = self.book_info_url(book_id=book_id)
49
+ return [await self.fetch(url, **kwargs)]
50
+
51
+ async def get_book_chapter(
52
+ self,
53
+ book_id: str,
54
+ chapter_id: str,
55
+ **kwargs: Any,
56
+ ) -> list[str]:
57
+ """
58
+ Fetch the raw HTML of a single chapter asynchronously.
59
+
60
+ :param book_id: The book identifier.
61
+ :param chapter_id: The chapter identifier.
62
+ :return: The chapter content as a string.
63
+ """
64
+ html_pages: list[str] = []
65
+ idx = 1
66
+
67
+ while True:
68
+ chapter_suffix = chapter_id if idx == 1 else f"{chapter_id}-{idx}"
69
+ relative_path = f"/xiaoshuo/{book_id}/{chapter_suffix}.html"
70
+ full_url = self.BASE_URL + relative_path
71
+
72
+ if idx > 1 and relative_path not in html_pages[-1]:
73
+ break
74
+
75
+ try:
76
+ html = await self.fetch(full_url, **kwargs)
77
+ except Exception as exc:
78
+ self.logger.warning(
79
+ "[async] get_book_chapter(%s page %d) failed: %s",
80
+ chapter_id,
81
+ idx,
82
+ exc,
83
+ )
84
+ break
85
+
86
+ html_pages.append(html)
87
+ idx += 1
88
+ await async_jitter_sleep(
89
+ self.request_interval,
90
+ mul_spread=1.1,
91
+ max_sleep=self.request_interval + 2,
92
+ )
93
+
94
+ return html_pages
95
+
96
+ @classmethod
97
+ def book_info_url(cls, book_id: str) -> str:
98
+ """
99
+ Construct the URL for fetching a book's info page.
100
+
101
+ :param book_id: The identifier of the book.
102
+ :return: Fully qualified URL for the book info page.
103
+ """
104
+ return cls.BOOK_INFO_URL.format(book_id=book_id)
105
+
106
+ @classmethod
107
+ def chapter_url(cls, book_id: str, chapter_id: str) -> str:
108
+ """
109
+ Construct the URL for fetching a specific chapter.
110
+
111
+ :param book_id: The identifier of the book.
112
+ :param chapter_id: The identifier of the chapter.
113
+ :return: Fully qualified chapter URL.
114
+ """
115
+ return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.deqixs.parser
4
+ --------------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+ from novel_downloader.core.parsers.base import BaseParser
12
+ from novel_downloader.models import (
13
+ BookInfoDict,
14
+ ChapterDict,
15
+ ChapterInfoDict,
16
+ VolumeInfoDict,
17
+ )
18
+
19
+ # from novel_downloader.core.parsers.registry import register_parser
20
+
21
+
22
+ # @register_parser(
23
+ # site_keys=["deqixs"],
24
+ # )
25
+ class DeqixsParser(BaseParser):
26
+ """
27
+ Parser for 得奇小说网 book pages.
28
+ """
29
+
30
+ ADS: set[str] = {
31
+ "更新不易",
32
+ "记得分享",
33
+ "(本章完)",
34
+ }
35
+
36
+ def parse_book_info(
37
+ self,
38
+ html_list: list[str],
39
+ **kwargs: Any,
40
+ ) -> BookInfoDict | None:
41
+ if not html_list:
42
+ return None
43
+
44
+ tree = html.fromstring(html_list[0])
45
+
46
+ # Extract book title and word count
47
+ book_name = tree.xpath("//div[@class='itemtxt']/h1/a/text()")[0].strip()
48
+ word_count = tree.xpath("//div[@class='itemtxt']/h1/i/text()")[0].strip()
49
+
50
+ # Extract serialization status and genre tags
51
+ spans = tree.xpath("//div[@class='itemtxt']/p[1]/span/text()")
52
+ serial_status = spans[0].strip() if spans else ""
53
+ tags = [s.strip() for s in spans[1:-1]] if len(spans) > 2 else []
54
+
55
+ # Extract author
56
+ author_text = tree.xpath("//div[@class='itemtxt']/p[2]/a/text()")[0]
57
+ author = author_text.replace("作者:", "").strip()
58
+
59
+ # Extract cover URL
60
+ cover_src = tree.xpath("//div[@class='item']//a/img/@src")[0]
61
+ cover_url = "https:" + cover_src if cover_src.startswith("//") else cover_src
62
+
63
+ # Extract last update time
64
+ update_raw = tree.xpath("//h2[@id='dir']/span/text()")[0].strip()
65
+ update_time = update_raw.replace("更新时间:", "").strip()
66
+
67
+ # Extract summary paragraphs (first description block)
68
+ paras = tree.xpath("(//div[@class='des bb'])[1]/p/text()")
69
+ summary = "\n".join(p.strip() for p in paras if p.strip())
70
+
71
+ # Extract chapters list
72
+ chapter_nodes = tree.xpath("//div[@id='list']//ul/li/a")
73
+ chapters: list[ChapterInfoDict] = []
74
+ for a in chapter_nodes:
75
+ href = a.get("href")
76
+ chapter_id = href.split("/")[-1].replace(".html", "")
77
+ title = a.text_content().strip()
78
+ chapters.append({"title": title, "url": href, "chapterId": chapter_id})
79
+
80
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
81
+
82
+ return {
83
+ "book_name": book_name,
84
+ "author": author,
85
+ "cover_url": cover_url,
86
+ "update_time": update_time,
87
+ "serial_status": serial_status,
88
+ "word_count": word_count,
89
+ "summary": summary,
90
+ "tags": tags,
91
+ "volumes": volumes,
92
+ "extra": {},
93
+ }
94
+
95
+ def parse_chapter(
96
+ self,
97
+ html_list: list[str],
98
+ chapter_id: str,
99
+ **kwargs: Any,
100
+ ) -> ChapterDict | None:
101
+ if not html_list:
102
+ return None
103
+
104
+ title_text = ""
105
+ contents: list[str] = []
106
+ for curr_html in html_list:
107
+ tree = html.fromstring(curr_html)
108
+ # Extract title once
109
+ if not title_text:
110
+ full_title = tree.xpath("string(//div[@class='submenu']/h1)")
111
+ if ">" in full_title:
112
+ title_text = full_title.split(">", 1)[1].strip()
113
+ else:
114
+ title_text = full_title.strip()
115
+ # Extract paragraphs
116
+ for p in tree.xpath("//div[@class='con']/p"):
117
+ text = p.text_content().strip()
118
+ # Filter out ads or empty paragraphs
119
+ if not text or any(ad in text for ad in self.ADS):
120
+ continue
121
+ contents.append(text)
122
+
123
+ content = "\n".join(contents)
124
+ if not content:
125
+ return None
126
+
127
+ return {
128
+ "id": chapter_id,
129
+ "title": title_text,
130
+ "content": content,
131
+ "extra": {"site": "deqixs"},
132
+ }
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.deqixs.searcher
4
+ ----------------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+ from novel_downloader.core.searchers.base import BaseSearcher
12
+ from novel_downloader.models import SearchResult
13
+
14
+ # from novel_downloader.core.searchers.registry import register_searcher
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ # @register_searcher(
20
+ # site_keys=["deqixs"],
21
+ # )
22
+ class DeqixsSearcher(BaseSearcher):
23
+ site_name = "deqixs"
24
+ priority = 20
25
+ BASE_URL = "https://www.deqixs.com"
26
+ SEARCH_URL = "https://www.deqixs.com/tag/"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ params = {"key": keyword}
31
+ try:
32
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
33
+ return await cls._response_to_str(resp)
34
+ except Exception:
35
+ logger.error(
36
+ "Failed to fetch HTML for keyword '%s' from '%s'",
37
+ keyword,
38
+ cls.SEARCH_URL,
39
+ )
40
+ return ""
41
+
42
+ @classmethod
43
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
44
+ doc = html.fromstring(html_str)
45
+ rows = doc.xpath("//div[@class='container']/div[@class='item']")
46
+ results: list[SearchResult] = []
47
+
48
+ for idx, row in enumerate(rows):
49
+ if limit is not None and idx >= limit:
50
+ break
51
+
52
+ href = row.xpath(".//h3/a/@href")[0]
53
+ book_id = href.strip("/ ").split("/")[-1]
54
+ if not book_id:
55
+ continue
56
+ book_url = cls.BASE_URL + href
57
+ img_src = row.xpath(".//a/img/@src")[0]
58
+ cover_url = "https:" + img_src if img_src.startswith("//") else img_src
59
+ title = row.xpath(".//h3/a/text()")[0].strip()
60
+
61
+ author_text = row.xpath(".//p[2]/a/text()")[0]
62
+ author = author_text.replace("作者:", "").strip()
63
+
64
+ spans = row.xpath(".//p[1]/span/text()")
65
+ word_count = spans[2].strip() if len(spans) > 2 else ""
66
+
67
+ # Extract latest chapter and update date
68
+ first_li = row.xpath(".//ul/li")[0]
69
+ update_date = first_li.xpath("./i/text()")[0].strip()
70
+ latest_chapter = first_li.xpath("./a/text()")[0].strip()
71
+
72
+ # Compute priority
73
+ prio = cls.priority + idx
74
+
75
+ results.append(
76
+ SearchResult(
77
+ site=cls.site_name,
78
+ book_id=book_id,
79
+ book_url=book_url,
80
+ cover_url=cover_url,
81
+ title=title,
82
+ author=author,
83
+ latest_chapter=latest_chapter,
84
+ update_date=update_date,
85
+ word_count=word_count,
86
+ priority=prio,
87
+ )
88
+ )
89
+ return results
@@ -1,14 +1,13 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- novel_downloader.core.searchers.qidian
4
- --------------------------------------
3
+ novel_downloader.core.archived.qidian.searcher
4
+ ----------------------------------------------
5
5
 
6
6
  """
7
7
 
8
8
  import logging
9
9
 
10
10
  from lxml import html
11
-
12
11
  from novel_downloader.core.searchers.base import BaseSearcher
13
12
  from novel_downloader.models import SearchResult
14
13
 
@@ -28,35 +27,21 @@ class QidianSearcher(BaseSearcher):
28
27
  SEARCH_URL = "https://www.qidian.com/so/{query}.html"
29
28
 
30
29
  @classmethod
31
- def _fetch_html(cls, keyword: str) -> str:
32
- """
33
- Fetch raw HTML from Qidian's search page.
34
-
35
- :param keyword: The search term to query on Qidian.
36
- :return: HTML text of the search results page, or an empty string on fail.
37
- """
30
+ async def _fetch_html(cls, keyword: str) -> str:
38
31
  url = cls.SEARCH_URL.format(query=cls._quote(keyword))
39
32
  try:
40
- response = cls._http_get(url)
41
- return response.text
33
+ async with (await cls._http_get(url)) as resp:
34
+ return await cls._response_to_str(resp)
42
35
  except Exception:
43
36
  logger.error(
44
37
  "Failed to fetch HTML for keyword '%s' from '%s'",
45
38
  keyword,
46
39
  url,
47
- exc_info=True,
48
40
  )
49
41
  return ""
50
42
 
51
43
  @classmethod
52
44
  def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
53
- """
54
- Parse raw HTML from Qidian search results into list of SearchResult.
55
-
56
- :param html_str: Raw HTML string from Qidian search results page.
57
- :param limit: Maximum number of results to return, or None for all.
58
- :return: List of SearchResult dicts.
59
- """
60
45
  doc = html.fromstring(html_str)
61
46
  items = doc.xpath(
62
47
  '//div[@id="result-list"]//li[contains(@class, "res-book-item")]'
@@ -68,6 +53,8 @@ class QidianSearcher(BaseSearcher):
68
53
  if limit is not None and idx >= limit:
69
54
  break
70
55
  book_id = item.get("data-bid")
56
+ if not book_id:
57
+ continue
71
58
  title_elem = item.xpath('.//h3[@class="book-info-title"]/a')[0]
72
59
  title = title_elem.text_content().strip()
73
60
  author_nodes = item.xpath(
@@ -79,8 +66,13 @@ class QidianSearcher(BaseSearcher):
79
66
  SearchResult(
80
67
  site=cls.site_name,
81
68
  book_id=book_id,
69
+ book_url="",
70
+ cover_url="",
82
71
  title=title,
83
72
  author=author,
73
+ latest_chapter="-",
74
+ update_date="-",
75
+ word_count="-",
84
76
  priority=prio,
85
77
  )
86
78
  )
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.archived.wanbengo.searcher
4
+ ------------------------------------------------
5
+
6
+ """
7
+
8
+ import logging
9
+
10
+ from lxml import html
11
+ from novel_downloader.core.searchers.base import BaseSearcher
12
+ from novel_downloader.models import SearchResult
13
+
14
+ # from novel_downloader.core.searchers.registry import register_searcher
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ # @register_searcher(
20
+ # site_keys=["wanbengo"],
21
+ # )
22
+ class WanbengoSearcher(BaseSearcher):
23
+ site_name = "wanbengo"
24
+ priority = 30
25
+ BASE_URL = "https://www.wanbengo.com"
26
+ SEARCH_URL = "https://www.sososhu.com/"
27
+
28
+ @classmethod
29
+ async def _fetch_html(cls, keyword: str) -> str:
30
+ params = {
31
+ "q": keyword,
32
+ "site": "wbsz",
33
+ }
34
+ try:
35
+ async with (await cls._http_get(cls.SEARCH_URL, params=params)) as resp:
36
+ return await cls._response_to_str(resp)
37
+ except Exception:
38
+ logger.error(
39
+ "Failed to fetch HTML for keyword '%s' from '%s'",
40
+ keyword,
41
+ cls.SEARCH_URL,
42
+ )
43
+ return ""
44
+
45
+ @classmethod
46
+ def _parse_html(cls, html_str: str, limit: int | None = None) -> list[SearchResult]:
47
+ doc = html.fromstring(html_str)
48
+ rows = doc.xpath(
49
+ "//div[contains(@class,'so_list')]//div[contains(@class,'hot')]//div[contains(@class,'item')]"
50
+ )
51
+ results: list[SearchResult] = []
52
+
53
+ for idx, row in enumerate(rows):
54
+ if limit is not None and idx >= limit:
55
+ break
56
+ a_nodes = row.xpath(".//dl/dt/a[1]")
57
+ a = a_nodes[0] if a_nodes else None
58
+ href = a.get("href") if a is not None else ""
59
+ if not href:
60
+ continue
61
+
62
+ book_url = cls._restore_url(cls._abs_url(href))
63
+ book_id = cls._book_id_from_url(book_url) if book_url else ""
64
+
65
+ title = (a.text_content() if a is not None else "").strip()
66
+ author = cls._first_str(row.xpath(".//dl/dt/span[1]/text()"))
67
+ cover_url = cls._first_str(
68
+ row.xpath(".//div[contains(@class,'image')]//img/@src")
69
+ )
70
+
71
+ # Compute priority
72
+ prio = cls.priority + idx
73
+
74
+ results.append(
75
+ SearchResult(
76
+ site=cls.site_name,
77
+ book_id=book_id,
78
+ book_url=book_url,
79
+ cover_url=cover_url,
80
+ title=title,
81
+ author=author,
82
+ latest_chapter="-",
83
+ update_date="-",
84
+ word_count="-",
85
+ priority=prio,
86
+ )
87
+ )
88
+ return results
89
+
90
+ @staticmethod
91
+ def _restore_url(url: str) -> str:
92
+ return url.replace("www.wbsz.org", "www.wanbengo.com")
93
+
94
+ @staticmethod
95
+ def _book_id_from_url(url: str) -> str:
96
+ tail = url.split("wanbengo.com", 1)[-1]
97
+ tail = tail.strip("/")
98
+ return tail.replace("/", "-")