novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,10 +3,10 @@
3
3
  novel_downloader.core.interfaces.downloader
4
4
  -------------------------------------------
5
5
 
6
- This module defines the DownloaderProtocol, a structural interface
7
- that outlines the expected behavior of any downloader class.
6
+ Protocol defining the interface for asynchronous book downloaders.
8
7
  """
9
8
 
9
+ import asyncio
10
10
  from collections.abc import Awaitable, Callable
11
11
  from typing import Any, Protocol, runtime_checkable
12
12
 
@@ -27,6 +27,7 @@ class DownloaderProtocol(Protocol):
27
27
  book: BookConfig,
28
28
  *,
29
29
  progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
30
+ cancel_event: asyncio.Event | None = None,
30
31
  **kwargs: Any,
31
32
  ) -> None:
32
33
  """
@@ -35,6 +36,7 @@ class DownloaderProtocol(Protocol):
35
36
  :param book: BookConfig with at least 'book_id'.
36
37
  :param progress_hook: Optional async callback after each chapter.
37
38
  args: completed_count, total_count.
39
+ :param cancel_event: Optional asyncio.Event to allow cancellation.
38
40
  """
39
41
  ...
40
42
 
@@ -43,6 +45,7 @@ class DownloaderProtocol(Protocol):
43
45
  books: list[BookConfig],
44
46
  *,
45
47
  progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
48
+ cancel_event: asyncio.Event | None = None,
46
49
  **kwargs: Any,
47
50
  ) -> None:
48
51
  """
@@ -51,5 +54,6 @@ class DownloaderProtocol(Protocol):
51
54
  :param books: List of BookConfig entries.
52
55
  :param progress_hook: Optional async callback after each chapter.
53
56
  args: completed_count, total_count.
57
+ :param cancel_event: Optional asyncio.Event to allow cancellation.
54
58
  """
55
59
  ...
@@ -3,10 +3,10 @@
3
3
  novel_downloader.core.interfaces.exporter
4
4
  -----------------------------------------
5
5
 
6
- Defines the ExporterProtocol interface for persisting completed books in
7
- TXT, EPUB, Markdown, and PDF formats.
6
+ Protocol defining the interface for exporting books to text, EPUB, and other formats.
8
7
  """
9
8
 
9
+ from pathlib import Path
10
10
  from typing import Protocol, runtime_checkable
11
11
 
12
12
 
@@ -18,7 +18,7 @@ class ExporterProtocol(Protocol):
18
18
  It may also optionally implement an EPUB (or other format) exporter.
19
19
  """
20
20
 
21
- def export(self, book_id: str) -> None:
21
+ def export(self, book_id: str) -> dict[str, Path]:
22
22
  """
23
23
  Export the book in the formats specified in config.
24
24
  If a method is not implemented or fails, log the error and continue.
@@ -27,7 +27,7 @@ class ExporterProtocol(Protocol):
27
27
  """
28
28
  ...
29
29
 
30
- def export_as_txt(self, book_id: str) -> None:
30
+ def export_as_txt(self, book_id: str) -> Path | None:
31
31
  """
32
32
  Persist the assembled book as a .txt file.
33
33
 
@@ -35,7 +35,7 @@ class ExporterProtocol(Protocol):
35
35
  """
36
36
  ...
37
37
 
38
- def export_as_epub(self, book_id: str) -> None:
38
+ def export_as_epub(self, book_id: str) -> Path | None:
39
39
  """
40
40
  Optional: Persist the assembled book as an .epub file.
41
41
 
@@ -43,7 +43,7 @@ class ExporterProtocol(Protocol):
43
43
  """
44
44
  ...
45
45
 
46
- def export_as_md(self, book_id: str) -> None:
46
+ def export_as_md(self, book_id: str) -> Path | None:
47
47
  """
48
48
  Optional: Persist the assembled book as a Markdown (.md) file.
49
49
 
@@ -51,7 +51,7 @@ class ExporterProtocol(Protocol):
51
51
  """
52
52
  ...
53
53
 
54
- def export_as_pdf(self, book_id: str) -> None:
54
+ def export_as_pdf(self, book_id: str) -> Path | None:
55
55
  """
56
56
  Optional: Persist the assembled book as a PDF file.
57
57
 
@@ -3,8 +3,7 @@
3
3
  novel_downloader.core.interfaces.fetcher
4
4
  ----------------------------------------
5
5
 
6
- Defines the Async FetcherProtocol interface for fetching raw HTML or JSON
7
- for book info pages, individual chapters, managing request lifecycle
6
+ Protocol defining the interface for asynchronous fetching, login, and session management
8
7
  """
9
8
 
10
9
  import types
@@ -32,6 +31,7 @@ class FetcherProtocol(Protocol):
32
31
  ) -> bool:
33
32
  """
34
33
  Attempt to log in asynchronously.
34
+
35
35
  :returns: True if login succeeded.
36
36
  """
37
37
  ...
@@ -45,7 +45,7 @@ class FetcherProtocol(Protocol):
45
45
  Fetch the raw HTML (or JSON) of the book info page asynchronously.
46
46
 
47
47
  :param book_id: The book identifier.
48
- :return: The page content as a string.
48
+ :return: The page content as string list.
49
49
  """
50
50
  ...
51
51
 
@@ -60,7 +60,7 @@ class FetcherProtocol(Protocol):
60
60
 
61
61
  :param book_id: The book identifier.
62
62
  :param chapter_id: The chapter identifier.
63
- :return: The chapter content as string.
63
+ :return: The page content as string list.
64
64
  """
65
65
  ...
66
66
 
@@ -126,19 +126,6 @@ class FetcherProtocol(Protocol):
126
126
  """
127
127
  ...
128
128
 
129
- async def set_interactive_mode(self, enable: bool) -> bool:
130
- """
131
- Enable or disable interactive mode for manual login.
132
-
133
- :param enable: True to enable, False to disable interactive mode.
134
- :return: True if operation or login check succeeded, False otherwise.
135
- """
136
- ...
137
-
138
- @property
139
- def requester_type(self) -> str:
140
- ...
141
-
142
129
  @property
143
130
  def is_logged_in(self) -> bool:
144
131
  """
@@ -3,13 +3,12 @@
3
3
  novel_downloader.core.interfaces.parser
4
4
  ---------------------------------------
5
5
 
6
- Defines the ParserProtocol interface for extracting book metadata,
7
- parsing individual chapter content, and setting parser context via book_id.
6
+ Protocol defining the interface for parsing book metadata and chapter content.
8
7
  """
9
8
 
10
9
  from typing import Any, Protocol, runtime_checkable
11
10
 
12
- from novel_downloader.models import ChapterDict
11
+ from novel_downloader.models import BookInfoDict, ChapterDict
13
12
 
14
13
 
15
14
  @runtime_checkable
@@ -24,7 +23,7 @@ class ParserProtocol(Protocol):
24
23
  self,
25
24
  html_list: list[str],
26
25
  **kwargs: Any,
27
- ) -> dict[str, Any]:
26
+ ) -> BookInfoDict | None:
28
27
  """
29
28
  Parse and return a dictionary of book information from the raw HTML.
30
29
 
@@ -40,10 +39,10 @@ class ParserProtocol(Protocol):
40
39
  **kwargs: Any,
41
40
  ) -> ChapterDict | None:
42
41
  """
43
- Parse and return the text content of one chapter.
42
+ Parse chapter page and extract the content of one chapter.
44
43
 
45
44
  :param html_list: The HTML list of the chapter pages.
46
45
  :param chapter_id: Identifier of the chapter being parsed.
47
- :return: The chapter's text.
46
+ :return: The chapter's data.
48
47
  """
49
48
  ...
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.interfaces.searcher
4
+ -----------------------------------------
5
+
6
+ Protocol defining the interface for site search implementations.
7
+ """
8
+
9
+ from typing import Protocol
10
+
11
+ import aiohttp
12
+
13
+ from novel_downloader.models import SearchResult
14
+
15
+
16
+ class SearcherProtocol(Protocol):
17
+ site_name: str
18
+
19
+ @classmethod
20
+ def configure(cls, session: aiohttp.ClientSession) -> None:
21
+ """Configure the shared session"""
22
+ ...
23
+
24
+ @classmethod
25
+ async def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
26
+ ...
@@ -3,36 +3,72 @@
3
3
  novel_downloader.core.parsers
4
4
  -----------------------------
5
5
 
6
- This package defines all site-specific parsing modules
7
- for the novel_downloader framework.
8
-
9
- Modules:
10
- - biquge (笔趣阁)
11
- - esjzone (ESJ Zone)
12
- - linovelib (哔哩轻小说)
13
- - qianbi (铅笔小说)
14
- - qidian (起点中文网)
15
- - sfacg (SF轻小说)
16
- - yamibo (百合会)
17
- - common (通用架构)
6
+ Parser implementations for extracting book metadata and
7
+ chapter content from various sources
18
8
  """
19
9
 
20
- from .biquge import BiqugeParser
21
- from .common import CommonParser
22
- from .esjzone import EsjzoneParser
23
- from .linovelib import LinovelibParser
24
- from .qianbi import QianbiParser
25
- from .qidian import QidianParser
26
- from .sfacg import SfacgParser
27
- from .yamibo import YamiboParser
28
-
29
10
  __all__ = [
11
+ "get_parser",
12
+ "AaatxtParser",
30
13
  "BiqugeParser",
31
- "CommonParser",
14
+ "BiquyueduParser",
15
+ "DxmwxParser",
16
+ "EightnovelParser",
32
17
  "EsjzoneParser",
18
+ "GuidayeParser",
19
+ "HetushuParser",
20
+ "I25zwParser",
21
+ "Ixdzs8Parser",
22
+ "Jpxs123Parser",
23
+ "LewennParser",
33
24
  "LinovelibParser",
25
+ "PiaotiaParser",
26
+ "QbtrParser",
34
27
  "QianbiParser",
35
28
  "QidianParser",
29
+ "Quanben5Parser",
36
30
  "SfacgParser",
31
+ "ShencouParser",
32
+ "ShuhaigeParser",
33
+ "TongrenquanParser",
34
+ "TtkanParser",
35
+ "WanbengoParser",
36
+ "XiaoshuowuParser",
37
+ "XiguashuwuParser",
38
+ "Xs63bParser",
39
+ "XshbookParser",
37
40
  "YamiboParser",
41
+ "YibigeParser",
38
42
  ]
43
+
44
+ from .aaatxt import AaatxtParser
45
+ from .b520 import BiqugeParser
46
+ from .biquyuedu import BiquyueduParser
47
+ from .dxmwx import DxmwxParser
48
+ from .eightnovel import EightnovelParser
49
+ from .esjzone import EsjzoneParser
50
+ from .guidaye import GuidayeParser
51
+ from .hetushu import HetushuParser
52
+ from .i25zw import I25zwParser
53
+ from .ixdzs8 import Ixdzs8Parser
54
+ from .jpxs123 import Jpxs123Parser
55
+ from .lewenn import LewennParser
56
+ from .linovelib import LinovelibParser
57
+ from .piaotia import PiaotiaParser
58
+ from .qbtr import QbtrParser
59
+ from .qianbi import QianbiParser
60
+ from .qidian import QidianParser
61
+ from .quanben5 import Quanben5Parser
62
+ from .registry import get_parser
63
+ from .sfacg import SfacgParser
64
+ from .shencou import ShencouParser
65
+ from .shuhaige import ShuhaigeParser
66
+ from .tongrenquan import TongrenquanParser
67
+ from .ttkan import TtkanParser
68
+ from .wanbengo import WanbengoParser
69
+ from .xiaoshuowu import XiaoshuowuParser
70
+ from .xiguashuwu import XiguashuwuParser
71
+ from .xs63b import Xs63bParser
72
+ from .xshbook import XshbookParser
73
+ from .yamibo import YamiboParser
74
+ from .yibige import YibigeParser
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.aaatxt
4
+ ------------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["aaatxt"],
24
+ )
25
+ class AaatxtParser(BaseParser):
26
+ """
27
+ Parser for 3A电子书 book pages.
28
+ """
29
+
30
+ ADS: set[str] = {
31
+ "按键盘上方向键",
32
+ "未阅读完",
33
+ "加入书签",
34
+ "已便下次继续阅读",
35
+ "更多原创手机电子书",
36
+ "免费TXT小说下载",
37
+ }
38
+
39
+ def parse_book_info(
40
+ self,
41
+ html_list: list[str],
42
+ **kwargs: Any,
43
+ ) -> BookInfoDict | None:
44
+ if not html_list:
45
+ return None
46
+
47
+ tree = html.fromstring(html_list[0])
48
+
49
+ book_name = self._first_str(tree.xpath("//div[@class='xiazai']/h1/text()"))
50
+
51
+ author = self._first_str(tree.xpath("//span[@id='author']/a/text()"))
52
+
53
+ cover_url = self._first_str(
54
+ tree.xpath("//div[@id='txtbook']//div[@class='fm']//img/@src")
55
+ )
56
+
57
+ update_time = self._first_str(
58
+ tree.xpath("//div[@id='txtbook']//li[contains(text(), '上传日期')]/text()"),
59
+ replaces=[("上传日期:", "")],
60
+ )
61
+
62
+ genre = self._first_str(
63
+ tree.xpath("//div[@id='submenu']/h2/a[@class='lan']/text()")
64
+ )
65
+ tags = [genre] if genre else []
66
+
67
+ summary_el = tree.xpath("//div[@id='jj']//p")
68
+ summary = summary_el[0].text_content().strip() if summary_el else ""
69
+
70
+ download_url = self._first_str(
71
+ tree.xpath("//div[@id='down']//li[@class='bd']//a/@href")
72
+ )
73
+
74
+ # Chapters from the book_list
75
+ chapters: list[ChapterInfoDict] = []
76
+ for a in tree.xpath("//div[@id='ml']//ol/li/a"):
77
+ url = a.get("href", "").strip()
78
+ chapter_id = url.split("/")[-1].replace(".html", "")
79
+ title = a.text_content().strip()
80
+ chapters.append(
81
+ {
82
+ "title": title,
83
+ "url": url,
84
+ "chapterId": chapter_id,
85
+ }
86
+ )
87
+
88
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
89
+
90
+ return {
91
+ "book_name": book_name,
92
+ "author": author,
93
+ "cover_url": cover_url,
94
+ "update_time": update_time,
95
+ "tags": tags,
96
+ "summary": summary,
97
+ "volumes": volumes,
98
+ "extra": {"download_url": download_url},
99
+ }
100
+
101
+ def parse_chapter(
102
+ self,
103
+ html_list: list[str],
104
+ chapter_id: str,
105
+ **kwargs: Any,
106
+ ) -> ChapterDict | None:
107
+ if not html_list:
108
+ return None
109
+
110
+ tree = html.fromstring(html_list[0])
111
+
112
+ raw_title = self._first_str(tree.xpath("//div[@id='content']//h1/text()"))
113
+ title = raw_title.split("-", 1)[-1].strip()
114
+
115
+ texts = []
116
+ for txt in tree.xpath("//div[@class='chapter']//text()"):
117
+ line = txt.strip()
118
+ # Skip empty/instruction/ad lines
119
+ if not line or self._is_ad_line(txt):
120
+ continue
121
+ texts.append(line)
122
+
123
+ content = "\n".join(texts)
124
+ if not content:
125
+ return None
126
+
127
+ return {
128
+ "id": chapter_id,
129
+ "title": title,
130
+ "content": content,
131
+ "extra": {"site": "aaatxt"},
132
+ }
@@ -0,0 +1,116 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.b520
4
+ ----------------------------------
5
+
6
+ """
7
+
8
+ from typing import Any
9
+
10
+ from lxml import html
11
+
12
+ from novel_downloader.core.parsers.base import BaseParser
13
+ from novel_downloader.core.parsers.registry import register_parser
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
20
+
21
+
22
+ @register_parser(
23
+ site_keys=["biquge", "bqg", "b520"],
24
+ )
25
+ class BiqugeParser(BaseParser):
26
+ """
27
+ Parser for 笔趣阁 book pages.
28
+ """
29
+
30
+ def parse_book_info(
31
+ self,
32
+ html_list: list[str],
33
+ **kwargs: Any,
34
+ ) -> BookInfoDict | None:
35
+ if not html_list:
36
+ return None
37
+
38
+ tree = html.fromstring(html_list[0])
39
+
40
+ book_name = self._first_str(tree.xpath('//div[@id="info"]/h1/text()'))
41
+
42
+ author = self._first_str(
43
+ tree.xpath('//div[@id="info"]/p[1]/text()'),
44
+ replaces=[("\xa0", ""), ("作者:", "")],
45
+ )
46
+
47
+ cover_url = self._first_str(tree.xpath('//div[@id="fmimg"]/img/@src'))
48
+
49
+ update_time = self._first_str(
50
+ tree.xpath('//div[@id="info"]/p[3]/text()'),
51
+ replaces=[("最后更新:", "")],
52
+ )
53
+
54
+ intro_elem = tree.xpath('//div[@id="intro"]')
55
+ summary = "".join(intro_elem[0].itertext()).strip() if intro_elem else ""
56
+
57
+ book_type = self._first_str(tree.xpath('//div[@class="con_top"]/a[2]/text()'))
58
+ tags = [book_type] if book_type else []
59
+
60
+ chapters: list[ChapterInfoDict] = [
61
+ {
62
+ "title": (a.text or "").strip(),
63
+ "url": (a.get("href") or "").strip(),
64
+ "chapterId": (a.get("href") or "").rsplit("/", 1)[-1].split(".", 1)[0],
65
+ }
66
+ for a in tree.xpath(
67
+ '//div[@id="list"]/dl/dt[contains(., "正文")]/following-sibling::dd/a'
68
+ )
69
+ ]
70
+
71
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
72
+
73
+ return {
74
+ "book_name": book_name,
75
+ "author": author,
76
+ "cover_url": cover_url,
77
+ "update_time": update_time,
78
+ "tags": tags,
79
+ "summary": summary,
80
+ "volumes": volumes,
81
+ "extra": {},
82
+ }
83
+
84
+ def parse_chapter(
85
+ self,
86
+ html_list: list[str],
87
+ chapter_id: str,
88
+ **kwargs: Any,
89
+ ) -> ChapterDict | None:
90
+ if not html_list:
91
+ return None
92
+ tree = html.fromstring(html_list[0])
93
+
94
+ title = self._first_str(tree.xpath('//div[@class="bookname"]/h1/text()'))
95
+ if not title:
96
+ title = f"第 {chapter_id} 章"
97
+
98
+ content_elem = tree.xpath('//div[@id="content"]')
99
+ if not content_elem:
100
+ return None
101
+ paragraphs = [
102
+ "".join(p.itertext()).strip() for p in content_elem[0].xpath(".//p")
103
+ ]
104
+ if paragraphs and "www.shuhaige.net" in paragraphs[-1]:
105
+ paragraphs.pop()
106
+
107
+ content = "\n".join(paragraphs)
108
+ if not content.strip():
109
+ return None
110
+
111
+ return {
112
+ "id": chapter_id,
113
+ "title": title,
114
+ "content": content,
115
+ "extra": {"site": "biquge"},
116
+ }
@@ -3,22 +3,17 @@
3
3
  novel_downloader.core.parsers.base
4
4
  ----------------------------------
5
5
 
6
- This module defines the BaseParser abstract class, which implements the
7
- ParserProtocol interface and provides a structured foundation for
8
- site-specific parsers.
9
-
10
- BaseParser manages internal parser state and enforces
11
- a standard parsing interface for:
12
- - Book info pages (e.g. metadata, chapter list)
13
- - Chapter pages (e.g. textual content)
6
+ Abstract base class providing common behavior for site-specific parsers.
14
7
  """
15
8
 
16
9
  import abc
10
+ import re
11
+ from collections.abc import Iterable
17
12
  from pathlib import Path
18
13
  from typing import Any
19
14
 
20
15
  from novel_downloader.core.interfaces import ParserProtocol
21
- from novel_downloader.models import ChapterDict, ParserConfig
16
+ from novel_downloader.models import BookInfoDict, ChapterDict, ParserConfig
22
17
 
23
18
 
24
19
  class BaseParser(ParserProtocol, abc.ABC):
@@ -32,6 +27,10 @@ class BaseParser(ParserProtocol, abc.ABC):
32
27
  Subclasses must implement actual parsing logic for specific sites.
33
28
  """
34
29
 
30
+ ADS: set[str] = set()
31
+
32
+ _SPACE_RE = re.compile(r"\s+")
33
+
35
34
  def __init__(
36
35
  self,
37
36
  config: ParserConfig,
@@ -44,15 +43,19 @@ class BaseParser(ParserProtocol, abc.ABC):
44
43
  self._config = config
45
44
  self._book_id: str | None = None
46
45
 
46
+ self._decode_font: bool = config.decode_font
47
+ self._use_truncation = config.use_truncation
47
48
  self._base_cache_dir = Path(config.cache_dir)
48
49
  self._cache_dir = self._base_cache_dir
49
50
 
51
+ self._ad_pattern = self._compile_ads_pattern()
52
+
50
53
  @abc.abstractmethod
51
54
  def parse_book_info(
52
55
  self,
53
56
  html_list: list[str],
54
57
  **kwargs: Any,
55
- ) -> dict[str, Any]:
58
+ ) -> BookInfoDict | None:
56
59
  """
57
60
  Parse and return a dictionary of book information from the raw HTML.
58
61
 
@@ -69,11 +72,11 @@ class BaseParser(ParserProtocol, abc.ABC):
69
72
  **kwargs: Any,
70
73
  ) -> ChapterDict | None:
71
74
  """
72
- Parse and return the text content of one chapter.
75
+ Parse chapter page and extract the content of one chapter.
73
76
 
74
77
  :param html_list: The HTML list of the chapter pages.
75
78
  :param chapter_id: Identifier of the chapter being parsed.
76
- :return: The chapter's text.
79
+ :return: The chapter's data.
77
80
  """
78
81
  ...
79
82
 
@@ -104,3 +107,51 @@ class BaseParser(ParserProtocol, abc.ABC):
104
107
  book-related folders or states.
105
108
  """
106
109
  pass
110
+
111
+ def _compile_ads_pattern(self) -> re.Pattern[str] | None:
112
+ """
113
+ Compile a regex pattern from the ADS list, or return None if no ADS.
114
+ """
115
+ if not self.ADS:
116
+ return None
117
+
118
+ return re.compile("|".join(map(re.escape, self.ADS)))
119
+
120
+ def _is_ad_line(self, line: str) -> bool:
121
+ """
122
+ Check if a line contains any ad text.
123
+
124
+ :param line: Single text line.
125
+ :return: True if line matches ad pattern, else False.
126
+ """
127
+ return bool(self._ad_pattern and self._ad_pattern.search(line))
128
+
129
+ def _filter_ads(self, lines: Iterable[str]) -> list[str]:
130
+ """
131
+ Filter out lines containing any ad text defined in ADS.
132
+
133
+ :param lines: Iterable of text lines (e.g. chapter content).
134
+ :return: List of lines with ads removed.
135
+ """
136
+ if not self._ad_pattern:
137
+ return list(lines)
138
+ return [line for line in lines if not self._ad_pattern.search(line)]
139
+
140
+ @classmethod
141
+ def _norm_space(cls, s: str, c: str = " ") -> str:
142
+ """
143
+ collapse any run of whitespace (incl. newlines, full-width spaces)
144
+
145
+ :param s: Input string to normalize.
146
+ :param c: Replacement character to use for collapsed whitespace.
147
+ """
148
+ return cls._SPACE_RE.sub(c, s).strip()
149
+
150
+ @staticmethod
151
+ def _first_str(xs: list[str], replaces: list[tuple[str, str]] | None = None) -> str:
152
+ replaces = replaces or []
153
+ value: str = xs[0].strip() if xs else ""
154
+ for replace in replaces:
155
+ old, new = replace
156
+ value = value.replace(old, new)
157
+ return value