novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +1 -3
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +26 -21
  5. novel_downloader/cli/download.py +77 -64
  6. novel_downloader/cli/export.py +16 -20
  7. novel_downloader/cli/main.py +1 -1
  8. novel_downloader/cli/search.py +62 -65
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +8 -5
  11. novel_downloader/config/adapter.py +65 -105
  12. novel_downloader/config/{loader.py → file_io.py} +53 -26
  13. novel_downloader/core/__init__.py +1 -0
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +3 -24
  21. novel_downloader/core/downloaders/base.py +49 -23
  22. novel_downloader/core/downloaders/common.py +191 -137
  23. novel_downloader/core/downloaders/qianbi.py +187 -146
  24. novel_downloader/core/downloaders/qidian.py +187 -141
  25. novel_downloader/core/downloaders/registry.py +4 -2
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +3 -20
  28. novel_downloader/core/exporters/base.py +33 -37
  29. novel_downloader/core/exporters/common/__init__.py +1 -2
  30. novel_downloader/core/exporters/common/epub.py +15 -10
  31. novel_downloader/core/exporters/common/main_exporter.py +19 -12
  32. novel_downloader/core/exporters/common/txt.py +14 -9
  33. novel_downloader/core/exporters/epub_util.py +59 -29
  34. novel_downloader/core/exporters/linovelib/__init__.py +1 -0
  35. novel_downloader/core/exporters/linovelib/epub.py +23 -25
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
  37. novel_downloader/core/exporters/linovelib/txt.py +17 -11
  38. novel_downloader/core/exporters/qidian.py +2 -8
  39. novel_downloader/core/exporters/registry.py +4 -2
  40. novel_downloader/core/exporters/txt_util.py +7 -7
  41. novel_downloader/core/fetchers/__init__.py +54 -48
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
  45. novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/lewenn.py +83 -0
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +5 -16
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/shuhaige.py +84 -0
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/wanbengo.py +83 -0
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +1 -9
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +9 -1
  81. novel_downloader/core/parsers/__init__.py +49 -12
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/esjzone.py +61 -66
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/linovelib.py +48 -64
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/qianbi.py +48 -50
  99. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  100. novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
  101. novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
  102. novel_downloader/core/parsers/qidian/main_parser.py +11 -38
  103. novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
  104. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
  105. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  106. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  107. novel_downloader/core/parsers/quanben5.py +103 -0
  108. novel_downloader/core/parsers/registry.py +5 -16
  109. novel_downloader/core/parsers/sfacg.py +38 -45
  110. novel_downloader/core/parsers/shencou.py +215 -0
  111. novel_downloader/core/parsers/shuhaige.py +111 -0
  112. novel_downloader/core/parsers/tongrenquan.py +116 -0
  113. novel_downloader/core/parsers/ttkan.py +132 -0
  114. novel_downloader/core/parsers/wanbengo.py +191 -0
  115. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  116. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  117. novel_downloader/core/parsers/xs63b.py +161 -0
  118. novel_downloader/core/parsers/xshbook.py +134 -0
  119. novel_downloader/core/parsers/yamibo.py +87 -131
  120. novel_downloader/core/parsers/yibige.py +166 -0
  121. novel_downloader/core/searchers/__init__.py +34 -3
  122. novel_downloader/core/searchers/aaatxt.py +107 -0
  123. novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
  124. novel_downloader/core/searchers/base.py +112 -36
  125. novel_downloader/core/searchers/dxmwx.py +105 -0
  126. novel_downloader/core/searchers/eightnovel.py +84 -0
  127. novel_downloader/core/searchers/esjzone.py +43 -25
  128. novel_downloader/core/searchers/hetushu.py +92 -0
  129. novel_downloader/core/searchers/i25zw.py +93 -0
  130. novel_downloader/core/searchers/ixdzs8.py +107 -0
  131. novel_downloader/core/searchers/jpxs123.py +107 -0
  132. novel_downloader/core/searchers/piaotia.py +100 -0
  133. novel_downloader/core/searchers/qbtr.py +106 -0
  134. novel_downloader/core/searchers/qianbi.py +74 -40
  135. novel_downloader/core/searchers/quanben5.py +144 -0
  136. novel_downloader/core/searchers/registry.py +24 -8
  137. novel_downloader/core/searchers/shuhaige.py +124 -0
  138. novel_downloader/core/searchers/tongrenquan.py +110 -0
  139. novel_downloader/core/searchers/ttkan.py +92 -0
  140. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  141. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  142. novel_downloader/core/searchers/xs63b.py +104 -0
  143. novel_downloader/locales/en.json +31 -82
  144. novel_downloader/locales/zh.json +32 -83
  145. novel_downloader/models/__init__.py +21 -22
  146. novel_downloader/models/book.py +44 -0
  147. novel_downloader/models/config.py +4 -37
  148. novel_downloader/models/login.py +1 -1
  149. novel_downloader/models/search.py +5 -0
  150. novel_downloader/resources/config/settings.toml +8 -70
  151. novel_downloader/resources/json/xiguashuwu.json +718 -0
  152. novel_downloader/utils/__init__.py +13 -22
  153. novel_downloader/utils/chapter_storage.py +3 -2
  154. novel_downloader/utils/constants.py +4 -29
  155. novel_downloader/utils/cookies.py +6 -18
  156. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  157. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  158. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  159. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  160. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  161. novel_downloader/utils/epub/__init__.py +1 -1
  162. novel_downloader/utils/epub/constants.py +57 -16
  163. novel_downloader/utils/epub/documents.py +88 -194
  164. novel_downloader/utils/epub/models.py +0 -14
  165. novel_downloader/utils/epub/utils.py +63 -96
  166. novel_downloader/utils/file_utils/__init__.py +2 -23
  167. novel_downloader/utils/file_utils/io.py +3 -113
  168. novel_downloader/utils/file_utils/sanitize.py +0 -4
  169. novel_downloader/utils/fontocr.py +207 -0
  170. novel_downloader/utils/logger.py +8 -16
  171. novel_downloader/utils/network.py +2 -2
  172. novel_downloader/utils/state.py +4 -90
  173. novel_downloader/utils/text_utils/__init__.py +1 -7
  174. novel_downloader/utils/text_utils/diff_display.py +5 -7
  175. novel_downloader/utils/time_utils/__init__.py +5 -11
  176. novel_downloader/utils/time_utils/datetime_utils.py +20 -29
  177. novel_downloader/utils/time_utils/sleep_utils.py +4 -8
  178. novel_downloader/web/__init__.py +13 -0
  179. novel_downloader/web/components/__init__.py +11 -0
  180. novel_downloader/web/components/navigation.py +35 -0
  181. novel_downloader/web/main.py +66 -0
  182. novel_downloader/web/pages/__init__.py +17 -0
  183. novel_downloader/web/pages/download.py +78 -0
  184. novel_downloader/web/pages/progress.py +147 -0
  185. novel_downloader/web/pages/search.py +329 -0
  186. novel_downloader/web/services/__init__.py +17 -0
  187. novel_downloader/web/services/client_dialog.py +164 -0
  188. novel_downloader/web/services/cred_broker.py +113 -0
  189. novel_downloader/web/services/cred_models.py +35 -0
  190. novel_downloader/web/services/task_manager.py +264 -0
  191. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  192. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  193. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  194. novel_downloader/core/downloaders/biquge.py +0 -29
  195. novel_downloader/core/downloaders/esjzone.py +0 -29
  196. novel_downloader/core/downloaders/linovelib.py +0 -29
  197. novel_downloader/core/downloaders/sfacg.py +0 -29
  198. novel_downloader/core/downloaders/yamibo.py +0 -29
  199. novel_downloader/core/exporters/biquge.py +0 -22
  200. novel_downloader/core/exporters/esjzone.py +0 -22
  201. novel_downloader/core/exporters/qianbi.py +0 -22
  202. novel_downloader/core/exporters/sfacg.py +0 -22
  203. novel_downloader/core/exporters/yamibo.py +0 -22
  204. novel_downloader/core/fetchers/base/__init__.py +0 -14
  205. novel_downloader/core/fetchers/base/browser.py +0 -422
  206. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  207. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  208. novel_downloader/core/fetchers/esjzone/browser.py +0 -209
  209. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  210. novel_downloader/core/fetchers/linovelib/browser.py +0 -198
  211. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  212. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  213. novel_downloader/core/fetchers/qidian/browser.py +0 -326
  214. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  215. novel_downloader/core/fetchers/sfacg/browser.py +0 -194
  216. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  217. novel_downloader/core/fetchers/yamibo/browser.py +0 -234
  218. novel_downloader/core/parsers/biquge.py +0 -139
  219. novel_downloader/models/chapter.py +0 -25
  220. novel_downloader/models/types.py +0 -13
  221. novel_downloader/tui/__init__.py +0 -7
  222. novel_downloader/tui/app.py +0 -32
  223. novel_downloader/tui/main.py +0 -17
  224. novel_downloader/tui/screens/__init__.py +0 -14
  225. novel_downloader/tui/screens/home.py +0 -198
  226. novel_downloader/tui/screens/login.py +0 -74
  227. novel_downloader/tui/styles/home_layout.tcss +0 -79
  228. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  229. novel_downloader/utils/cache.py +0 -24
  230. novel_downloader/utils/fontocr/__init__.py +0 -22
  231. novel_downloader/utils/fontocr/hash_store.py +0 -280
  232. novel_downloader/utils/fontocr/hash_utils.py +0 -103
  233. novel_downloader/utils/fontocr/model_loader.py +0 -69
  234. novel_downloader/utils/fontocr/ocr_v1.py +0 -315
  235. novel_downloader/utils/fontocr/ocr_v2.py +0 -764
  236. novel_downloader/utils/fontocr/ocr_v3.py +0 -744
  237. novel_downloader-1.5.0.dist-info/METADATA +0 -196
  238. novel_downloader-1.5.0.dist-info/RECORD +0 -164
  239. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  240. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  241. {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -65,17 +65,15 @@ def parse_normal_chapter(
65
65
  seq = chapter_info.get("seq", None)
66
66
  volume = chapter_info.get("extra", {}).get("volumeName", "")
67
67
 
68
- chapter_text = _parse_browser_paragraph(html_str)
68
+ chapter_text = _parse_paragraph(
69
+ html_str=raw_html,
70
+ is_vip=vip_status(ssr_data),
71
+ chapter_id=chapter_id,
72
+ fkp=fkp,
73
+ fuid=parser._fuid,
74
+ )
69
75
  if not chapter_text:
70
- chapter_text = _parse_session_paragraph(
71
- html_str=raw_html,
72
- is_vip=vip_status(ssr_data),
73
- chapter_id=chapter_id,
74
- fkp=fkp,
75
- fuid=parser._fuid,
76
- )
77
- if not chapter_text:
78
- return None
76
+ return None
79
77
 
80
78
  if parser._use_truncation and duplicated:
81
79
  chapter_text = truncate_half_lines(chapter_text)
@@ -103,55 +101,26 @@ def parse_normal_chapter(
103
101
  return None
104
102
 
105
103
 
106
- def _parse_browser_paragraph(html_str: str) -> str:
107
- try:
108
- tree = html.fromstring(html_str)
109
- main = tree.xpath('//div[@id="app"]//div[@id="reader-content"]//main')
110
- if not main:
111
- return ""
112
- main = main[0]
113
-
114
- content_spans = main.xpath('.//span[contains(@class, "content-text")]')
115
-
116
- paragraph_texts = [
117
- span.text_content().strip()
118
- for span in content_spans
119
- if span.text_content().strip()
120
- ]
121
-
122
- chapter_text = "\n\n".join(paragraph_texts)
123
- return chapter_text
124
-
125
- except Exception as e:
126
- logger.error("[Parser] _parse_paragraph failed: %s", e)
127
- return ""
128
-
129
-
130
- def _parse_session_paragraph(
104
+ def _parse_paragraph(
131
105
  html_str: str,
132
106
  is_vip: bool,
133
107
  chapter_id: str,
134
108
  fkp: str,
135
109
  fuid: str,
136
110
  ) -> str:
137
- try:
138
- raw_html = html_str
139
-
140
- if is_vip:
141
- try:
142
- decryptor = get_decryptor()
143
- raw_html = decryptor.decrypt(raw_html, chapter_id, fkp, fuid)
144
- except Exception as e:
145
- logger.error("[Parser] decryption failed for '%s': %s", chapter_id, e)
146
- return ""
147
-
148
- tree = html.fromstring(raw_html)
149
- paras = tree.xpath(".//p")
150
- paragraph_texts = [
151
- p.text_content().strip() for p in paras if p.text_content().strip()
152
- ]
153
- return "\n\n".join(paragraph_texts)
111
+ raw_html = html_str
112
+
113
+ if is_vip:
114
+ try:
115
+ decryptor = get_decryptor()
116
+ raw_html = decryptor.decrypt(raw_html, chapter_id, fkp, fuid)
117
+ except Exception as e:
118
+ logger.error("[Parser] decryption failed for '%s': %s", chapter_id, e)
119
+ return ""
154
120
 
155
- except Exception as e:
156
- logger.error("[Parser] _parse_paragraph failed: %s", e)
157
- return ""
121
+ tree = html.fromstring(raw_html)
122
+ paras = tree.xpath(".//p")
123
+ paragraph_texts = [
124
+ p.text_content().strip() for p in paras if p.text_content().strip()
125
+ ]
126
+ return "\n".join(paragraph_texts)
@@ -10,13 +10,17 @@ from __future__ import annotations
10
10
 
11
11
  import logging
12
12
  from pathlib import Path
13
- from typing import TYPE_CHECKING, Any
13
+ from typing import Any
14
14
 
15
15
  from novel_downloader.core.parsers.base import BaseParser
16
16
  from novel_downloader.core.parsers.registry import register_parser
17
- from novel_downloader.models import ChapterDict, ParserConfig
18
- from novel_downloader.utils import find_cookie_value
17
+ from novel_downloader.models import (
18
+ BookInfoDict,
19
+ ChapterDict,
20
+ ParserConfig,
21
+ )
19
22
  from novel_downloader.utils.constants import DATA_DIR
23
+ from novel_downloader.utils.cookies import get_cookie_value
20
24
 
21
25
  from .book_info_parser import parse_book_info
22
26
  from .chapter_router import parse_chapter
@@ -24,17 +28,13 @@ from .utils import is_encrypted
24
28
 
25
29
  logger = logging.getLogger(__name__)
26
30
 
27
- if TYPE_CHECKING:
28
- from novel_downloader.utils.fontocr import FontOCR
29
-
30
31
 
31
32
  @register_parser(
32
33
  site_keys=["qidian", "qd"],
33
- backends=["session", "browser"],
34
34
  )
35
35
  class QidianParser(BaseParser):
36
36
  """
37
- Parser for Qidian site.
37
+ Parser for 起点中文网 site.
38
38
  """
39
39
 
40
40
  def __init__(
@@ -49,47 +49,20 @@ class QidianParser(BaseParser):
49
49
  """
50
50
  super().__init__(config)
51
51
 
52
- # Extract and store parser flags from config
53
- self._use_truncation = config.use_truncation
54
- self._decode_font: bool = config.decode_font
55
-
56
52
  self._fixed_font_dir: Path = self._base_cache_dir / "fixed_fonts"
57
53
  self._fixed_font_dir.mkdir(parents=True, exist_ok=True)
58
54
  self._debug_dir: Path = Path.cwd() / "debug"
59
55
 
60
56
  state_files = [
61
- DATA_DIR / "qidian" / "browser_state.cookies",
62
57
  DATA_DIR / "qidian" / "session_state.cookies",
63
58
  ]
64
- self._fuid: str = fuid or find_cookie_value(state_files, "ywguid")
65
-
66
- self._font_ocr: FontOCR | None = None
67
- if self._decode_font:
68
- try:
69
- from novel_downloader.utils.fontocr import FontOCR
70
- except ImportError:
71
- logger.warning(
72
- "[QidianParser] FontOCR not available, font decoding will skip"
73
- )
74
- else:
75
- self._font_ocr = FontOCR(
76
- cache_dir=self._base_cache_dir,
77
- use_freq=config.use_freq,
78
- use_ocr=config.use_ocr,
79
- use_vec=config.use_vec,
80
- batch_size=config.batch_size,
81
- gpu_mem=config.gpu_mem,
82
- gpu_id=config.gpu_id,
83
- ocr_weight=config.ocr_weight,
84
- vec_weight=config.vec_weight,
85
- font_debug=config.save_font_debug,
86
- )
59
+ self._fuid: str = fuid or get_cookie_value(state_files, "ywguid")
87
60
 
88
61
  def parse_book_info(
89
62
  self,
90
63
  html_list: list[str],
91
64
  **kwargs: Any,
92
- ) -> dict[str, Any]:
65
+ ) -> BookInfoDict | None:
93
66
  """
94
67
  Parse a book info page and extract metadata and chapter structure.
95
68
 
@@ -97,7 +70,7 @@ class QidianParser(BaseParser):
97
70
  :return: Parsed metadata and chapter structure as a dictionary.
98
71
  """
99
72
  if not html_list:
100
- return {}
73
+ return None
101
74
  return parse_book_info(html_list[0])
102
75
 
103
76
  def parse_chapter(
@@ -3,6 +3,7 @@
3
3
  novel_downloader.core.parsers.qidian.utils
4
4
  ------------------------------------------
5
5
 
6
+ Utility functions and helpers for parsing and decrypting Qidian novel pages
6
7
  """
7
8
 
8
9
  __all__ = [
@@ -25,7 +25,7 @@ import requests
25
25
  from novel_downloader.utils.constants import JS_SCRIPT_DIR
26
26
 
27
27
  DEST_ROOT: Final[Path] = JS_SCRIPT_DIR
28
- GITHUB_OWNER: Final = "BowenZ217"
28
+ GITHUB_OWNER: Final = "saudadez21"
29
29
  GITHUB_REPO: Final = "qidian-decryptor"
30
30
  RELEASE_VERSION: Final = "v1.0.1"
31
31
  BASE_URL: Final = f"https://github.com/{GITHUB_OWNER}/{GITHUB_REPO}/releases/download/{RELEASE_VERSION}"
@@ -0,0 +1,143 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.qidian.utils.fontmap_recover
4
+ ----------------------------------------------------------
5
+
6
+ Tools for generating and applying font character mappings
7
+ to recover obfuscated Qidian text.
8
+ """
9
+
10
+ __all__ = [
11
+ "generate_font_map",
12
+ "apply_font_mapping",
13
+ ]
14
+
15
+ import json
16
+ import logging
17
+ from pathlib import Path
18
+
19
+ import numpy as np
20
+ from fontTools.ttLib import TTFont
21
+ from PIL import ImageFont
22
+
23
+ logger = logging.getLogger(__name__)
24
+ CHAR_FONT_SIZE = 52
25
+
26
+
27
+ def generate_font_map(
28
+ fixed_font_path: Path,
29
+ random_font_path: Path,
30
+ char_set: set[str],
31
+ refl_set: set[str],
32
+ cache_dir: Path,
33
+ batch_size: int = 32,
34
+ ) -> dict[str, str]:
35
+ """
36
+ Build a mapping from scrambled font chars to real chars.
37
+
38
+ Uses OCR to compare rendered glyphs from a known (fixed) font and an
39
+ obfuscated (random) font. Results are cached in JSON so repeated runs
40
+ are faster.
41
+
42
+ :param fixed_font_path: fixed font file.
43
+ :param random_font_path: random font file.
44
+ :param char_set: Characters to match directly.
45
+ :param refl_set: Characters to match in flipped form.
46
+ :param cache_dir: Directory to save/load cached results.
47
+ :param batch_size: How many chars to OCR per batch.
48
+
49
+ :return: { obf_char: real_char, ... }
50
+ """
51
+ try:
52
+ from novel_downloader.utils.fontocr import get_font_ocr
53
+
54
+ font_ocr = get_font_ocr(batch_size=batch_size)
55
+ except ImportError:
56
+ logger.warning("[QidianParser] FontOCR not available, font decoding will skip")
57
+ return {}
58
+
59
+ mapping_result: dict[str, str] = {}
60
+ fixed_map_file = cache_dir / "fixed_font_map" / f"{Path(fixed_font_path).stem}.json"
61
+ fixed_map_file.parent.mkdir(parents=True, exist_ok=True)
62
+
63
+ # load existing cache
64
+ try:
65
+ with open(fixed_map_file, encoding="utf-8") as f:
66
+ fixed_map = json.load(f)
67
+ cached_chars = set(fixed_map.keys())
68
+ mapping_result.update({ch: fixed_map[ch] for ch in char_set if ch in fixed_map})
69
+ mapping_result.update({ch: fixed_map[ch] for ch in refl_set if ch in fixed_map})
70
+ char_set = set(char_set) - cached_chars
71
+ refl_set = set(refl_set) - cached_chars
72
+ except Exception:
73
+ fixed_map = {}
74
+ cached_chars = set()
75
+
76
+ # prepare font renderers and cmap sets
77
+ try:
78
+ fixed_ttf = TTFont(fixed_font_path)
79
+ fixed_chars = {chr(c) for c in fixed_ttf.getBestCmap()}
80
+ fixed_font = ImageFont.truetype(str(fixed_font_path), CHAR_FONT_SIZE)
81
+
82
+ random_ttf = TTFont(random_font_path)
83
+ random_chars = {chr(c) for c in random_ttf.getBestCmap()}
84
+ random_font = ImageFont.truetype(str(random_font_path), CHAR_FONT_SIZE)
85
+ except Exception as e:
86
+ logger.error("[FontOCR] Failed to load TTF fonts: %s", e)
87
+ return mapping_result
88
+
89
+ def _render_batch(chars: list[tuple[str, bool]]) -> list[tuple[str, np.ndarray]]:
90
+ out = []
91
+ for ch, reflect in chars:
92
+ if ch in fixed_chars:
93
+ font = fixed_font
94
+ elif ch in random_chars:
95
+ font = random_font
96
+ else:
97
+ continue
98
+ img = font_ocr.render_char_image_array(ch, font, reflect)
99
+ if img is not None:
100
+ out.append((ch, img))
101
+ return out
102
+
103
+ # process normal and reflected sets together
104
+ for chars, reflect in [(list(char_set), False), (list(refl_set), True)]:
105
+ for batch_chars in font_ocr._chunked(chars, font_ocr._batch_size):
106
+ # render all images in this batch
107
+ to_render = [(ch, reflect) for ch in batch_chars]
108
+ rendered = _render_batch(to_render)
109
+ if not rendered:
110
+ continue
111
+
112
+ # query OCR+vec simultaneously
113
+ imgs_to_query = [img for (ch, img) in rendered]
114
+ fused = font_ocr.predict(imgs_to_query, top_k=1)
115
+
116
+ # pick best per char, apply threshold + cache
117
+ for (ch, _), preds in zip(rendered, fused, strict=False):
118
+ if not preds:
119
+ continue
120
+ real_char, _ = preds[0]
121
+ mapping_result[ch] = real_char
122
+ fixed_map[ch] = real_char
123
+
124
+ # persist updated fixed_map
125
+ try:
126
+ with open(fixed_map_file, "w", encoding="utf-8") as f:
127
+ json.dump(fixed_map, f, ensure_ascii=False, indent=2)
128
+ except Exception as e:
129
+ logger.error("[FontOCR] Failed to save fixed map: %s", e)
130
+
131
+ return mapping_result
132
+
133
+
134
+ def apply_font_mapping(text: str, font_map: dict[str, str]) -> str:
135
+ """
136
+ Replace each character in `text` using `font_map`,
137
+ leaving unmapped characters unchanged.
138
+
139
+ :param text: The input string, possibly containing obfuscated font chars.
140
+ :param font_map: A dict mapping obfuscated chars to real chars.
141
+ :return: The de-obfuscated text.
142
+ """
143
+ return "".join(font_map.get(ch, ch) for ch in text)
@@ -4,10 +4,6 @@ novel_downloader.core.parsers.qidian.utils.helpers
4
4
  --------------------------------------------------
5
5
 
6
6
  Shared utility functions for parsing Qidian pages.
7
-
8
- This module provides reusable helpers to:
9
- - Extract SSR-rendered JSON page context and structured chapter metadata.
10
- - Identify VIP chapters, encrypted content, and viewability conditions.
11
7
  """
12
8
 
13
9
  import json
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.parsers.quanben5
4
+ --------------------------------------
5
+
6
+ """
7
+
8
+ from datetime import datetime
9
+ from typing import Any
10
+
11
+ from lxml import html
12
+
13
+ from novel_downloader.core.parsers.base import BaseParser
14
+ from novel_downloader.core.parsers.registry import register_parser
15
+ from novel_downloader.models import (
16
+ BookInfoDict,
17
+ ChapterDict,
18
+ ChapterInfoDict,
19
+ VolumeInfoDict,
20
+ )
21
+
22
+
23
+ @register_parser(
24
+ site_keys=["quanben5"],
25
+ )
26
+ class Quanben5Parser(BaseParser):
27
+ """
28
+ Parser for 全本小说网 book pages.
29
+ """
30
+
31
+ def parse_book_info(
32
+ self,
33
+ html_list: list[str],
34
+ **kwargs: Any,
35
+ ) -> BookInfoDict | None:
36
+ if not html_list:
37
+ return None
38
+
39
+ tree = html.fromstring(html_list[0])
40
+ book_name = self._first_str(tree.xpath("//h3/span/text()"))
41
+ author = self._first_str(
42
+ tree.xpath(
43
+ '//p[@class="info"][contains(., "作者")]/span[@class="author"]/text()'
44
+ )
45
+ )
46
+ cover_url = self._first_str(tree.xpath('//div[@class="pic"]/img/@src'))
47
+ category = self._first_str(
48
+ tree.xpath('//p[@class="info"][contains(., "类别")]/span/text()')
49
+ )
50
+ tags = [category] if category else []
51
+ update_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
52
+ summary = self._first_str(tree.xpath('//p[@class="description"]/text()'))
53
+
54
+ chapters: list[ChapterInfoDict] = []
55
+ for li in tree.xpath('//ul[@class="list"]/li'):
56
+ link = li.xpath(".//a")[0]
57
+ href = link.get("href", "").strip()
58
+ title = self._first_str(link.xpath(".//span/text()"))
59
+ # '/n/toutian/83840.html' -> '83840'
60
+ chapter_id = href.rstrip(".html").split("/")[-1]
61
+ chapters.append({"title": title, "url": href, "chapterId": chapter_id})
62
+
63
+ volumes: list[VolumeInfoDict] = [{"volume_name": "正文", "chapters": chapters}]
64
+
65
+ return {
66
+ "book_name": book_name,
67
+ "author": author,
68
+ "cover_url": cover_url,
69
+ "update_time": update_time,
70
+ "tags": tags,
71
+ "summary": summary,
72
+ "volumes": volumes,
73
+ "extra": {},
74
+ }
75
+
76
+ def parse_chapter(
77
+ self,
78
+ html_list: list[str],
79
+ chapter_id: str,
80
+ **kwargs: Any,
81
+ ) -> ChapterDict | None:
82
+ if not html_list:
83
+ return None
84
+
85
+ tree = html.fromstring(html_list[0])
86
+
87
+ # Extract the chapter title
88
+ title = self._first_str(tree.xpath('//h1[@class="title1"]/text()'))
89
+
90
+ # Extract all <p> text within the content container
91
+ paragraphs = tree.xpath('//div[@id="content"]/p/text()')
92
+ # Clean whitespace and join with double newlines
93
+ content = "\n".join(p.strip() for p in paragraphs if p.strip())
94
+
95
+ if not content:
96
+ return None
97
+
98
+ return {
99
+ "id": chapter_id,
100
+ "title": title,
101
+ "content": content,
102
+ "extra": {"site": "quanben5"},
103
+ }
@@ -3,6 +3,7 @@
3
3
  novel_downloader.core.parsers.registry
4
4
  --------------------------------------
5
5
 
6
+ Registry and factory helpers for creating site-specific parsers.
6
7
  """
7
8
 
8
9
  __all__ = ["register_parser", "get_parser"]
@@ -16,27 +17,24 @@ from novel_downloader.models import ParserConfig
16
17
  ParserBuilder = Callable[[ParserConfig], ParserProtocol]
17
18
 
18
19
  P = TypeVar("P", bound=ParserProtocol)
19
- _PARSER_MAP: dict[str, dict[str, ParserBuilder]] = {}
20
+ _PARSER_MAP: dict[str, ParserBuilder] = {}
20
21
 
21
22
 
22
23
  def register_parser(
23
24
  site_keys: Sequence[str],
24
- backends: Sequence[str],
25
25
  ) -> Callable[[type[P]], type[P]]:
26
26
  """
27
27
  Decorator to register a parser class under given keys.
28
28
 
29
29
  :param site_keys: Sequence of site identifiers
30
- :param backends: Sequence of backend types
30
+ :param backends: Sequence of backend types
31
31
  :return: A class decorator that populates _PARSER_MAP.
32
32
  """
33
33
 
34
34
  def decorator(cls: type[P]) -> type[P]:
35
35
  for site in site_keys:
36
36
  site_lower = site.lower()
37
- bucket = _PARSER_MAP.setdefault(site_lower, {})
38
- for backend in backends:
39
- bucket[backend] = cls
37
+ _PARSER_MAP[site_lower] = cls
40
38
  return cls
41
39
 
42
40
  return decorator
@@ -52,17 +50,8 @@ def get_parser(site: str, config: ParserConfig) -> ParserProtocol:
52
50
  """
53
51
  site_key = site.lower()
54
52
  try:
55
- backend_map = _PARSER_MAP[site_key]
53
+ parser_cls = _PARSER_MAP[site_key]
56
54
  except KeyError as err:
57
55
  raise ValueError(f"Unsupported site: {site!r}") from err
58
56
 
59
- mode = config.mode
60
- try:
61
- parser_cls = backend_map[mode]
62
- except KeyError as err:
63
- raise ValueError(
64
- f"Unsupported parser mode {mode!r} for site {site!r}. "
65
- f"Available modes: {list(backend_map)}"
66
- ) from err
67
-
68
57
  return parser_cls(config)
@@ -11,22 +11,32 @@ from lxml import html
11
11
 
12
12
  from novel_downloader.core.parsers.base import BaseParser
13
13
  from novel_downloader.core.parsers.registry import register_parser
14
- from novel_downloader.models import ChapterDict
14
+ from novel_downloader.models import (
15
+ BookInfoDict,
16
+ ChapterDict,
17
+ ChapterInfoDict,
18
+ VolumeInfoDict,
19
+ )
15
20
 
16
21
 
17
22
  @register_parser(
18
23
  site_keys=["sfacg"],
19
- backends=["session", "browser"],
20
24
  )
21
25
  class SfacgParser(BaseParser):
22
- """ """
26
+ """
27
+ Parser for sfacg book pages.
28
+ """
23
29
 
24
30
  # Book info XPaths
25
31
  _BOOK_NAME_XPATH = '//ul[@class="book_info"]//span[@class="book_newtitle"]/text()'
26
32
  _AUTHOR_INFO_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/text()'
27
33
  _UPDATE_TIME_XPATH = '//ul[@class="book_info"]//span[@class="book_info3"]/br/following-sibling::text()' # noqa: E501
28
34
  _COVER_URL_XPATH = '//ul[@class="book_info"]//li/img/@src'
29
- _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
35
+ # _STATUS_XPATH = '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
36
+ _STATUS_XPATH = (
37
+ '//ul[@class="book_info"]//div[@class="book_info2"]/span/text()'
38
+ ' and (contains(., "完结") or contains(., "连载"))]/text()'
39
+ )
30
40
  _SUMMARY_XPATH = '//ul[@class="book_profile"]/li[@class="book_bk_qs1"]/text()'
31
41
 
32
42
  # Catalog XPaths
@@ -47,54 +57,35 @@ class SfacgParser(BaseParser):
47
57
  self,
48
58
  html_list: list[str],
49
59
  **kwargs: Any,
50
- ) -> dict[str, Any]:
51
- """
52
- Parse a book info page and extract metadata and chapter structure.
53
-
54
- :param html_list: Raw HTML of the book info page.
55
- :return: Parsed metadata and chapter structure as a dictionary.
56
- """
60
+ ) -> BookInfoDict | None:
57
61
  if len(html_list) < 2:
58
- return {}
62
+ return None
59
63
 
60
64
  info_tree = html.fromstring(html_list[0])
61
65
  catalog_tree = html.fromstring(html_list[1])
62
66
 
63
- result: dict[str, Any] = {}
64
-
65
67
  # Book metadata
66
- book_name = info_tree.xpath(self._BOOK_NAME_XPATH)
67
- result["book_name"] = book_name[0].strip() if book_name else ""
68
+ book_name = self._first_str(info_tree.xpath(self._BOOK_NAME_XPATH))
68
69
 
69
- book_info3 = info_tree.xpath(self._AUTHOR_INFO_XPATH)
70
- result["author"] = book_info3[0].split("/")[0].strip() if book_info3 else ""
71
- result["word_count"] = (
72
- book_info3[0].split("/")[1].strip()
73
- if book_info3 and len(book_info3[0].split("/")) > 1
74
- else ""
75
- )
70
+ book_info3_str = self._first_str(info_tree.xpath(self._AUTHOR_INFO_XPATH))
71
+ author, _, word_count = (p.strip() for p in book_info3_str.partition("/"))
76
72
 
77
- book_info3_br = info_tree.xpath(self._UPDATE_TIME_XPATH)
78
- result["update_time"] = book_info3_br[0].strip() if book_info3_br else ""
73
+ update_time = self._first_str(info_tree.xpath(self._UPDATE_TIME_XPATH))
79
74
 
80
- cover_url = info_tree.xpath(self._COVER_URL_XPATH)
81
- result["cover_url"] = "https:" + cover_url[0] if cover_url else ""
75
+ cover_url = "https:" + self._first_str(info_tree.xpath(self._COVER_URL_XPATH))
82
76
 
83
- serial_status = info_tree.xpath(self._STATUS_XPATH)
84
- result["serial_status"] = next(
85
- (s for s in serial_status if "完结" in s or "连载" in s), ""
86
- )
77
+ serial_status = self._first_str(info_tree.xpath(self._STATUS_XPATH))
87
78
 
88
- summary = info_tree.xpath(self._SUMMARY_XPATH)
89
- result["summary"] = "".join(summary).strip()
79
+ summary_elem = info_tree.xpath(self._SUMMARY_XPATH)
80
+ summary = "".join(summary_elem).strip()
90
81
 
91
82
  # Chapter structure
92
83
  volume_titles = catalog_tree.xpath(self._VOLUME_TITLE_XPATH)
93
84
  volume_blocks = catalog_tree.xpath(self._VOLUME_CONTENT_XPATH)
94
85
 
95
- volumes = []
86
+ volumes: list[VolumeInfoDict] = []
96
87
  for vol_title, vol_block in zip(volume_titles, volume_blocks, strict=False):
97
- chapters = []
88
+ chapters: list[ChapterInfoDict] = []
98
89
  for a in vol_block.xpath(self._CHAPTER_LIST_XPATH):
99
90
  href = a.xpath("./@href")[0] if a.xpath("./@href") else ""
100
91
  title = "".join(a.xpath(".//li//text()")).strip()
@@ -112,9 +103,18 @@ class SfacgParser(BaseParser):
112
103
  "chapters": chapters,
113
104
  }
114
105
  )
115
- result["volumes"] = volumes
116
106
 
117
- return result
107
+ return {
108
+ "book_name": book_name,
109
+ "author": author,
110
+ "cover_url": cover_url,
111
+ "update_time": update_time,
112
+ "word_count": word_count,
113
+ "serial_status": serial_status,
114
+ "summary": summary,
115
+ "volumes": volumes,
116
+ "extra": {},
117
+ }
118
118
 
119
119
  def parse_chapter(
120
120
  self,
@@ -122,13 +122,6 @@ class SfacgParser(BaseParser):
122
122
  chapter_id: str,
123
123
  **kwargs: Any,
124
124
  ) -> ChapterDict | None:
125
- """
126
- Parse a single chapter page and extract clean text or simplified HTML.
127
-
128
- :param html_list: Raw HTML of the chapter page.
129
- :param chapter_id: Identifier of the chapter being parsed.
130
- :return: Cleaned chapter content as plain text or minimal HTML.
131
- """
132
125
  if not html_list:
133
126
  return None
134
127
  keywords = [
@@ -156,7 +149,7 @@ class SfacgParser(BaseParser):
156
149
  raw_text_parts = tree.xpath(self._CHAPTER_TEXT_XPATH)
157
150
  content_lines = [txt.strip() for txt in raw_text_parts if txt.strip()]
158
151
 
159
- content = "\n\n".join(content_lines).strip()
152
+ content = "\n".join(content_lines).strip()
160
153
  if not content:
161
154
  return None
162
155