novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,9 @@
3
3
  novel_downloader.core.exporters.linovelib
4
4
  -----------------------------------------
5
5
 
6
+ Exporter implementation for handling Linovelib novels.
6
7
  """
7
8
 
8
- from .main_exporter import LinovelibExporter
9
-
10
9
  __all__ = ["LinovelibExporter"]
10
+
11
+ from .main_exporter import LinovelibExporter
@@ -8,37 +8,33 @@ Contains the logic for exporting novel content as a single `.epub` file.
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- import html
12
- import json
13
- import re
14
11
  from pathlib import Path
15
12
  from typing import TYPE_CHECKING
16
13
 
17
14
  from novel_downloader.core.exporters.epub_util import (
18
- Book,
19
- Chapter,
20
- StyleSheet,
21
- Volume,
15
+ build_epub_chapter,
16
+ download_cover,
17
+ finalize_export,
18
+ inline_remote_images,
19
+ prepare_builder,
20
+ remove_all_images,
21
+ )
22
+ from novel_downloader.utils import (
23
+ download,
24
+ get_cleaner,
22
25
  )
23
26
  from novel_downloader.utils.constants import (
24
- CSS_MAIN_PATH,
25
27
  DEFAULT_HEADERS,
28
+ DEFAULT_IMAGE_SUFFIX,
29
+ )
30
+ from novel_downloader.utils.epub import (
31
+ Chapter,
32
+ Volume,
26
33
  )
27
- from novel_downloader.utils.file_utils import sanitize_filename
28
- from novel_downloader.utils.network import download_image
29
34
 
30
35
  if TYPE_CHECKING:
31
36
  from .main_exporter import LinovelibExporter
32
37
 
33
- _IMAGE_WRAPPER = (
34
- '<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
35
- )
36
- _IMG_TAG_PATTERN = re.compile(
37
- r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
38
- )
39
- _RAW_HTML_RE = re.compile(
40
- r'^(<img\b[^>]*?\/>|<div class="duokan-image-single illus">.*?<\/div>)$', re.DOTALL
41
- )
42
38
  _IMG_HEADERS = DEFAULT_HEADERS.copy()
43
39
  _IMG_HEADERS["Referer"] = "https://www.linovelib.com/"
44
40
 
@@ -46,7 +42,7 @@ _IMG_HEADERS["Referer"] = "https://www.linovelib.com/"
46
42
  def export_whole_book(
47
43
  exporter: LinovelibExporter,
48
44
  book_id: str,
49
- ) -> None:
45
+ ) -> Path | None:
50
46
  """
51
47
  Export a single novel (identified by `book_id`) to an EPUB file.
52
48
 
@@ -54,132 +50,142 @@ def export_whole_book(
54
50
  1. Load `book_info.json` for metadata.
55
51
  2. Generate introductory HTML and optionally include the cover image.
56
52
  3. Initialize the EPUB container.
57
- 4. Iterate through volumes and chapters, convert each to XHTML.
53
+ 4. Iterate through volumes and chapters in volume-batches, convert each to XHTML.
58
54
  5. Assemble the spine, TOC, CSS and write out the final `.epub`.
59
55
 
56
+ :param exporter: The exporter instance, carrying config and path info.
60
57
  :param book_id: Identifier of the novel (used as subdirectory name).
61
58
  """
62
59
  TAG = "[exporter]"
63
60
  config = exporter._config
64
- # --- Paths & options ---
61
+
65
62
  raw_base = exporter._raw_data_dir / book_id
66
- img_dir = exporter._cache_dir / book_id / "images"
63
+ img_dir = raw_base / "images"
67
64
  out_dir = exporter.output_dir
65
+
68
66
  img_dir.mkdir(parents=True, exist_ok=True)
69
67
  out_dir.mkdir(parents=True, exist_ok=True)
70
68
 
69
+ cleaner = get_cleaner(
70
+ enabled=config.clean_text,
71
+ config=config.cleaner_cfg,
72
+ )
73
+
71
74
  # --- Load book_info.json ---
72
- info_path = raw_base / "book_info.json"
73
- try:
74
- info_text = info_path.read_text(encoding="utf-8")
75
- book_info = json.loads(info_text)
76
- except Exception as e:
77
- exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
78
- return
75
+ book_info = exporter._load_book_info(book_id)
76
+ if not book_info:
77
+ return None
79
78
 
80
79
  book_name = book_info.get("book_name", book_id)
81
80
  book_author = book_info.get("author", "")
81
+
82
82
  exporter.logger.info(
83
83
  "%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
84
84
  )
85
85
 
86
86
  # --- Generate intro + cover ---
87
- cover_path: Path | None = None
88
- cover_url = book_info.get("cover_url", "")
89
- if config.include_cover and cover_url:
90
- cover_path = download_image(
91
- cover_url,
92
- raw_base,
93
- target_name="cover",
94
- headers=_IMG_HEADERS,
95
- on_exist="overwrite",
96
- )
97
- if not cover_path:
98
- exporter.logger.warning("Failed to download cover from %s", cover_url)
87
+ cover_path = download_cover(
88
+ book_info.get("cover_url", ""),
89
+ raw_base,
90
+ config.include_cover,
91
+ exporter.logger,
92
+ TAG,
93
+ headers=_IMG_HEADERS,
94
+ )
99
95
 
100
96
  # --- Initialize EPUB ---
101
- book = Book(
97
+ book, main_css = prepare_builder(
98
+ site_name=exporter.site,
99
+ book_id=book_id,
102
100
  title=book_name,
103
101
  author=book_author,
104
102
  description=book_info.get("summary", ""),
105
- cover_path=cover_path,
106
- subject=book_info.get("subject", []),
103
+ subject=book_info.get("tags", []),
107
104
  serial_status=book_info.get("serial_status", ""),
108
105
  word_count=book_info.get("word_count", ""),
109
- uid=f"{exporter.site}_{book_id}",
110
- )
111
- main_css = StyleSheet(
112
- id="main_style",
113
- content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
114
- filename="main.css",
106
+ cover_path=cover_path,
115
107
  )
116
- book.add_stylesheet(main_css)
117
108
 
118
109
  # --- Compile chapters ---
119
110
  volumes = book_info.get("volumes", [])
111
+ if not volumes:
112
+ exporter.logger.warning("%s No volumes found in metadata.", TAG)
113
+
120
114
  for vol_index, vol in enumerate(volumes, start=1):
121
- raw_vol_name = vol.get("volume_name", "")
122
- raw_vol_name = raw_vol_name.replace(book_name, "").strip()
123
- vol_name = raw_vol_name or f"Volume {vol_index}"
115
+ raw_name = vol.get("volume_name", "")
116
+ raw_name = raw_name.replace(book_name, "").strip()
117
+ vol_name = raw_name or f"Volume {vol_index}"
124
118
  exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
125
119
 
126
- vol_cover_path: Path | None = None
120
+ # Batch-fetch chapters for this volume
121
+ chap_ids = [
122
+ chap["chapterId"]
123
+ for chap in vol.get("chapters", [])
124
+ if chap.get("chapterId")
125
+ ]
126
+ chap_map = exporter._get_chapters(book_id, chap_ids)
127
+
128
+ vol_cover: Path | None = None
127
129
  vol_cover_url = vol.get("volume_cover", "")
128
130
  if vol_cover_url:
129
- vol_cover_path = download_image(
131
+ vol_cover = download(
130
132
  vol_cover_url,
131
133
  img_dir,
132
134
  on_exist="skip",
135
+ default_suffix=DEFAULT_IMAGE_SUFFIX,
136
+ headers=_IMG_HEADERS,
133
137
  )
134
138
 
135
139
  curr_vol = Volume(
136
140
  id=f"vol_{vol_index}",
137
141
  title=vol_name,
138
- intro=vol.get("volume_intro", ""),
139
- cover=vol_cover_path,
142
+ intro=cleaner.clean_content(vol.get("volume_intro", "")),
143
+ cover=vol_cover,
140
144
  )
141
145
 
142
- for chap in vol.get("chapters", []):
143
- chap_id = chap.get("chapterId")
144
- chap_title = chap.get("title", "")
146
+ for chap_meta in vol.get("chapters", []):
147
+ chap_id = chap_meta.get("chapterId")
145
148
  if not chap_id:
146
149
  exporter.logger.warning(
147
150
  "%s Missing chapterId, skipping: %s",
148
151
  TAG,
149
- chap,
152
+ chap_meta,
150
153
  )
151
154
  continue
152
155
 
153
- chapter_data = exporter._get_chapter(book_id, chap_id)
154
- if not chapter_data:
156
+ chap_title = chap_meta.get("title", "")
157
+ data = chap_map.get(chap_id)
158
+ if not data:
155
159
  exporter.logger.info(
156
- "%s Missing chapter file: %s (%s), skipping.",
160
+ "%s Missing chapter: %s (%s), skipping.",
157
161
  TAG,
158
162
  chap_title,
159
163
  chap_id,
160
164
  )
161
165
  continue
162
166
 
163
- title = chapter_data.get("title") or chap_id
164
- content: str = chapter_data.get("content", "")
165
- content, img_paths = _inline_remote_images(content, img_dir)
166
- chap_html = _txt_to_html(
167
- chapter_title=title,
168
- chapter_text=content,
169
- extras={
170
- "作者说": chapter_data.get("author_say", ""),
171
- },
167
+ title = cleaner.clean_title(data.get("title", chap_title)) or chap_id
168
+ content = cleaner.clean_content(data.get("content", ""))
169
+ content = (
170
+ inline_remote_images(book, content, img_dir, headers=_IMG_HEADERS)
171
+ if config.include_picture
172
+ else remove_all_images(content)
172
173
  )
173
- curr_vol.add_chapter(
174
+
175
+ chap_html = build_epub_chapter(
176
+ title=title,
177
+ paragraphs=content,
178
+ extras={},
179
+ )
180
+ curr_vol.chapters.append(
174
181
  Chapter(
175
182
  id=f"c_{chap_id}",
183
+ filename=f"c{chap_id}.xhtml",
176
184
  title=title,
177
185
  content=chap_html,
178
186
  css=[main_css],
179
187
  )
180
188
  )
181
- for img_path in img_paths:
182
- book.add_image(img_path)
183
189
 
184
190
  book.add_volume(curr_vol)
185
191
 
@@ -189,218 +195,155 @@ def export_whole_book(
189
195
  author=book_info.get("author"),
190
196
  ext="epub",
191
197
  )
192
- out_path = out_dir / sanitize_filename(out_name)
193
-
194
- try:
195
- book.export(out_path)
196
- exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
197
- except Exception as e:
198
- exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
199
- return
198
+ return finalize_export(
199
+ book=book,
200
+ out_dir=out_dir,
201
+ filename=out_name,
202
+ logger=exporter.logger,
203
+ tag=TAG,
204
+ )
200
205
 
201
206
 
202
207
  def export_by_volume(
203
208
  exporter: LinovelibExporter,
204
209
  book_id: str,
205
- ) -> None:
210
+ ) -> Path | None:
206
211
  """
207
- Export a single novel (identified by `book_id`) to multi EPUB file.
212
+ Export each volume of a novel as a separate EPUB file.
213
+
214
+ Steps:
215
+ 1. Load metadata from `book_info.json`.
216
+ 2. For each volume:
217
+ a. Clean the volume title and determine output filename.
218
+ b. Batch-fetch all chapters in this volume to minimize SQLite overhead.
219
+ c. Initialize an EPUB builder for the volume, including cover and intro.
220
+ d. For each chapter: clean title & content, inline remote images.
221
+ e. Finalize and write the volume EPUB.
208
222
 
209
223
  :param book_id: Identifier of the novel (used as subdirectory name).
210
224
  """
211
225
  TAG = "[exporter]"
212
226
  config = exporter._config
213
- # --- Paths & options ---
227
+
214
228
  raw_base = exporter._raw_data_dir / book_id
215
- img_dir = exporter._cache_dir / book_id / "images"
229
+ img_dir = raw_base / "images"
216
230
  out_dir = exporter.output_dir
231
+
217
232
  img_dir.mkdir(parents=True, exist_ok=True)
218
233
  out_dir.mkdir(parents=True, exist_ok=True)
219
234
 
235
+ cleaner = get_cleaner(
236
+ enabled=config.clean_text,
237
+ config=config.cleaner_cfg,
238
+ )
239
+
220
240
  # --- Load book_info.json ---
221
- info_path = raw_base / "book_info.json"
222
- try:
223
- info_text = info_path.read_text(encoding="utf-8")
224
- book_info = json.loads(info_text)
225
- except Exception as e:
226
- exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
227
- return
241
+ book_info = exporter._load_book_info(book_id)
242
+ if not book_info:
243
+ return None
228
244
 
229
245
  book_name = book_info.get("book_name", book_id)
230
246
  book_author = book_info.get("author", "")
231
247
  book_summary = book_info.get("summary", "")
248
+
232
249
  exporter.logger.info(
233
250
  "%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
234
251
  )
235
252
 
236
- main_css = StyleSheet(
237
- id="main_style",
238
- content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
239
- filename="main.css",
240
- )
241
-
242
253
  # --- Compile columes ---
243
254
  volumes = book_info.get("volumes", [])
244
- for vol_index, vol in enumerate(volumes, start=1):
245
- raw_vol_name = vol.get("volume_name", "")
246
- raw_vol_name = raw_vol_name.replace(book_name, "").strip()
247
- vol_name = raw_vol_name or f"Volume {vol_index}"
255
+ if not volumes:
256
+ exporter.logger.warning("%s No volumes found in metadata.", TAG)
248
257
 
249
- vol_cover_path: Path | None = None
258
+ for vol_index, vol in enumerate(volumes, start=1):
259
+ raw_name = vol.get("volume_name", "")
260
+ raw_name = cleaner.clean_title(raw_name.replace(book_name, ""))
261
+ vol_name = raw_name or f"Volume {vol_index}"
262
+
263
+ # Batch-fetch chapters for this volume
264
+ chap_ids = [
265
+ chap["chapterId"]
266
+ for chap in vol.get("chapters", [])
267
+ if chap.get("chapterId")
268
+ ]
269
+ chap_map = exporter._get_chapters(book_id, chap_ids)
270
+
271
+ vol_cover: Path | None = None
250
272
  vol_cover_url = vol.get("volume_cover", "")
251
273
  if config.include_cover and vol_cover_url:
252
- vol_cover_path = download_image(
274
+ vol_cover = download(
253
275
  vol_cover_url,
254
276
  img_dir,
255
277
  headers=_IMG_HEADERS,
256
278
  on_exist="skip",
279
+ default_suffix=DEFAULT_IMAGE_SUFFIX,
257
280
  )
258
281
 
259
- book = Book(
260
- title=vol_name,
282
+ book, main_css = prepare_builder(
283
+ site_name=exporter.site,
284
+ book_id=book_id,
285
+ title=book_name,
261
286
  author=book_author,
262
287
  description=vol.get("volume_intro") or book_summary,
263
- cover_path=vol_cover_path,
264
- subject=book_info.get("subject", []),
265
- serial_status=vol.get("serial_status", ""),
288
+ subject=book_info.get("tags", []),
289
+ serial_status=book_info.get("serial_status", ""),
266
290
  word_count=vol.get("word_count", ""),
267
- uid=f"{exporter.site}_{book_id}_v{vol_index}",
291
+ cover_path=vol_cover,
268
292
  )
269
- book.add_stylesheet(main_css)
270
293
 
271
- for chap in vol.get("chapters", []):
272
- chap_id = chap.get("chapterId")
273
- chap_title = chap.get("title", "")
294
+ for chap_meta in vol.get("chapters", []):
295
+ chap_id = chap_meta.get("chapterId")
274
296
  if not chap_id:
275
297
  exporter.logger.warning(
276
298
  "%s Missing chapterId, skipping: %s",
277
299
  TAG,
278
- chap,
300
+ chap_meta,
279
301
  )
280
302
  continue
281
303
 
282
- chapter_data = exporter._get_chapter(book_id, chap_id)
283
- if not chapter_data:
304
+ chap_title = chap_meta.get("title", "")
305
+ data = chap_map.get(chap_id)
306
+ if not data:
284
307
  exporter.logger.info(
285
- "%s Missing chapter file: %s (%s), skipping.",
308
+ "%s Missing chapter: %s (%s), skipping.",
286
309
  TAG,
287
310
  chap_title,
288
311
  chap_id,
289
312
  )
290
313
  continue
291
314
 
292
- title = chapter_data.get("title", "") or chap_id
293
- content: str = chapter_data.get("content", "")
294
- content, imgs = _inline_remote_images(content, img_dir)
295
- chap_html = _txt_to_html(
296
- chapter_title=title,
297
- chapter_text=content,
315
+ title = cleaner.clean_title(data.get("title", chap_title)) or chap_id
316
+ content = cleaner.clean_content(data.get("content", ""))
317
+ content = (
318
+ inline_remote_images(book, content, img_dir, headers=_IMG_HEADERS)
319
+ if config.include_picture
320
+ else remove_all_images(content)
321
+ )
322
+ chap_html = build_epub_chapter(
323
+ title=title,
324
+ paragraphs=content,
298
325
  extras={},
299
326
  )
300
327
  book.add_chapter(
301
328
  Chapter(
302
329
  id=f"c_{chap_id}",
330
+ filename=f"c{chap_id}.xhtml",
303
331
  title=title,
304
332
  content=chap_html,
305
333
  css=[main_css],
306
334
  )
307
335
  )
308
- for img_path in imgs:
309
- book.add_image(img_path)
310
336
 
311
337
  out_name = exporter.get_filename(
312
338
  title=vol_name,
313
339
  author=book_info.get("author"),
314
340
  ext="epub",
315
341
  )
316
- out_path = out_dir / sanitize_filename(out_name)
317
-
318
- try:
319
- book.export(out_path)
320
- exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
321
- except Exception as e:
322
- exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
323
- return
324
-
325
-
326
- def _inline_remote_images(
327
- content: str,
328
- image_dir: str | Path,
329
- ) -> tuple[str, list[Path]]:
330
- """
331
- Download every remote `<img src="...">` in `content` into `image_dir`,
332
- and replace the original tag with _IMAGE_WRAPPER
333
- pointing to the local filename.
334
-
335
- :param content: HTML/text of the chapter containing <img> tags.
336
- :param image_dir: Directory to save downloaded images into.
337
- :return: A tuple (modified_content, list_of_downloaded_image_paths).
338
- """
339
- downloaded_images: list[Path] = []
340
-
341
- def _replace(match: re.Match[str]) -> str:
342
- url = match.group(1)
343
- try:
344
- # download_image returns a Path or None
345
- local_path = download_image(
346
- url,
347
- image_dir,
348
- target_name=None,
349
- headers=_IMG_HEADERS,
350
- on_exist="skip",
351
- )
352
- if not local_path:
353
- return match.group(0)
354
-
355
- downloaded_images.append(local_path)
356
- return _IMAGE_WRAPPER.format(filename=local_path.name)
357
- except Exception:
358
- return match.group(0)
359
-
360
- modified_content = _IMG_TAG_PATTERN.sub(_replace, content)
361
- return modified_content, downloaded_images
362
-
363
-
364
- def _txt_to_html(
365
- chapter_title: str,
366
- chapter_text: str,
367
- extras: dict[str, str] | None = None,
368
- ) -> str:
369
- """
370
- Convert chapter text and author note to styled HTML.
371
-
372
- :param chapter_title: Title of the chapter.
373
- :param chapter_text: Main content of the chapter.
374
- :param extras: Optional dict of titles and content, e.g. {"作者说": "text"}.
375
- :return: Rendered HTML as a string.
376
- """
377
-
378
- def _render_block(text: str) -> str:
379
- lines = (line.strip() for line in text.splitlines() if line.strip())
380
- out = []
381
- for line in lines:
382
- # preserve raw HTML, otherwise wrap in <p>
383
- if _RAW_HTML_RE.match(line):
384
- out.append(line)
385
- else:
386
- out.append(f"<p>{html.escape(line)}</p>")
387
- return "\n".join(out)
388
-
389
- parts = []
390
- parts.append(f"<h2>{html.escape(chapter_title)}</h2>")
391
- parts.append(_render_block(chapter_text))
392
-
393
- if extras:
394
- for title, note in extras.items():
395
- note = note.strip()
396
- if not note:
397
- continue
398
- parts.extend(
399
- [
400
- "<hr />",
401
- f"<p>{html.escape(title)}</p>",
402
- _render_block(note),
403
- ]
404
- )
405
-
406
- return "\n".join(parts)
342
+ finalize_export(
343
+ book=book,
344
+ out_dir=out_dir,
345
+ filename=out_name,
346
+ logger=exporter.logger,
347
+ tag=TAG,
348
+ )
349
+ return None