novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -4
  3. novel_downloader/cli/clean.py +21 -88
  4. novel_downloader/cli/config.py +27 -104
  5. novel_downloader/cli/download.py +78 -66
  6. novel_downloader/cli/export.py +20 -21
  7. novel_downloader/cli/main.py +3 -1
  8. novel_downloader/cli/search.py +120 -0
  9. novel_downloader/cli/ui.py +156 -0
  10. novel_downloader/config/__init__.py +10 -14
  11. novel_downloader/config/adapter.py +195 -99
  12. novel_downloader/config/{loader.py → file_io.py} +53 -27
  13. novel_downloader/core/__init__.py +14 -13
  14. novel_downloader/core/archived/deqixs/fetcher.py +115 -0
  15. novel_downloader/core/archived/deqixs/parser.py +132 -0
  16. novel_downloader/core/archived/deqixs/searcher.py +89 -0
  17. novel_downloader/core/archived/qidian/searcher.py +79 -0
  18. novel_downloader/core/archived/wanbengo/searcher.py +98 -0
  19. novel_downloader/core/archived/xshbook/searcher.py +93 -0
  20. novel_downloader/core/downloaders/__init__.py +8 -30
  21. novel_downloader/core/downloaders/base.py +182 -30
  22. novel_downloader/core/downloaders/common.py +217 -384
  23. novel_downloader/core/downloaders/qianbi.py +332 -4
  24. novel_downloader/core/downloaders/qidian.py +250 -290
  25. novel_downloader/core/downloaders/registry.py +69 -0
  26. novel_downloader/core/downloaders/signals.py +46 -0
  27. novel_downloader/core/exporters/__init__.py +8 -26
  28. novel_downloader/core/exporters/base.py +107 -31
  29. novel_downloader/core/exporters/common/__init__.py +3 -4
  30. novel_downloader/core/exporters/common/epub.py +92 -171
  31. novel_downloader/core/exporters/common/main_exporter.py +14 -67
  32. novel_downloader/core/exporters/common/txt.py +90 -86
  33. novel_downloader/core/exporters/epub_util.py +184 -1327
  34. novel_downloader/core/exporters/linovelib/__init__.py +3 -2
  35. novel_downloader/core/exporters/linovelib/epub.py +165 -222
  36. novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
  37. novel_downloader/core/exporters/linovelib/txt.py +76 -66
  38. novel_downloader/core/exporters/qidian.py +15 -11
  39. novel_downloader/core/exporters/registry.py +55 -0
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/fetchers/__init__.py +57 -56
  42. novel_downloader/core/fetchers/aaatxt.py +83 -0
  43. novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
  44. novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
  45. novel_downloader/core/fetchers/biquyuedu.py +83 -0
  46. novel_downloader/core/fetchers/dxmwx.py +110 -0
  47. novel_downloader/core/fetchers/eightnovel.py +139 -0
  48. novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
  49. novel_downloader/core/fetchers/guidaye.py +85 -0
  50. novel_downloader/core/fetchers/hetushu.py +92 -0
  51. novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
  52. novel_downloader/core/fetchers/ixdzs8.py +113 -0
  53. novel_downloader/core/fetchers/jpxs123.py +101 -0
  54. novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
  55. novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
  56. novel_downloader/core/fetchers/piaotia.py +105 -0
  57. novel_downloader/core/fetchers/qbtr.py +101 -0
  58. novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
  59. novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
  60. novel_downloader/core/fetchers/quanben5.py +92 -0
  61. novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
  62. novel_downloader/core/fetchers/registry.py +60 -0
  63. novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
  64. novel_downloader/core/fetchers/shencou.py +106 -0
  65. novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
  66. novel_downloader/core/fetchers/tongrenquan.py +84 -0
  67. novel_downloader/core/fetchers/ttkan.py +95 -0
  68. novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
  69. novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
  70. novel_downloader/core/fetchers/xiguashuwu.py +177 -0
  71. novel_downloader/core/fetchers/xs63b.py +171 -0
  72. novel_downloader/core/fetchers/xshbook.py +85 -0
  73. novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
  74. novel_downloader/core/fetchers/yibige.py +114 -0
  75. novel_downloader/core/interfaces/__init__.py +8 -14
  76. novel_downloader/core/interfaces/downloader.py +6 -2
  77. novel_downloader/core/interfaces/exporter.py +7 -7
  78. novel_downloader/core/interfaces/fetcher.py +4 -17
  79. novel_downloader/core/interfaces/parser.py +5 -6
  80. novel_downloader/core/interfaces/searcher.py +26 -0
  81. novel_downloader/core/parsers/__init__.py +58 -22
  82. novel_downloader/core/parsers/aaatxt.py +132 -0
  83. novel_downloader/core/parsers/b520.py +116 -0
  84. novel_downloader/core/parsers/base.py +63 -12
  85. novel_downloader/core/parsers/biquyuedu.py +133 -0
  86. novel_downloader/core/parsers/dxmwx.py +162 -0
  87. novel_downloader/core/parsers/eightnovel.py +224 -0
  88. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
  89. novel_downloader/core/parsers/guidaye.py +128 -0
  90. novel_downloader/core/parsers/hetushu.py +139 -0
  91. novel_downloader/core/parsers/i25zw.py +137 -0
  92. novel_downloader/core/parsers/ixdzs8.py +186 -0
  93. novel_downloader/core/parsers/jpxs123.py +137 -0
  94. novel_downloader/core/parsers/lewenn.py +142 -0
  95. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
  96. novel_downloader/core/parsers/piaotia.py +189 -0
  97. novel_downloader/core/parsers/qbtr.py +136 -0
  98. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
  99. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  100. novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
  101. novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
  102. novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
  103. novel_downloader/core/parsers/qidian/main_parser.py +19 -57
  104. novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
  105. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
  106. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
  107. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
  108. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  109. novel_downloader/core/parsers/quanben5.py +103 -0
  110. novel_downloader/core/parsers/registry.py +57 -0
  111. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
  112. novel_downloader/core/parsers/shencou.py +215 -0
  113. novel_downloader/core/parsers/shuhaige.py +111 -0
  114. novel_downloader/core/parsers/tongrenquan.py +116 -0
  115. novel_downloader/core/parsers/ttkan.py +132 -0
  116. novel_downloader/core/parsers/wanbengo.py +191 -0
  117. novel_downloader/core/parsers/xiaoshuowu.py +173 -0
  118. novel_downloader/core/parsers/xiguashuwu.py +435 -0
  119. novel_downloader/core/parsers/xs63b.py +161 -0
  120. novel_downloader/core/parsers/xshbook.py +134 -0
  121. novel_downloader/core/parsers/yamibo.py +155 -0
  122. novel_downloader/core/parsers/yibige.py +166 -0
  123. novel_downloader/core/searchers/__init__.py +51 -0
  124. novel_downloader/core/searchers/aaatxt.py +107 -0
  125. novel_downloader/core/searchers/b520.py +84 -0
  126. novel_downloader/core/searchers/base.py +168 -0
  127. novel_downloader/core/searchers/dxmwx.py +105 -0
  128. novel_downloader/core/searchers/eightnovel.py +84 -0
  129. novel_downloader/core/searchers/esjzone.py +102 -0
  130. novel_downloader/core/searchers/hetushu.py +92 -0
  131. novel_downloader/core/searchers/i25zw.py +93 -0
  132. novel_downloader/core/searchers/ixdzs8.py +107 -0
  133. novel_downloader/core/searchers/jpxs123.py +107 -0
  134. novel_downloader/core/searchers/piaotia.py +100 -0
  135. novel_downloader/core/searchers/qbtr.py +106 -0
  136. novel_downloader/core/searchers/qianbi.py +165 -0
  137. novel_downloader/core/searchers/quanben5.py +144 -0
  138. novel_downloader/core/searchers/registry.py +79 -0
  139. novel_downloader/core/searchers/shuhaige.py +124 -0
  140. novel_downloader/core/searchers/tongrenquan.py +110 -0
  141. novel_downloader/core/searchers/ttkan.py +92 -0
  142. novel_downloader/core/searchers/xiaoshuowu.py +122 -0
  143. novel_downloader/core/searchers/xiguashuwu.py +95 -0
  144. novel_downloader/core/searchers/xs63b.py +104 -0
  145. novel_downloader/locales/en.json +36 -79
  146. novel_downloader/locales/zh.json +37 -80
  147. novel_downloader/models/__init__.py +23 -50
  148. novel_downloader/models/book.py +44 -0
  149. novel_downloader/models/config.py +16 -43
  150. novel_downloader/models/login.py +1 -1
  151. novel_downloader/models/search.py +21 -0
  152. novel_downloader/resources/config/settings.toml +39 -74
  153. novel_downloader/resources/css_styles/intro.css +83 -0
  154. novel_downloader/resources/css_styles/main.css +30 -89
  155. novel_downloader/resources/json/xiguashuwu.json +718 -0
  156. novel_downloader/utils/__init__.py +43 -0
  157. novel_downloader/utils/chapter_storage.py +247 -226
  158. novel_downloader/utils/constants.py +5 -50
  159. novel_downloader/utils/cookies.py +6 -18
  160. novel_downloader/utils/crypto_utils/__init__.py +13 -0
  161. novel_downloader/utils/crypto_utils/aes_util.py +90 -0
  162. novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
  163. novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
  164. novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
  165. novel_downloader/utils/epub/__init__.py +34 -0
  166. novel_downloader/utils/epub/builder.py +377 -0
  167. novel_downloader/utils/epub/constants.py +118 -0
  168. novel_downloader/utils/epub/documents.py +297 -0
  169. novel_downloader/utils/epub/models.py +120 -0
  170. novel_downloader/utils/epub/utils.py +179 -0
  171. novel_downloader/utils/file_utils/__init__.py +5 -30
  172. novel_downloader/utils/file_utils/io.py +9 -150
  173. novel_downloader/utils/file_utils/normalize.py +2 -2
  174. novel_downloader/utils/file_utils/sanitize.py +2 -7
  175. novel_downloader/utils/fontocr.py +207 -0
  176. novel_downloader/utils/i18n.py +2 -0
  177. novel_downloader/utils/logger.py +10 -16
  178. novel_downloader/utils/network.py +111 -252
  179. novel_downloader/utils/state.py +5 -90
  180. novel_downloader/utils/text_utils/__init__.py +16 -21
  181. novel_downloader/utils/text_utils/diff_display.py +6 -9
  182. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  183. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  184. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  185. novel_downloader/utils/time_utils/__init__.py +6 -12
  186. novel_downloader/utils/time_utils/datetime_utils.py +23 -33
  187. novel_downloader/utils/time_utils/sleep_utils.py +5 -10
  188. novel_downloader/web/__init__.py +13 -0
  189. novel_downloader/web/components/__init__.py +11 -0
  190. novel_downloader/web/components/navigation.py +35 -0
  191. novel_downloader/web/main.py +66 -0
  192. novel_downloader/web/pages/__init__.py +17 -0
  193. novel_downloader/web/pages/download.py +78 -0
  194. novel_downloader/web/pages/progress.py +147 -0
  195. novel_downloader/web/pages/search.py +329 -0
  196. novel_downloader/web/services/__init__.py +17 -0
  197. novel_downloader/web/services/client_dialog.py +164 -0
  198. novel_downloader/web/services/cred_broker.py +113 -0
  199. novel_downloader/web/services/cred_models.py +35 -0
  200. novel_downloader/web/services/task_manager.py +264 -0
  201. novel_downloader-2.0.0.dist-info/METADATA +171 -0
  202. novel_downloader-2.0.0.dist-info/RECORD +210 -0
  203. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
  204. novel_downloader/config/site_rules.py +0 -94
  205. novel_downloader/core/downloaders/biquge.py +0 -25
  206. novel_downloader/core/downloaders/esjzone.py +0 -25
  207. novel_downloader/core/downloaders/linovelib.py +0 -25
  208. novel_downloader/core/downloaders/sfacg.py +0 -25
  209. novel_downloader/core/downloaders/yamibo.py +0 -25
  210. novel_downloader/core/exporters/biquge.py +0 -25
  211. novel_downloader/core/exporters/esjzone.py +0 -25
  212. novel_downloader/core/exporters/qianbi.py +0 -25
  213. novel_downloader/core/exporters/sfacg.py +0 -25
  214. novel_downloader/core/exporters/yamibo.py +0 -25
  215. novel_downloader/core/factory/__init__.py +0 -20
  216. novel_downloader/core/factory/downloader.py +0 -73
  217. novel_downloader/core/factory/exporter.py +0 -58
  218. novel_downloader/core/factory/fetcher.py +0 -96
  219. novel_downloader/core/factory/parser.py +0 -86
  220. novel_downloader/core/fetchers/base/__init__.py +0 -14
  221. novel_downloader/core/fetchers/base/browser.py +0 -403
  222. novel_downloader/core/fetchers/biquge/__init__.py +0 -14
  223. novel_downloader/core/fetchers/common/__init__.py +0 -14
  224. novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
  225. novel_downloader/core/fetchers/esjzone/browser.py +0 -204
  226. novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
  227. novel_downloader/core/fetchers/linovelib/browser.py +0 -193
  228. novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
  229. novel_downloader/core/fetchers/qidian/__init__.py +0 -14
  230. novel_downloader/core/fetchers/qidian/browser.py +0 -318
  231. novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
  232. novel_downloader/core/fetchers/sfacg/browser.py +0 -189
  233. novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
  234. novel_downloader/core/fetchers/yamibo/browser.py +0 -229
  235. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  236. novel_downloader/core/parsers/biquge/main_parser.py +0 -134
  237. novel_downloader/core/parsers/common/__init__.py +0 -13
  238. novel_downloader/core/parsers/common/helper.py +0 -323
  239. novel_downloader/core/parsers/common/main_parser.py +0 -106
  240. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  241. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  242. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  243. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  244. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  245. novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
  246. novel_downloader/models/browser.py +0 -21
  247. novel_downloader/models/chapter.py +0 -25
  248. novel_downloader/models/site_rules.py +0 -99
  249. novel_downloader/models/tasks.py +0 -33
  250. novel_downloader/models/types.py +0 -15
  251. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  252. novel_downloader/resources/json/replace_word_map.json +0 -4
  253. novel_downloader/resources/text/blacklist.txt +0 -22
  254. novel_downloader/tui/__init__.py +0 -7
  255. novel_downloader/tui/app.py +0 -32
  256. novel_downloader/tui/main.py +0 -17
  257. novel_downloader/tui/screens/__init__.py +0 -14
  258. novel_downloader/tui/screens/home.py +0 -198
  259. novel_downloader/tui/screens/login.py +0 -74
  260. novel_downloader/tui/styles/home_layout.tcss +0 -79
  261. novel_downloader/tui/widgets/richlog_handler.py +0 -24
  262. novel_downloader/utils/cache.py +0 -24
  263. novel_downloader/utils/fontocr/__init__.py +0 -22
  264. novel_downloader/utils/fontocr/model_loader.py +0 -69
  265. novel_downloader/utils/fontocr/ocr_v1.py +0 -303
  266. novel_downloader/utils/fontocr/ocr_v2.py +0 -752
  267. novel_downloader/utils/hash_store.py +0 -279
  268. novel_downloader/utils/hash_utils.py +0 -103
  269. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  270. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  271. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  272. novel_downloader-1.4.5.dist-info/METADATA +0 -196
  273. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  274. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
  275. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
  276. {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,1356 +3,213 @@
3
3
  novel_downloader.core.exporters.epub_util
4
4
  -----------------------------------------
5
5
 
6
+ Utilities for preparing and formatting chapter HTML for EPUB exports.
6
7
  """
7
8
 
8
- import zipfile
9
- from collections.abc import Sequence
10
- from contextlib import suppress
11
- from datetime import UTC, datetime
9
+ __all__ = [
10
+ "download_cover",
11
+ "prepare_builder",
12
+ "finalize_export",
13
+ "inline_remote_images",
14
+ "remove_all_images",
15
+ "build_epub_chapter",
16
+ ]
17
+
18
+ import logging
19
+ import re
20
+ from html import escape
12
21
  from pathlib import Path
13
- from typing import NotRequired, Self, TypedDict
14
- from zipfile import ZIP_DEFLATED, ZIP_STORED
15
-
16
- from lxml import etree, html
17
- from lxml.etree import _Element
18
22
 
23
+ from novel_downloader.utils import download, sanitize_filename
19
24
  from novel_downloader.utils.constants import (
20
- CSS_VOLUME_INTRO_PATH,
21
- VOLUME_BORDER_IMAGE_PATH,
25
+ CSS_MAIN_PATH,
26
+ DEFAULT_HEADERS,
27
+ DEFAULT_IMAGE_SUFFIX,
22
28
  )
29
+ from novel_downloader.utils.epub import EpubBuilder, StyleSheet
23
30
 
24
- _ROOT_PATH = "OEBPS"
25
- _IMAGE_FOLDER = "Images"
26
- _TEXT_FOLDER = "Text"
27
- _CSS_FOLDER = "Styles"
28
-
29
- _IMAGE_MEDIA_TYPES: dict[str, str] = {
30
- "png": "image/png",
31
- "jpg": "image/jpeg",
32
- "jpeg": "image/jpeg",
33
- "gif": "image/gif",
34
- "svg": "image/svg+xml",
35
- "webp": "image/webp",
36
- }
37
-
38
- _CONTAINER_TEMPLATE = """<?xml version="1.0" encoding="UTF-8"?>
39
- <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
40
- <rootfiles>
41
- <rootfile full-path="{root_path}/content.opf"
42
- media-type="application/oebps-package+xml"/>
43
- </rootfiles>
44
- </container>"""
45
-
46
- _COVER_IMAGE_TEMPLATE = (
47
- f'<div style="text-align: center; margin: 0; padding: 0;">'
48
- f'<img src="../{_IMAGE_FOLDER}/cover.{{ext}}" alt="cover" '
49
- f'style="max-width: 100%; height: auto;" />'
50
- f"</div>"
31
+ _IMAGE_WRAPPER = '<div class="duokan-image-single illus">{img}</div>'
32
+ _IMG_TAG_RE = re.compile(r"<img[^>]*>", re.IGNORECASE)
33
+ _IMG_SRC_RE = re.compile(
34
+ r'<img[^>]*\bsrc=["\'](https?://[^"\']+)["\'][^>]*>',
35
+ re.IGNORECASE,
51
36
  )
52
37
 
53
38
 
54
- class ChapterEntry(TypedDict):
55
- id: str
56
- label: str
57
- src: str
58
- chapters: NotRequired[list["ChapterEntry"]]
59
-
60
-
61
- class VolumeEntry(TypedDict):
62
- id: str
63
- label: str
64
- src: str
65
- chapters: list[ChapterEntry]
66
-
67
-
68
- class ManifestEntry(TypedDict):
69
- id: str
70
- href: str
71
- media_type: str
72
- properties: str | None
73
-
74
-
75
- class SpineEntry(TypedDict):
76
- idref: str
77
- properties: str | None
78
-
79
-
80
- class NavPoint:
81
- def __init__(
82
- self,
83
- id: str,
84
- label: str,
85
- src: str,
86
- children: list[Self] | None = None,
87
- ):
88
- self._id = id
89
- self._label = label
90
- self._src = src
91
- self._children = children or []
92
-
93
- def add_child(self, point: Self) -> None:
94
- """
95
- Append a child nav point under this one.
96
- """
97
- self._children.append(point)
98
-
99
- @property
100
- def id(self) -> str:
101
- """
102
- Unique identifier for this navigation point.
103
- """
104
- return self._id
105
-
106
- @property
107
- def label(self) -> str:
108
- """
109
- Display text shown in the TOC for this point.
110
- """
111
- return self._label
112
-
113
- @property
114
- def src(self) -> str:
115
- """
116
- Path to the target content file (e.g., chapter XHTML).
117
- """
118
- return self._src
119
-
120
- @property
121
- def children(self) -> list[Self]:
122
- """
123
- Nested navigation points under this one, if any.
124
- """
125
- return self._children
126
-
127
-
128
- class EpubResource:
129
- def __init__(
130
- self,
131
- id: str,
132
- filename: str,
133
- media_type: str,
134
- ):
135
- self._id = id
136
- self._filename = filename
137
- self._media_type = media_type
138
-
139
- @property
140
- def id(self) -> str:
141
- return self._id
142
-
143
- @property
144
- def filename(self) -> str:
145
- return self._filename
146
-
147
- @property
148
- def media_type(self) -> str:
149
- return self._media_type
150
-
151
-
152
- class StyleSheet(EpubResource):
153
- def __init__(
154
- self,
155
- id: str,
156
- content: str,
157
- filename: str = "style.css",
158
- ):
159
- super().__init__(
160
- id=id,
161
- filename=filename,
162
- media_type="text/css",
163
- )
164
- self._content = content
165
-
166
- @property
167
- def content(self) -> str:
168
- return self._content
169
-
170
-
171
- class ImageResource(EpubResource):
172
- def __init__(
173
- self,
174
- id: str,
175
- data: bytes,
176
- media_type: str,
177
- filename: str,
178
- ):
179
- super().__init__(
180
- id=id,
181
- filename=filename,
182
- media_type=media_type,
183
- )
184
- self._data = data
185
-
186
- @property
187
- def data(self) -> bytes:
188
- return self._data
189
-
190
-
191
- class NavDocument(EpubResource):
192
- def __init__(
193
- self,
194
- title: str = "未命名",
195
- language: str = "zh-CN",
196
- id: str = "nav",
197
- filename: str = "nav.xhtml",
198
- ):
199
- super().__init__(
200
- id=id,
201
- filename=filename,
202
- media_type="application/xhtml+xml",
203
- )
204
- self._title = title
205
- self._language = language
206
- self._content_items: list[ChapterEntry | VolumeEntry] = []
207
-
208
- def add_chapter(
209
- self,
210
- id: str,
211
- label: str,
212
- src: str,
213
- ) -> None:
214
- """
215
- Add a top-level chapter entry to the navigation structure.
216
-
217
- :param id: The unique ID for the chapter.
218
- :param label: The display title for the chapter.
219
- :param src: The href target for the chapter's XHTML file.
220
- """
221
- self._content_items.append(
222
- {
223
- "id": id,
224
- "label": label,
225
- "src": src,
226
- }
227
- )
228
-
229
- def add_volume(
230
- self,
231
- id: str,
232
- label: str,
233
- src: str,
234
- chapters: list[ChapterEntry],
235
- ) -> None:
236
- """
237
- Add a volume entry with nested chapters to the navigation.
238
-
239
- :param id: The unique ID for the volume.
240
- :param label: The display title for the volume.
241
- :param src: The href target for the volume's intro XHTML file.
242
- :param chapters: A list of chapter entries under this volume.
243
- """
244
- self._content_items.append(
245
- {
246
- "id": id,
247
- "label": label,
248
- "src": src,
249
- "chapters": chapters,
250
- }
251
- )
252
-
253
- @property
254
- def title(self) -> str:
255
- return self._title
256
-
257
- @property
258
- def language(self) -> str:
259
- return self._language
260
-
261
- @property
262
- def content_items(self) -> list[ChapterEntry | VolumeEntry]:
263
- return self._content_items
264
-
265
-
266
- class NCXDocument(EpubResource):
267
- def __init__(
268
- self,
269
- title: str = "未命名",
270
- uid: str = "",
271
- id: str = "ncx",
272
- filename: str = "toc.ncx",
273
- ):
274
- super().__init__(
275
- id=id,
276
- filename=filename,
277
- media_type="application/x-dtbncx+xml",
278
- )
279
- self._title = title
280
- self._uid = uid
281
- self._nav_points: list[NavPoint] = []
282
-
283
- def add_chapter(
284
- self,
285
- id: str,
286
- label: str,
287
- src: str,
288
- ) -> None:
289
- """
290
- Add a single flat chapter entry to the NCX nav map.
291
- """
292
- self._nav_points.append(NavPoint(id=id, label=label, src=src))
293
-
294
- def add_volume(
295
- self,
296
- id: str,
297
- label: str,
298
- src: str,
299
- chapters: list[ChapterEntry],
300
- ) -> None:
301
- """
302
- Add a volume with nested chapters to the NCX nav map.
303
- """
304
- children = [
305
- NavPoint(id=c["id"], label=c["label"], src=c["src"]) for c in chapters
306
- ]
307
- self._nav_points.append(
308
- NavPoint(id=id, label=label, src=src, children=children)
309
- )
310
-
311
- @property
312
- def nav_points(self) -> list[NavPoint]:
313
- return self._nav_points
314
-
315
- @property
316
- def title(self) -> str:
317
- return self._title
318
-
319
- @property
320
- def uid(self) -> str:
321
- return self._uid
322
-
323
-
324
- class OpfDocument(EpubResource):
325
- def __init__(
326
- self,
327
- title: str,
328
- author: str = "",
329
- description: str = "",
330
- uid: str = "",
331
- subject: list[str] | None = None,
332
- language: str = "zh-CN",
333
- id: str = "opf",
334
- filename: str = "content.opf",
335
- ):
336
- super().__init__(
337
- id=id,
338
- filename=filename,
339
- media_type="application/oebps-package+xml",
340
- )
341
- self._title = title
342
- self._author = author
343
- self._description = description
344
- self._uid = uid
345
- self._language = language
346
- self._include_cover = False
347
- self._subject: list[str] = subject or []
348
- self._manifest: list[ManifestEntry] = []
349
- self._spine: list[SpineEntry] = []
350
-
351
- def add_manifest_item(
352
- self,
353
- id: str,
354
- href: str,
355
- media_type: str,
356
- properties: str | None = None,
357
- ) -> None:
358
- self._manifest.append(
359
- {
360
- "id": id,
361
- "href": href,
362
- "media_type": media_type,
363
- "properties": properties,
364
- }
365
- )
366
-
367
- def add_spine_item(
368
- self,
369
- idref: str,
370
- properties: str | None = None,
371
- ) -> None:
372
- self._spine.append({"idref": idref, "properties": properties})
373
-
374
- def set_subject(self, subject: list[str]) -> None:
375
- self._subject = subject
376
-
377
- @property
378
- def title(self) -> str:
379
- """
380
- Book title metadata.
381
- """
382
- return self._title
383
-
384
- @property
385
- def author(self) -> str:
386
- """
387
- Author metadata.
388
- """
389
- return self._author
390
-
391
- @property
392
- def description(self) -> str:
393
- """
394
- Book description metadata.
395
- """
396
- return self._description
397
-
398
- @property
399
- def subject(self) -> list[str]:
400
- return self._subject
401
-
402
- @property
403
- def uid(self) -> str:
404
- """
405
- Unique identifier for the book, used in dc:identifier and NCX UID.
406
- """
407
- return self._uid
408
-
409
- @property
410
- def language(self) -> str:
411
- return self._language
412
-
413
- @property
414
- def include_cover(self) -> bool:
415
- """
416
- Whether to include a cover item in the <guide> section.
417
- """
418
- return self._include_cover
419
-
420
- @include_cover.setter
421
- def include_cover(self, value: bool) -> None:
422
- self._include_cover = value
423
-
424
- @property
425
- def manifest(self) -> list[ManifestEntry]:
426
- """
427
- All resources used by the book (XHTML, CSS, images, nav, etc.).
428
- """
429
- return self._manifest
430
-
431
- @property
432
- def spine(self) -> list[SpineEntry]:
433
- """
434
- Defines the reading order of the book's contents.
435
- """
436
- return self._spine
437
-
438
-
439
- class Chapter(EpubResource):
440
- def __init__(
441
- self,
442
- id: str,
443
- title: str,
444
- content: str,
445
- css: list[StyleSheet] | None = None,
446
- filename: str | None = None,
447
- ):
448
- filename = filename or f"{id}.xhtml"
449
- super().__init__(
450
- id=id,
451
- filename=filename,
452
- media_type="application/xhtml+xml",
453
- )
454
- self._title = title
455
- self._content = content
456
- self._css = css or []
457
-
458
- @property
459
- def title(self) -> str:
460
- return self._title
461
-
462
- def to_xhtml(self, lang: str = "zh-CN") -> str:
463
- # Prepare namespace map
464
- NSMAP = {
465
- None: "http://www.w3.org/1999/xhtml",
466
- "epub": "http://www.idpf.org/2007/ops",
467
- }
468
- # Create <html> root with xml:lang and lang
469
- html_el = etree.Element(
470
- "{http://www.w3.org/1999/xhtml}html",
471
- nsmap=NSMAP,
472
- attrib={
473
- "{http://www.w3.org/XML/1998/namespace}lang": lang,
474
- "lang": lang,
475
- },
476
- )
477
-
478
- # Build <head>
479
- head = etree.SubElement(html_el, "head")
480
- title = etree.SubElement(head, "title")
481
- title.text = self._title
482
-
483
- # Add stylesheet links
484
- for css in self._css:
485
- etree.SubElement(
486
- head,
487
- "link",
488
- attrib={
489
- "href": f"../{_CSS_FOLDER}/{css.filename}",
490
- "rel": "stylesheet",
491
- "type": css.media_type,
492
- },
493
- )
494
-
495
- # Build <body>
496
- body = etree.SubElement(html_el, "body")
497
- wrapper = html.fromstring(
498
- f'<div xmlns="http://www.w3.org/1999/xhtml">{self._content}</div>'
499
- )
500
- for node in wrapper:
501
- body.append(node)
502
-
503
- xhtml_bytes: bytes = etree.tostring(
504
- html_el,
505
- pretty_print=True,
506
- xml_declaration=False, # we'll do it ourselves
507
- encoding="utf-8",
508
- method="xml",
509
- )
510
- doctype = (
511
- '<?xml version="1.0" encoding="utf-8"?>\n'
512
- "<!DOCTYPE html PUBLIC "
513
- '"-//W3C//DTD XHTML 1.1//EN" '
514
- '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'
515
- )
516
- return doctype + xhtml_bytes.decode("utf-8")
517
-
518
-
519
- class Volume:
520
- def __init__(
521
- self,
522
- id: str,
523
- title: str,
524
- intro: str = "",
525
- cover: Path | None = None,
526
- chapters: list[Chapter] | None = None,
527
- ):
528
- self._id = id
529
- self._title = title
530
- self._intro = intro
531
- self._cover = cover
532
- self._chapters = chapters or []
533
-
534
- def add_chapter(self, chapter: Chapter) -> None:
535
- """
536
- Append a chapter to this volume.
537
- """
538
- self._chapters.append(chapter)
539
-
540
- @property
541
- def id(self) -> str:
542
- return self._id
543
-
544
- @property
545
- def title(self) -> str:
546
- return self._title
547
-
548
- @property
549
- def intro(self) -> str:
550
- """
551
- Optional volume description or introduction text.
552
- """
553
- return self._intro
554
-
555
- @property
556
- def cover(self) -> Path | None:
557
- """
558
- Optional volume-specific cover image.
559
- """
560
- return self._cover
561
-
562
- @property
563
- def chapters(self) -> list[Chapter]:
564
- return self._chapters
565
-
566
-
567
- class Book:
568
- def __init__(
569
- self,
570
- title: str,
571
- author: str = "",
572
- description: str = "",
573
- cover_path: Path | None = None,
574
- subject: list[str] | None = None,
575
- serial_status: str = "",
576
- word_count: str = "",
577
- uid: str = "",
578
- language: str = "zh-CN",
579
- ):
580
- self._title = title
581
- self._author = author
582
- self._description = description
583
- self._language = language
584
-
585
- self._subject: list[str] = subject or []
586
- self._serial_status = serial_status
587
- self._word_count = word_count
588
-
589
- self._content_items: list[Chapter] = []
590
- self._images: list[ImageResource] = []
591
- self._img_set: set[Path] = set()
592
- self._stylesheets: list[StyleSheet] = []
593
- self._vol_idx = 0
594
-
595
- self._nav = NavDocument(title=title, language=language)
596
- self._ncx = NCXDocument(title=title, uid=uid)
597
- self._opf = OpfDocument(
598
- title=title,
599
- author=author,
600
- description=description,
601
- uid=uid,
602
- subject=subject,
603
- language=language,
604
- )
605
- self._opf.add_manifest_item(
606
- id="ncx",
607
- href="toc.ncx",
608
- media_type="application/x-dtbncx+xml",
609
- )
610
- self._opf.add_manifest_item(
611
- id="nav",
612
- href="nav.xhtml",
613
- media_type="application/xhtml+xml",
614
- properties="nav",
615
- )
616
-
617
- self._vol_intro_css = StyleSheet(
618
- id="volume_style",
619
- content=CSS_VOLUME_INTRO_PATH.read_text(encoding="utf-8"),
620
- filename="volume_style.css",
621
- )
622
- with suppress(FileNotFoundError):
623
- self._images.append(
624
- ImageResource(
625
- id="img-volume-border",
626
- data=VOLUME_BORDER_IMAGE_PATH.read_bytes(),
627
- media_type="image/png",
628
- filename="volume_border.png",
629
- )
630
- )
631
- self._opf.add_manifest_item(
632
- id="img-volume-border",
633
- href=f"{_IMAGE_FOLDER}/volume_border.png",
634
- media_type="image/png",
635
- )
636
- self._opf.add_manifest_item(
637
- id="volume_style",
638
- href=f"{_CSS_FOLDER}/volume_style.css",
639
- media_type="text/css",
640
- )
641
- self._stylesheets.append(self._vol_intro_css)
642
-
643
- if cover_path and cover_path.exists() and cover_path.is_file():
644
- ext = cover_path.suffix.lower().lstrip(".")
645
- media_type = _IMAGE_MEDIA_TYPES.get(ext)
646
- if media_type:
647
- data = cover_path.read_bytes()
648
-
649
- # create the CoverImage
650
- self._images.append(
651
- ImageResource(
652
- id="cover-img",
653
- data=data,
654
- media_type=media_type,
655
- filename=f"cover.{ext}",
656
- )
657
- )
658
- self._content_items.append(
659
- Chapter(
660
- id="cover",
661
- title="Cover",
662
- content=_COVER_IMAGE_TEMPLATE.format(ext=ext),
663
- filename="cover.xhtml",
664
- )
665
- )
666
-
667
- self._opf.add_manifest_item(
668
- id="cover-img",
669
- href=f"{_IMAGE_FOLDER}/cover.{ext}",
670
- media_type=media_type,
671
- properties="cover-image",
672
- )
673
-
674
- self._opf.add_manifest_item(
675
- id="cover",
676
- href=f"{_TEXT_FOLDER}/cover.xhtml",
677
- media_type="application/xhtml+xml",
678
- )
679
- self._opf.add_spine_item(
680
- idref="cover",
681
- properties="duokan-page-fullscreen",
682
- )
683
-
684
- self._opf.include_cover = True
685
-
686
- # intro
687
- intro_html = _gene_book_intro(
688
- book_name=title,
689
- author=author,
690
- serial_status=serial_status,
691
- word_count=word_count,
692
- summary=description,
693
- )
694
- self._content_items.append(
695
- Chapter(
696
- id="intro",
697
- title="书籍简介",
698
- content=intro_html,
699
- filename="intro.xhtml",
700
- )
701
- )
702
- self._opf.add_manifest_item(
703
- id="intro",
704
- href=f"{_TEXT_FOLDER}/intro.xhtml",
705
- media_type="application/xhtml+xml",
706
- )
707
- self._opf.add_spine_item(
708
- idref="intro",
709
- )
710
- self._nav.add_chapter(
711
- id="intro",
712
- label="书籍简介",
713
- src=f"{_TEXT_FOLDER}/intro.xhtml",
714
- )
715
- self._ncx.add_chapter(
716
- id="intro",
717
- label="书籍简介",
718
- src=f"{_TEXT_FOLDER}/intro.xhtml",
719
- )
720
-
721
- def export(self, output_path: str | Path) -> bool:
722
- """
723
- Build and export the current book as an EPUB file.
724
-
725
- :param output_path: Path to save the final .epub file.
726
- """
727
- return _build_epub(
728
- book=self,
729
- output_path=Path(output_path),
730
- )
731
-
732
- @property
733
- def content_items(self) -> list[Chapter]:
734
- """
735
- Ordered list of contents.
736
- """
737
- return self._content_items
738
-
739
- @property
740
- def images(self) -> list[ImageResource]:
741
- return self._images
742
-
743
- @property
744
- def stylesheets(self) -> list[StyleSheet]:
745
- return self._stylesheets
746
-
747
- @property
748
- def nav(self) -> NavDocument:
749
- return self._nav
750
-
751
- @property
752
- def ncx(self) -> NCXDocument:
753
- return self._ncx
754
-
755
- @property
756
- def opf(self) -> OpfDocument:
757
- return self._opf
758
-
759
- def add_chapter(self, chapter: Chapter) -> None:
760
- self._ncx.add_chapter(
761
- id=chapter.id,
762
- label=chapter.title,
763
- src=f"{_TEXT_FOLDER}/{chapter.filename}",
764
- )
765
- self._nav.add_chapter(
766
- id=chapter.id,
767
- label=chapter.title,
768
- src=f"{_TEXT_FOLDER}/{chapter.filename}",
769
- )
770
- self._opf.add_manifest_item(
771
- id=chapter.id,
772
- href=f"{_TEXT_FOLDER}/{chapter.filename}",
773
- media_type=chapter.media_type,
774
- )
775
- self._opf.add_spine_item(idref=chapter.id)
776
-
777
- self._content_items.append(chapter)
778
-
779
- def add_volume(self, volume: Volume) -> None:
780
- if volume.cover:
781
- cover = (
782
- f'<img class="width100" src="../{_IMAGE_FOLDER}/{volume.cover.name}"/>'
783
- )
784
- self._content_items.append(
785
- Chapter(
786
- id=f"vol_{self._vol_idx}_cover",
787
- title=volume.title,
788
- content=cover,
789
- filename=f"vol_{self._vol_idx}_cover.xhtml",
790
- )
791
- )
792
- self.add_image(volume.cover)
793
- self._opf.add_manifest_item(
794
- id=f"vol_{self._vol_idx}_cover",
795
- href=f"{_TEXT_FOLDER}/vol_{self._vol_idx}_cover.xhtml",
796
- media_type="application/xhtml+xml",
797
- )
798
- self._opf.add_spine_item(
799
- idref=f"vol_{self._vol_idx}_cover",
800
- properties="duokan-page-fullscreen",
801
- )
802
-
803
- self._content_items.append(
804
- Chapter(
805
- id=f"vol_{self._vol_idx}",
806
- title=volume.title,
807
- content=_create_volume_intro(volume.title, volume.intro),
808
- filename=f"vol_{self._vol_idx}.xhtml",
809
- css=[self._vol_intro_css],
810
- )
811
- )
812
- self._opf.add_manifest_item(
813
- id=f"vol_{self._vol_idx}",
814
- href=f"{_TEXT_FOLDER}/vol_{self._vol_idx}.xhtml",
815
- media_type="application/xhtml+xml",
816
- )
817
- self._opf.add_spine_item(
818
- idref=f"vol_{self._vol_idx}",
819
- )
820
- vol_chapters: list[ChapterEntry] = []
821
- for chap in volume.chapters:
822
- chap_id = chap.id
823
- chap_label = chap.title
824
- chap_src = f"{_TEXT_FOLDER}/{chap.filename}"
825
- vol_chapters.append(
826
- {
827
- "id": chap_id,
828
- "label": chap_label,
829
- "src": chap_src,
830
- }
831
- )
832
- self._opf.add_manifest_item(
833
- id=chap_id,
834
- href=chap_src,
835
- media_type=chap.media_type,
836
- )
837
- self._opf.add_spine_item(
838
- idref=chap_id,
839
- )
840
- self._ncx.add_volume(
841
- id=f"vol_{self._vol_idx}",
842
- label=volume.title,
843
- src=f"{_TEXT_FOLDER}/vol_{self._vol_idx}.xhtml",
844
- chapters=vol_chapters,
845
- )
846
- self._nav.add_volume(
847
- id=f"vol_{self._vol_idx}",
848
- label=volume.title,
849
- src=f"{_TEXT_FOLDER}/vol_{self._vol_idx}.xhtml",
850
- chapters=vol_chapters,
851
- )
852
- self._content_items.extend(volume.chapters)
853
- self._vol_idx += 1
854
-
855
- def add_image(self, image_path: Path) -> bool:
856
- if image_path in self._img_set:
857
- return False
858
- self._img_set.add(image_path)
859
- if not image_path.exists() or not image_path.is_file():
860
- return False
861
-
862
- ext = image_path.suffix.lower().lstrip(".")
863
- media_type = _IMAGE_MEDIA_TYPES.get(ext)
864
- if media_type is None:
865
- return False
866
-
867
- filename = image_path.name
868
- resource_id = f"img_{filename}"
869
- data = image_path.read_bytes()
870
- href = f"{_IMAGE_FOLDER}/{filename}"
871
-
872
- img_res = ImageResource(
873
- id=resource_id,
874
- data=data,
875
- media_type=media_type,
876
- filename=filename,
877
- )
878
- self._images.append(img_res)
879
-
880
- self._opf.add_manifest_item(
881
- id=resource_id,
882
- href=href,
883
- media_type=media_type,
884
- )
885
-
886
- return True
887
-
888
- def add_stylesheet(self, css: StyleSheet) -> None:
889
- self._stylesheets.append(css)
890
- self._opf.add_manifest_item(
891
- id=css.id,
892
- href=f"{_CSS_FOLDER}/{css.filename}",
893
- media_type=css.media_type,
894
- )
895
-
896
-
897
- def generate_container_xml(
898
- root_path: str = _ROOT_PATH,
899
- ) -> str:
900
- """
901
- Generate the XML content for META-INF/container.xml in an EPUB archive.
902
-
903
- :param root_path: The folder where the OPF file is stored.
904
- :return: A string containing the full XML for container.xml.
905
- """
906
- return _CONTAINER_TEMPLATE.format(root_path=root_path)
907
-
908
-
909
- def generate_nav_xhtml(nav: NavDocument) -> str:
910
- """
911
- Generate the XHTML content for nav.xhtml based on the NavDocument.
912
-
913
- :param nav: A NavDocument instance containing navigation data.
914
- :return: A string containing the full XHTML for nav.xhtml.
915
- """
916
- XHTML_NS = "http://www.w3.org/1999/xhtml"
917
- EPUB_NS = "http://www.idpf.org/2007/ops"
918
- XML_NS = "http://www.w3.org/XML/1998/namespace"
919
-
920
- nsmap_root = {
921
- None: XHTML_NS,
922
- "epub": EPUB_NS,
923
- }
924
-
925
- html = etree.Element(
926
- f"{{{XHTML_NS}}}html",
927
- nsmap=nsmap_root,
928
- lang=nav.language,
929
- )
930
- # xml:lang
931
- html.set(f"{{{XML_NS}}}lang", nav.language)
932
-
933
- # <head><title>
934
- head = etree.SubElement(html, f"{{{XHTML_NS}}}head")
935
- title_el = etree.SubElement(head, f"{{{XHTML_NS}}}title")
936
- title_el.text = nav.title
937
-
938
- # <body><nav epub:type="toc" id="..." role="doc-toc">
939
- body = etree.SubElement(html, f"{{{XHTML_NS}}}body")
940
- nav_el = etree.SubElement(
941
- body,
942
- f"{{{XHTML_NS}}}nav",
943
- {
944
- f"{{{EPUB_NS}}}type": "toc",
945
- "id": nav.id,
946
- "role": "doc-toc",
947
- },
948
- )
949
-
950
- h2 = etree.SubElement(nav_el, f"{{{XHTML_NS}}}h2")
951
- h2.text = nav.title
952
-
953
- # <ol> ... </ol>
954
- def _add_items(
955
- parent_ol: _Element,
956
- items: Sequence[ChapterEntry | VolumeEntry],
957
- ) -> None:
958
- for item in items:
959
- li = etree.SubElement(parent_ol, f"{{{XHTML_NS}}}li")
960
- a = etree.SubElement(li, f"{{{XHTML_NS}}}a", href=item["src"])
961
- a.text = item["label"]
962
- if "chapters" in item and item["chapters"]:
963
- sub_ol = etree.SubElement(li, f"{{{XHTML_NS}}}ol")
964
- _add_items(sub_ol, item["chapters"])
965
-
966
- top_ol = etree.SubElement(nav_el, f"{{{XHTML_NS}}}ol")
967
- _add_items(top_ol, nav.content_items)
968
-
969
- xml_bytes: bytes = etree.tostring(
970
- html,
971
- xml_declaration=True,
972
- encoding="utf-8",
973
- pretty_print=True,
974
- doctype="<!DOCTYPE html>",
975
- )
976
- return xml_bytes.decode("utf-8")
977
-
978
-
979
- def generate_ncx_xml(ncx: NCXDocument) -> str:
980
- """
981
- Generate the XML content for toc.ncx used in EPUB 2 navigation.
982
-
983
- :param ncx: An NCXDocument instance representing the table of contents.
984
- :return: A string containing the full NCX XML document.
985
- """
986
- nsmap_root = {None: "http://www.daisy.org/z3986/2005/ncx/"}
987
- root = etree.Element("ncx", nsmap=nsmap_root, version="2005-1")
988
-
989
- # head
990
- head = etree.SubElement(root, "head")
991
- etree.SubElement(head, "meta", name="dtb:uid", content=ncx.uid)
992
-
993
- def _depth(points: list[NavPoint]) -> int:
994
- if not points:
995
- return 0
996
- return 1 + max(_depth(p.children) for p in points)
997
-
998
- depth = _depth(ncx.nav_points)
999
- etree.SubElement(head, "meta", name="dtb:depth", content=str(depth))
1000
- etree.SubElement(head, "meta", name="dtb:totalPageCount", content="0")
1001
- etree.SubElement(head, "meta", name="dtb:maxPageNumber", content="0")
1002
-
1003
- # docTitle
1004
- docTitle = etree.SubElement(root, "docTitle")
1005
- text = etree.SubElement(docTitle, "text")
1006
- text.text = ncx.title
1007
-
1008
- # navMap
1009
- navMap = etree.SubElement(root, "navMap")
1010
- play_order = 1
1011
-
1012
- def _add_navpoint(point: NavPoint, parent: _Element) -> None:
1013
- nonlocal play_order
1014
- np = etree.SubElement(
1015
- parent,
1016
- "navPoint",
1017
- id=point.id,
1018
- playOrder=str(play_order),
1019
- )
1020
- play_order += 1
1021
-
1022
- navLabel = etree.SubElement(np, "navLabel")
1023
- lbl_text = etree.SubElement(navLabel, "text")
1024
- lbl_text.text = point.label
1025
-
1026
- etree.SubElement(np, "content", src=point.src)
1027
-
1028
- for child in point.children:
1029
- _add_navpoint(child, np)
1030
-
1031
- for pt in ncx.nav_points:
1032
- _add_navpoint(pt, navMap)
1033
-
1034
- xml_bytes: bytes = etree.tostring(
1035
- root,
1036
- xml_declaration=True,
1037
- encoding="utf-8",
1038
- pretty_print=True,
1039
- )
1040
- return xml_bytes.decode("utf-8")
1041
-
1042
-
1043
- def generate_opf_xml(opf: OpfDocument) -> str:
1044
- """
1045
- Generate the content.opf XML, which defines metadata, manifest, and spine.
1046
-
1047
- This function outputs a complete OPF package document that includes:
1048
- - <metadata>: title, author, language, identifiers, etc.
1049
- - <manifest>: all resource entries
1050
- - <spine>: the reading order of the content
1051
- - <guide>: optional references like cover page
1052
-
1053
- :param opf: An OpfDocument instance with metadata and content listings.
1054
- :return: A string containing the full OPF XML content.
1055
- """
1056
- OPF_NS = "http://www.idpf.org/2007/opf"
1057
- DC_NS = "http://purl.org/dc/elements/1.1/"
1058
- # package root
1059
- nsmap_root = {None: OPF_NS}
1060
- meta_nsmap = {
1061
- "dc": DC_NS,
1062
- "opf": OPF_NS,
1063
- }
1064
-
1065
- # <package>
1066
- pkg_attrib = {
1067
- "version": "3.0",
1068
- "unique-identifier": "id",
1069
- "prefix": "rendition: http://www.idpf.org/vocab/rendition/#",
1070
- }
1071
- package = etree.Element(f"{{{OPF_NS}}}package", attrib=pkg_attrib, nsmap=nsmap_root)
1072
-
1073
- # <metadata>
1074
- metadata = etree.SubElement(package, f"{{{OPF_NS}}}metadata", nsmap=meta_nsmap)
1075
-
1076
- now = datetime.now(UTC).replace(microsecond=0).isoformat()
1077
- m = etree.SubElement(
1078
- metadata,
1079
- f"{{{OPF_NS}}}meta",
1080
- attrib={"property": "dcterms:modified"},
1081
- )
1082
- m.text = now
1083
-
1084
- dc_id = etree.SubElement(
1085
- metadata,
1086
- f"{{{DC_NS}}}identifier",
1087
- id="id",
1088
- )
1089
- dc_id.text = opf.uid
1090
-
1091
- dc_title = etree.SubElement(
1092
- metadata,
1093
- f"{{{DC_NS}}}title",
1094
- )
1095
- dc_title.text = opf.title
1096
-
1097
- dc_lang = etree.SubElement(
1098
- metadata,
1099
- f"{{{DC_NS}}}language",
1100
- )
1101
- dc_lang.text = opf.language
1102
-
1103
- if opf.author:
1104
- dc_creator = etree.SubElement(
1105
- metadata,
1106
- f"{{{DC_NS}}}creator",
1107
- id="creator",
1108
- )
1109
- dc_creator.text = opf.author
1110
-
1111
- if opf.description:
1112
- dc_desc = etree.SubElement(
1113
- metadata,
1114
- f"{{{DC_NS}}}description",
1115
- )
1116
- dc_desc.text = opf.description
1117
-
1118
- if opf.subject:
1119
- subj_text = ",".join(opf.subject)
1120
- dc_subject = etree.SubElement(
1121
- metadata,
1122
- f"{{{DC_NS}}}subject",
1123
- )
1124
- dc_subject.text = subj_text
1125
-
1126
- if opf.include_cover:
1127
- cover = next(
1128
- (m for m in opf.manifest if m["properties"] == "cover-image"),
1129
- None,
1130
- )
1131
- if cover:
1132
- etree.SubElement(
1133
- metadata,
1134
- f"{{{OPF_NS}}}meta",
1135
- name="cover",
1136
- content=cover["id"],
1137
- )
1138
-
1139
- # <manifest>
1140
- manifest = etree.SubElement(package, f"{{{OPF_NS}}}manifest")
1141
- for item in opf.manifest:
1142
- attrs = {
1143
- "href": item["href"],
1144
- "id": item["id"],
1145
- "media-type": item["media_type"],
1146
- }
1147
- if item["properties"]:
1148
- attrs["properties"] = item["properties"]
1149
- etree.SubElement(manifest, f"{{{OPF_NS}}}item", attrib=attrs)
1150
-
1151
- spine_attrs = {}
1152
- toc_item = next(
1153
- (m for m in opf.manifest if m["media_type"] == "application/x-dtbncx+xml"),
1154
- None,
1155
- )
1156
- if toc_item:
1157
- spine_attrs["toc"] = toc_item["id"]
1158
- spine = etree.SubElement(package, f"{{{OPF_NS}}}spine", **spine_attrs)
1159
- for ref in opf.spine:
1160
- attrs = {"idref": ref["idref"]}
1161
- if ref["properties"]:
1162
- attrs["properties"] = ref["properties"]
1163
- etree.SubElement(spine, f"{{{OPF_NS}}}itemref", attrib=attrs)
1164
-
1165
- # <guide>
1166
- if opf.include_cover:
1167
- cover_ref = next((m for m in opf.manifest if m["id"] == "cover"), None)
1168
- if cover_ref:
1169
- guide = etree.SubElement(package, f"{{{OPF_NS}}}guide")
1170
- etree.SubElement(
1171
- guide,
1172
- f"{{{OPF_NS}}}reference",
1173
- type="cover",
1174
- title="Cover",
1175
- href=cover_ref["href"],
1176
- )
1177
-
1178
- xml_bytes: bytes = etree.tostring(
1179
- package,
1180
- xml_declaration=True,
1181
- encoding="utf-8",
1182
- pretty_print=True,
1183
- )
1184
- return xml_bytes.decode("utf-8")
1185
-
1186
-
1187
- def _split_volume_title(volume_title: str) -> tuple[str, str]:
1188
- """
1189
- Split volume title into two parts for better display.
1190
-
1191
- :param volume_title: Original volume title string.
1192
- :return: Tuple of (line1, line2)
1193
- """
1194
- if " " in volume_title:
1195
- parts = volume_title.split(" ")
1196
- elif "-" in volume_title:
1197
- parts = volume_title.split("-")
1198
- else:
1199
- return volume_title, ""
1200
-
1201
- return parts[0], "".join(parts[1:])
1202
-
1203
-
1204
- def _create_volume_intro(
1205
- volume_title: str,
1206
- volume_intro_text: str = "",
1207
- ) -> str:
1208
- """
1209
- Generate the HTML snippet for a volume's introduction section.
1210
-
1211
- :param volume_title: Title of the volume.
1212
- :param volume_intro_text: Optional introduction text for the volume.
1213
- :return: HTML string representing the volume's intro section.
1214
- """
1215
- line1, line2 = _split_volume_title(volume_title)
1216
-
1217
- def _make_border_img(class_name: str) -> str:
1218
- return (
1219
- f'<div class="{class_name}">'
1220
- f'<img alt="" class="{class_name}" '
1221
- f'src="../{_IMAGE_FOLDER}/volume_border.png" />'
1222
- f"</div>"
1223
- )
1224
-
1225
- html_parts = [_make_border_img("border1")]
1226
- html_parts.append(f'<h1 class="volume-title-line1">{line1}</h1>')
1227
- html_parts.append(_make_border_img("border2"))
1228
- if line2:
1229
- html_parts.append(f'<p class="volume-title-line2">{line2}</p>')
1230
-
1231
- if volume_intro_text:
1232
- lines = [line.strip() for line in volume_intro_text.split("\n") if line.strip()]
1233
- html_parts.extend(f'<p class="intro">{line}</p>' for line in lines)
1234
-
1235
- return "\n".join(html_parts)
1236
-
1237
-
1238
- def _gene_book_intro(
1239
- book_name: str,
39
+ def download_cover(
40
+ cover_url: str,
41
+ raw_base: Path,
42
+ include_cover: bool,
43
+ logger: logging.Logger,
44
+ tag: str,
45
+ headers: dict[str, str] | None = None,
46
+ ) -> Path | None:
47
+ if include_cover and cover_url:
48
+ path = download(
49
+ cover_url,
50
+ raw_base,
51
+ filename="cover",
52
+ headers=headers or DEFAULT_HEADERS,
53
+ on_exist="overwrite",
54
+ default_suffix=DEFAULT_IMAGE_SUFFIX,
55
+ )
56
+ if not path:
57
+ logger.warning("%s Failed to download cover from %s", tag, cover_url)
58
+ return path
59
+ return None
60
+
61
+
62
+ def prepare_builder(
63
+ site_name: str,
64
+ book_id: str,
65
+ title: str,
1240
66
  author: str,
67
+ description: str,
68
+ subject: list[str],
1241
69
  serial_status: str,
1242
70
  word_count: str,
1243
- summary: str,
71
+ cover_path: Path | None,
72
+ ) -> tuple[EpubBuilder, StyleSheet]:
73
+ book = EpubBuilder(
74
+ title=title,
75
+ author=author,
76
+ description=description,
77
+ cover_path=cover_path,
78
+ subject=subject,
79
+ serial_status=serial_status,
80
+ word_count=word_count,
81
+ uid=f"{site_name}_{book_id}",
82
+ )
83
+ css_text = CSS_MAIN_PATH.read_text(encoding="utf-8")
84
+ main_css = StyleSheet(id="main_style", content=css_text, filename="main.css")
85
+ book.add_stylesheet(main_css)
86
+ return book, main_css
87
+
88
+
89
+ def finalize_export(
90
+ book: EpubBuilder,
91
+ out_dir: Path,
92
+ filename: str,
93
+ logger: logging.Logger,
94
+ tag: str,
95
+ ) -> Path | None:
96
+ out_path = out_dir / sanitize_filename(filename)
97
+ try:
98
+ book.export(out_path)
99
+ logger.info("%s EPUB successfully written to %s", tag, out_path)
100
+ return out_path
101
+ except OSError as e:
102
+ logger.error("%s Failed to write EPUB to %s: %s", tag, out_path, e)
103
+ return None
104
+
105
+
106
+ def inline_remote_images(
107
+ book: EpubBuilder,
108
+ content: str,
109
+ image_dir: Path,
110
+ headers: dict[str, str] | None = None,
1244
111
  ) -> str:
1245
112
  """
1246
- Generate HTML string for a book's information and summary.
113
+ Download every remote `<img src="...">` in `content` into `image_dir`,
114
+ and replace the original url with local path.
1247
115
 
1248
- :return: An HTML-formatted string presenting the book's information.
116
+ :param content: HTML/text of the chapter containing <img> tags.
117
+ :param image_dir: Directory to save downloaded images into.
118
+ :return: modified_content.
1249
119
  """
1250
- # Start composing the HTML output
1251
- html_parts = ["<h1>书籍简介</h1>", '<div class="list">', "<ul>"]
1252
-
1253
- if book_name:
1254
- html_parts.append(f"<li>书名: 《{book_name}》</li>")
1255
- if author:
1256
- html_parts.append(f"<li>作者: {author}</li>")
1257
-
1258
- if word_count:
1259
- html_parts.append(f"<li>字数: {word_count}</li>")
1260
- if serial_status:
1261
- html_parts.append(f"<li>状态: {serial_status}</li>")
1262
-
1263
- html_parts.append("</ul>")
1264
- html_parts.append("</div>")
1265
- html_parts.append('<p class="new-page-after"><br/></p>')
1266
120
 
1267
- if summary:
1268
- html_parts.append("<h2>简介</h2>")
1269
- for paragraph in summary.split("\n"):
1270
- paragraph = paragraph.strip()
1271
- if paragraph:
1272
- html_parts.append(f"<p>{paragraph}</p>")
121
+ def _replace(m: re.Match[str]) -> str:
122
+ url = m.group(1)
123
+ try:
124
+ local_path = download(
125
+ url,
126
+ image_dir,
127
+ headers=headers or DEFAULT_HEADERS,
128
+ on_exist="skip",
129
+ default_suffix=DEFAULT_IMAGE_SUFFIX,
130
+ )
131
+ if not local_path:
132
+ return m.group(0)
133
+ filename = book.add_image(local_path)
134
+ return f'<img src="../Images/{filename}" />'
135
+ except Exception:
136
+ return m.group(0)
1273
137
 
1274
- return "\n".join(html_parts)
138
+ return _IMG_SRC_RE.sub(_replace, content)
1275
139
 
1276
140
 
1277
- def _build_epub(
1278
- book: Book,
1279
- output_path: Path,
1280
- ) -> bool:
141
+ def remove_all_images(content: str) -> str:
1281
142
  """
1282
- Build an EPUB file at `output_path` from the given `book`.
143
+ Remove all <img> tags from the given content.
1283
144
 
1284
- Returns True on success, False on failure.
145
+ :param content: HTML/text of the chapter containing <img> tags.
1285
146
  """
1286
- # make sure output directory exists
1287
- output_path.parent.mkdir(parents=True, exist_ok=True)
1288
-
1289
- # generate all the XML/XHTML strings up front
1290
- container_xml = generate_container_xml()
1291
- nav_xhtml = generate_nav_xhtml(book.nav)
1292
- ncx_xml = generate_ncx_xml(book.ncx)
1293
- opf_xml = generate_opf_xml(book.opf)
1294
-
1295
- try:
1296
- with zipfile.ZipFile(output_path, "w") as epub:
1297
- # 1) The very first file must be the uncompressed mimetype
1298
- epub.writestr(
1299
- "mimetype",
1300
- "application/epub+zip",
1301
- compress_type=ZIP_STORED,
1302
- )
1303
-
1304
- # 2) META-INF/container.xml
1305
- epub.writestr(
1306
- "META-INF/container.xml",
1307
- container_xml,
1308
- compress_type=ZIP_DEFLATED,
1309
- )
1310
-
1311
- # 3) OEBPS/nav.xhtml, toc.ncx, content.opf
1312
- epub.writestr(
1313
- f"{_ROOT_PATH}/nav.xhtml",
1314
- nav_xhtml,
1315
- compress_type=ZIP_DEFLATED,
1316
- )
1317
- epub.writestr(
1318
- f"{_ROOT_PATH}/toc.ncx",
1319
- ncx_xml,
1320
- compress_type=ZIP_DEFLATED,
1321
- )
1322
- epub.writestr(
1323
- f"{_ROOT_PATH}/content.opf",
1324
- opf_xml,
1325
- compress_type=ZIP_DEFLATED,
1326
- )
1327
-
1328
- # 4) CSS files
1329
- for css in book.stylesheets:
1330
- css_path = f"{_ROOT_PATH}/{_CSS_FOLDER}/{css.filename}"
1331
- epub.writestr(
1332
- css_path,
1333
- css.content,
1334
- compress_type=ZIP_DEFLATED,
1335
- )
147
+ return _IMG_TAG_RE.sub("", content)
1336
148
 
1337
- # 5) XHTML content items (chapters, etc.)
1338
- for item in book.content_items:
1339
- chap_path = f"{_ROOT_PATH}/{_TEXT_FOLDER}/{item.filename}"
1340
- epub.writestr(
1341
- chap_path,
1342
- item.to_xhtml(),
1343
- compress_type=ZIP_DEFLATED,
1344
- )
1345
149
 
1346
- # 6) images
1347
- for img in book.images:
1348
- img_path = f"{_ROOT_PATH}/{_IMAGE_FOLDER}/{img.filename}"
1349
- epub.writestr(
1350
- img_path,
1351
- img.data, # bytes
1352
- compress_type=ZIP_DEFLATED,
1353
- )
1354
-
1355
- return True
1356
-
1357
- except Exception:
1358
- return False
150
+ def build_epub_chapter(
151
+ title: str,
152
+ paragraphs: str,
153
+ extras: dict[str, str] | None = None,
154
+ ) -> str:
155
+ """
156
+ Build a formatted chapter epub HTML including title, body paragraphs,
157
+ and optional extra sections.
158
+
159
+ :param title: Chapter title.
160
+ :param paragraphs: Raw multi-line string. Blank lines are ignored.
161
+ :param extras: Optional dict mapping section titles to multi-line strings.
162
+ :return: A HTML include title, paragraphs, and extras.
163
+ """
164
+
165
+ def _render_block(text: str) -> str:
166
+ out: list[str] = []
167
+ for raw in text.splitlines():
168
+ line = raw.strip()
169
+ if not line:
170
+ continue
171
+
172
+ # case 1: already wrapped in a <div>...</div>
173
+ if line.startswith("<div") and line.endswith("</div>"):
174
+ out.append(line)
175
+ continue
176
+
177
+ # case 2: single <img> line
178
+ if _IMG_TAG_RE.fullmatch(line):
179
+ out.append(_IMAGE_WRAPPER.format(img=line))
180
+ continue
181
+
182
+ # case 3: inline <img> in text -> escape other text, preserve <img>
183
+ if "<img " in line:
184
+ pieces = []
185
+ last = 0
186
+ for m in _IMG_TAG_RE.finditer(line):
187
+ pieces.append(escape(line[last : m.start()]))
188
+ pieces.append(m.group(0))
189
+ last = m.end()
190
+ pieces.append(escape(line[last:]))
191
+ out.append("<p>" + "".join(pieces) + "</p>")
192
+ else:
193
+ # plain text line
194
+ out.append(f"<p>{escape(line)}</p>")
195
+
196
+ return "\n".join(out)
197
+
198
+ parts = []
199
+ parts.append(f"<h2>{escape(title)}</h2>")
200
+ parts.append(_render_block(paragraphs))
201
+
202
+ if extras:
203
+ for title, note in extras.items():
204
+ note = note.strip()
205
+ if not note:
206
+ continue
207
+ parts.extend(
208
+ [
209
+ "<hr />",
210
+ f"<h3>{escape(title)}</h3>",
211
+ _render_block(note),
212
+ ]
213
+ )
214
+
215
+ return "\n".join(parts)