novel-downloader 1.4.4__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -2
  3. novel_downloader/cli/config.py +1 -83
  4. novel_downloader/cli/download.py +4 -5
  5. novel_downloader/cli/export.py +4 -1
  6. novel_downloader/cli/main.py +2 -0
  7. novel_downloader/cli/search.py +123 -0
  8. novel_downloader/config/__init__.py +3 -10
  9. novel_downloader/config/adapter.py +190 -54
  10. novel_downloader/config/loader.py +2 -3
  11. novel_downloader/core/__init__.py +13 -13
  12. novel_downloader/core/downloaders/__init__.py +10 -11
  13. novel_downloader/core/downloaders/base.py +152 -26
  14. novel_downloader/core/downloaders/biquge.py +5 -1
  15. novel_downloader/core/downloaders/common.py +157 -378
  16. novel_downloader/core/downloaders/esjzone.py +5 -1
  17. novel_downloader/core/downloaders/linovelib.py +5 -1
  18. novel_downloader/core/downloaders/qianbi.py +291 -4
  19. novel_downloader/core/downloaders/qidian.py +199 -285
  20. novel_downloader/core/downloaders/registry.py +67 -0
  21. novel_downloader/core/downloaders/sfacg.py +5 -1
  22. novel_downloader/core/downloaders/yamibo.py +5 -1
  23. novel_downloader/core/exporters/__init__.py +10 -11
  24. novel_downloader/core/exporters/base.py +87 -7
  25. novel_downloader/core/exporters/biquge.py +5 -8
  26. novel_downloader/core/exporters/common/__init__.py +2 -2
  27. novel_downloader/core/exporters/common/epub.py +82 -166
  28. novel_downloader/core/exporters/common/main_exporter.py +0 -60
  29. novel_downloader/core/exporters/common/txt.py +82 -83
  30. novel_downloader/core/exporters/epub_util.py +157 -1330
  31. novel_downloader/core/exporters/esjzone.py +5 -8
  32. novel_downloader/core/exporters/linovelib/__init__.py +2 -2
  33. novel_downloader/core/exporters/linovelib/epub.py +157 -212
  34. novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
  35. novel_downloader/core/exporters/linovelib/txt.py +67 -63
  36. novel_downloader/core/exporters/qianbi.py +5 -8
  37. novel_downloader/core/exporters/qidian.py +14 -4
  38. novel_downloader/core/exporters/registry.py +53 -0
  39. novel_downloader/core/exporters/sfacg.py +5 -8
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/exporters/yamibo.py +5 -8
  42. novel_downloader/core/fetchers/__init__.py +19 -24
  43. novel_downloader/core/fetchers/base/__init__.py +3 -3
  44. novel_downloader/core/fetchers/base/browser.py +23 -4
  45. novel_downloader/core/fetchers/base/session.py +30 -5
  46. novel_downloader/core/fetchers/biquge/__init__.py +3 -3
  47. novel_downloader/core/fetchers/biquge/browser.py +5 -0
  48. novel_downloader/core/fetchers/biquge/session.py +6 -1
  49. novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
  50. novel_downloader/core/fetchers/esjzone/browser.py +5 -0
  51. novel_downloader/core/fetchers/esjzone/session.py +6 -1
  52. novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
  53. novel_downloader/core/fetchers/linovelib/browser.py +6 -1
  54. novel_downloader/core/fetchers/linovelib/session.py +6 -1
  55. novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
  56. novel_downloader/core/fetchers/qianbi/browser.py +5 -0
  57. novel_downloader/core/fetchers/qianbi/session.py +5 -0
  58. novel_downloader/core/fetchers/qidian/__init__.py +3 -3
  59. novel_downloader/core/fetchers/qidian/browser.py +12 -4
  60. novel_downloader/core/fetchers/qidian/session.py +11 -3
  61. novel_downloader/core/fetchers/registry.py +71 -0
  62. novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
  63. novel_downloader/core/fetchers/sfacg/browser.py +5 -0
  64. novel_downloader/core/fetchers/sfacg/session.py +5 -0
  65. novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
  66. novel_downloader/core/fetchers/yamibo/browser.py +5 -0
  67. novel_downloader/core/fetchers/yamibo/session.py +6 -1
  68. novel_downloader/core/interfaces/__init__.py +7 -5
  69. novel_downloader/core/interfaces/searcher.py +18 -0
  70. novel_downloader/core/parsers/__init__.py +10 -11
  71. novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
  72. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
  73. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
  74. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
  75. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  76. novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
  77. novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
  78. novel_downloader/core/parsers/qidian/main_parser.py +10 -21
  79. novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
  80. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
  81. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  82. novel_downloader/core/parsers/registry.py +68 -0
  83. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
  84. novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
  85. novel_downloader/core/searchers/__init__.py +20 -0
  86. novel_downloader/core/searchers/base.py +92 -0
  87. novel_downloader/core/searchers/biquge.py +83 -0
  88. novel_downloader/core/searchers/esjzone.py +84 -0
  89. novel_downloader/core/searchers/qianbi.py +131 -0
  90. novel_downloader/core/searchers/qidian.py +87 -0
  91. novel_downloader/core/searchers/registry.py +63 -0
  92. novel_downloader/locales/en.json +12 -4
  93. novel_downloader/locales/zh.json +12 -4
  94. novel_downloader/models/__init__.py +4 -30
  95. novel_downloader/models/config.py +12 -6
  96. novel_downloader/models/search.py +16 -0
  97. novel_downloader/models/types.py +0 -2
  98. novel_downloader/resources/config/settings.toml +31 -4
  99. novel_downloader/resources/css_styles/intro.css +83 -0
  100. novel_downloader/resources/css_styles/main.css +30 -89
  101. novel_downloader/utils/__init__.py +52 -0
  102. novel_downloader/utils/chapter_storage.py +244 -224
  103. novel_downloader/utils/constants.py +1 -21
  104. novel_downloader/utils/epub/__init__.py +34 -0
  105. novel_downloader/utils/epub/builder.py +377 -0
  106. novel_downloader/utils/epub/constants.py +77 -0
  107. novel_downloader/utils/epub/documents.py +403 -0
  108. novel_downloader/utils/epub/models.py +134 -0
  109. novel_downloader/utils/epub/utils.py +212 -0
  110. novel_downloader/utils/file_utils/__init__.py +10 -14
  111. novel_downloader/utils/file_utils/io.py +20 -51
  112. novel_downloader/utils/file_utils/normalize.py +2 -2
  113. novel_downloader/utils/file_utils/sanitize.py +2 -3
  114. novel_downloader/utils/fontocr/__init__.py +5 -5
  115. novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
  116. novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
  117. novel_downloader/utils/fontocr/ocr_v1.py +13 -1
  118. novel_downloader/utils/fontocr/ocr_v2.py +13 -1
  119. novel_downloader/utils/fontocr/ocr_v3.py +744 -0
  120. novel_downloader/utils/i18n.py +2 -0
  121. novel_downloader/utils/logger.py +2 -0
  122. novel_downloader/utils/network.py +110 -251
  123. novel_downloader/utils/state.py +1 -0
  124. novel_downloader/utils/text_utils/__init__.py +18 -17
  125. novel_downloader/utils/text_utils/diff_display.py +4 -5
  126. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  127. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  128. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  129. novel_downloader/utils/time_utils/__init__.py +3 -3
  130. novel_downloader/utils/time_utils/datetime_utils.py +4 -5
  131. novel_downloader/utils/time_utils/sleep_utils.py +2 -3
  132. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
  133. novel_downloader-1.5.0.dist-info/RECORD +164 -0
  134. novel_downloader/config/site_rules.py +0 -94
  135. novel_downloader/core/factory/__init__.py +0 -20
  136. novel_downloader/core/factory/downloader.py +0 -73
  137. novel_downloader/core/factory/exporter.py +0 -58
  138. novel_downloader/core/factory/fetcher.py +0 -96
  139. novel_downloader/core/factory/parser.py +0 -86
  140. novel_downloader/core/fetchers/common/__init__.py +0 -14
  141. novel_downloader/core/fetchers/common/browser.py +0 -79
  142. novel_downloader/core/fetchers/common/session.py +0 -79
  143. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  144. novel_downloader/core/parsers/common/__init__.py +0 -13
  145. novel_downloader/core/parsers/common/helper.py +0 -323
  146. novel_downloader/core/parsers/common/main_parser.py +0 -106
  147. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  148. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  149. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  150. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  151. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  152. novel_downloader/models/browser.py +0 -21
  153. novel_downloader/models/site_rules.py +0 -99
  154. novel_downloader/models/tasks.py +0 -33
  155. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  156. novel_downloader/resources/json/replace_word_map.json +0 -4
  157. novel_downloader/resources/text/blacklist.txt +0 -22
  158. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  159. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  160. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  161. novel_downloader-1.4.4.dist-info/RECORD +0 -165
  162. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
  163. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
  164. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
  165. {novel_downloader-1.4.4.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -13,19 +13,10 @@ Currently supported platforms:
13
13
  - qidian (起点中文网)
14
14
  - sfacg (SF轻小说)
15
15
  - yamibo (百合会)
16
- - common (通用架构)
17
16
  """
18
17
 
19
- from .biquge import BiqugeExporter
20
- from .common import CommonExporter
21
- from .esjzone import EsjzoneExporter
22
- from .linovelib import LinovelibExporter
23
- from .qianbi import QianbiExporter
24
- from .qidian import QidianExporter
25
- from .sfacg import SfacgExporter
26
- from .yamibo import YamiboExporter
27
-
28
18
  __all__ = [
19
+ "get_exporter",
29
20
  "BiqugeExporter",
30
21
  "EsjzoneExporter",
31
22
  "LinovelibExporter",
@@ -33,5 +24,13 @@ __all__ = [
33
24
  "QidianExporter",
34
25
  "SfacgExporter",
35
26
  "YamiboExporter",
36
- "CommonExporter",
37
27
  ]
28
+
29
+ from .biquge import BiqugeExporter
30
+ from .esjzone import EsjzoneExporter
31
+ from .linovelib import LinovelibExporter
32
+ from .qianbi import QianbiExporter
33
+ from .qidian import QidianExporter
34
+ from .registry import get_exporter
35
+ from .sfacg import SfacgExporter
36
+ from .yamibo import YamiboExporter
@@ -9,6 +9,7 @@ content into various output formats.
9
9
  """
10
10
 
11
11
  import abc
12
+ import json
12
13
  import logging
13
14
  import types
14
15
  from datetime import datetime
@@ -16,7 +17,8 @@ from pathlib import Path
16
17
  from typing import Any, Self
17
18
 
18
19
  from novel_downloader.core.interfaces import ExporterProtocol
19
- from novel_downloader.models import ExporterConfig
20
+ from novel_downloader.models import ChapterDict, ExporterConfig
21
+ from novel_downloader.utils import ChapterStorage
20
22
 
21
23
 
22
24
  class SafeDict(dict[str, Any]):
@@ -31,24 +33,33 @@ class BaseExporter(ExporterProtocol, abc.ABC):
31
33
  such as TXT, EPUB, Markdown, or PDF.
32
34
  """
33
35
 
36
+ DEFAULT_SOURCE_ID = 0
37
+ DEFAULT_PRIORITIES_MAP = {
38
+ DEFAULT_SOURCE_ID: 0,
39
+ }
40
+
34
41
  def __init__(
35
42
  self,
36
43
  config: ExporterConfig,
37
44
  site: str,
45
+ priorities: dict[int, int] | None = None,
38
46
  ):
39
47
  """
40
48
  Initialize the exporter with given configuration.
41
49
 
42
- :param config: A ExporterConfig object that defines
43
- save paths, formats, and options.
50
+ :param config: Exporter configuration settings.
51
+ :param site: Identifier for the target website or source.
52
+ :param priorities: Mapping of source_id to priority value.
53
+ Lower numbers indicate higher priority.
54
+ E.X. {0: 10, 1: 100} means source 0 is preferred.
44
55
  """
45
56
  self._config = config
46
57
  self._site = site
58
+ self._priorities = priorities or self.DEFAULT_PRIORITIES_MAP
59
+ self._storage_cache: dict[str, ChapterStorage] = {}
47
60
 
48
- self._cache_dir = Path(config.cache_dir) / site
49
61
  self._raw_data_dir = Path(config.raw_data_dir) / site
50
62
  self._output_dir = Path(config.output_dir)
51
- self._cache_dir.mkdir(parents=True, exist_ok=True)
52
63
  self._output_dir.mkdir(parents=True, exist_ok=True)
53
64
 
54
65
  self.logger = logging.getLogger(f"{self.__class__.__name__}")
@@ -160,16 +171,84 @@ class BaseExporter(ExporterProtocol, abc.ABC):
160
171
 
161
172
  return f"{name}.{ext}"
162
173
 
174
+ @property
175
+ def site(self) -> str:
176
+ """
177
+ Get the site identifier.
178
+
179
+ :return: The site string.
180
+ """
181
+ return self._site
182
+
163
183
  @property
164
184
  def output_dir(self) -> Path:
165
- """Access the output directory for saving files."""
185
+ """
186
+ Access the output directory for saving files.
187
+ """
166
188
  return self._output_dir
167
189
 
168
190
  @property
169
191
  def filename_template(self) -> str:
170
- """Access the filename template."""
192
+ """
193
+ Access the filename template.
194
+ """
171
195
  return self._config.filename_template
172
196
 
197
+ def _get_chapter(
198
+ self,
199
+ book_id: str,
200
+ chap_id: str,
201
+ ) -> ChapterDict | None:
202
+ if book_id not in self._storage_cache:
203
+ return None
204
+ return self._storage_cache[book_id].get_best_chapter(chap_id)
205
+
206
+ def _get_chapters(
207
+ self,
208
+ book_id: str,
209
+ chap_ids: list[str],
210
+ ) -> dict[str, ChapterDict | None]:
211
+ if book_id not in self._storage_cache:
212
+ return {}
213
+ return self._storage_cache[book_id].get_best_chapters(chap_ids)
214
+
215
+ def _load_book_info(self, book_id: str) -> dict[str, Any]:
216
+ info_path = self._raw_data_dir / book_id / "book_info.json"
217
+ if not info_path.is_file():
218
+ self.logger.error("Missing metadata file: %s", info_path)
219
+ return {}
220
+
221
+ try:
222
+ text = info_path.read_text(encoding="utf-8")
223
+ data: Any = json.loads(text)
224
+ if not isinstance(data, dict):
225
+ self.logger.error(
226
+ "Invalid JSON structure in %s: expected an object at the top",
227
+ info_path,
228
+ )
229
+ return {}
230
+ return data
231
+ except json.JSONDecodeError as e:
232
+ self.logger.error("Corrupt JSON in %s: %s", info_path, e)
233
+ return {}
234
+
235
+ def _init_chapter_storages(self, book_id: str) -> None:
236
+ if book_id in self._storage_cache:
237
+ return
238
+ self._storage_cache[book_id] = ChapterStorage(
239
+ raw_base=self._raw_data_dir / book_id,
240
+ priorities=self._priorities,
241
+ )
242
+ self._storage_cache[book_id].connect()
243
+
244
+ def _close_chapter_storages(self) -> None:
245
+ for storage in self._storage_cache.values():
246
+ try:
247
+ storage.close()
248
+ except Exception as e:
249
+ self.logger.warning("Failed to close storage %s: %s", storage, e)
250
+ self._storage_cache.clear()
251
+
173
252
  def _on_close(self) -> None:
174
253
  """
175
254
  Hook method called at the beginning of close().
@@ -182,6 +261,7 @@ class BaseExporter(ExporterProtocol, abc.ABC):
182
261
  Shutdown and clean up the exporter.
183
262
  """
184
263
  self._on_close()
264
+ self._close_chapter_storages()
185
265
 
186
266
  def __enter__(self) -> Self:
187
267
  return self
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.biquge
5
5
 
6
6
  """
7
7
 
8
+ __all__ = ["BiqugeExporter"]
9
+
10
+ from novel_downloader.core.exporters.registry import register_exporter
8
11
  from novel_downloader.models import ExporterConfig
9
12
 
10
13
  from .common import CommonExporter
11
14
 
12
15
 
16
+ @register_exporter(site_keys=["biquge", "bqg"])
13
17
  class BiqugeExporter(CommonExporter):
14
18
  def __init__(
15
19
  self,
16
20
  config: ExporterConfig,
17
21
  ):
18
- super().__init__(
19
- config,
20
- site="biquge",
21
- chap_folders=["chapters"],
22
- )
23
-
24
-
25
- __all__ = ["BiqugeExporter"]
22
+ super().__init__(config, site="biquge")
@@ -7,6 +7,6 @@ This module provides the `CommonExporter` class for
7
7
  handling the saving process of novels.
8
8
  """
9
9
 
10
- from .main_exporter import CommonExporter
11
-
12
10
  __all__ = ["CommonExporter"]
11
+
12
+ from .main_exporter import CommonExporter
@@ -8,36 +8,29 @@ Contains the logic for exporting novel content as a single `.epub` file.
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- import html
12
- import json
13
- import re
14
11
  from pathlib import Path
15
12
  from typing import TYPE_CHECKING
16
13
 
17
14
  from novel_downloader.core.exporters.epub_util import (
18
- Book,
15
+ build_epub_chapter,
16
+ download_cover,
17
+ finalize_export,
18
+ inline_remote_images,
19
+ prepare_builder,
20
+ )
21
+ from novel_downloader.utils import (
22
+ download,
23
+ get_cleaner,
24
+ )
25
+ from novel_downloader.utils.constants import DEFAULT_IMAGE_SUFFIX
26
+ from novel_downloader.utils.epub import (
19
27
  Chapter,
20
- StyleSheet,
21
28
  Volume,
22
29
  )
23
- from novel_downloader.utils.constants import CSS_MAIN_PATH
24
- from novel_downloader.utils.file_utils import sanitize_filename
25
- from novel_downloader.utils.network import download_image
26
- from novel_downloader.utils.text_utils import clean_chapter_title
27
30
 
28
31
  if TYPE_CHECKING:
29
32
  from .main_exporter import CommonExporter
30
33
 
31
- _IMAGE_WRAPPER = (
32
- '<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
33
- )
34
- _IMG_TAG_PATTERN = re.compile(
35
- r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
36
- )
37
- _RAW_HTML_RE = re.compile(
38
- r'^(<img\b[^>]*?\/>|<div class="duokan-image-single illus">.*?<\/div>)$', re.DOTALL
39
- )
40
-
41
34
 
42
35
  def common_export_as_epub(
43
36
  exporter: CommonExporter,
@@ -50,132 +43,137 @@ def common_export_as_epub(
50
43
  1. Load `book_info.json` for metadata.
51
44
  2. Generate introductory HTML and optionally include the cover image.
52
45
  3. Initialize the EPUB container.
53
- 4. Iterate through volumes and chapters, convert each to XHTML.
46
+ 4. Iterate through volumes and chapters in volume-batches, convert each to XHTML.
54
47
  5. Assemble the spine, TOC, CSS and write out the final `.epub`.
55
48
 
56
- :param saver: The saver instance, carrying config and path info.
49
+ :param exporter: The exporter instance, carrying config and path info.
57
50
  :param book_id: Identifier of the novel (used as subdirectory name).
58
51
  """
59
52
  TAG = "[exporter]"
60
53
  config = exporter._config
61
- # --- Paths & options ---
54
+
62
55
  raw_base = exporter._raw_data_dir / book_id
63
- img_dir = exporter._cache_dir / book_id / "images"
56
+ img_dir = raw_base / "images"
64
57
  out_dir = exporter.output_dir
58
+
65
59
  img_dir.mkdir(parents=True, exist_ok=True)
66
60
  out_dir.mkdir(parents=True, exist_ok=True)
67
61
 
62
+ cleaner = get_cleaner(
63
+ enabled=config.clean_text,
64
+ config=config.cleaner_cfg,
65
+ )
66
+
68
67
  # --- Load book_info.json ---
69
- info_path = raw_base / "book_info.json"
70
- try:
71
- info_text = info_path.read_text(encoding="utf-8")
72
- book_info = json.loads(info_text)
73
- except Exception as e:
74
- exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
68
+ book_info = exporter._load_book_info(book_id)
69
+ if not book_info:
75
70
  return
76
71
 
77
72
  book_name = book_info.get("book_name", book_id)
78
73
  book_author = book_info.get("author", "")
74
+
79
75
  exporter.logger.info(
80
76
  "%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
81
77
  )
82
78
 
83
- # --- Generate intro + cover ---
84
- cover_path: Path | None = None
85
- cover_url = book_info.get("cover_url", "")
86
- if config.include_cover and cover_url:
87
- cover_path = download_image(
88
- cover_url,
89
- raw_base,
90
- target_name="cover",
91
- on_exist="overwrite",
92
- )
93
- if not cover_path:
94
- exporter.logger.warning("Failed to download cover from %s", cover_url)
79
+ cover_path = download_cover(
80
+ book_info.get("cover_url", ""),
81
+ raw_base,
82
+ config.include_cover,
83
+ exporter.logger,
84
+ TAG,
85
+ )
95
86
 
96
87
  # --- Initialize EPUB ---
97
- book = Book(
88
+ book, main_css = prepare_builder(
89
+ site_name=exporter.site,
90
+ book_id=book_id,
98
91
  title=book_name,
99
92
  author=book_author,
100
93
  description=book_info.get("summary", ""),
101
- cover_path=cover_path,
102
94
  subject=book_info.get("subject", []),
103
95
  serial_status=book_info.get("serial_status", ""),
104
96
  word_count=book_info.get("word_count", ""),
105
- uid=f"{exporter.site}_{book_id}",
106
- )
107
- main_css = StyleSheet(
108
- id="main_style",
109
- content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
110
- filename="main.css",
97
+ cover_path=cover_path,
111
98
  )
112
- book.add_stylesheet(main_css)
113
99
 
114
100
  # --- Compile chapters ---
115
101
  volumes = book_info.get("volumes", [])
116
- for vol_index, vol in enumerate(volumes, start=1):
117
- raw_vol_name = vol.get("volume_name", "")
118
- raw_vol_name = raw_vol_name.replace(book_name, "").strip()
119
- vol_name = raw_vol_name or f"Volume {vol_index}"
120
- exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
102
+ if not volumes:
103
+ exporter.logger.warning("%s No volumes found in metadata.", TAG)
121
104
 
122
- vol_cover_path: Path | None = None
105
+ for vol_index, vol in enumerate(volumes, start=1):
106
+ raw_name = vol.get("volume_name", "")
107
+ raw_name = cleaner.clean_title(raw_name.replace(book_name, ""))
108
+ vol_name = raw_name or f"Volume {vol_index}"
109
+ exporter.logger.info("%s Processing volume %d: %s", TAG, vol_index, vol_name)
110
+
111
+ # Batch-fetch chapters for this volume
112
+ chap_ids = [
113
+ chap.get("chapterId")
114
+ for chap in vol.get("chapters", [])
115
+ if chap.get("chapterId")
116
+ ]
117
+ chap_map = exporter._get_chapters(book_id, chap_ids)
118
+
119
+ vol_cover: Path | None = None
123
120
  vol_cover_url = vol.get("volume_cover", "")
124
121
  if vol_cover_url:
125
- vol_cover_path = download_image(
122
+ vol_cover = download(
126
123
  vol_cover_url,
127
124
  img_dir,
128
125
  on_exist="skip",
126
+ default_suffix=DEFAULT_IMAGE_SUFFIX,
129
127
  )
130
128
 
131
129
  curr_vol = Volume(
132
130
  id=f"vol_{vol_index}",
133
131
  title=vol_name,
134
- intro=vol.get("volume_intro", ""),
135
- cover=vol_cover_path,
132
+ intro=cleaner.clean_content(vol.get("volume_intro", "")),
133
+ cover=vol_cover,
136
134
  )
137
135
 
138
- for chap in vol.get("chapters", []):
139
- chap_id = chap.get("chapterId")
140
- chap_title = chap.get("title", "")
136
+ for chap_meta in vol.get("chapters", []):
137
+ chap_id = chap_meta.get("chapterId")
141
138
  if not chap_id:
142
139
  exporter.logger.warning(
143
140
  "%s Missing chapterId, skipping: %s",
144
141
  TAG,
145
- chap,
142
+ chap_meta,
146
143
  )
147
144
  continue
148
145
 
149
- chapter_data = exporter._get_chapter(book_id, chap_id)
150
- if not chapter_data:
146
+ chap_title = cleaner.clean_title(chap_meta.get("title", ""))
147
+ data = chap_map.get(chap_id)
148
+ if not data:
151
149
  exporter.logger.info(
152
- "%s Missing chapter file: %s (%s), skipping.",
150
+ "%s Missing chapter: %s (%s), skipping.",
153
151
  TAG,
154
152
  chap_title,
155
153
  chap_id,
156
154
  )
157
155
  continue
158
156
 
159
- title = clean_chapter_title(chapter_data.get("title", "")) or chap_id
160
- content: str = chapter_data.get("content", "")
161
- content, img_paths = _inline_remote_images(content, img_dir)
162
- chap_html = _txt_to_html(
163
- chapter_title=title,
164
- chapter_text=content,
165
- extras={
166
- "作者说": chapter_data.get("author_say", ""),
167
- },
157
+ title = cleaner.clean_title(data.get("title", chap_title)) or chap_id
158
+ content = cleaner.clean_content(data.get("content", ""))
159
+ extra = data.get("extra", {})
160
+ author_note = cleaner.clean_content(extra.get("author_say", ""))
161
+ content = inline_remote_images(book, content, img_dir)
162
+
163
+ chap_html = build_epub_chapter(
164
+ title=title,
165
+ paragraphs=content,
166
+ extras={"作者说": author_note},
168
167
  )
169
168
  curr_vol.add_chapter(
170
169
  Chapter(
171
170
  id=f"c_{chap_id}",
171
+ filename=f"c{chap_id}.xhtml",
172
172
  title=title,
173
173
  content=chap_html,
174
174
  css=[main_css],
175
175
  )
176
176
  )
177
- for img_path in img_paths:
178
- book.add_image(img_path)
179
177
 
180
178
  book.add_volume(curr_vol)
181
179
 
@@ -185,93 +183,11 @@ def common_export_as_epub(
185
183
  author=book_info.get("author"),
186
184
  ext="epub",
187
185
  )
188
- out_path = out_dir / sanitize_filename(out_name)
189
-
190
- try:
191
- book.export(out_path)
192
- exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
193
- except Exception as e:
194
- exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
186
+ finalize_export(
187
+ book=book,
188
+ out_dir=out_dir,
189
+ filename=out_name,
190
+ logger=exporter.logger,
191
+ tag=TAG,
192
+ )
195
193
  return
196
-
197
-
198
- def _inline_remote_images(
199
- content: str,
200
- image_dir: str | Path,
201
- ) -> tuple[str, list[Path]]:
202
- """
203
- Download every remote `<img src="...">` in `content` into `image_dir`,
204
- and replace the original tag with _IMAGE_WRAPPER
205
- pointing to the local filename.
206
-
207
- :param content: HTML/text of the chapter containing <img> tags.
208
- :param image_dir: Directory to save downloaded images into.
209
- :return: A tuple (modified_content, list_of_downloaded_image_paths).
210
- """
211
- downloaded_images: list[Path] = []
212
-
213
- def _replace(match: re.Match[str]) -> str:
214
- url = match.group(1)
215
- try:
216
- # download_image returns a Path or None
217
- local_path = download_image(
218
- url,
219
- image_dir,
220
- target_name=None,
221
- on_exist="skip",
222
- )
223
- if not local_path:
224
- return match.group(0)
225
-
226
- downloaded_images.append(local_path)
227
- return _IMAGE_WRAPPER.format(filename=local_path.name)
228
- except Exception:
229
- return match.group(0)
230
-
231
- modified_content = _IMG_TAG_PATTERN.sub(_replace, content)
232
- return modified_content, downloaded_images
233
-
234
-
235
- def _txt_to_html(
236
- chapter_title: str,
237
- chapter_text: str,
238
- extras: dict[str, str] | None = None,
239
- ) -> str:
240
- """
241
- Convert chapter text and author note to styled HTML.
242
-
243
- :param chapter_title: Title of the chapter.
244
- :param chapter_text: Main content of the chapter.
245
- :param extras: Optional dict of titles and content, e.g. {"作者说": "text"}.
246
- :return: Rendered HTML as a string.
247
- """
248
-
249
- def _render_block(text: str) -> str:
250
- lines = (line.strip() for line in text.splitlines() if line.strip())
251
- out = []
252
- for line in lines:
253
- # preserve raw HTML, otherwise wrap in <p>
254
- if _RAW_HTML_RE.match(line):
255
- out.append(line)
256
- else:
257
- out.append(f"<p>{html.escape(line)}</p>")
258
- return "\n".join(out)
259
-
260
- parts = []
261
- parts.append(f"<h2>{html.escape(chapter_title)}</h2>")
262
- parts.append(_render_block(chapter_text))
263
-
264
- if extras:
265
- for title, note in extras.items():
266
- note = note.strip()
267
- if not note:
268
- continue
269
- parts.extend(
270
- [
271
- "<hr />",
272
- f"<p>{html.escape(title)}</p>",
273
- _render_block(note),
274
- ]
275
- )
276
-
277
- return "\n".join(parts)
@@ -8,12 +8,7 @@ novel data. It defines the logic to compile, structure, and export novel content
8
8
  in plain text format based on the platform's metadata and chapter files.
9
9
  """
10
10
 
11
- from collections.abc import Mapping
12
- from typing import Any
13
-
14
11
  from novel_downloader.core.exporters.base import BaseExporter
15
- from novel_downloader.models import ExporterConfig
16
- from novel_downloader.utils.chapter_storage import ChapterStorage
17
12
 
18
13
  from .txt import common_export_as_txt
19
14
 
@@ -26,16 +21,6 @@ class CommonExporter(BaseExporter):
26
21
  and EPUB (.epub) files.
27
22
  """
28
23
 
29
- def __init__(
30
- self,
31
- config: ExporterConfig,
32
- site: str,
33
- chap_folders: list[str] | None = None,
34
- ):
35
- super().__init__(config, site)
36
- self._chapter_storage_cache: dict[str, list[ChapterStorage]] = {}
37
- self._chap_folders: list[str] = chap_folders or ["chapters"]
38
-
39
24
  def export_as_txt(self, book_id: str) -> None:
40
25
  """
41
26
  Compile and export a complete novel as a single .txt file.
@@ -70,48 +55,3 @@ class CommonExporter(BaseExporter):
70
55
 
71
56
  self._init_chapter_storages(book_id)
72
57
  return common_export_as_epub(self, book_id)
73
-
74
- @property
75
- def site(self) -> str:
76
- """
77
- Get the site identifier.
78
-
79
- :return: The site string.
80
- """
81
- return self._site
82
-
83
- def _get_chapter(
84
- self,
85
- book_id: str,
86
- chap_id: str,
87
- ) -> Mapping[str, Any]:
88
- for storage in self._chapter_storage_cache[book_id]:
89
- data = storage.get(chap_id)
90
- if data:
91
- return data
92
- return {}
93
-
94
- def _init_chapter_storages(self, book_id: str) -> None:
95
- if book_id in self._chapter_storage_cache:
96
- return
97
- raw_base = self._raw_data_dir / book_id
98
- self._chapter_storage_cache[book_id] = [
99
- ChapterStorage(
100
- raw_base=raw_base,
101
- namespace=ns,
102
- backend_type=self._config.storage_backend,
103
- )
104
- for ns in self._chap_folders
105
- ]
106
-
107
- def _on_close(self) -> None:
108
- """
109
- Close all ChapterStorage connections in the cache.
110
- """
111
- for storages in self._chapter_storage_cache.values():
112
- for storage in storages:
113
- try:
114
- storage.close()
115
- except Exception as e:
116
- self.logger.warning("Failed to close storage %s: %s", storage, e)
117
- self._chapter_storage_cache.clear()