novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/__init__.py +2 -2
  3. novel_downloader/cli/config.py +1 -83
  4. novel_downloader/cli/download.py +4 -5
  5. novel_downloader/cli/export.py +4 -1
  6. novel_downloader/cli/main.py +2 -0
  7. novel_downloader/cli/search.py +123 -0
  8. novel_downloader/config/__init__.py +3 -10
  9. novel_downloader/config/adapter.py +190 -54
  10. novel_downloader/config/loader.py +2 -3
  11. novel_downloader/core/__init__.py +13 -13
  12. novel_downloader/core/downloaders/__init__.py +10 -11
  13. novel_downloader/core/downloaders/base.py +152 -26
  14. novel_downloader/core/downloaders/biquge.py +5 -1
  15. novel_downloader/core/downloaders/common.py +157 -378
  16. novel_downloader/core/downloaders/esjzone.py +5 -1
  17. novel_downloader/core/downloaders/linovelib.py +5 -1
  18. novel_downloader/core/downloaders/qianbi.py +291 -4
  19. novel_downloader/core/downloaders/qidian.py +199 -285
  20. novel_downloader/core/downloaders/registry.py +67 -0
  21. novel_downloader/core/downloaders/sfacg.py +5 -1
  22. novel_downloader/core/downloaders/yamibo.py +5 -1
  23. novel_downloader/core/exporters/__init__.py +10 -11
  24. novel_downloader/core/exporters/base.py +87 -7
  25. novel_downloader/core/exporters/biquge.py +5 -8
  26. novel_downloader/core/exporters/common/__init__.py +2 -2
  27. novel_downloader/core/exporters/common/epub.py +82 -166
  28. novel_downloader/core/exporters/common/main_exporter.py +0 -60
  29. novel_downloader/core/exporters/common/txt.py +82 -83
  30. novel_downloader/core/exporters/epub_util.py +157 -1330
  31. novel_downloader/core/exporters/esjzone.py +5 -8
  32. novel_downloader/core/exporters/linovelib/__init__.py +2 -2
  33. novel_downloader/core/exporters/linovelib/epub.py +157 -212
  34. novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
  35. novel_downloader/core/exporters/linovelib/txt.py +67 -63
  36. novel_downloader/core/exporters/qianbi.py +5 -8
  37. novel_downloader/core/exporters/qidian.py +14 -4
  38. novel_downloader/core/exporters/registry.py +53 -0
  39. novel_downloader/core/exporters/sfacg.py +5 -8
  40. novel_downloader/core/exporters/txt_util.py +67 -0
  41. novel_downloader/core/exporters/yamibo.py +5 -8
  42. novel_downloader/core/fetchers/__init__.py +19 -24
  43. novel_downloader/core/fetchers/base/__init__.py +3 -3
  44. novel_downloader/core/fetchers/base/browser.py +23 -4
  45. novel_downloader/core/fetchers/base/session.py +30 -5
  46. novel_downloader/core/fetchers/biquge/__init__.py +3 -3
  47. novel_downloader/core/fetchers/biquge/browser.py +5 -0
  48. novel_downloader/core/fetchers/biquge/session.py +6 -1
  49. novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
  50. novel_downloader/core/fetchers/esjzone/browser.py +5 -0
  51. novel_downloader/core/fetchers/esjzone/session.py +6 -1
  52. novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
  53. novel_downloader/core/fetchers/linovelib/browser.py +6 -1
  54. novel_downloader/core/fetchers/linovelib/session.py +6 -1
  55. novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
  56. novel_downloader/core/fetchers/qianbi/browser.py +5 -0
  57. novel_downloader/core/fetchers/qianbi/session.py +5 -0
  58. novel_downloader/core/fetchers/qidian/__init__.py +3 -3
  59. novel_downloader/core/fetchers/qidian/browser.py +12 -4
  60. novel_downloader/core/fetchers/qidian/session.py +11 -3
  61. novel_downloader/core/fetchers/registry.py +71 -0
  62. novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
  63. novel_downloader/core/fetchers/sfacg/browser.py +5 -0
  64. novel_downloader/core/fetchers/sfacg/session.py +5 -0
  65. novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
  66. novel_downloader/core/fetchers/yamibo/browser.py +5 -0
  67. novel_downloader/core/fetchers/yamibo/session.py +6 -1
  68. novel_downloader/core/interfaces/__init__.py +7 -5
  69. novel_downloader/core/interfaces/searcher.py +18 -0
  70. novel_downloader/core/parsers/__init__.py +10 -11
  71. novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
  72. novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
  73. novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
  74. novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
  75. novel_downloader/core/parsers/qidian/__init__.py +2 -2
  76. novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
  77. novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
  78. novel_downloader/core/parsers/qidian/main_parser.py +10 -21
  79. novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
  80. novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
  81. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
  82. novel_downloader/core/parsers/registry.py +68 -0
  83. novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
  84. novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
  85. novel_downloader/core/searchers/__init__.py +20 -0
  86. novel_downloader/core/searchers/base.py +92 -0
  87. novel_downloader/core/searchers/biquge.py +83 -0
  88. novel_downloader/core/searchers/esjzone.py +84 -0
  89. novel_downloader/core/searchers/qianbi.py +131 -0
  90. novel_downloader/core/searchers/qidian.py +87 -0
  91. novel_downloader/core/searchers/registry.py +63 -0
  92. novel_downloader/locales/en.json +12 -4
  93. novel_downloader/locales/zh.json +12 -4
  94. novel_downloader/models/__init__.py +4 -30
  95. novel_downloader/models/config.py +12 -6
  96. novel_downloader/models/search.py +16 -0
  97. novel_downloader/models/types.py +0 -2
  98. novel_downloader/resources/config/settings.toml +31 -4
  99. novel_downloader/resources/css_styles/intro.css +83 -0
  100. novel_downloader/resources/css_styles/main.css +30 -89
  101. novel_downloader/utils/__init__.py +52 -0
  102. novel_downloader/utils/chapter_storage.py +244 -224
  103. novel_downloader/utils/constants.py +1 -21
  104. novel_downloader/utils/epub/__init__.py +34 -0
  105. novel_downloader/utils/epub/builder.py +377 -0
  106. novel_downloader/utils/epub/constants.py +77 -0
  107. novel_downloader/utils/epub/documents.py +403 -0
  108. novel_downloader/utils/epub/models.py +134 -0
  109. novel_downloader/utils/epub/utils.py +212 -0
  110. novel_downloader/utils/file_utils/__init__.py +10 -14
  111. novel_downloader/utils/file_utils/io.py +20 -51
  112. novel_downloader/utils/file_utils/normalize.py +2 -2
  113. novel_downloader/utils/file_utils/sanitize.py +2 -3
  114. novel_downloader/utils/fontocr/__init__.py +5 -5
  115. novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
  116. novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
  117. novel_downloader/utils/fontocr/ocr_v1.py +13 -1
  118. novel_downloader/utils/fontocr/ocr_v2.py +13 -1
  119. novel_downloader/utils/fontocr/ocr_v3.py +744 -0
  120. novel_downloader/utils/i18n.py +2 -0
  121. novel_downloader/utils/logger.py +2 -0
  122. novel_downloader/utils/network.py +110 -251
  123. novel_downloader/utils/state.py +1 -0
  124. novel_downloader/utils/text_utils/__init__.py +18 -17
  125. novel_downloader/utils/text_utils/diff_display.py +4 -5
  126. novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
  127. novel_downloader/utils/text_utils/text_cleaner.py +179 -0
  128. novel_downloader/utils/text_utils/truncate_utils.py +62 -0
  129. novel_downloader/utils/time_utils/__init__.py +3 -3
  130. novel_downloader/utils/time_utils/datetime_utils.py +4 -5
  131. novel_downloader/utils/time_utils/sleep_utils.py +2 -3
  132. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
  133. novel_downloader-1.5.0.dist-info/RECORD +164 -0
  134. novel_downloader/config/site_rules.py +0 -94
  135. novel_downloader/core/factory/__init__.py +0 -20
  136. novel_downloader/core/factory/downloader.py +0 -73
  137. novel_downloader/core/factory/exporter.py +0 -58
  138. novel_downloader/core/factory/fetcher.py +0 -96
  139. novel_downloader/core/factory/parser.py +0 -86
  140. novel_downloader/core/fetchers/common/__init__.py +0 -14
  141. novel_downloader/core/fetchers/common/browser.py +0 -79
  142. novel_downloader/core/fetchers/common/session.py +0 -79
  143. novel_downloader/core/parsers/biquge/__init__.py +0 -10
  144. novel_downloader/core/parsers/common/__init__.py +0 -13
  145. novel_downloader/core/parsers/common/helper.py +0 -323
  146. novel_downloader/core/parsers/common/main_parser.py +0 -106
  147. novel_downloader/core/parsers/esjzone/__init__.py +0 -10
  148. novel_downloader/core/parsers/linovelib/__init__.py +0 -10
  149. novel_downloader/core/parsers/qianbi/__init__.py +0 -10
  150. novel_downloader/core/parsers/sfacg/__init__.py +0 -10
  151. novel_downloader/core/parsers/yamibo/__init__.py +0 -10
  152. novel_downloader/models/browser.py +0 -21
  153. novel_downloader/models/site_rules.py +0 -99
  154. novel_downloader/models/tasks.py +0 -33
  155. novel_downloader/resources/css_styles/volume-intro.css +0 -56
  156. novel_downloader/resources/json/replace_word_map.json +0 -4
  157. novel_downloader/resources/text/blacklist.txt +0 -22
  158. novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
  159. novel_downloader/utils/text_utils/font_mapping.py +0 -28
  160. novel_downloader/utils/text_utils/text_cleaning.py +0 -107
  161. novel_downloader-1.4.5.dist-info/RECORD +0 -165
  162. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
  163. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
  164. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
  165. {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.esjzone
5
5
 
6
6
  """
7
7
 
8
+ __all__ = ["EsjzoneExporter"]
9
+
10
+ from novel_downloader.core.exporters.registry import register_exporter
8
11
  from novel_downloader.models import ExporterConfig
9
12
 
10
13
  from .common import CommonExporter
11
14
 
12
15
 
16
+ @register_exporter(site_keys=["esjzone"])
13
17
  class EsjzoneExporter(CommonExporter):
14
18
  def __init__(
15
19
  self,
16
20
  config: ExporterConfig,
17
21
  ):
18
- super().__init__(
19
- config,
20
- site="esjzone",
21
- chap_folders=["chapters"],
22
- )
23
-
24
-
25
- __all__ = ["EsjzoneExporter"]
22
+ super().__init__(config, site="esjzone")
@@ -5,6 +5,6 @@ novel_downloader.core.exporters.linovelib
5
5
 
6
6
  """
7
7
 
8
- from .main_exporter import LinovelibExporter
9
-
10
8
  __all__ = ["LinovelibExporter"]
9
+
10
+ from .main_exporter import LinovelibExporter
@@ -8,37 +8,32 @@ Contains the logic for exporting novel content as a single `.epub` file.
8
8
 
9
9
  from __future__ import annotations
10
10
 
11
- import html
12
- import json
13
- import re
14
11
  from pathlib import Path
15
12
  from typing import TYPE_CHECKING
16
13
 
17
14
  from novel_downloader.core.exporters.epub_util import (
18
- Book,
19
- Chapter,
20
- StyleSheet,
21
- Volume,
15
+ build_epub_chapter,
16
+ download_cover,
17
+ finalize_export,
18
+ inline_remote_images,
19
+ prepare_builder,
20
+ )
21
+ from novel_downloader.utils import (
22
+ download,
23
+ get_cleaner,
22
24
  )
23
25
  from novel_downloader.utils.constants import (
24
- CSS_MAIN_PATH,
25
26
  DEFAULT_HEADERS,
27
+ DEFAULT_IMAGE_SUFFIX,
28
+ )
29
+ from novel_downloader.utils.epub import (
30
+ Chapter,
31
+ Volume,
26
32
  )
27
- from novel_downloader.utils.file_utils import sanitize_filename
28
- from novel_downloader.utils.network import download_image
29
33
 
30
34
  if TYPE_CHECKING:
31
35
  from .main_exporter import LinovelibExporter
32
36
 
33
- _IMAGE_WRAPPER = (
34
- '<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
35
- )
36
- _IMG_TAG_PATTERN = re.compile(
37
- r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
38
- )
39
- _RAW_HTML_RE = re.compile(
40
- r'^(<img\b[^>]*?\/>|<div class="duokan-image-single illus">.*?<\/div>)$', re.DOTALL
41
- )
42
37
  _IMG_HEADERS = DEFAULT_HEADERS.copy()
43
38
  _IMG_HEADERS["Referer"] = "https://www.linovelib.com/"
44
39
 
@@ -54,132 +49,143 @@ def export_whole_book(
54
49
  1. Load `book_info.json` for metadata.
55
50
  2. Generate introductory HTML and optionally include the cover image.
56
51
  3. Initialize the EPUB container.
57
- 4. Iterate through volumes and chapters, convert each to XHTML.
52
+ 4. Iterate through volumes and chapters in volume-batches, convert each to XHTML.
58
53
  5. Assemble the spine, TOC, CSS and write out the final `.epub`.
59
54
 
55
+ :param exporter: The exporter instance, carrying config and path info.
60
56
  :param book_id: Identifier of the novel (used as subdirectory name).
61
57
  """
62
58
  TAG = "[exporter]"
63
59
  config = exporter._config
64
- # --- Paths & options ---
60
+
65
61
  raw_base = exporter._raw_data_dir / book_id
66
- img_dir = exporter._cache_dir / book_id / "images"
62
+ img_dir = raw_base / "images"
67
63
  out_dir = exporter.output_dir
64
+
68
65
  img_dir.mkdir(parents=True, exist_ok=True)
69
66
  out_dir.mkdir(parents=True, exist_ok=True)
70
67
 
68
+ cleaner = get_cleaner(
69
+ enabled=config.clean_text,
70
+ config=config.cleaner_cfg,
71
+ )
72
+
71
73
  # --- Load book_info.json ---
72
- info_path = raw_base / "book_info.json"
73
- try:
74
- info_text = info_path.read_text(encoding="utf-8")
75
- book_info = json.loads(info_text)
76
- except Exception as e:
77
- exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
74
+ book_info = exporter._load_book_info(book_id)
75
+ if not book_info:
78
76
  return
79
77
 
80
78
  book_name = book_info.get("book_name", book_id)
81
79
  book_author = book_info.get("author", "")
80
+
82
81
  exporter.logger.info(
83
82
  "%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
84
83
  )
85
84
 
86
85
  # --- Generate intro + cover ---
87
- cover_path: Path | None = None
88
- cover_url = book_info.get("cover_url", "")
89
- if config.include_cover and cover_url:
90
- cover_path = download_image(
91
- cover_url,
92
- raw_base,
93
- target_name="cover",
94
- headers=_IMG_HEADERS,
95
- on_exist="overwrite",
96
- )
97
- if not cover_path:
98
- exporter.logger.warning("Failed to download cover from %s", cover_url)
86
+ cover_path = download_cover(
87
+ book_info.get("cover_url", ""),
88
+ raw_base,
89
+ config.include_cover,
90
+ exporter.logger,
91
+ TAG,
92
+ headers=_IMG_HEADERS,
93
+ )
99
94
 
100
95
  # --- Initialize EPUB ---
101
- book = Book(
96
+ book, main_css = prepare_builder(
97
+ site_name=exporter.site,
98
+ book_id=book_id,
102
99
  title=book_name,
103
100
  author=book_author,
104
101
  description=book_info.get("summary", ""),
105
- cover_path=cover_path,
106
102
  subject=book_info.get("subject", []),
107
103
  serial_status=book_info.get("serial_status", ""),
108
104
  word_count=book_info.get("word_count", ""),
109
- uid=f"{exporter.site}_{book_id}",
110
- )
111
- main_css = StyleSheet(
112
- id="main_style",
113
- content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
114
- filename="main.css",
105
+ cover_path=cover_path,
115
106
  )
116
- book.add_stylesheet(main_css)
117
107
 
118
108
  # --- Compile chapters ---
119
109
  volumes = book_info.get("volumes", [])
110
+ if not volumes:
111
+ exporter.logger.warning("%s No volumes found in metadata.", TAG)
112
+
120
113
  for vol_index, vol in enumerate(volumes, start=1):
121
- raw_vol_name = vol.get("volume_name", "")
122
- raw_vol_name = raw_vol_name.replace(book_name, "").strip()
123
- vol_name = raw_vol_name or f"Volume {vol_index}"
114
+ raw_name = vol.get("volume_name", "")
115
+ raw_name = raw_name.replace(book_name, "").strip()
116
+ vol_name = raw_name or f"Volume {vol_index}"
124
117
  exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
125
118
 
126
- vol_cover_path: Path | None = None
119
+ # Batch-fetch chapters for this volume
120
+ chap_ids = [
121
+ chap.get("chapterId")
122
+ for chap in vol.get("chapters", [])
123
+ if chap.get("chapterId")
124
+ ]
125
+ chap_map = exporter._get_chapters(book_id, chap_ids)
126
+
127
+ vol_cover: Path | None = None
127
128
  vol_cover_url = vol.get("volume_cover", "")
128
129
  if vol_cover_url:
129
- vol_cover_path = download_image(
130
+ vol_cover = download(
130
131
  vol_cover_url,
131
132
  img_dir,
132
133
  on_exist="skip",
134
+ default_suffix=DEFAULT_IMAGE_SUFFIX,
135
+ headers=_IMG_HEADERS,
133
136
  )
134
137
 
135
138
  curr_vol = Volume(
136
139
  id=f"vol_{vol_index}",
137
140
  title=vol_name,
138
- intro=vol.get("volume_intro", ""),
139
- cover=vol_cover_path,
141
+ intro=cleaner.clean_content(vol.get("volume_intro", "")),
142
+ cover=vol_cover,
140
143
  )
141
144
 
142
- for chap in vol.get("chapters", []):
143
- chap_id = chap.get("chapterId")
144
- chap_title = chap.get("title", "")
145
+ for chap_meta in vol.get("chapters", []):
146
+ chap_id = chap_meta.get("chapterId")
145
147
  if not chap_id:
146
148
  exporter.logger.warning(
147
149
  "%s Missing chapterId, skipping: %s",
148
150
  TAG,
149
- chap,
151
+ chap_meta,
150
152
  )
151
153
  continue
152
154
 
153
- chapter_data = exporter._get_chapter(book_id, chap_id)
154
- if not chapter_data:
155
+ chap_title = cleaner.clean_title(chap_meta.get("title", ""))
156
+ data = chap_map.get(chap_id)
157
+ if not data:
155
158
  exporter.logger.info(
156
- "%s Missing chapter file: %s (%s), skipping.",
159
+ "%s Missing chapter: %s (%s), skipping.",
157
160
  TAG,
158
161
  chap_title,
159
162
  chap_id,
160
163
  )
161
164
  continue
162
165
 
163
- title = chapter_data.get("title") or chap_id
164
- content: str = chapter_data.get("content", "")
165
- content, img_paths = _inline_remote_images(content, img_dir)
166
- chap_html = _txt_to_html(
167
- chapter_title=title,
168
- chapter_text=content,
169
- extras={
170
- "作者说": chapter_data.get("author_say", ""),
171
- },
166
+ title = cleaner.clean_title(data.get("title", chap_title)) or chap_id
167
+ content = cleaner.clean_content(data.get("content", ""))
168
+ content = inline_remote_images(
169
+ book,
170
+ content,
171
+ img_dir,
172
+ headers=_IMG_HEADERS,
173
+ )
174
+
175
+ chap_html = build_epub_chapter(
176
+ title=title,
177
+ paragraphs=content,
178
+ extras={},
172
179
  )
173
180
  curr_vol.add_chapter(
174
181
  Chapter(
175
182
  id=f"c_{chap_id}",
183
+ filename=f"c{chap_id}.xhtml",
176
184
  title=title,
177
185
  content=chap_html,
178
186
  css=[main_css],
179
187
  )
180
188
  )
181
- for img_path in img_paths:
182
- book.add_image(img_path)
183
189
 
184
190
  book.add_volume(curr_vol)
185
191
 
@@ -189,13 +195,13 @@ def export_whole_book(
189
195
  author=book_info.get("author"),
190
196
  ext="epub",
191
197
  )
192
- out_path = out_dir / sanitize_filename(out_name)
193
-
194
- try:
195
- book.export(out_path)
196
- exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
197
- except Exception as e:
198
- exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
198
+ finalize_export(
199
+ book=book,
200
+ out_dir=out_dir,
201
+ filename=out_name,
202
+ logger=exporter.logger,
203
+ tag=TAG,
204
+ )
199
205
  return
200
206
 
201
207
 
@@ -204,203 +210,142 @@ def export_by_volume(
204
210
  book_id: str,
205
211
  ) -> None:
206
212
  """
207
- Export a single novel (identified by `book_id`) to multi EPUB file.
213
+ Export each volume of a novel as a separate EPUB file.
214
+
215
+ Steps:
216
+ 1. Load metadata from `book_info.json`.
217
+ 2. For each volume:
218
+ a. Clean the volume title and determine output filename.
219
+ b. Batch-fetch all chapters in this volume to minimize SQLite overhead.
220
+ c. Initialize an EPUB builder for the volume, including cover and intro.
221
+ d. For each chapter: clean title & content, inline remote images.
222
+ e. Finalize and write the volume EPUB.
208
223
 
209
224
  :param book_id: Identifier of the novel (used as subdirectory name).
210
225
  """
211
226
  TAG = "[exporter]"
212
227
  config = exporter._config
213
- # --- Paths & options ---
228
+
214
229
  raw_base = exporter._raw_data_dir / book_id
215
- img_dir = exporter._cache_dir / book_id / "images"
230
+ img_dir = raw_base / "images"
216
231
  out_dir = exporter.output_dir
232
+
217
233
  img_dir.mkdir(parents=True, exist_ok=True)
218
234
  out_dir.mkdir(parents=True, exist_ok=True)
219
235
 
236
+ cleaner = get_cleaner(
237
+ enabled=config.clean_text,
238
+ config=config.cleaner_cfg,
239
+ )
240
+
220
241
  # --- Load book_info.json ---
221
- info_path = raw_base / "book_info.json"
222
- try:
223
- info_text = info_path.read_text(encoding="utf-8")
224
- book_info = json.loads(info_text)
225
- except Exception as e:
226
- exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
242
+ book_info = exporter._load_book_info(book_id)
243
+ if not book_info:
227
244
  return
228
245
 
229
246
  book_name = book_info.get("book_name", book_id)
230
247
  book_author = book_info.get("author", "")
231
248
  book_summary = book_info.get("summary", "")
249
+
232
250
  exporter.logger.info(
233
251
  "%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
234
252
  )
235
253
 
236
- main_css = StyleSheet(
237
- id="main_style",
238
- content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
239
- filename="main.css",
240
- )
241
-
242
254
  # --- Compile columes ---
243
255
  volumes = book_info.get("volumes", [])
244
- for vol_index, vol in enumerate(volumes, start=1):
245
- raw_vol_name = vol.get("volume_name", "")
246
- raw_vol_name = raw_vol_name.replace(book_name, "").strip()
247
- vol_name = raw_vol_name or f"Volume {vol_index}"
256
+ if not volumes:
257
+ exporter.logger.warning("%s No volumes found in metadata.", TAG)
248
258
 
249
- vol_cover_path: Path | None = None
259
+ for vol_index, vol in enumerate(volumes, start=1):
260
+ raw_name = vol.get("volume_name", "")
261
+ raw_name = cleaner.clean_title(raw_name.replace(book_name, ""))
262
+ vol_name = raw_name or f"Volume {vol_index}"
263
+
264
+ # Batch-fetch chapters for this volume
265
+ chap_ids = [
266
+ chap.get("chapterId")
267
+ for chap in vol.get("chapters", [])
268
+ if chap.get("chapterId")
269
+ ]
270
+ chap_map = exporter._get_chapters(book_id, chap_ids)
271
+
272
+ vol_cover: Path | None = None
250
273
  vol_cover_url = vol.get("volume_cover", "")
251
274
  if config.include_cover and vol_cover_url:
252
- vol_cover_path = download_image(
275
+ vol_cover = download(
253
276
  vol_cover_url,
254
277
  img_dir,
255
278
  headers=_IMG_HEADERS,
256
279
  on_exist="skip",
280
+ default_suffix=DEFAULT_IMAGE_SUFFIX,
257
281
  )
258
282
 
259
- book = Book(
260
- title=vol_name,
283
+ book, main_css = prepare_builder(
284
+ site_name=exporter.site,
285
+ book_id=book_id,
286
+ title=book_name,
261
287
  author=book_author,
262
288
  description=vol.get("volume_intro") or book_summary,
263
- cover_path=vol_cover_path,
264
289
  subject=book_info.get("subject", []),
265
290
  serial_status=vol.get("serial_status", ""),
266
291
  word_count=vol.get("word_count", ""),
267
- uid=f"{exporter.site}_{book_id}_v{vol_index}",
292
+ cover_path=vol_cover,
268
293
  )
269
- book.add_stylesheet(main_css)
270
294
 
271
- for chap in vol.get("chapters", []):
272
- chap_id = chap.get("chapterId")
273
- chap_title = chap.get("title", "")
295
+ for chap_meta in vol.get("chapters", []):
296
+ chap_id = chap_meta.get("chapterId")
274
297
  if not chap_id:
275
298
  exporter.logger.warning(
276
299
  "%s Missing chapterId, skipping: %s",
277
300
  TAG,
278
- chap,
301
+ chap_meta,
279
302
  )
280
303
  continue
281
304
 
282
- chapter_data = exporter._get_chapter(book_id, chap_id)
283
- if not chapter_data:
305
+ chap_title = cleaner.clean_title(chap_meta.get("title", ""))
306
+ data = chap_map.get(chap_id)
307
+ if not data:
284
308
  exporter.logger.info(
285
- "%s Missing chapter file: %s (%s), skipping.",
309
+ "%s Missing chapter: %s (%s), skipping.",
286
310
  TAG,
287
311
  chap_title,
288
312
  chap_id,
289
313
  )
290
314
  continue
291
315
 
292
- title = chapter_data.get("title", "") or chap_id
293
- content: str = chapter_data.get("content", "")
294
- content, imgs = _inline_remote_images(content, img_dir)
295
- chap_html = _txt_to_html(
296
- chapter_title=title,
297
- chapter_text=content,
316
+ title = cleaner.clean_title(data.get("title", chap_title)) or chap_id
317
+ content = cleaner.clean_content(data.get("content", ""))
318
+ content = inline_remote_images(
319
+ book,
320
+ content,
321
+ img_dir,
322
+ headers=_IMG_HEADERS,
323
+ )
324
+ chap_html = build_epub_chapter(
325
+ title=title,
326
+ paragraphs=content,
298
327
  extras={},
299
328
  )
300
329
  book.add_chapter(
301
330
  Chapter(
302
331
  id=f"c_{chap_id}",
332
+ filename=f"c{chap_id}.xhtml",
303
333
  title=title,
304
334
  content=chap_html,
305
335
  css=[main_css],
306
336
  )
307
337
  )
308
- for img_path in imgs:
309
- book.add_image(img_path)
310
338
 
311
339
  out_name = exporter.get_filename(
312
340
  title=vol_name,
313
341
  author=book_info.get("author"),
314
342
  ext="epub",
315
343
  )
316
- out_path = out_dir / sanitize_filename(out_name)
317
-
318
- try:
319
- book.export(out_path)
320
- exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
321
- except Exception as e:
322
- exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
344
+ finalize_export(
345
+ book=book,
346
+ out_dir=out_dir,
347
+ filename=out_name,
348
+ logger=exporter.logger,
349
+ tag=TAG,
350
+ )
323
351
  return
324
-
325
-
326
- def _inline_remote_images(
327
- content: str,
328
- image_dir: str | Path,
329
- ) -> tuple[str, list[Path]]:
330
- """
331
- Download every remote `<img src="...">` in `content` into `image_dir`,
332
- and replace the original tag with _IMAGE_WRAPPER
333
- pointing to the local filename.
334
-
335
- :param content: HTML/text of the chapter containing <img> tags.
336
- :param image_dir: Directory to save downloaded images into.
337
- :return: A tuple (modified_content, list_of_downloaded_image_paths).
338
- """
339
- downloaded_images: list[Path] = []
340
-
341
- def _replace(match: re.Match[str]) -> str:
342
- url = match.group(1)
343
- try:
344
- # download_image returns a Path or None
345
- local_path = download_image(
346
- url,
347
- image_dir,
348
- target_name=None,
349
- headers=_IMG_HEADERS,
350
- on_exist="skip",
351
- )
352
- if not local_path:
353
- return match.group(0)
354
-
355
- downloaded_images.append(local_path)
356
- return _IMAGE_WRAPPER.format(filename=local_path.name)
357
- except Exception:
358
- return match.group(0)
359
-
360
- modified_content = _IMG_TAG_PATTERN.sub(_replace, content)
361
- return modified_content, downloaded_images
362
-
363
-
364
- def _txt_to_html(
365
- chapter_title: str,
366
- chapter_text: str,
367
- extras: dict[str, str] | None = None,
368
- ) -> str:
369
- """
370
- Convert chapter text and author note to styled HTML.
371
-
372
- :param chapter_title: Title of the chapter.
373
- :param chapter_text: Main content of the chapter.
374
- :param extras: Optional dict of titles and content, e.g. {"作者说": "text"}.
375
- :return: Rendered HTML as a string.
376
- """
377
-
378
- def _render_block(text: str) -> str:
379
- lines = (line.strip() for line in text.splitlines() if line.strip())
380
- out = []
381
- for line in lines:
382
- # preserve raw HTML, otherwise wrap in <p>
383
- if _RAW_HTML_RE.match(line):
384
- out.append(line)
385
- else:
386
- out.append(f"<p>{html.escape(line)}</p>")
387
- return "\n".join(out)
388
-
389
- parts = []
390
- parts.append(f"<h2>{html.escape(chapter_title)}</h2>")
391
- parts.append(_render_block(chapter_text))
392
-
393
- if extras:
394
- for title, note in extras.items():
395
- note = note.strip()
396
- if not note:
397
- continue
398
- parts.extend(
399
- [
400
- "<hr />",
401
- f"<p>{html.escape(title)}</p>",
402
- _render_block(note),
403
- ]
404
- )
405
-
406
- return "\n".join(parts)
@@ -5,16 +5,15 @@ novel_downloader.core.exporters.linovelib.main_exporter
5
5
 
6
6
  """
7
7
 
8
- from collections.abc import Mapping
9
- from typing import Any
10
8
 
11
9
  from novel_downloader.core.exporters.base import BaseExporter
10
+ from novel_downloader.core.exporters.registry import register_exporter
12
11
  from novel_downloader.models import ExporterConfig
13
- from novel_downloader.utils.chapter_storage import ChapterStorage
14
12
 
15
13
  from .txt import linovelib_export_as_txt
16
14
 
17
15
 
16
+ @register_exporter(site_keys=["linovelib"])
18
17
  class LinovelibExporter(BaseExporter):
19
18
  """"""
20
19
 
@@ -29,8 +28,6 @@ class LinovelibExporter(BaseExporter):
29
28
  save paths, formats, and options.
30
29
  """
31
30
  super().__init__(config, "linovelib")
32
- self._chapter_storage_cache: dict[str, list[ChapterStorage]] = {}
33
- self._chap_folders: list[str] = ["chapters"]
34
31
 
35
32
  def export_as_txt(self, book_id: str) -> None:
36
33
  """
@@ -71,57 +68,3 @@ class LinovelibExporter(BaseExporter):
71
68
  f"Unsupported split_mode: {self._config.split_mode!r}"
72
69
  ) from err
73
70
  return export_fn(self, book_id)
74
-
75
- @property
76
- def site(self) -> str:
77
- """
78
- Get the site identifier.
79
-
80
- :return: The site string.
81
- """
82
- return self._site
83
-
84
- @site.setter
85
- def site(self, value: str) -> None:
86
- """
87
- Set the site identifier.
88
-
89
- :param value: New site string to set.
90
- """
91
- self._site = value
92
-
93
- def _get_chapter(
94
- self,
95
- book_id: str,
96
- chap_id: str,
97
- ) -> Mapping[str, Any]:
98
- for storage in self._chapter_storage_cache[book_id]:
99
- data = storage.get(chap_id)
100
- if data:
101
- return data
102
- return {}
103
-
104
- def _init_chapter_storages(self, book_id: str) -> None:
105
- if book_id in self._chapter_storage_cache:
106
- return
107
- raw_base = self._raw_data_dir / book_id
108
- self._chapter_storage_cache[book_id] = [
109
- ChapterStorage(
110
- raw_base=raw_base,
111
- namespace=ns,
112
- backend_type=self._config.storage_backend,
113
- )
114
- for ns in self._chap_folders
115
- ]
116
-
117
- def _on_close(self) -> None:
118
- """
119
- Close all ChapterStorage connections in the cache.
120
- """
121
- for storages in self._chapter_storage_cache.values():
122
- for storage in storages:
123
- try:
124
- storage.close()
125
- except Exception as e:
126
- self.logger.warning("Failed to close storage %s: %s", storage, e)
127
- self._chapter_storage_cache.clear()