novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. novel_downloader/__init__.py +1 -1
  2. novel_downloader/cli/download.py +14 -11
  3. novel_downloader/cli/export.py +19 -19
  4. novel_downloader/cli/ui.py +35 -8
  5. novel_downloader/config/adapter.py +216 -153
  6. novel_downloader/core/__init__.py +5 -6
  7. novel_downloader/core/archived/deqixs/fetcher.py +1 -28
  8. novel_downloader/core/downloaders/__init__.py +2 -0
  9. novel_downloader/core/downloaders/base.py +34 -85
  10. novel_downloader/core/downloaders/common.py +147 -171
  11. novel_downloader/core/downloaders/qianbi.py +30 -64
  12. novel_downloader/core/downloaders/qidian.py +157 -184
  13. novel_downloader/core/downloaders/qqbook.py +292 -0
  14. novel_downloader/core/downloaders/registry.py +2 -2
  15. novel_downloader/core/exporters/__init__.py +2 -0
  16. novel_downloader/core/exporters/base.py +37 -59
  17. novel_downloader/core/exporters/common.py +620 -0
  18. novel_downloader/core/exporters/linovelib.py +47 -0
  19. novel_downloader/core/exporters/qidian.py +41 -12
  20. novel_downloader/core/exporters/qqbook.py +28 -0
  21. novel_downloader/core/exporters/registry.py +2 -2
  22. novel_downloader/core/fetchers/__init__.py +4 -2
  23. novel_downloader/core/fetchers/aaatxt.py +2 -22
  24. novel_downloader/core/fetchers/b520.py +3 -23
  25. novel_downloader/core/fetchers/base.py +80 -105
  26. novel_downloader/core/fetchers/biquyuedu.py +2 -22
  27. novel_downloader/core/fetchers/dxmwx.py +10 -22
  28. novel_downloader/core/fetchers/esjzone.py +6 -29
  29. novel_downloader/core/fetchers/guidaye.py +2 -22
  30. novel_downloader/core/fetchers/hetushu.py +9 -29
  31. novel_downloader/core/fetchers/i25zw.py +2 -16
  32. novel_downloader/core/fetchers/ixdzs8.py +2 -16
  33. novel_downloader/core/fetchers/jpxs123.py +2 -16
  34. novel_downloader/core/fetchers/lewenn.py +2 -22
  35. novel_downloader/core/fetchers/linovelib.py +4 -20
  36. novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
  37. novel_downloader/core/fetchers/piaotia.py +2 -16
  38. novel_downloader/core/fetchers/qbtr.py +2 -16
  39. novel_downloader/core/fetchers/qianbi.py +1 -20
  40. novel_downloader/core/fetchers/qidian.py +27 -68
  41. novel_downloader/core/fetchers/qqbook.py +177 -0
  42. novel_downloader/core/fetchers/quanben5.py +9 -29
  43. novel_downloader/core/fetchers/rate_limiter.py +22 -53
  44. novel_downloader/core/fetchers/sfacg.py +3 -16
  45. novel_downloader/core/fetchers/shencou.py +2 -16
  46. novel_downloader/core/fetchers/shuhaige.py +2 -22
  47. novel_downloader/core/fetchers/tongrenquan.py +2 -22
  48. novel_downloader/core/fetchers/ttkan.py +3 -14
  49. novel_downloader/core/fetchers/wanbengo.py +2 -22
  50. novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
  51. novel_downloader/core/fetchers/xiguashuwu.py +4 -20
  52. novel_downloader/core/fetchers/xs63b.py +3 -15
  53. novel_downloader/core/fetchers/xshbook.py +2 -22
  54. novel_downloader/core/fetchers/yamibo.py +4 -28
  55. novel_downloader/core/fetchers/yibige.py +13 -26
  56. novel_downloader/core/interfaces/exporter.py +19 -7
  57. novel_downloader/core/interfaces/fetcher.py +23 -49
  58. novel_downloader/core/interfaces/parser.py +2 -2
  59. novel_downloader/core/parsers/__init__.py +4 -2
  60. novel_downloader/core/parsers/b520.py +2 -2
  61. novel_downloader/core/parsers/base.py +5 -39
  62. novel_downloader/core/parsers/esjzone.py +3 -3
  63. novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
  64. novel_downloader/core/parsers/qidian.py +717 -0
  65. novel_downloader/core/parsers/qqbook.py +709 -0
  66. novel_downloader/core/parsers/xiguashuwu.py +8 -15
  67. novel_downloader/core/searchers/__init__.py +2 -2
  68. novel_downloader/core/searchers/b520.py +1 -1
  69. novel_downloader/core/searchers/base.py +2 -2
  70. novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
  71. novel_downloader/locales/en.json +3 -3
  72. novel_downloader/locales/zh.json +3 -3
  73. novel_downloader/models/__init__.py +2 -0
  74. novel_downloader/models/book.py +1 -0
  75. novel_downloader/models/config.py +12 -0
  76. novel_downloader/resources/config/settings.toml +23 -5
  77. novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
  78. novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
  79. novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
  80. novel_downloader/utils/__init__.py +0 -2
  81. novel_downloader/utils/chapter_storage.py +2 -3
  82. novel_downloader/utils/constants.py +7 -3
  83. novel_downloader/utils/cookies.py +32 -17
  84. novel_downloader/utils/crypto_utils/__init__.py +0 -6
  85. novel_downloader/utils/crypto_utils/aes_util.py +1 -1
  86. novel_downloader/utils/crypto_utils/rc4.py +40 -50
  87. novel_downloader/utils/epub/__init__.py +2 -3
  88. novel_downloader/utils/epub/builder.py +6 -6
  89. novel_downloader/utils/epub/constants.py +1 -6
  90. novel_downloader/utils/epub/documents.py +7 -7
  91. novel_downloader/utils/epub/models.py +8 -8
  92. novel_downloader/utils/epub/utils.py +10 -10
  93. novel_downloader/utils/file_utils/io.py +48 -73
  94. novel_downloader/utils/file_utils/normalize.py +1 -7
  95. novel_downloader/utils/file_utils/sanitize.py +4 -11
  96. novel_downloader/utils/fontocr/__init__.py +13 -0
  97. novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
  98. novel_downloader/utils/fontocr/loader.py +52 -0
  99. novel_downloader/utils/logger.py +80 -56
  100. novel_downloader/utils/network.py +16 -40
  101. novel_downloader/utils/node_decryptor/__init__.py +13 -0
  102. novel_downloader/utils/node_decryptor/decryptor.py +342 -0
  103. novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
  104. novel_downloader/utils/text_utils/text_cleaner.py +39 -30
  105. novel_downloader/utils/text_utils/truncate_utils.py +3 -14
  106. novel_downloader/utils/time_utils/sleep_utils.py +53 -43
  107. novel_downloader/web/main.py +1 -1
  108. novel_downloader/web/pages/download.py +1 -1
  109. novel_downloader/web/pages/search.py +4 -4
  110. novel_downloader/web/services/task_manager.py +2 -0
  111. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
  112. novel_downloader-2.0.2.dist-info/RECORD +203 -0
  113. novel_downloader/core/exporters/common/__init__.py +0 -11
  114. novel_downloader/core/exporters/common/epub.py +0 -198
  115. novel_downloader/core/exporters/common/main_exporter.py +0 -64
  116. novel_downloader/core/exporters/common/txt.py +0 -146
  117. novel_downloader/core/exporters/epub_util.py +0 -215
  118. novel_downloader/core/exporters/linovelib/__init__.py +0 -11
  119. novel_downloader/core/exporters/linovelib/epub.py +0 -349
  120. novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
  121. novel_downloader/core/exporters/linovelib/txt.py +0 -139
  122. novel_downloader/core/exporters/txt_util.py +0 -67
  123. novel_downloader/core/parsers/qidian/__init__.py +0 -10
  124. novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
  125. novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
  126. novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
  127. novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
  128. novel_downloader/core/parsers/qidian/main_parser.py +0 -101
  129. novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
  130. novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
  131. novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
  132. novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
  133. novel_downloader-2.0.0.dist-info/RECORD +0 -210
  134. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
  135. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
  136. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
  137. {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,620 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.exporters.common
4
+ --------------------------------------
5
+
6
+ Shared exporter implementation for producing standard TXT and EPUB outputs.
7
+ """
8
+
9
+ import re
10
+ from html import escape
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from novel_downloader.core.exporters.base import BaseExporter
15
+ from novel_downloader.models import (
16
+ BookInfoDict,
17
+ ChapterDict,
18
+ VolumeInfoDict,
19
+ )
20
+ from novel_downloader.utils import (
21
+ download,
22
+ sanitize_filename,
23
+ write_file,
24
+ )
25
+ from novel_downloader.utils.constants import (
26
+ CSS_MAIN_PATH,
27
+ DEFAULT_HEADERS,
28
+ DEFAULT_IMAGE_SUFFIX,
29
+ )
30
+ from novel_downloader.utils.epub import (
31
+ Chapter,
32
+ EpubBuilder,
33
+ StyleSheet,
34
+ Volume,
35
+ )
36
+
37
+
38
+ class CommonExporter(BaseExporter):
39
+ """
40
+ CommonExporter is a exporter that processes and exports novels.
41
+
42
+ It extends the BaseExporter interface and provides
43
+ logic for exporting full novels as plain text (.txt) files
44
+ and EPUB (.epub) files.
45
+ """
46
+
47
+ _IMAGE_WRAPPER = '<div class="duokan-image-single illus">{img}</div>'
48
+ _IMG_TAG_RE = re.compile(r"<img[^>]*>", re.IGNORECASE)
49
+ _IMG_SRC_RE = re.compile(
50
+ r'<img[^>]*\bsrc\s*=\s*["\'](https?://[^"\']+)["\'][^>]*>',
51
+ re.IGNORECASE,
52
+ )
53
+
54
+ def export_as_txt(self, book_id: str) -> Path | None:
55
+ """
56
+ Export a novel as a single text file by merging all chapter data.
57
+
58
+ Steps:
59
+ 1. Load book metadata.
60
+ 2. For each volume:
61
+ a. Append the volume title.
62
+ b. Batch-fetch all chapters in that volume to minimize SQLite calls.
63
+ c. Append each chapter's title, content, and optional extra data.
64
+ 3. Build a header with book metadata.
65
+ 4. Concatenate header and all chapter contents.
66
+ 5. Save the resulting .txt file to the output directory
67
+
68
+ :param book_id: The book identifier (used to locate raw data)
69
+ """
70
+ book_id = self._normalize_book_id(book_id)
71
+ self._init_chapter_storages(book_id)
72
+
73
+ # --- Load book_info.json ---
74
+ book_info = self._load_book_info(book_id)
75
+ if not book_info:
76
+ return None
77
+
78
+ # --- Prepare header (book metadata) ---
79
+ name = book_info["book_name"]
80
+ author = book_info.get("author") or ""
81
+ header_txt = self._build_txt_header(book_info, name, author)
82
+
83
+ # --- Build body by volumes & chapters ---
84
+ parts: list[str] = [header_txt]
85
+
86
+ for v_idx, volume in enumerate(book_info.get("volumes", []), start=1):
87
+ vol_title = volume.get("volume_name") or f"卷 {v_idx}"
88
+ vol_title = self._cleaner.clean_title(vol_title)
89
+ parts.append(self._build_txt_volume_heading(vol_title, volume))
90
+
91
+ # Collect chapter ids then batch fetch
92
+ chap_ids = [
93
+ c["chapterId"] for c in volume.get("chapters", []) if c.get("chapterId")
94
+ ]
95
+ if not chap_ids:
96
+ continue
97
+ chap_map = self._get_chapters(book_id, chap_ids)
98
+
99
+ # Append each chapter
100
+ for ch_info in volume.get("chapters", []):
101
+ chap_id = ch_info.get("chapterId")
102
+ if not chap_id:
103
+ continue
104
+
105
+ ch = chap_map.get(chap_id)
106
+ if not ch:
107
+ self.logger.warning(
108
+ "Missing chapter content for chapterId=%s", chap_id
109
+ )
110
+ continue
111
+
112
+ parts.append(self._build_txt_chapter(ch))
113
+
114
+ final_text = "\n".join(parts)
115
+
116
+ # --- Determine output file path ---
117
+ out_name = self.get_filename(title=name, author=author, ext="txt")
118
+ out_path = self._output_dir / sanitize_filename(out_name)
119
+
120
+ # --- Save final text ---
121
+ try:
122
+ result = write_file(
123
+ content=final_text,
124
+ filepath=out_path,
125
+ on_exist="overwrite",
126
+ )
127
+ self.logger.info("Exported TXT: %s", out_path)
128
+ except Exception as e:
129
+ self.logger.error(
130
+ "Failed to write TXT to %s: %s", out_path, e, exc_info=True
131
+ )
132
+ return None
133
+ return result
134
+
135
+ def export_as_epub(self, book_id: str) -> Path | None:
136
+ """
137
+ Persist the assembled book as EPUB (.epub) file.
138
+
139
+ :param book_id: The book identifier.
140
+ """
141
+ book_id = self._normalize_book_id(book_id)
142
+ self._init_chapter_storages(book_id)
143
+
144
+ mode = self._split_mode
145
+ if mode == "book":
146
+ return self._export_epub_by_book(book_id)
147
+ if mode == "volume":
148
+ return self._export_epub_by_volume(book_id)
149
+ raise ValueError(f"Unsupported split_mode: {mode!r}")
150
+
151
+ def _export_epub_by_volume(self, book_id: str) -> Path | None:
152
+ """
153
+ Export each volume of a novel as a separate EPUB file.
154
+
155
+ Steps:
156
+ 1. Load metadata from `book_info.json`.
157
+ 2. For each volume:
158
+ a. Clean the volume title and determine output filename.
159
+ b. Batch-fetch all chapters in this volume to minimize SQLite overhead.
160
+ c. Initialize an EPUB builder for the volume, including cover and intro.
161
+ d. For each chapter: clean title & content, inline remote images.
162
+ e. Finalize and write the volume EPUB.
163
+
164
+ :param book_id: Identifier of the novel (used as subdirectory name).
165
+ """
166
+ # --- Load book_info.json ---
167
+ book_info = self._load_book_info(book_id)
168
+ if not book_info:
169
+ return None
170
+
171
+ # --- Prepare path ---
172
+ raw_base = self._raw_data_dir / book_id
173
+ img_dir = raw_base / "images"
174
+ img_dir.mkdir(parents=True, exist_ok=True)
175
+
176
+ # --- Prepare header (book metadata) ---
177
+ name = book_info["book_name"]
178
+ author = book_info.get("author") or ""
179
+ book_summary = book_info.get("summary", "")
180
+
181
+ # --- Generate intro + cover ---
182
+ cover_url = book_info.get("cover_url") or ""
183
+ cover_path: Path | None = None
184
+ if self._include_cover and cover_url:
185
+ cover_path = self._download_image(
186
+ img_url=cover_url,
187
+ target_dir=raw_base,
188
+ filename="cover",
189
+ )
190
+
191
+ css_text = CSS_MAIN_PATH.read_text(encoding="utf-8")
192
+ main_css = StyleSheet(id="main_style", content=css_text, filename="main.css")
193
+
194
+ # --- Compile columes ---
195
+ for v_idx, vol in enumerate(book_info.get("volumes", []), start=1):
196
+ vol_title = vol.get("volume_name") or f"卷 {v_idx}"
197
+ vol_title = self._cleaner.clean_title(vol_title.replace(name, ""))
198
+
199
+ vol_cover_url = vol.get("volume_cover") or ""
200
+ vol_cover: Path | None = None
201
+ if self._include_cover and vol_cover_url:
202
+ vol_cover = self._download_image(
203
+ img_url=vol_cover_url,
204
+ target_dir=img_dir,
205
+ )
206
+ vol_cover = vol_cover or cover_path
207
+
208
+ book = EpubBuilder(
209
+ title=f"{name} - {vol_title}",
210
+ author=author,
211
+ description=vol.get("volume_intro") or book_summary,
212
+ cover_path=vol_cover,
213
+ subject=book_info.get("tags", []),
214
+ serial_status=book_info.get("serial_status", ""),
215
+ word_count=vol.get("word_count", ""),
216
+ uid=f"{self._site}_{book_id}_v{v_idx}",
217
+ )
218
+ book.add_stylesheet(main_css)
219
+
220
+ # Collect chapter ids then batch fetch
221
+ chap_ids = [
222
+ c["chapterId"] for c in vol.get("chapters", []) if c.get("chapterId")
223
+ ]
224
+ if not chap_ids:
225
+ continue
226
+ chap_map = self._get_chapters(book_id, chap_ids)
227
+
228
+ # Append each chapter
229
+ for ch_info in vol.get("chapters", []):
230
+ chap_id = ch_info.get("chapterId")
231
+ if not chap_id:
232
+ continue
233
+
234
+ ch = chap_map.get(chap_id)
235
+ if not ch:
236
+ self.logger.warning(
237
+ "Missing chapter content for chapterId=%s", chap_id
238
+ )
239
+ continue
240
+
241
+ title = self._cleaner.clean_title(ch.get("title", "")) or chap_id
242
+ content = self._cleaner.clean_content(ch.get("content", ""))
243
+
244
+ content = (
245
+ self._inline_remote_images(book, content, img_dir)
246
+ if self._include_picture
247
+ else self._remove_all_images(content)
248
+ )
249
+
250
+ chap_html = self._build_epub_chapter(
251
+ title=title,
252
+ paragraphs=content,
253
+ extras=ch.get("extra", {}),
254
+ )
255
+ book.add_chapter(
256
+ Chapter(
257
+ id=f"c_{chap_id}",
258
+ filename=f"c{chap_id}.xhtml",
259
+ title=title,
260
+ content=chap_html,
261
+ css=[main_css],
262
+ )
263
+ )
264
+
265
+ out_name = self.get_filename(title=vol_title, author=author, ext="epub")
266
+ out_path = self._output_dir / sanitize_filename(out_name)
267
+
268
+ try:
269
+ book.export(out_path)
270
+ self.logger.info("Exported EPUB: %s", out_path)
271
+ except Exception as e:
272
+ self.logger.error(
273
+ "Failed to write EPUB to %s: %s", out_path, e, exc_info=True
274
+ )
275
+
276
+ return None
277
+
278
+ def _export_epub_by_book(self, book_id: str) -> Path | None:
279
+ """
280
+ Export a single novel (identified by `book_id`) to an EPUB file.
281
+
282
+ This function will:
283
+ 1. Load `book_info.json` for metadata.
284
+ 2. Generate introductory HTML and optionally include the cover image.
285
+ 3. Initialize the EPUB container.
286
+ 4. Iterate through volumes and chapters in volume-batches, convert to XHTML.
287
+ 5. Assemble the spine, TOC, CSS and write out the final `.epub`.
288
+
289
+ :param book_id: Identifier of the novel (used as subdirectory name).
290
+ """
291
+ # --- Load book_info.json ---
292
+ book_info = self._load_book_info(book_id)
293
+ if not book_info:
294
+ return None
295
+
296
+ # --- Prepare path ---
297
+ raw_base = self._raw_data_dir / book_id
298
+ img_dir = raw_base / "images"
299
+ img_dir.mkdir(parents=True, exist_ok=True)
300
+
301
+ # --- Prepare header (book metadata) ---
302
+ name = book_info["book_name"]
303
+ author = book_info.get("author") or ""
304
+
305
+ # --- Generate intro + cover ---
306
+ cover_url = book_info.get("cover_url") or ""
307
+ cover_path: Path | None = None
308
+ if self._include_cover and cover_url:
309
+ cover_path = self._download_image(
310
+ img_url=cover_url,
311
+ target_dir=raw_base,
312
+ filename="cover",
313
+ )
314
+
315
+ # --- Initialize EPUB ---
316
+ book = EpubBuilder(
317
+ title=name,
318
+ author=author,
319
+ description=book_info.get("summary", ""),
320
+ cover_path=cover_path,
321
+ subject=book_info.get("tags", []),
322
+ serial_status=book_info.get("serial_status", ""),
323
+ word_count=book_info.get("word_count", ""),
324
+ uid=f"{self._site}_{book_id}",
325
+ )
326
+ css_text = CSS_MAIN_PATH.read_text(encoding="utf-8")
327
+ main_css = StyleSheet(id="main_style", content=css_text, filename="main.css")
328
+ book.add_stylesheet(main_css)
329
+
330
+ # --- Compile columes ---
331
+ for v_idx, vol in enumerate(book_info.get("volumes", []), start=1):
332
+ vol_title = vol.get("volume_name") or f"卷 {v_idx}"
333
+ vol_title = self._cleaner.clean_title(vol_title.replace(name, ""))
334
+
335
+ vol_cover_url = vol.get("volume_cover") or ""
336
+ vol_cover: Path | None = None
337
+ if self._include_cover and vol_cover_url:
338
+ vol_cover = self._download_image(
339
+ img_url=vol_cover_url,
340
+ target_dir=img_dir,
341
+ )
342
+
343
+ curr_vol = Volume(
344
+ id=f"vol_{v_idx}",
345
+ title=vol_title,
346
+ intro=self._cleaner.clean_content(vol.get("volume_intro") or ""),
347
+ cover=vol_cover,
348
+ )
349
+
350
+ # Collect chapter ids then batch fetch
351
+ chap_ids = [
352
+ c["chapterId"] for c in vol.get("chapters", []) if c.get("chapterId")
353
+ ]
354
+ if not chap_ids:
355
+ book.add_volume(curr_vol)
356
+ continue
357
+ chap_map = self._get_chapters(book_id, chap_ids)
358
+
359
+ # Append each chapter
360
+ for ch_info in vol.get("chapters", []):
361
+ chap_id = ch_info.get("chapterId")
362
+ if not chap_id:
363
+ continue
364
+
365
+ ch = chap_map.get(chap_id)
366
+ if not ch:
367
+ self.logger.warning(
368
+ "Missing chapter content for chapterId=%s", chap_id
369
+ )
370
+ continue
371
+
372
+ title = self._cleaner.clean_title(ch.get("title", "")) or chap_id
373
+ content = self._cleaner.clean_content(ch.get("content", ""))
374
+
375
+ content = (
376
+ self._inline_remote_images(book, content, img_dir)
377
+ if self._include_picture
378
+ else self._remove_all_images(content)
379
+ )
380
+
381
+ chap_html = self._build_epub_chapter(
382
+ title=title,
383
+ paragraphs=content,
384
+ extras=ch.get("extra", {}),
385
+ )
386
+
387
+ curr_vol.chapters.append(
388
+ Chapter(
389
+ id=f"c_{chap_id}",
390
+ filename=f"c{chap_id}.xhtml",
391
+ title=title,
392
+ content=chap_html,
393
+ css=[main_css],
394
+ )
395
+ )
396
+
397
+ book.add_volume(curr_vol)
398
+
399
+ # --- Finalize EPUB ---
400
+ out_name = self.get_filename(title=name, author=author, ext="epub")
401
+ out_path = self._output_dir / sanitize_filename(out_name)
402
+
403
+ try:
404
+ book.export(out_path)
405
+ self.logger.info("Exported EPUB: %s", out_path)
406
+ except Exception as e:
407
+ self.logger.error(
408
+ "Failed to write EPUB to %s: %s", out_path, e, exc_info=True
409
+ )
410
+ return None
411
+ return out_path
412
+
413
+ @staticmethod
414
+ def _normalize_book_id(book_id: str) -> str:
415
+ """
416
+ Normalize a book identifier.
417
+
418
+ Subclasses may override this method to transform the book ID
419
+ into their preferred format.
420
+ """
421
+ return book_id.replace("/", "-")
422
+
423
+ def _render_txt_extras(self, extras: dict[str, Any]) -> str:
424
+ """
425
+ Format the extras dict into a string.
426
+
427
+ Subclasses may override this method to render extra info.
428
+ """
429
+ return ""
430
+
431
+ def _render_epub_extras(self, extras: dict[str, Any]) -> str:
432
+ """
433
+ Format the extras dict into a string.
434
+
435
+ Subclasses may override this method to render extra info.
436
+ """
437
+ return ""
438
+
439
+ @staticmethod
440
+ def _download_image(
441
+ img_url: str,
442
+ target_dir: Path,
443
+ filename: str | None = None,
444
+ *,
445
+ on_exist: str = "overwrite",
446
+ ) -> Path | None:
447
+ """
448
+ Download image from url to target dir with given name
449
+
450
+ Subclasses may override this method if site need more info
451
+ """
452
+ return download(
453
+ img_url,
454
+ target_dir,
455
+ filename=filename,
456
+ headers=DEFAULT_HEADERS,
457
+ on_exist="overwrite",
458
+ default_suffix=DEFAULT_IMAGE_SUFFIX,
459
+ )
460
+
461
+ def _build_txt_header(self, book_info: BookInfoDict, name: str, author: str) -> str:
462
+ """
463
+ Top-of-file metadata block.
464
+ """
465
+ lines: list[str] = [name.strip()]
466
+
467
+ if author:
468
+ lines.append(f"作者:{author.strip()}")
469
+
470
+ if serial_status := book_info.get("serial_status"):
471
+ lines.append(f"状态:{serial_status.strip()}")
472
+
473
+ if word_count := book_info.get("word_count"):
474
+ lines.append(f"字数:{word_count.strip()}")
475
+
476
+ if tags_list := book_info.get("tags"):
477
+ tags = "、".join(t.strip() for t in tags_list if t)
478
+ if tags:
479
+ lines.append(f"标签:{tags}")
480
+
481
+ if update_time := (book_info.get("update_time") or "").strip():
482
+ lines.append(f"更新:{update_time}")
483
+
484
+ if summary := (book_info.get("summary") or "").strip():
485
+ lines.extend(["", summary])
486
+
487
+ return "\n".join(lines).strip() + "\n\n"
488
+
489
+ def _build_txt_volume_heading(self, vol_title: str, volume: VolumeInfoDict) -> str:
490
+ """
491
+ Render a volume heading. Include optional info if present.
492
+ """
493
+ meta_bits: list[str] = []
494
+
495
+ if v_update_time := volume.get("update_time"):
496
+ meta_bits.append(f"更新时间:{v_update_time}")
497
+
498
+ if v_word_count := volume.get("word_count"):
499
+ meta_bits.append(f"字数:{v_word_count}")
500
+
501
+ if v_intro := (volume.get("volume_intro") or "").strip():
502
+ meta_bits.append(f"简介:{v_intro}")
503
+
504
+ line = f"=== {vol_title.strip()} ==="
505
+ return f"{line}\n" + ("\n".join(meta_bits) + "\n\n" if meta_bits else "\n\n")
506
+
507
+ def _build_txt_chapter(self, chap: ChapterDict) -> str:
508
+ """
509
+ Render one chapter to text
510
+ """
511
+ # Title
512
+ raw_title = chap.get("title", "")
513
+ title_line = self._cleaner.clean_title(raw_title).strip()
514
+
515
+ cleaned = self._cleaner.clean_content(chap.get("content") or "").strip()
516
+ cleaned = self._remove_all_images(cleaned)
517
+ body = "\n".join(s for line in cleaned.splitlines() if (s := line.strip()))
518
+
519
+ # Extras
520
+ extras_txt = self._render_txt_extras(chap.get("extra", {}) or {})
521
+
522
+ return (
523
+ f"{title_line}\n\n{body}\n\n{extras_txt}\n\n"
524
+ if extras_txt
525
+ else f"{title_line}\n\n{body}\n\n"
526
+ )
527
+
528
+ def _inline_remote_images(
529
+ self,
530
+ book: EpubBuilder,
531
+ content: str,
532
+ image_dir: Path,
533
+ ) -> str:
534
+ """
535
+ Download every remote `<img src="...">` in `content` into `image_dir`,
536
+ and replace the original url with local path.
537
+
538
+ :param content: HTML/text of the chapter containing <img> tags.
539
+ :param image_dir: Directory to save downloaded images into.
540
+ """
541
+ if "<img" not in content.lower():
542
+ return content
543
+
544
+ def _replace(m: re.Match[str]) -> str:
545
+ url = m.group(1)
546
+ try:
547
+ local_path = self._download_image(url, image_dir, on_exist="skip")
548
+ if not local_path:
549
+ return m.group(0)
550
+ filename = book.add_image(local_path)
551
+ return f'<img src="../Images/{filename}" />'
552
+ except Exception as e:
553
+ self.logger.debug("Inline image failed for %s: %s", url, e)
554
+ return m.group(0)
555
+
556
+ return self._IMG_SRC_RE.sub(_replace, content)
557
+
558
+ @classmethod
559
+ def _remove_all_images(cls, content: str) -> str:
560
+ """
561
+ Remove all <img> tags from the given content.
562
+
563
+ :param content: HTML/text of the chapter containing <img> tags.
564
+ """
565
+ return cls._IMG_TAG_RE.sub("", content)
566
+
567
+ def _build_epub_chapter(
568
+ self,
569
+ title: str,
570
+ paragraphs: str,
571
+ extras: dict[str, str],
572
+ ) -> str:
573
+ """
574
+ Build a formatted chapter epub HTML including title, body paragraphs,
575
+ and optional extra sections.
576
+ """
577
+ parts = []
578
+ parts.append(f"<h2>{escape(title)}</h2>")
579
+ parts.append(self._render_html_block(paragraphs))
580
+
581
+ extras_epub = self._render_epub_extras(extras)
582
+ if extras_epub:
583
+ parts.append(extras_epub)
584
+
585
+ return "\n".join(parts)
586
+
587
+ @classmethod
588
+ def _render_html_block(cls, text: str) -> str:
589
+ out: list[str] = []
590
+ for raw in text.splitlines():
591
+ line = raw.strip()
592
+ if not line:
593
+ continue
594
+
595
+ # case 1: already wrapped in a <div>...</div>
596
+ if line.startswith("<div") and line.endswith("</div>"):
597
+ out.append(line)
598
+ continue
599
+
600
+ # case 2: single <img> line
601
+ if cls._IMG_TAG_RE.fullmatch(line):
602
+ out.append(cls._IMAGE_WRAPPER.format(img=line))
603
+ continue
604
+
605
+ # case 3: inline <img> in text -> escape other text, preserve <img>
606
+ if "<img " in line.lower():
607
+ pieces = []
608
+ last = 0
609
+ for m in cls._IMG_TAG_RE.finditer(line):
610
+ pieces.append(escape(line[last : m.start()]))
611
+ pieces.append(m.group(0))
612
+ last = m.end()
613
+ pieces.append(escape(line[last:]))
614
+ out.append("<p>" + "".join(pieces) + "</p>")
615
+ continue
616
+
617
+ # plain text line
618
+ out.append(f"<p>{escape(line)}</p>")
619
+
620
+ return "\n".join(out)
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ novel_downloader.core.exporters.linovelib
4
+ -----------------------------------------
5
+
6
+ Exporter implementation for handling Linovelib novels.
7
+ """
8
+
9
+ from pathlib import Path
10
+
11
+ from novel_downloader.core.exporters.common import CommonExporter
12
+ from novel_downloader.core.exporters.registry import register_exporter
13
+ from novel_downloader.utils import download
14
+ from novel_downloader.utils.constants import (
15
+ DEFAULT_HEADERS,
16
+ DEFAULT_IMAGE_SUFFIX,
17
+ )
18
+
19
+ _IMG_HEADERS = DEFAULT_HEADERS.copy()
20
+ _IMG_HEADERS["Referer"] = "https://www.linovelib.com/"
21
+
22
+
23
+ @register_exporter(site_keys=["linovelib"])
24
+ class LinovelibExporter(CommonExporter):
25
+ """
26
+ Exporter for 哔哩轻小说 novels.
27
+ """
28
+
29
+ @staticmethod
30
+ def _download_image(
31
+ img_url: str,
32
+ target_dir: Path,
33
+ filename: str | None = None,
34
+ *,
35
+ on_exist: str = "overwrite",
36
+ ) -> Path | None:
37
+ """
38
+ Download image from url to target dir with given name
39
+ """
40
+ return download(
41
+ img_url,
42
+ target_dir,
43
+ filename=filename,
44
+ headers=_IMG_HEADERS,
45
+ on_exist="overwrite",
46
+ default_suffix=DEFAULT_IMAGE_SUFFIX,
47
+ )