novel-downloader 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +14 -11
- novel_downloader/cli/export.py +19 -19
- novel_downloader/cli/ui.py +35 -8
- novel_downloader/config/adapter.py +216 -153
- novel_downloader/core/__init__.py +5 -6
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +27 -68
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +23 -49
- novel_downloader/core/interfaces/parser.py +2 -2
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +5 -39
- novel_downloader/core/parsers/esjzone.py +3 -3
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +7 -7
- novel_downloader/core/parsers/qidian.py +717 -0
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +8 -15
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/locales/en.json +3 -3
- novel_downloader/locales/zh.json +3 -3
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/__init__.py +0 -2
- novel_downloader/utils/chapter_storage.py +2 -3
- novel_downloader/utils/constants.py +7 -3
- novel_downloader/utils/cookies.py +32 -17
- novel_downloader/utils/crypto_utils/__init__.py +0 -6
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/crypto_utils/rc4.py +40 -50
- novel_downloader/utils/epub/__init__.py +2 -3
- novel_downloader/utils/epub/builder.py +6 -6
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/epub/documents.py +7 -7
- novel_downloader/utils/epub/models.py +8 -8
- novel_downloader/utils/epub/utils.py +10 -10
- novel_downloader/utils/file_utils/io.py +48 -73
- novel_downloader/utils/file_utils/normalize.py +1 -7
- novel_downloader/utils/file_utils/sanitize.py +4 -11
- novel_downloader/utils/fontocr/__init__.py +13 -0
- novel_downloader/utils/{fontocr.py → fontocr/core.py} +72 -61
- novel_downloader/utils/fontocr/loader.py +52 -0
- novel_downloader/utils/logger.py +80 -56
- novel_downloader/utils/network.py +16 -40
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/utils/text_utils/text_cleaner.py +39 -30
- novel_downloader/utils/text_utils/truncate_utils.py +3 -14
- novel_downloader/utils/time_utils/sleep_utils.py +53 -43
- novel_downloader/web/main.py +1 -1
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +4 -4
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +5 -1
- novel_downloader-2.0.2.dist-info/RECORD +203 -0
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/book_info_parser.py +0 -89
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +0 -470
- novel_downloader/core/parsers/qidian/chapter_normal.py +0 -126
- novel_downloader/core/parsers/qidian/chapter_router.py +0 -68
- novel_downloader/core/parsers/qidian/main_parser.py +0 -101
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -30
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +0 -143
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -110
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- novel_downloader-2.0.0.dist-info/RECORD +0 -210
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.0.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,620 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.exporters.common
|
4
|
+
--------------------------------------
|
5
|
+
|
6
|
+
Shared exporter implementation for producing standard TXT and EPUB outputs.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import re
|
10
|
+
from html import escape
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import Any
|
13
|
+
|
14
|
+
from novel_downloader.core.exporters.base import BaseExporter
|
15
|
+
from novel_downloader.models import (
|
16
|
+
BookInfoDict,
|
17
|
+
ChapterDict,
|
18
|
+
VolumeInfoDict,
|
19
|
+
)
|
20
|
+
from novel_downloader.utils import (
|
21
|
+
download,
|
22
|
+
sanitize_filename,
|
23
|
+
write_file,
|
24
|
+
)
|
25
|
+
from novel_downloader.utils.constants import (
|
26
|
+
CSS_MAIN_PATH,
|
27
|
+
DEFAULT_HEADERS,
|
28
|
+
DEFAULT_IMAGE_SUFFIX,
|
29
|
+
)
|
30
|
+
from novel_downloader.utils.epub import (
|
31
|
+
Chapter,
|
32
|
+
EpubBuilder,
|
33
|
+
StyleSheet,
|
34
|
+
Volume,
|
35
|
+
)
|
36
|
+
|
37
|
+
|
38
|
+
class CommonExporter(BaseExporter):
|
39
|
+
"""
|
40
|
+
CommonExporter is a exporter that processes and exports novels.
|
41
|
+
|
42
|
+
It extends the BaseExporter interface and provides
|
43
|
+
logic for exporting full novels as plain text (.txt) files
|
44
|
+
and EPUB (.epub) files.
|
45
|
+
"""
|
46
|
+
|
47
|
+
_IMAGE_WRAPPER = '<div class="duokan-image-single illus">{img}</div>'
|
48
|
+
_IMG_TAG_RE = re.compile(r"<img[^>]*>", re.IGNORECASE)
|
49
|
+
_IMG_SRC_RE = re.compile(
|
50
|
+
r'<img[^>]*\bsrc\s*=\s*["\'](https?://[^"\']+)["\'][^>]*>',
|
51
|
+
re.IGNORECASE,
|
52
|
+
)
|
53
|
+
|
54
|
+
def export_as_txt(self, book_id: str) -> Path | None:
|
55
|
+
"""
|
56
|
+
Export a novel as a single text file by merging all chapter data.
|
57
|
+
|
58
|
+
Steps:
|
59
|
+
1. Load book metadata.
|
60
|
+
2. For each volume:
|
61
|
+
a. Append the volume title.
|
62
|
+
b. Batch-fetch all chapters in that volume to minimize SQLite calls.
|
63
|
+
c. Append each chapter's title, content, and optional extra data.
|
64
|
+
3. Build a header with book metadata.
|
65
|
+
4. Concatenate header and all chapter contents.
|
66
|
+
5. Save the resulting .txt file to the output directory
|
67
|
+
|
68
|
+
:param book_id: The book identifier (used to locate raw data)
|
69
|
+
"""
|
70
|
+
book_id = self._normalize_book_id(book_id)
|
71
|
+
self._init_chapter_storages(book_id)
|
72
|
+
|
73
|
+
# --- Load book_info.json ---
|
74
|
+
book_info = self._load_book_info(book_id)
|
75
|
+
if not book_info:
|
76
|
+
return None
|
77
|
+
|
78
|
+
# --- Prepare header (book metadata) ---
|
79
|
+
name = book_info["book_name"]
|
80
|
+
author = book_info.get("author") or ""
|
81
|
+
header_txt = self._build_txt_header(book_info, name, author)
|
82
|
+
|
83
|
+
# --- Build body by volumes & chapters ---
|
84
|
+
parts: list[str] = [header_txt]
|
85
|
+
|
86
|
+
for v_idx, volume in enumerate(book_info.get("volumes", []), start=1):
|
87
|
+
vol_title = volume.get("volume_name") or f"卷 {v_idx}"
|
88
|
+
vol_title = self._cleaner.clean_title(vol_title)
|
89
|
+
parts.append(self._build_txt_volume_heading(vol_title, volume))
|
90
|
+
|
91
|
+
# Collect chapter ids then batch fetch
|
92
|
+
chap_ids = [
|
93
|
+
c["chapterId"] for c in volume.get("chapters", []) if c.get("chapterId")
|
94
|
+
]
|
95
|
+
if not chap_ids:
|
96
|
+
continue
|
97
|
+
chap_map = self._get_chapters(book_id, chap_ids)
|
98
|
+
|
99
|
+
# Append each chapter
|
100
|
+
for ch_info in volume.get("chapters", []):
|
101
|
+
chap_id = ch_info.get("chapterId")
|
102
|
+
if not chap_id:
|
103
|
+
continue
|
104
|
+
|
105
|
+
ch = chap_map.get(chap_id)
|
106
|
+
if not ch:
|
107
|
+
self.logger.warning(
|
108
|
+
"Missing chapter content for chapterId=%s", chap_id
|
109
|
+
)
|
110
|
+
continue
|
111
|
+
|
112
|
+
parts.append(self._build_txt_chapter(ch))
|
113
|
+
|
114
|
+
final_text = "\n".join(parts)
|
115
|
+
|
116
|
+
# --- Determine output file path ---
|
117
|
+
out_name = self.get_filename(title=name, author=author, ext="txt")
|
118
|
+
out_path = self._output_dir / sanitize_filename(out_name)
|
119
|
+
|
120
|
+
# --- Save final text ---
|
121
|
+
try:
|
122
|
+
result = write_file(
|
123
|
+
content=final_text,
|
124
|
+
filepath=out_path,
|
125
|
+
on_exist="overwrite",
|
126
|
+
)
|
127
|
+
self.logger.info("Exported TXT: %s", out_path)
|
128
|
+
except Exception as e:
|
129
|
+
self.logger.error(
|
130
|
+
"Failed to write TXT to %s: %s", out_path, e, exc_info=True
|
131
|
+
)
|
132
|
+
return None
|
133
|
+
return result
|
134
|
+
|
135
|
+
def export_as_epub(self, book_id: str) -> Path | None:
|
136
|
+
"""
|
137
|
+
Persist the assembled book as EPUB (.epub) file.
|
138
|
+
|
139
|
+
:param book_id: The book identifier.
|
140
|
+
"""
|
141
|
+
book_id = self._normalize_book_id(book_id)
|
142
|
+
self._init_chapter_storages(book_id)
|
143
|
+
|
144
|
+
mode = self._split_mode
|
145
|
+
if mode == "book":
|
146
|
+
return self._export_epub_by_book(book_id)
|
147
|
+
if mode == "volume":
|
148
|
+
return self._export_epub_by_volume(book_id)
|
149
|
+
raise ValueError(f"Unsupported split_mode: {mode!r}")
|
150
|
+
|
151
|
+
def _export_epub_by_volume(self, book_id: str) -> Path | None:
|
152
|
+
"""
|
153
|
+
Export each volume of a novel as a separate EPUB file.
|
154
|
+
|
155
|
+
Steps:
|
156
|
+
1. Load metadata from `book_info.json`.
|
157
|
+
2. For each volume:
|
158
|
+
a. Clean the volume title and determine output filename.
|
159
|
+
b. Batch-fetch all chapters in this volume to minimize SQLite overhead.
|
160
|
+
c. Initialize an EPUB builder for the volume, including cover and intro.
|
161
|
+
d. For each chapter: clean title & content, inline remote images.
|
162
|
+
e. Finalize and write the volume EPUB.
|
163
|
+
|
164
|
+
:param book_id: Identifier of the novel (used as subdirectory name).
|
165
|
+
"""
|
166
|
+
# --- Load book_info.json ---
|
167
|
+
book_info = self._load_book_info(book_id)
|
168
|
+
if not book_info:
|
169
|
+
return None
|
170
|
+
|
171
|
+
# --- Prepare path ---
|
172
|
+
raw_base = self._raw_data_dir / book_id
|
173
|
+
img_dir = raw_base / "images"
|
174
|
+
img_dir.mkdir(parents=True, exist_ok=True)
|
175
|
+
|
176
|
+
# --- Prepare header (book metadata) ---
|
177
|
+
name = book_info["book_name"]
|
178
|
+
author = book_info.get("author") or ""
|
179
|
+
book_summary = book_info.get("summary", "")
|
180
|
+
|
181
|
+
# --- Generate intro + cover ---
|
182
|
+
cover_url = book_info.get("cover_url") or ""
|
183
|
+
cover_path: Path | None = None
|
184
|
+
if self._include_cover and cover_url:
|
185
|
+
cover_path = self._download_image(
|
186
|
+
img_url=cover_url,
|
187
|
+
target_dir=raw_base,
|
188
|
+
filename="cover",
|
189
|
+
)
|
190
|
+
|
191
|
+
css_text = CSS_MAIN_PATH.read_text(encoding="utf-8")
|
192
|
+
main_css = StyleSheet(id="main_style", content=css_text, filename="main.css")
|
193
|
+
|
194
|
+
# --- Compile columes ---
|
195
|
+
for v_idx, vol in enumerate(book_info.get("volumes", []), start=1):
|
196
|
+
vol_title = vol.get("volume_name") or f"卷 {v_idx}"
|
197
|
+
vol_title = self._cleaner.clean_title(vol_title.replace(name, ""))
|
198
|
+
|
199
|
+
vol_cover_url = vol.get("volume_cover") or ""
|
200
|
+
vol_cover: Path | None = None
|
201
|
+
if self._include_cover and vol_cover_url:
|
202
|
+
vol_cover = self._download_image(
|
203
|
+
img_url=vol_cover_url,
|
204
|
+
target_dir=img_dir,
|
205
|
+
)
|
206
|
+
vol_cover = vol_cover or cover_path
|
207
|
+
|
208
|
+
book = EpubBuilder(
|
209
|
+
title=f"{name} - {vol_title}",
|
210
|
+
author=author,
|
211
|
+
description=vol.get("volume_intro") or book_summary,
|
212
|
+
cover_path=vol_cover,
|
213
|
+
subject=book_info.get("tags", []),
|
214
|
+
serial_status=book_info.get("serial_status", ""),
|
215
|
+
word_count=vol.get("word_count", ""),
|
216
|
+
uid=f"{self._site}_{book_id}_v{v_idx}",
|
217
|
+
)
|
218
|
+
book.add_stylesheet(main_css)
|
219
|
+
|
220
|
+
# Collect chapter ids then batch fetch
|
221
|
+
chap_ids = [
|
222
|
+
c["chapterId"] for c in vol.get("chapters", []) if c.get("chapterId")
|
223
|
+
]
|
224
|
+
if not chap_ids:
|
225
|
+
continue
|
226
|
+
chap_map = self._get_chapters(book_id, chap_ids)
|
227
|
+
|
228
|
+
# Append each chapter
|
229
|
+
for ch_info in vol.get("chapters", []):
|
230
|
+
chap_id = ch_info.get("chapterId")
|
231
|
+
if not chap_id:
|
232
|
+
continue
|
233
|
+
|
234
|
+
ch = chap_map.get(chap_id)
|
235
|
+
if not ch:
|
236
|
+
self.logger.warning(
|
237
|
+
"Missing chapter content for chapterId=%s", chap_id
|
238
|
+
)
|
239
|
+
continue
|
240
|
+
|
241
|
+
title = self._cleaner.clean_title(ch.get("title", "")) or chap_id
|
242
|
+
content = self._cleaner.clean_content(ch.get("content", ""))
|
243
|
+
|
244
|
+
content = (
|
245
|
+
self._inline_remote_images(book, content, img_dir)
|
246
|
+
if self._include_picture
|
247
|
+
else self._remove_all_images(content)
|
248
|
+
)
|
249
|
+
|
250
|
+
chap_html = self._build_epub_chapter(
|
251
|
+
title=title,
|
252
|
+
paragraphs=content,
|
253
|
+
extras=ch.get("extra", {}),
|
254
|
+
)
|
255
|
+
book.add_chapter(
|
256
|
+
Chapter(
|
257
|
+
id=f"c_{chap_id}",
|
258
|
+
filename=f"c{chap_id}.xhtml",
|
259
|
+
title=title,
|
260
|
+
content=chap_html,
|
261
|
+
css=[main_css],
|
262
|
+
)
|
263
|
+
)
|
264
|
+
|
265
|
+
out_name = self.get_filename(title=vol_title, author=author, ext="epub")
|
266
|
+
out_path = self._output_dir / sanitize_filename(out_name)
|
267
|
+
|
268
|
+
try:
|
269
|
+
book.export(out_path)
|
270
|
+
self.logger.info("Exported EPUB: %s", out_path)
|
271
|
+
except Exception as e:
|
272
|
+
self.logger.error(
|
273
|
+
"Failed to write EPUB to %s: %s", out_path, e, exc_info=True
|
274
|
+
)
|
275
|
+
|
276
|
+
return None
|
277
|
+
|
278
|
+
def _export_epub_by_book(self, book_id: str) -> Path | None:
|
279
|
+
"""
|
280
|
+
Export a single novel (identified by `book_id`) to an EPUB file.
|
281
|
+
|
282
|
+
This function will:
|
283
|
+
1. Load `book_info.json` for metadata.
|
284
|
+
2. Generate introductory HTML and optionally include the cover image.
|
285
|
+
3. Initialize the EPUB container.
|
286
|
+
4. Iterate through volumes and chapters in volume-batches, convert to XHTML.
|
287
|
+
5. Assemble the spine, TOC, CSS and write out the final `.epub`.
|
288
|
+
|
289
|
+
:param book_id: Identifier of the novel (used as subdirectory name).
|
290
|
+
"""
|
291
|
+
# --- Load book_info.json ---
|
292
|
+
book_info = self._load_book_info(book_id)
|
293
|
+
if not book_info:
|
294
|
+
return None
|
295
|
+
|
296
|
+
# --- Prepare path ---
|
297
|
+
raw_base = self._raw_data_dir / book_id
|
298
|
+
img_dir = raw_base / "images"
|
299
|
+
img_dir.mkdir(parents=True, exist_ok=True)
|
300
|
+
|
301
|
+
# --- Prepare header (book metadata) ---
|
302
|
+
name = book_info["book_name"]
|
303
|
+
author = book_info.get("author") or ""
|
304
|
+
|
305
|
+
# --- Generate intro + cover ---
|
306
|
+
cover_url = book_info.get("cover_url") or ""
|
307
|
+
cover_path: Path | None = None
|
308
|
+
if self._include_cover and cover_url:
|
309
|
+
cover_path = self._download_image(
|
310
|
+
img_url=cover_url,
|
311
|
+
target_dir=raw_base,
|
312
|
+
filename="cover",
|
313
|
+
)
|
314
|
+
|
315
|
+
# --- Initialize EPUB ---
|
316
|
+
book = EpubBuilder(
|
317
|
+
title=name,
|
318
|
+
author=author,
|
319
|
+
description=book_info.get("summary", ""),
|
320
|
+
cover_path=cover_path,
|
321
|
+
subject=book_info.get("tags", []),
|
322
|
+
serial_status=book_info.get("serial_status", ""),
|
323
|
+
word_count=book_info.get("word_count", ""),
|
324
|
+
uid=f"{self._site}_{book_id}",
|
325
|
+
)
|
326
|
+
css_text = CSS_MAIN_PATH.read_text(encoding="utf-8")
|
327
|
+
main_css = StyleSheet(id="main_style", content=css_text, filename="main.css")
|
328
|
+
book.add_stylesheet(main_css)
|
329
|
+
|
330
|
+
# --- Compile columes ---
|
331
|
+
for v_idx, vol in enumerate(book_info.get("volumes", []), start=1):
|
332
|
+
vol_title = vol.get("volume_name") or f"卷 {v_idx}"
|
333
|
+
vol_title = self._cleaner.clean_title(vol_title.replace(name, ""))
|
334
|
+
|
335
|
+
vol_cover_url = vol.get("volume_cover") or ""
|
336
|
+
vol_cover: Path | None = None
|
337
|
+
if self._include_cover and vol_cover_url:
|
338
|
+
vol_cover = self._download_image(
|
339
|
+
img_url=vol_cover_url,
|
340
|
+
target_dir=img_dir,
|
341
|
+
)
|
342
|
+
|
343
|
+
curr_vol = Volume(
|
344
|
+
id=f"vol_{v_idx}",
|
345
|
+
title=vol_title,
|
346
|
+
intro=self._cleaner.clean_content(vol.get("volume_intro") or ""),
|
347
|
+
cover=vol_cover,
|
348
|
+
)
|
349
|
+
|
350
|
+
# Collect chapter ids then batch fetch
|
351
|
+
chap_ids = [
|
352
|
+
c["chapterId"] for c in vol.get("chapters", []) if c.get("chapterId")
|
353
|
+
]
|
354
|
+
if not chap_ids:
|
355
|
+
book.add_volume(curr_vol)
|
356
|
+
continue
|
357
|
+
chap_map = self._get_chapters(book_id, chap_ids)
|
358
|
+
|
359
|
+
# Append each chapter
|
360
|
+
for ch_info in vol.get("chapters", []):
|
361
|
+
chap_id = ch_info.get("chapterId")
|
362
|
+
if not chap_id:
|
363
|
+
continue
|
364
|
+
|
365
|
+
ch = chap_map.get(chap_id)
|
366
|
+
if not ch:
|
367
|
+
self.logger.warning(
|
368
|
+
"Missing chapter content for chapterId=%s", chap_id
|
369
|
+
)
|
370
|
+
continue
|
371
|
+
|
372
|
+
title = self._cleaner.clean_title(ch.get("title", "")) or chap_id
|
373
|
+
content = self._cleaner.clean_content(ch.get("content", ""))
|
374
|
+
|
375
|
+
content = (
|
376
|
+
self._inline_remote_images(book, content, img_dir)
|
377
|
+
if self._include_picture
|
378
|
+
else self._remove_all_images(content)
|
379
|
+
)
|
380
|
+
|
381
|
+
chap_html = self._build_epub_chapter(
|
382
|
+
title=title,
|
383
|
+
paragraphs=content,
|
384
|
+
extras=ch.get("extra", {}),
|
385
|
+
)
|
386
|
+
|
387
|
+
curr_vol.chapters.append(
|
388
|
+
Chapter(
|
389
|
+
id=f"c_{chap_id}",
|
390
|
+
filename=f"c{chap_id}.xhtml",
|
391
|
+
title=title,
|
392
|
+
content=chap_html,
|
393
|
+
css=[main_css],
|
394
|
+
)
|
395
|
+
)
|
396
|
+
|
397
|
+
book.add_volume(curr_vol)
|
398
|
+
|
399
|
+
# --- Finalize EPUB ---
|
400
|
+
out_name = self.get_filename(title=name, author=author, ext="epub")
|
401
|
+
out_path = self._output_dir / sanitize_filename(out_name)
|
402
|
+
|
403
|
+
try:
|
404
|
+
book.export(out_path)
|
405
|
+
self.logger.info("Exported EPUB: %s", out_path)
|
406
|
+
except Exception as e:
|
407
|
+
self.logger.error(
|
408
|
+
"Failed to write EPUB to %s: %s", out_path, e, exc_info=True
|
409
|
+
)
|
410
|
+
return None
|
411
|
+
return out_path
|
412
|
+
|
413
|
+
@staticmethod
|
414
|
+
def _normalize_book_id(book_id: str) -> str:
|
415
|
+
"""
|
416
|
+
Normalize a book identifier.
|
417
|
+
|
418
|
+
Subclasses may override this method to transform the book ID
|
419
|
+
into their preferred format.
|
420
|
+
"""
|
421
|
+
return book_id.replace("/", "-")
|
422
|
+
|
423
|
+
def _render_txt_extras(self, extras: dict[str, Any]) -> str:
|
424
|
+
"""
|
425
|
+
Format the extras dict into a string.
|
426
|
+
|
427
|
+
Subclasses may override this method to render extra info.
|
428
|
+
"""
|
429
|
+
return ""
|
430
|
+
|
431
|
+
def _render_epub_extras(self, extras: dict[str, Any]) -> str:
|
432
|
+
"""
|
433
|
+
Format the extras dict into a string.
|
434
|
+
|
435
|
+
Subclasses may override this method to render extra info.
|
436
|
+
"""
|
437
|
+
return ""
|
438
|
+
|
439
|
+
@staticmethod
|
440
|
+
def _download_image(
|
441
|
+
img_url: str,
|
442
|
+
target_dir: Path,
|
443
|
+
filename: str | None = None,
|
444
|
+
*,
|
445
|
+
on_exist: str = "overwrite",
|
446
|
+
) -> Path | None:
|
447
|
+
"""
|
448
|
+
Download image from url to target dir with given name
|
449
|
+
|
450
|
+
Subclasses may override this method if site need more info
|
451
|
+
"""
|
452
|
+
return download(
|
453
|
+
img_url,
|
454
|
+
target_dir,
|
455
|
+
filename=filename,
|
456
|
+
headers=DEFAULT_HEADERS,
|
457
|
+
on_exist="overwrite",
|
458
|
+
default_suffix=DEFAULT_IMAGE_SUFFIX,
|
459
|
+
)
|
460
|
+
|
461
|
+
def _build_txt_header(self, book_info: BookInfoDict, name: str, author: str) -> str:
|
462
|
+
"""
|
463
|
+
Top-of-file metadata block.
|
464
|
+
"""
|
465
|
+
lines: list[str] = [name.strip()]
|
466
|
+
|
467
|
+
if author:
|
468
|
+
lines.append(f"作者:{author.strip()}")
|
469
|
+
|
470
|
+
if serial_status := book_info.get("serial_status"):
|
471
|
+
lines.append(f"状态:{serial_status.strip()}")
|
472
|
+
|
473
|
+
if word_count := book_info.get("word_count"):
|
474
|
+
lines.append(f"字数:{word_count.strip()}")
|
475
|
+
|
476
|
+
if tags_list := book_info.get("tags"):
|
477
|
+
tags = "、".join(t.strip() for t in tags_list if t)
|
478
|
+
if tags:
|
479
|
+
lines.append(f"标签:{tags}")
|
480
|
+
|
481
|
+
if update_time := (book_info.get("update_time") or "").strip():
|
482
|
+
lines.append(f"更新:{update_time}")
|
483
|
+
|
484
|
+
if summary := (book_info.get("summary") or "").strip():
|
485
|
+
lines.extend(["", summary])
|
486
|
+
|
487
|
+
return "\n".join(lines).strip() + "\n\n"
|
488
|
+
|
489
|
+
def _build_txt_volume_heading(self, vol_title: str, volume: VolumeInfoDict) -> str:
|
490
|
+
"""
|
491
|
+
Render a volume heading. Include optional info if present.
|
492
|
+
"""
|
493
|
+
meta_bits: list[str] = []
|
494
|
+
|
495
|
+
if v_update_time := volume.get("update_time"):
|
496
|
+
meta_bits.append(f"更新时间:{v_update_time}")
|
497
|
+
|
498
|
+
if v_word_count := volume.get("word_count"):
|
499
|
+
meta_bits.append(f"字数:{v_word_count}")
|
500
|
+
|
501
|
+
if v_intro := (volume.get("volume_intro") or "").strip():
|
502
|
+
meta_bits.append(f"简介:{v_intro}")
|
503
|
+
|
504
|
+
line = f"=== {vol_title.strip()} ==="
|
505
|
+
return f"{line}\n" + ("\n".join(meta_bits) + "\n\n" if meta_bits else "\n\n")
|
506
|
+
|
507
|
+
def _build_txt_chapter(self, chap: ChapterDict) -> str:
|
508
|
+
"""
|
509
|
+
Render one chapter to text
|
510
|
+
"""
|
511
|
+
# Title
|
512
|
+
raw_title = chap.get("title", "")
|
513
|
+
title_line = self._cleaner.clean_title(raw_title).strip()
|
514
|
+
|
515
|
+
cleaned = self._cleaner.clean_content(chap.get("content") or "").strip()
|
516
|
+
cleaned = self._remove_all_images(cleaned)
|
517
|
+
body = "\n".join(s for line in cleaned.splitlines() if (s := line.strip()))
|
518
|
+
|
519
|
+
# Extras
|
520
|
+
extras_txt = self._render_txt_extras(chap.get("extra", {}) or {})
|
521
|
+
|
522
|
+
return (
|
523
|
+
f"{title_line}\n\n{body}\n\n{extras_txt}\n\n"
|
524
|
+
if extras_txt
|
525
|
+
else f"{title_line}\n\n{body}\n\n"
|
526
|
+
)
|
527
|
+
|
528
|
+
def _inline_remote_images(
|
529
|
+
self,
|
530
|
+
book: EpubBuilder,
|
531
|
+
content: str,
|
532
|
+
image_dir: Path,
|
533
|
+
) -> str:
|
534
|
+
"""
|
535
|
+
Download every remote `<img src="...">` in `content` into `image_dir`,
|
536
|
+
and replace the original url with local path.
|
537
|
+
|
538
|
+
:param content: HTML/text of the chapter containing <img> tags.
|
539
|
+
:param image_dir: Directory to save downloaded images into.
|
540
|
+
"""
|
541
|
+
if "<img" not in content.lower():
|
542
|
+
return content
|
543
|
+
|
544
|
+
def _replace(m: re.Match[str]) -> str:
|
545
|
+
url = m.group(1)
|
546
|
+
try:
|
547
|
+
local_path = self._download_image(url, image_dir, on_exist="skip")
|
548
|
+
if not local_path:
|
549
|
+
return m.group(0)
|
550
|
+
filename = book.add_image(local_path)
|
551
|
+
return f'<img src="../Images/{filename}" />'
|
552
|
+
except Exception as e:
|
553
|
+
self.logger.debug("Inline image failed for %s: %s", url, e)
|
554
|
+
return m.group(0)
|
555
|
+
|
556
|
+
return self._IMG_SRC_RE.sub(_replace, content)
|
557
|
+
|
558
|
+
@classmethod
|
559
|
+
def _remove_all_images(cls, content: str) -> str:
|
560
|
+
"""
|
561
|
+
Remove all <img> tags from the given content.
|
562
|
+
|
563
|
+
:param content: HTML/text of the chapter containing <img> tags.
|
564
|
+
"""
|
565
|
+
return cls._IMG_TAG_RE.sub("", content)
|
566
|
+
|
567
|
+
def _build_epub_chapter(
|
568
|
+
self,
|
569
|
+
title: str,
|
570
|
+
paragraphs: str,
|
571
|
+
extras: dict[str, str],
|
572
|
+
) -> str:
|
573
|
+
"""
|
574
|
+
Build a formatted chapter epub HTML including title, body paragraphs,
|
575
|
+
and optional extra sections.
|
576
|
+
"""
|
577
|
+
parts = []
|
578
|
+
parts.append(f"<h2>{escape(title)}</h2>")
|
579
|
+
parts.append(self._render_html_block(paragraphs))
|
580
|
+
|
581
|
+
extras_epub = self._render_epub_extras(extras)
|
582
|
+
if extras_epub:
|
583
|
+
parts.append(extras_epub)
|
584
|
+
|
585
|
+
return "\n".join(parts)
|
586
|
+
|
587
|
+
@classmethod
|
588
|
+
def _render_html_block(cls, text: str) -> str:
|
589
|
+
out: list[str] = []
|
590
|
+
for raw in text.splitlines():
|
591
|
+
line = raw.strip()
|
592
|
+
if not line:
|
593
|
+
continue
|
594
|
+
|
595
|
+
# case 1: already wrapped in a <div>...</div>
|
596
|
+
if line.startswith("<div") and line.endswith("</div>"):
|
597
|
+
out.append(line)
|
598
|
+
continue
|
599
|
+
|
600
|
+
# case 2: single <img> line
|
601
|
+
if cls._IMG_TAG_RE.fullmatch(line):
|
602
|
+
out.append(cls._IMAGE_WRAPPER.format(img=line))
|
603
|
+
continue
|
604
|
+
|
605
|
+
# case 3: inline <img> in text -> escape other text, preserve <img>
|
606
|
+
if "<img " in line.lower():
|
607
|
+
pieces = []
|
608
|
+
last = 0
|
609
|
+
for m in cls._IMG_TAG_RE.finditer(line):
|
610
|
+
pieces.append(escape(line[last : m.start()]))
|
611
|
+
pieces.append(m.group(0))
|
612
|
+
last = m.end()
|
613
|
+
pieces.append(escape(line[last:]))
|
614
|
+
out.append("<p>" + "".join(pieces) + "</p>")
|
615
|
+
continue
|
616
|
+
|
617
|
+
# plain text line
|
618
|
+
out.append(f"<p>{escape(line)}</p>")
|
619
|
+
|
620
|
+
return "\n".join(out)
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.exporters.linovelib
|
4
|
+
-----------------------------------------
|
5
|
+
|
6
|
+
Exporter implementation for handling Linovelib novels.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from pathlib import Path
|
10
|
+
|
11
|
+
from novel_downloader.core.exporters.common import CommonExporter
|
12
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
13
|
+
from novel_downloader.utils import download
|
14
|
+
from novel_downloader.utils.constants import (
|
15
|
+
DEFAULT_HEADERS,
|
16
|
+
DEFAULT_IMAGE_SUFFIX,
|
17
|
+
)
|
18
|
+
|
19
|
+
_IMG_HEADERS = DEFAULT_HEADERS.copy()
|
20
|
+
_IMG_HEADERS["Referer"] = "https://www.linovelib.com/"
|
21
|
+
|
22
|
+
|
23
|
+
@register_exporter(site_keys=["linovelib"])
|
24
|
+
class LinovelibExporter(CommonExporter):
|
25
|
+
"""
|
26
|
+
Exporter for 哔哩轻小说 novels.
|
27
|
+
"""
|
28
|
+
|
29
|
+
@staticmethod
|
30
|
+
def _download_image(
|
31
|
+
img_url: str,
|
32
|
+
target_dir: Path,
|
33
|
+
filename: str | None = None,
|
34
|
+
*,
|
35
|
+
on_exist: str = "overwrite",
|
36
|
+
) -> Path | None:
|
37
|
+
"""
|
38
|
+
Download image from url to target dir with given name
|
39
|
+
"""
|
40
|
+
return download(
|
41
|
+
img_url,
|
42
|
+
target_dir,
|
43
|
+
filename=filename,
|
44
|
+
headers=_IMG_HEADERS,
|
45
|
+
on_exist="overwrite",
|
46
|
+
default_suffix=DEFAULT_IMAGE_SUFFIX,
|
47
|
+
)
|