novel-downloader 1.4.0__py3-none-any.whl → 1.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +69 -10
- novel_downloader/config/adapter.py +42 -9
- novel_downloader/core/downloaders/base.py +26 -22
- novel_downloader/core/downloaders/common.py +41 -5
- novel_downloader/core/downloaders/qidian.py +60 -32
- novel_downloader/core/exporters/common/epub.py +153 -68
- novel_downloader/core/exporters/epub_util.py +1358 -0
- novel_downloader/core/exporters/linovelib/epub.py +147 -190
- novel_downloader/core/fetchers/linovelib/browser.py +15 -0
- novel_downloader/core/fetchers/linovelib/session.py +15 -0
- novel_downloader/core/fetchers/qidian/browser.py +62 -10
- novel_downloader/core/interfaces/downloader.py +13 -12
- novel_downloader/locales/en.json +2 -0
- novel_downloader/locales/zh.json +2 -0
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/config.py +8 -0
- novel_downloader/tui/screens/home.py +5 -4
- novel_downloader/utils/constants.py +0 -29
- {novel_downloader-1.4.0.dist-info → novel_downloader-1.4.2.dist-info}/METADATA +4 -2
- {novel_downloader-1.4.0.dist-info → novel_downloader-1.4.2.dist-info}/RECORD +25 -30
- novel_downloader/core/exporters/epub_utils/__init__.py +0 -40
- novel_downloader/core/exporters/epub_utils/css_builder.py +0 -75
- novel_downloader/core/exporters/epub_utils/image_loader.py +0 -131
- novel_downloader/core/exporters/epub_utils/initializer.py +0 -100
- novel_downloader/core/exporters/epub_utils/text_to_html.py +0 -178
- novel_downloader/core/exporters/epub_utils/volume_intro.py +0 -60
- {novel_downloader-1.4.0.dist-info → novel_downloader-1.4.2.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.0.dist-info → novel_downloader-1.4.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.4.0.dist-info → novel_downloader-1.4.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.0.dist-info → novel_downloader-1.4.2.dist-info}/top_level.txt +0 -0
@@ -8,25 +8,19 @@ Contains the logic for exporting novel content as a single `.epub` file.
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
+
import html
|
11
12
|
import json
|
13
|
+
import re
|
12
14
|
from pathlib import Path
|
13
15
|
from typing import TYPE_CHECKING
|
14
16
|
|
15
|
-
from
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
create_css_items,
|
21
|
-
create_volume_intro,
|
22
|
-
generate_book_intro_html,
|
23
|
-
init_epub,
|
24
|
-
inline_remote_images,
|
25
|
-
)
|
26
|
-
from novel_downloader.utils.constants import (
|
27
|
-
EPUB_OPTIONS,
|
28
|
-
EPUB_TEXT_FOLDER,
|
17
|
+
from novel_downloader.core.exporters.epub_util import (
|
18
|
+
Book,
|
19
|
+
Chapter,
|
20
|
+
StyleSheet,
|
21
|
+
Volume,
|
29
22
|
)
|
23
|
+
from novel_downloader.utils.constants import CSS_MAIN_PATH
|
30
24
|
from novel_downloader.utils.file_utils import sanitize_filename
|
31
25
|
from novel_downloader.utils.network import download_image
|
32
26
|
from novel_downloader.utils.text_utils import clean_chapter_title
|
@@ -34,6 +28,16 @@ from novel_downloader.utils.text_utils import clean_chapter_title
|
|
34
28
|
if TYPE_CHECKING:
|
35
29
|
from .main_exporter import CommonExporter
|
36
30
|
|
31
|
+
_IMAGE_WRAPPER = (
|
32
|
+
'<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
|
33
|
+
)
|
34
|
+
_IMG_TAG_PATTERN = re.compile(
|
35
|
+
r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
|
36
|
+
)
|
37
|
+
_RAW_HTML_RE = re.compile(
|
38
|
+
r'^(<img\b[^>]*?\/>|<div class="duokan-image-single illus">.*?<\/div>)$', re.DOTALL
|
39
|
+
)
|
40
|
+
|
37
41
|
|
38
42
|
def common_export_as_epub(
|
39
43
|
exporter: CommonExporter,
|
@@ -71,12 +75,12 @@ def common_export_as_epub(
|
|
71
75
|
return
|
72
76
|
|
73
77
|
book_name = book_info.get("book_name", book_id)
|
78
|
+
book_author = book_info.get("author", "")
|
74
79
|
exporter.logger.info(
|
75
80
|
"%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
|
76
81
|
)
|
77
82
|
|
78
83
|
# --- Generate intro + cover ---
|
79
|
-
intro_html = generate_book_intro_html(book_info)
|
80
84
|
cover_path: Path | None = None
|
81
85
|
cover_url = book_info.get("cover_url", "")
|
82
86
|
if config.include_cover and cover_url:
|
@@ -90,49 +94,56 @@ def common_export_as_epub(
|
|
90
94
|
exporter.logger.warning("Failed to download cover from %s", cover_url)
|
91
95
|
|
92
96
|
# --- Initialize EPUB ---
|
93
|
-
book
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
97
|
+
book = Book(
|
98
|
+
title=book_name,
|
99
|
+
author=book_author,
|
100
|
+
description=book_info.get("summary", ""),
|
101
|
+
cover_path=cover_path,
|
102
|
+
subject=book_info.get("subject", []),
|
103
|
+
serial_status=book_info.get("serial_status", ""),
|
104
|
+
word_count=book_info.get("word_count", ""),
|
105
|
+
uid=f"{exporter.site}_{book_id}",
|
106
|
+
)
|
107
|
+
main_css = StyleSheet(
|
108
|
+
id="main_style",
|
109
|
+
content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
|
110
|
+
filename="main.css",
|
99
111
|
)
|
100
|
-
|
101
|
-
include_main=True,
|
102
|
-
include_volume=True,
|
103
|
-
):
|
104
|
-
book.add_item(css)
|
112
|
+
book.add_stylesheet(main_css)
|
105
113
|
|
106
114
|
# --- Compile chapters ---
|
107
115
|
volumes = book_info.get("volumes", [])
|
108
116
|
for vol_index, vol in enumerate(volumes, start=1):
|
109
|
-
raw_vol_name = vol.get("volume_name", "")
|
110
|
-
|
117
|
+
raw_vol_name = vol.get("volume_name", "")
|
118
|
+
raw_vol_name = raw_vol_name.replace(book_name, "").strip()
|
119
|
+
vol_name = raw_vol_name or f"Volume {vol_index}"
|
111
120
|
exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
|
112
121
|
|
113
|
-
|
114
|
-
|
122
|
+
vol_cover_path: Path | None = None
|
123
|
+
vol_cover_url = vol.get("volume_cover", "")
|
124
|
+
if vol_cover_url:
|
125
|
+
vol_cover_path = download_image(
|
126
|
+
vol_cover_url,
|
127
|
+
img_dir,
|
128
|
+
on_exist="skip",
|
129
|
+
)
|
130
|
+
|
131
|
+
curr_vol = Volume(
|
132
|
+
id=f"vol_{vol_index}",
|
115
133
|
title=vol_name,
|
116
|
-
|
117
|
-
|
134
|
+
intro=vol.get("volume_intro", ""),
|
135
|
+
cover=vol_cover_path,
|
118
136
|
)
|
119
|
-
vol_intro.content = create_volume_intro(vol_name, vol.get("volume_intro", ""))
|
120
|
-
vol_intro.add_link(
|
121
|
-
href="../Styles/volume-intro.css",
|
122
|
-
rel="stylesheet",
|
123
|
-
type="text/css",
|
124
|
-
)
|
125
|
-
book.add_item(vol_intro)
|
126
|
-
spine.append(vol_intro)
|
127
|
-
|
128
|
-
section = epub.Section(vol_name, vol_intro.file_name)
|
129
|
-
chapter_items: list[epub.EpubHtml] = []
|
130
137
|
|
131
138
|
for chap in vol.get("chapters", []):
|
132
139
|
chap_id = chap.get("chapterId")
|
133
140
|
chap_title = chap.get("title", "")
|
134
141
|
if not chap_id:
|
135
|
-
exporter.logger.warning(
|
142
|
+
exporter.logger.warning(
|
143
|
+
"%s Missing chapterId, skipping: %s",
|
144
|
+
TAG,
|
145
|
+
chap,
|
146
|
+
)
|
136
147
|
continue
|
137
148
|
|
138
149
|
chapter_data = exporter._get_chapter(book_id, chap_id)
|
@@ -147,36 +158,28 @@ def common_export_as_epub(
|
|
147
158
|
|
148
159
|
title = clean_chapter_title(chapter_data.get("title", "")) or chap_id
|
149
160
|
content: str = chapter_data.get("content", "")
|
150
|
-
content =
|
151
|
-
chap_html =
|
161
|
+
content, img_paths = _inline_remote_images(content, img_dir)
|
162
|
+
chap_html = _txt_to_html(
|
152
163
|
chapter_title=title,
|
153
164
|
chapter_text=content,
|
154
|
-
|
165
|
+
extras={
|
166
|
+
"作者说": chapter_data.get("author_say", ""),
|
167
|
+
},
|
155
168
|
)
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
type="text/css",
|
169
|
+
curr_vol.add_chapter(
|
170
|
+
Chapter(
|
171
|
+
id=f"c_{chap_id}",
|
172
|
+
title=title,
|
173
|
+
content=chap_html,
|
174
|
+
css=[main_css],
|
175
|
+
)
|
164
176
|
)
|
165
|
-
|
166
|
-
|
167
|
-
chapter_items.append(item)
|
168
|
-
|
169
|
-
toc_list.append((section, chapter_items))
|
177
|
+
for img_path in img_paths:
|
178
|
+
book.add_image(img_path)
|
170
179
|
|
171
|
-
|
180
|
+
book.add_volume(curr_vol)
|
172
181
|
|
173
182
|
# --- 5. Finalize EPUB ---
|
174
|
-
exporter.logger.info("%s Building TOC and spine...", TAG)
|
175
|
-
book.toc = toc_list
|
176
|
-
book.spine = spine
|
177
|
-
book.add_item(epub.EpubNcx())
|
178
|
-
book.add_item(epub.EpubNav())
|
179
|
-
|
180
183
|
out_name = exporter.get_filename(
|
181
184
|
title=book_name,
|
182
185
|
author=book_info.get("author"),
|
@@ -185,8 +188,90 @@ def common_export_as_epub(
|
|
185
188
|
out_path = out_dir / sanitize_filename(out_name)
|
186
189
|
|
187
190
|
try:
|
188
|
-
|
191
|
+
book.export(out_path)
|
189
192
|
exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
|
190
193
|
except Exception as e:
|
191
194
|
exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
|
192
195
|
return
|
196
|
+
|
197
|
+
|
198
|
+
def _inline_remote_images(
|
199
|
+
content: str,
|
200
|
+
image_dir: str | Path,
|
201
|
+
) -> tuple[str, list[Path]]:
|
202
|
+
"""
|
203
|
+
Download every remote `<img src="...">` in `content` into `image_dir`,
|
204
|
+
and replace the original tag with _IMAGE_WRAPPER
|
205
|
+
pointing to the local filename.
|
206
|
+
|
207
|
+
:param content: HTML/text of the chapter containing <img> tags.
|
208
|
+
:param image_dir: Directory to save downloaded images into.
|
209
|
+
:return: A tuple (modified_content, list_of_downloaded_image_paths).
|
210
|
+
"""
|
211
|
+
downloaded_images: list[Path] = []
|
212
|
+
|
213
|
+
def _replace(match: re.Match[str]) -> str:
|
214
|
+
url = match.group(1)
|
215
|
+
try:
|
216
|
+
# download_image returns a Path or None
|
217
|
+
local_path = download_image(
|
218
|
+
url,
|
219
|
+
image_dir,
|
220
|
+
target_name=None,
|
221
|
+
on_exist="skip",
|
222
|
+
)
|
223
|
+
if not local_path:
|
224
|
+
return match.group(0)
|
225
|
+
|
226
|
+
downloaded_images.append(local_path)
|
227
|
+
return _IMAGE_WRAPPER.format(filename=local_path.name)
|
228
|
+
except Exception:
|
229
|
+
return match.group(0)
|
230
|
+
|
231
|
+
modified_content = _IMG_TAG_PATTERN.sub(_replace, content)
|
232
|
+
return modified_content, downloaded_images
|
233
|
+
|
234
|
+
|
235
|
+
def _txt_to_html(
|
236
|
+
chapter_title: str,
|
237
|
+
chapter_text: str,
|
238
|
+
extras: dict[str, str] | None = None,
|
239
|
+
) -> str:
|
240
|
+
"""
|
241
|
+
Convert chapter text and author note to styled HTML.
|
242
|
+
|
243
|
+
:param chapter_title: Title of the chapter.
|
244
|
+
:param chapter_text: Main content of the chapter.
|
245
|
+
:param extras: Optional dict of titles and content, e.g. {"作者说": "text"}.
|
246
|
+
:return: Rendered HTML as a string.
|
247
|
+
"""
|
248
|
+
|
249
|
+
def _render_block(text: str) -> str:
|
250
|
+
lines = (line.strip() for line in text.splitlines() if line.strip())
|
251
|
+
out = []
|
252
|
+
for line in lines:
|
253
|
+
# preserve raw HTML, otherwise wrap in <p>
|
254
|
+
if _RAW_HTML_RE.match(line):
|
255
|
+
out.append(line)
|
256
|
+
else:
|
257
|
+
out.append(f"<p>{html.escape(line)}</p>")
|
258
|
+
return "\n".join(out)
|
259
|
+
|
260
|
+
parts = []
|
261
|
+
parts.append(f"<h2>{html.escape(chapter_title)}</h2>")
|
262
|
+
parts.append(_render_block(chapter_text))
|
263
|
+
|
264
|
+
if extras:
|
265
|
+
for title, note in extras.items():
|
266
|
+
note = note.strip()
|
267
|
+
if not note:
|
268
|
+
continue
|
269
|
+
parts.extend(
|
270
|
+
[
|
271
|
+
"<hr />",
|
272
|
+
f"<p>{html.escape(title)}</p>",
|
273
|
+
_render_block(note),
|
274
|
+
]
|
275
|
+
)
|
276
|
+
|
277
|
+
return "\n".join(parts)
|