novel-downloader 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +70 -11
- novel_downloader/config/adapter.py +43 -9
- novel_downloader/core/__init__.py +19 -1
- novel_downloader/core/downloaders/base.py +26 -29
- novel_downloader/core/downloaders/biquge.py +1 -3
- novel_downloader/core/downloaders/common.py +41 -7
- novel_downloader/core/downloaders/esjzone.py +1 -3
- novel_downloader/core/downloaders/linovelib.py +1 -3
- novel_downloader/core/downloaders/qianbi.py +1 -3
- novel_downloader/core/downloaders/qidian.py +61 -37
- novel_downloader/core/downloaders/sfacg.py +1 -3
- novel_downloader/core/downloaders/yamibo.py +1 -3
- novel_downloader/core/exporters/common/epub.py +153 -68
- novel_downloader/core/exporters/epub_util.py +1358 -0
- novel_downloader/core/exporters/linovelib/epub.py +147 -190
- novel_downloader/core/factory/downloader.py +3 -6
- novel_downloader/core/fetchers/base/browser.py +32 -12
- novel_downloader/core/fetchers/esjzone/browser.py +8 -6
- novel_downloader/core/fetchers/qidian/browser.py +62 -10
- novel_downloader/core/fetchers/yamibo/browser.py +3 -3
- novel_downloader/core/interfaces/downloader.py +13 -12
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +11 -2
- novel_downloader/core/parsers/qidian/chapter_normal.py +8 -1
- novel_downloader/core/parsers/qidian/main_parser.py +7 -2
- novel_downloader/core/parsers/qidian/utils/__init__.py +2 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +9 -0
- novel_downloader/locales/en.json +2 -0
- novel_downloader/locales/zh.json +2 -0
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/config.py +9 -0
- novel_downloader/resources/config/settings.toml +1 -0
- novel_downloader/tui/screens/home.py +13 -6
- novel_downloader/utils/constants.py +0 -29
- novel_downloader/utils/{model_loader.py → fontocr/model_loader.py} +2 -2
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -1
- novel_downloader/utils/text_utils/__init__.py +8 -1
- novel_downloader/utils/text_utils/text_cleaning.py +51 -0
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/METADATA +5 -2
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/RECORD +45 -50
- novel_downloader/core/exporters/epub_utils/__init__.py +0 -40
- novel_downloader/core/exporters/epub_utils/css_builder.py +0 -75
- novel_downloader/core/exporters/epub_utils/image_loader.py +0 -131
- novel_downloader/core/exporters/epub_utils/initializer.py +0 -100
- novel_downloader/core/exporters/epub_utils/text_to_html.py +0 -178
- novel_downloader/core/exporters/epub_utils/volume_intro.py +0 -60
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/top_level.txt +0 -0
@@ -13,11 +13,11 @@ from typing import Any, cast
|
|
13
13
|
|
14
14
|
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
15
|
from novel_downloader.core.interfaces import (
|
16
|
-
ExporterProtocol,
|
17
16
|
FetcherProtocol,
|
18
17
|
ParserProtocol,
|
19
18
|
)
|
20
19
|
from novel_downloader.models import (
|
20
|
+
BookConfig,
|
21
21
|
ChapterDict,
|
22
22
|
CidTask,
|
23
23
|
DownloaderConfig,
|
@@ -40,15 +40,14 @@ class QidianDownloader(BaseDownloader):
|
|
40
40
|
self,
|
41
41
|
fetcher: FetcherProtocol,
|
42
42
|
parser: ParserProtocol,
|
43
|
-
exporter: ExporterProtocol,
|
44
43
|
config: DownloaderConfig,
|
45
44
|
):
|
46
45
|
config.request_interval = max(1.0, config.request_interval)
|
47
|
-
super().__init__(fetcher, parser,
|
46
|
+
super().__init__(fetcher, parser, config, "qidian")
|
48
47
|
|
49
48
|
async def _download_one(
|
50
49
|
self,
|
51
|
-
|
50
|
+
book: BookConfig,
|
52
51
|
*,
|
53
52
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
54
53
|
**kwargs: Any,
|
@@ -56,9 +55,13 @@ class QidianDownloader(BaseDownloader):
|
|
56
55
|
"""
|
57
56
|
The full download logic for a single book.
|
58
57
|
|
59
|
-
:param
|
58
|
+
:param book: BookConfig with at least 'book_id'.
|
60
59
|
"""
|
61
60
|
TAG = "[Downloader]"
|
61
|
+
book_id = book["book_id"]
|
62
|
+
start_id = book.get("start_id")
|
63
|
+
end_id = book.get("end_id")
|
64
|
+
ignore_set = set(book.get("ignore_ids", []))
|
62
65
|
|
63
66
|
raw_base = self.raw_data_dir / book_id
|
64
67
|
cache_base = self.cache_dir / book_id
|
@@ -140,6 +143,10 @@ class QidianDownloader(BaseDownloader):
|
|
140
143
|
cid_queue.task_done()
|
141
144
|
continue
|
142
145
|
|
146
|
+
if cid in ignore_set:
|
147
|
+
cid_queue.task_done()
|
148
|
+
continue
|
149
|
+
|
143
150
|
try:
|
144
151
|
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
145
152
|
await html_queue.put(
|
@@ -194,40 +201,39 @@ class QidianDownloader(BaseDownloader):
|
|
194
201
|
skip_retry = False
|
195
202
|
try:
|
196
203
|
chap_json: ChapterDict | None = None
|
197
|
-
if self.
|
204
|
+
if self.check_restricted(task.html_list):
|
198
205
|
self.logger.info(
|
199
206
|
"[Parser] Skipped restricted page for cid %s", task.cid
|
200
207
|
)
|
201
208
|
skip_retry = True
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
209
|
+
raise ValueError("Restricted content detected")
|
210
|
+
|
211
|
+
is_encrypted = self.check_encrypted(task.html_list)
|
212
|
+
chap_json = await asyncio.to_thread(
|
213
|
+
self.parser.parse_chapter,
|
214
|
+
task.html_list,
|
215
|
+
task.cid,
|
216
|
+
)
|
217
|
+
if is_encrypted:
|
218
|
+
skip_retry = True
|
219
|
+
if self.save_html:
|
220
|
+
folder = chapters_html_dir / (
|
221
|
+
"html_encrypted" if is_encrypted else "html_plain"
|
222
|
+
)
|
223
|
+
html_path = folder / f"{task.cid}.html"
|
224
|
+
save_as_txt(task.html_list[0], html_path, on_exist="skip")
|
225
|
+
self.logger.debug(
|
226
|
+
"%s Saved raw HTML for chapter %s to %s",
|
227
|
+
TAG,
|
206
228
|
task.cid,
|
229
|
+
html_path,
|
207
230
|
)
|
208
|
-
if self.check_encrypted(task.html_list):
|
209
|
-
skip_retry = True
|
210
231
|
if chap_json:
|
211
232
|
await save_queue.put(chap_json)
|
212
233
|
self.logger.info(
|
213
234
|
"[Parser] saved chapter %s",
|
214
235
|
task.cid,
|
215
236
|
)
|
216
|
-
if self.save_html:
|
217
|
-
is_encrypted = chap_json.get("extra", {}).get(
|
218
|
-
"encrypted", False
|
219
|
-
)
|
220
|
-
folder = chapters_html_dir / (
|
221
|
-
"html_encrypted" if is_encrypted else "html_plain"
|
222
|
-
)
|
223
|
-
html_path = folder / f"{task.cid}.html"
|
224
|
-
save_as_txt(task.html_list[0], html_path, on_exist="skip")
|
225
|
-
self.logger.debug(
|
226
|
-
"%s Saved raw HTML for chapter %s to %s",
|
227
|
-
TAG,
|
228
|
-
task.cid,
|
229
|
-
html_path,
|
230
|
-
)
|
231
237
|
else:
|
232
238
|
raise ValueError("Empty parse result")
|
233
239
|
except Exception as e:
|
@@ -296,20 +302,40 @@ class QidianDownloader(BaseDownloader):
|
|
296
302
|
)
|
297
303
|
)
|
298
304
|
|
299
|
-
|
305
|
+
found_start = start_id is None
|
306
|
+
stop_early = False
|
307
|
+
|
300
308
|
for vol in book_info.get("volumes", []):
|
301
309
|
chapters = vol.get("chapters", [])
|
302
310
|
for chap in chapters:
|
311
|
+
if stop_early:
|
312
|
+
break
|
313
|
+
|
303
314
|
cid = chap.get("chapterId")
|
304
|
-
if
|
315
|
+
if not cid:
|
316
|
+
continue
|
317
|
+
|
318
|
+
if not found_start:
|
319
|
+
if cid == start_id:
|
320
|
+
found_start = True
|
321
|
+
else:
|
322
|
+
completed_count += 1
|
323
|
+
continue
|
324
|
+
|
325
|
+
if end_id is not None and cid == end_id:
|
326
|
+
stop_early = True
|
327
|
+
|
328
|
+
if cid in ignore_set:
|
329
|
+
continue
|
330
|
+
|
331
|
+
if normal_cs.exists(cid) and self.skip_existing:
|
305
332
|
completed_count += 1
|
306
|
-
if progress_hook:
|
307
|
-
await progress_hook(completed_count, total_chapters)
|
308
|
-
last_cid = cid
|
309
333
|
continue
|
310
334
|
|
311
|
-
await cid_queue.put(CidTask(cid=cid, prev_cid=
|
312
|
-
|
335
|
+
await cid_queue.put(CidTask(cid=cid, prev_cid=None))
|
336
|
+
|
337
|
+
if stop_early:
|
338
|
+
break
|
313
339
|
|
314
340
|
await cid_queue.join()
|
315
341
|
await html_queue.join()
|
@@ -323,8 +349,6 @@ class QidianDownloader(BaseDownloader):
|
|
323
349
|
normal_cs.close()
|
324
350
|
encrypted_cs.close()
|
325
351
|
|
326
|
-
await asyncio.to_thread(self.exporter.export, book_id)
|
327
|
-
|
328
352
|
self.logger.info(
|
329
353
|
"%s Novel '%s' download completed.",
|
330
354
|
TAG,
|
@@ -333,7 +357,7 @@ class QidianDownloader(BaseDownloader):
|
|
333
357
|
return
|
334
358
|
|
335
359
|
@staticmethod
|
336
|
-
def
|
360
|
+
def check_restricted(html_list: list[str]) -> bool:
|
337
361
|
"""
|
338
362
|
Return True if page content indicates access restriction
|
339
363
|
(e.g. not subscribed/purchased).
|
@@ -7,7 +7,6 @@ novel_downloader.core.downloaders.sfacg
|
|
7
7
|
|
8
8
|
from novel_downloader.core.downloaders.common import CommonDownloader
|
9
9
|
from novel_downloader.core.interfaces import (
|
10
|
-
ExporterProtocol,
|
11
10
|
FetcherProtocol,
|
12
11
|
ParserProtocol,
|
13
12
|
)
|
@@ -21,7 +20,6 @@ class SfacgDownloader(CommonDownloader):
|
|
21
20
|
self,
|
22
21
|
fetcher: FetcherProtocol,
|
23
22
|
parser: ParserProtocol,
|
24
|
-
exporter: ExporterProtocol,
|
25
23
|
config: DownloaderConfig,
|
26
24
|
):
|
27
|
-
super().__init__(fetcher, parser,
|
25
|
+
super().__init__(fetcher, parser, config, "sfacg")
|
@@ -7,7 +7,6 @@ novel_downloader.core.downloaders.yamibo
|
|
7
7
|
|
8
8
|
from novel_downloader.core.downloaders.common import CommonDownloader
|
9
9
|
from novel_downloader.core.interfaces import (
|
10
|
-
ExporterProtocol,
|
11
10
|
FetcherProtocol,
|
12
11
|
ParserProtocol,
|
13
12
|
)
|
@@ -21,7 +20,6 @@ class YamiboDownloader(CommonDownloader):
|
|
21
20
|
self,
|
22
21
|
fetcher: FetcherProtocol,
|
23
22
|
parser: ParserProtocol,
|
24
|
-
exporter: ExporterProtocol,
|
25
23
|
config: DownloaderConfig,
|
26
24
|
):
|
27
|
-
super().__init__(fetcher, parser,
|
25
|
+
super().__init__(fetcher, parser, config, "yamibo")
|
@@ -8,25 +8,19 @@ Contains the logic for exporting novel content as a single `.epub` file.
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
+
import html
|
11
12
|
import json
|
13
|
+
import re
|
12
14
|
from pathlib import Path
|
13
15
|
from typing import TYPE_CHECKING
|
14
16
|
|
15
|
-
from
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
create_css_items,
|
21
|
-
create_volume_intro,
|
22
|
-
generate_book_intro_html,
|
23
|
-
init_epub,
|
24
|
-
inline_remote_images,
|
25
|
-
)
|
26
|
-
from novel_downloader.utils.constants import (
|
27
|
-
EPUB_OPTIONS,
|
28
|
-
EPUB_TEXT_FOLDER,
|
17
|
+
from novel_downloader.core.exporters.epub_util import (
|
18
|
+
Book,
|
19
|
+
Chapter,
|
20
|
+
StyleSheet,
|
21
|
+
Volume,
|
29
22
|
)
|
23
|
+
from novel_downloader.utils.constants import CSS_MAIN_PATH
|
30
24
|
from novel_downloader.utils.file_utils import sanitize_filename
|
31
25
|
from novel_downloader.utils.network import download_image
|
32
26
|
from novel_downloader.utils.text_utils import clean_chapter_title
|
@@ -34,6 +28,16 @@ from novel_downloader.utils.text_utils import clean_chapter_title
|
|
34
28
|
if TYPE_CHECKING:
|
35
29
|
from .main_exporter import CommonExporter
|
36
30
|
|
31
|
+
_IMAGE_WRAPPER = (
|
32
|
+
'<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
|
33
|
+
)
|
34
|
+
_IMG_TAG_PATTERN = re.compile(
|
35
|
+
r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
|
36
|
+
)
|
37
|
+
_RAW_HTML_RE = re.compile(
|
38
|
+
r'^(<img\b[^>]*?\/>|<div class="duokan-image-single illus">.*?<\/div>)$', re.DOTALL
|
39
|
+
)
|
40
|
+
|
37
41
|
|
38
42
|
def common_export_as_epub(
|
39
43
|
exporter: CommonExporter,
|
@@ -71,12 +75,12 @@ def common_export_as_epub(
|
|
71
75
|
return
|
72
76
|
|
73
77
|
book_name = book_info.get("book_name", book_id)
|
78
|
+
book_author = book_info.get("author", "")
|
74
79
|
exporter.logger.info(
|
75
80
|
"%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
|
76
81
|
)
|
77
82
|
|
78
83
|
# --- Generate intro + cover ---
|
79
|
-
intro_html = generate_book_intro_html(book_info)
|
80
84
|
cover_path: Path | None = None
|
81
85
|
cover_url = book_info.get("cover_url", "")
|
82
86
|
if config.include_cover and cover_url:
|
@@ -90,49 +94,56 @@ def common_export_as_epub(
|
|
90
94
|
exporter.logger.warning("Failed to download cover from %s", cover_url)
|
91
95
|
|
92
96
|
# --- Initialize EPUB ---
|
93
|
-
book
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
97
|
+
book = Book(
|
98
|
+
title=book_name,
|
99
|
+
author=book_author,
|
100
|
+
description=book_info.get("summary", ""),
|
101
|
+
cover_path=cover_path,
|
102
|
+
subject=book_info.get("subject", []),
|
103
|
+
serial_status=book_info.get("serial_status", ""),
|
104
|
+
word_count=book_info.get("word_count", ""),
|
105
|
+
uid=f"{exporter.site}_{book_id}",
|
106
|
+
)
|
107
|
+
main_css = StyleSheet(
|
108
|
+
id="main_style",
|
109
|
+
content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
|
110
|
+
filename="main.css",
|
99
111
|
)
|
100
|
-
|
101
|
-
include_main=True,
|
102
|
-
include_volume=True,
|
103
|
-
):
|
104
|
-
book.add_item(css)
|
112
|
+
book.add_stylesheet(main_css)
|
105
113
|
|
106
114
|
# --- Compile chapters ---
|
107
115
|
volumes = book_info.get("volumes", [])
|
108
116
|
for vol_index, vol in enumerate(volumes, start=1):
|
109
|
-
raw_vol_name = vol.get("volume_name", "")
|
110
|
-
|
117
|
+
raw_vol_name = vol.get("volume_name", "")
|
118
|
+
raw_vol_name = raw_vol_name.replace(book_name, "").strip()
|
119
|
+
vol_name = raw_vol_name or f"Volume {vol_index}"
|
111
120
|
exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
|
112
121
|
|
113
|
-
|
114
|
-
|
122
|
+
vol_cover_path: Path | None = None
|
123
|
+
vol_cover_url = vol.get("volume_cover", "")
|
124
|
+
if vol_cover_url:
|
125
|
+
vol_cover_path = download_image(
|
126
|
+
vol_cover_url,
|
127
|
+
img_dir,
|
128
|
+
on_exist="skip",
|
129
|
+
)
|
130
|
+
|
131
|
+
curr_vol = Volume(
|
132
|
+
id=f"vol_{vol_index}",
|
115
133
|
title=vol_name,
|
116
|
-
|
117
|
-
|
134
|
+
intro=vol.get("volume_intro", ""),
|
135
|
+
cover=vol_cover_path,
|
118
136
|
)
|
119
|
-
vol_intro.content = create_volume_intro(vol_name, vol.get("volume_intro", ""))
|
120
|
-
vol_intro.add_link(
|
121
|
-
href="../Styles/volume-intro.css",
|
122
|
-
rel="stylesheet",
|
123
|
-
type="text/css",
|
124
|
-
)
|
125
|
-
book.add_item(vol_intro)
|
126
|
-
spine.append(vol_intro)
|
127
|
-
|
128
|
-
section = epub.Section(vol_name, vol_intro.file_name)
|
129
|
-
chapter_items: list[epub.EpubHtml] = []
|
130
137
|
|
131
138
|
for chap in vol.get("chapters", []):
|
132
139
|
chap_id = chap.get("chapterId")
|
133
140
|
chap_title = chap.get("title", "")
|
134
141
|
if not chap_id:
|
135
|
-
exporter.logger.warning(
|
142
|
+
exporter.logger.warning(
|
143
|
+
"%s Missing chapterId, skipping: %s",
|
144
|
+
TAG,
|
145
|
+
chap,
|
146
|
+
)
|
136
147
|
continue
|
137
148
|
|
138
149
|
chapter_data = exporter._get_chapter(book_id, chap_id)
|
@@ -147,36 +158,28 @@ def common_export_as_epub(
|
|
147
158
|
|
148
159
|
title = clean_chapter_title(chapter_data.get("title", "")) or chap_id
|
149
160
|
content: str = chapter_data.get("content", "")
|
150
|
-
content =
|
151
|
-
chap_html =
|
161
|
+
content, img_paths = _inline_remote_images(content, img_dir)
|
162
|
+
chap_html = _txt_to_html(
|
152
163
|
chapter_title=title,
|
153
164
|
chapter_text=content,
|
154
|
-
|
165
|
+
extras={
|
166
|
+
"作者说": chapter_data.get("author_say", ""),
|
167
|
+
},
|
155
168
|
)
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
type="text/css",
|
169
|
+
curr_vol.add_chapter(
|
170
|
+
Chapter(
|
171
|
+
id=f"c_{chap_id}",
|
172
|
+
title=title,
|
173
|
+
content=chap_html,
|
174
|
+
css=[main_css],
|
175
|
+
)
|
164
176
|
)
|
165
|
-
|
166
|
-
|
167
|
-
chapter_items.append(item)
|
168
|
-
|
169
|
-
toc_list.append((section, chapter_items))
|
177
|
+
for img_path in img_paths:
|
178
|
+
book.add_image(img_path)
|
170
179
|
|
171
|
-
|
180
|
+
book.add_volume(curr_vol)
|
172
181
|
|
173
182
|
# --- 5. Finalize EPUB ---
|
174
|
-
exporter.logger.info("%s Building TOC and spine...", TAG)
|
175
|
-
book.toc = toc_list
|
176
|
-
book.spine = spine
|
177
|
-
book.add_item(epub.EpubNcx())
|
178
|
-
book.add_item(epub.EpubNav())
|
179
|
-
|
180
183
|
out_name = exporter.get_filename(
|
181
184
|
title=book_name,
|
182
185
|
author=book_info.get("author"),
|
@@ -185,8 +188,90 @@ def common_export_as_epub(
|
|
185
188
|
out_path = out_dir / sanitize_filename(out_name)
|
186
189
|
|
187
190
|
try:
|
188
|
-
|
191
|
+
book.export(out_path)
|
189
192
|
exporter.logger.info("%s EPUB successfully written to %s", TAG, out_path)
|
190
193
|
except Exception as e:
|
191
194
|
exporter.logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
|
192
195
|
return
|
196
|
+
|
197
|
+
|
198
|
+
def _inline_remote_images(
|
199
|
+
content: str,
|
200
|
+
image_dir: str | Path,
|
201
|
+
) -> tuple[str, list[Path]]:
|
202
|
+
"""
|
203
|
+
Download every remote `<img src="...">` in `content` into `image_dir`,
|
204
|
+
and replace the original tag with _IMAGE_WRAPPER
|
205
|
+
pointing to the local filename.
|
206
|
+
|
207
|
+
:param content: HTML/text of the chapter containing <img> tags.
|
208
|
+
:param image_dir: Directory to save downloaded images into.
|
209
|
+
:return: A tuple (modified_content, list_of_downloaded_image_paths).
|
210
|
+
"""
|
211
|
+
downloaded_images: list[Path] = []
|
212
|
+
|
213
|
+
def _replace(match: re.Match[str]) -> str:
|
214
|
+
url = match.group(1)
|
215
|
+
try:
|
216
|
+
# download_image returns a Path or None
|
217
|
+
local_path = download_image(
|
218
|
+
url,
|
219
|
+
image_dir,
|
220
|
+
target_name=None,
|
221
|
+
on_exist="skip",
|
222
|
+
)
|
223
|
+
if not local_path:
|
224
|
+
return match.group(0)
|
225
|
+
|
226
|
+
downloaded_images.append(local_path)
|
227
|
+
return _IMAGE_WRAPPER.format(filename=local_path.name)
|
228
|
+
except Exception:
|
229
|
+
return match.group(0)
|
230
|
+
|
231
|
+
modified_content = _IMG_TAG_PATTERN.sub(_replace, content)
|
232
|
+
return modified_content, downloaded_images
|
233
|
+
|
234
|
+
|
235
|
+
def _txt_to_html(
|
236
|
+
chapter_title: str,
|
237
|
+
chapter_text: str,
|
238
|
+
extras: dict[str, str] | None = None,
|
239
|
+
) -> str:
|
240
|
+
"""
|
241
|
+
Convert chapter text and author note to styled HTML.
|
242
|
+
|
243
|
+
:param chapter_title: Title of the chapter.
|
244
|
+
:param chapter_text: Main content of the chapter.
|
245
|
+
:param extras: Optional dict of titles and content, e.g. {"作者说": "text"}.
|
246
|
+
:return: Rendered HTML as a string.
|
247
|
+
"""
|
248
|
+
|
249
|
+
def _render_block(text: str) -> str:
|
250
|
+
lines = (line.strip() for line in text.splitlines() if line.strip())
|
251
|
+
out = []
|
252
|
+
for line in lines:
|
253
|
+
# preserve raw HTML, otherwise wrap in <p>
|
254
|
+
if _RAW_HTML_RE.match(line):
|
255
|
+
out.append(line)
|
256
|
+
else:
|
257
|
+
out.append(f"<p>{html.escape(line)}</p>")
|
258
|
+
return "\n".join(out)
|
259
|
+
|
260
|
+
parts = []
|
261
|
+
parts.append(f"<h2>{html.escape(chapter_title)}</h2>")
|
262
|
+
parts.append(_render_block(chapter_text))
|
263
|
+
|
264
|
+
if extras:
|
265
|
+
for title, note in extras.items():
|
266
|
+
note = note.strip()
|
267
|
+
if not note:
|
268
|
+
continue
|
269
|
+
parts.extend(
|
270
|
+
[
|
271
|
+
"<hr />",
|
272
|
+
f"<p>{html.escape(title)}</p>",
|
273
|
+
_render_block(note),
|
274
|
+
]
|
275
|
+
)
|
276
|
+
|
277
|
+
return "\n".join(parts)
|