novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -2
- novel_downloader/cli/config.py +1 -83
- novel_downloader/cli/download.py +4 -5
- novel_downloader/cli/export.py +4 -1
- novel_downloader/cli/main.py +2 -0
- novel_downloader/cli/search.py +123 -0
- novel_downloader/config/__init__.py +3 -10
- novel_downloader/config/adapter.py +190 -54
- novel_downloader/config/loader.py +2 -3
- novel_downloader/core/__init__.py +13 -13
- novel_downloader/core/downloaders/__init__.py +10 -11
- novel_downloader/core/downloaders/base.py +152 -26
- novel_downloader/core/downloaders/biquge.py +5 -1
- novel_downloader/core/downloaders/common.py +157 -378
- novel_downloader/core/downloaders/esjzone.py +5 -1
- novel_downloader/core/downloaders/linovelib.py +5 -1
- novel_downloader/core/downloaders/qianbi.py +291 -4
- novel_downloader/core/downloaders/qidian.py +199 -285
- novel_downloader/core/downloaders/registry.py +67 -0
- novel_downloader/core/downloaders/sfacg.py +5 -1
- novel_downloader/core/downloaders/yamibo.py +5 -1
- novel_downloader/core/exporters/__init__.py +10 -11
- novel_downloader/core/exporters/base.py +87 -7
- novel_downloader/core/exporters/biquge.py +5 -8
- novel_downloader/core/exporters/common/__init__.py +2 -2
- novel_downloader/core/exporters/common/epub.py +82 -166
- novel_downloader/core/exporters/common/main_exporter.py +0 -60
- novel_downloader/core/exporters/common/txt.py +82 -83
- novel_downloader/core/exporters/epub_util.py +157 -1330
- novel_downloader/core/exporters/esjzone.py +5 -8
- novel_downloader/core/exporters/linovelib/__init__.py +2 -2
- novel_downloader/core/exporters/linovelib/epub.py +157 -212
- novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
- novel_downloader/core/exporters/linovelib/txt.py +67 -63
- novel_downloader/core/exporters/qianbi.py +5 -8
- novel_downloader/core/exporters/qidian.py +14 -4
- novel_downloader/core/exporters/registry.py +53 -0
- novel_downloader/core/exporters/sfacg.py +5 -8
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/exporters/yamibo.py +5 -8
- novel_downloader/core/fetchers/__init__.py +19 -24
- novel_downloader/core/fetchers/base/__init__.py +3 -3
- novel_downloader/core/fetchers/base/browser.py +23 -4
- novel_downloader/core/fetchers/base/session.py +30 -5
- novel_downloader/core/fetchers/biquge/__init__.py +3 -3
- novel_downloader/core/fetchers/biquge/browser.py +5 -0
- novel_downloader/core/fetchers/biquge/session.py +6 -1
- novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
- novel_downloader/core/fetchers/esjzone/browser.py +5 -0
- novel_downloader/core/fetchers/esjzone/session.py +6 -1
- novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
- novel_downloader/core/fetchers/linovelib/browser.py +6 -1
- novel_downloader/core/fetchers/linovelib/session.py +6 -1
- novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
- novel_downloader/core/fetchers/qianbi/browser.py +5 -0
- novel_downloader/core/fetchers/qianbi/session.py +5 -0
- novel_downloader/core/fetchers/qidian/__init__.py +3 -3
- novel_downloader/core/fetchers/qidian/browser.py +12 -4
- novel_downloader/core/fetchers/qidian/session.py +11 -3
- novel_downloader/core/fetchers/registry.py +71 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
- novel_downloader/core/fetchers/sfacg/browser.py +5 -0
- novel_downloader/core/fetchers/sfacg/session.py +5 -0
- novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
- novel_downloader/core/fetchers/yamibo/browser.py +5 -0
- novel_downloader/core/fetchers/yamibo/session.py +6 -1
- novel_downloader/core/interfaces/__init__.py +7 -5
- novel_downloader/core/interfaces/searcher.py +18 -0
- novel_downloader/core/parsers/__init__.py +10 -11
- novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
- novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
- novel_downloader/core/parsers/qidian/main_parser.py +10 -21
- novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/registry.py +68 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
- novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
- novel_downloader/core/searchers/__init__.py +20 -0
- novel_downloader/core/searchers/base.py +92 -0
- novel_downloader/core/searchers/biquge.py +83 -0
- novel_downloader/core/searchers/esjzone.py +84 -0
- novel_downloader/core/searchers/qianbi.py +131 -0
- novel_downloader/core/searchers/qidian.py +87 -0
- novel_downloader/core/searchers/registry.py +63 -0
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +12 -4
- novel_downloader/models/__init__.py +4 -30
- novel_downloader/models/config.py +12 -6
- novel_downloader/models/search.py +16 -0
- novel_downloader/models/types.py +0 -2
- novel_downloader/resources/config/settings.toml +31 -4
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/utils/__init__.py +52 -0
- novel_downloader/utils/chapter_storage.py +244 -224
- novel_downloader/utils/constants.py +1 -21
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +77 -0
- novel_downloader/utils/epub/documents.py +403 -0
- novel_downloader/utils/epub/models.py +134 -0
- novel_downloader/utils/epub/utils.py +212 -0
- novel_downloader/utils/file_utils/__init__.py +10 -14
- novel_downloader/utils/file_utils/io.py +20 -51
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -3
- novel_downloader/utils/fontocr/__init__.py +5 -5
- novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
- novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
- novel_downloader/utils/fontocr/ocr_v1.py +13 -1
- novel_downloader/utils/fontocr/ocr_v2.py +13 -1
- novel_downloader/utils/fontocr/ocr_v3.py +744 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +2 -0
- novel_downloader/utils/network.py +110 -251
- novel_downloader/utils/state.py +1 -0
- novel_downloader/utils/text_utils/__init__.py +18 -17
- novel_downloader/utils/text_utils/diff_display.py +4 -5
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +3 -3
- novel_downloader/utils/time_utils/datetime_utils.py +4 -5
- novel_downloader/utils/time_utils/sleep_utils.py +2 -3
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
- novel_downloader-1.5.0.dist-info/RECORD +164 -0
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/common/browser.py +0 -79
- novel_downloader/core/fetchers/common/session.py +0 -79
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.esjzone
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
+
__all__ = ["EsjzoneExporter"]
|
9
|
+
|
10
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
8
11
|
from novel_downloader.models import ExporterConfig
|
9
12
|
|
10
13
|
from .common import CommonExporter
|
11
14
|
|
12
15
|
|
16
|
+
@register_exporter(site_keys=["esjzone"])
|
13
17
|
class EsjzoneExporter(CommonExporter):
|
14
18
|
def __init__(
|
15
19
|
self,
|
16
20
|
config: ExporterConfig,
|
17
21
|
):
|
18
|
-
super().__init__(
|
19
|
-
config,
|
20
|
-
site="esjzone",
|
21
|
-
chap_folders=["chapters"],
|
22
|
-
)
|
23
|
-
|
24
|
-
|
25
|
-
__all__ = ["EsjzoneExporter"]
|
22
|
+
super().__init__(config, site="esjzone")
|
@@ -8,37 +8,32 @@ Contains the logic for exporting novel content as a single `.epub` file.
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
-
import html
|
12
|
-
import json
|
13
|
-
import re
|
14
11
|
from pathlib import Path
|
15
12
|
from typing import TYPE_CHECKING
|
16
13
|
|
17
14
|
from novel_downloader.core.exporters.epub_util import (
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
15
|
+
build_epub_chapter,
|
16
|
+
download_cover,
|
17
|
+
finalize_export,
|
18
|
+
inline_remote_images,
|
19
|
+
prepare_builder,
|
20
|
+
)
|
21
|
+
from novel_downloader.utils import (
|
22
|
+
download,
|
23
|
+
get_cleaner,
|
22
24
|
)
|
23
25
|
from novel_downloader.utils.constants import (
|
24
|
-
CSS_MAIN_PATH,
|
25
26
|
DEFAULT_HEADERS,
|
27
|
+
DEFAULT_IMAGE_SUFFIX,
|
28
|
+
)
|
29
|
+
from novel_downloader.utils.epub import (
|
30
|
+
Chapter,
|
31
|
+
Volume,
|
26
32
|
)
|
27
|
-
from novel_downloader.utils.file_utils import sanitize_filename
|
28
|
-
from novel_downloader.utils.network import download_image
|
29
33
|
|
30
34
|
if TYPE_CHECKING:
|
31
35
|
from .main_exporter import LinovelibExporter
|
32
36
|
|
33
|
-
_IMAGE_WRAPPER = (
|
34
|
-
'<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
|
35
|
-
)
|
36
|
-
_IMG_TAG_PATTERN = re.compile(
|
37
|
-
r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
|
38
|
-
)
|
39
|
-
_RAW_HTML_RE = re.compile(
|
40
|
-
r'^(<img\b[^>]*?\/>|<div class="duokan-image-single illus">.*?<\/div>)$', re.DOTALL
|
41
|
-
)
|
42
37
|
_IMG_HEADERS = DEFAULT_HEADERS.copy()
|
43
38
|
_IMG_HEADERS["Referer"] = "https://www.linovelib.com/"
|
44
39
|
|
@@ -54,132 +49,143 @@ def export_whole_book(
|
|
54
49
|
1. Load `book_info.json` for metadata.
|
55
50
|
2. Generate introductory HTML and optionally include the cover image.
|
56
51
|
3. Initialize the EPUB container.
|
57
|
-
4. Iterate through volumes and chapters, convert each to XHTML.
|
52
|
+
4. Iterate through volumes and chapters in volume-batches, convert each to XHTML.
|
58
53
|
5. Assemble the spine, TOC, CSS and write out the final `.epub`.
|
59
54
|
|
55
|
+
:param exporter: The exporter instance, carrying config and path info.
|
60
56
|
:param book_id: Identifier of the novel (used as subdirectory name).
|
61
57
|
"""
|
62
58
|
TAG = "[exporter]"
|
63
59
|
config = exporter._config
|
64
|
-
|
60
|
+
|
65
61
|
raw_base = exporter._raw_data_dir / book_id
|
66
|
-
img_dir =
|
62
|
+
img_dir = raw_base / "images"
|
67
63
|
out_dir = exporter.output_dir
|
64
|
+
|
68
65
|
img_dir.mkdir(parents=True, exist_ok=True)
|
69
66
|
out_dir.mkdir(parents=True, exist_ok=True)
|
70
67
|
|
68
|
+
cleaner = get_cleaner(
|
69
|
+
enabled=config.clean_text,
|
70
|
+
config=config.cleaner_cfg,
|
71
|
+
)
|
72
|
+
|
71
73
|
# --- Load book_info.json ---
|
72
|
-
|
73
|
-
|
74
|
-
info_text = info_path.read_text(encoding="utf-8")
|
75
|
-
book_info = json.loads(info_text)
|
76
|
-
except Exception as e:
|
77
|
-
exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
|
74
|
+
book_info = exporter._load_book_info(book_id)
|
75
|
+
if not book_info:
|
78
76
|
return
|
79
77
|
|
80
78
|
book_name = book_info.get("book_name", book_id)
|
81
79
|
book_author = book_info.get("author", "")
|
80
|
+
|
82
81
|
exporter.logger.info(
|
83
82
|
"%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
|
84
83
|
)
|
85
84
|
|
86
85
|
# --- Generate intro + cover ---
|
87
|
-
cover_path
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
on_exist="overwrite",
|
96
|
-
)
|
97
|
-
if not cover_path:
|
98
|
-
exporter.logger.warning("Failed to download cover from %s", cover_url)
|
86
|
+
cover_path = download_cover(
|
87
|
+
book_info.get("cover_url", ""),
|
88
|
+
raw_base,
|
89
|
+
config.include_cover,
|
90
|
+
exporter.logger,
|
91
|
+
TAG,
|
92
|
+
headers=_IMG_HEADERS,
|
93
|
+
)
|
99
94
|
|
100
95
|
# --- Initialize EPUB ---
|
101
|
-
book =
|
96
|
+
book, main_css = prepare_builder(
|
97
|
+
site_name=exporter.site,
|
98
|
+
book_id=book_id,
|
102
99
|
title=book_name,
|
103
100
|
author=book_author,
|
104
101
|
description=book_info.get("summary", ""),
|
105
|
-
cover_path=cover_path,
|
106
102
|
subject=book_info.get("subject", []),
|
107
103
|
serial_status=book_info.get("serial_status", ""),
|
108
104
|
word_count=book_info.get("word_count", ""),
|
109
|
-
|
110
|
-
)
|
111
|
-
main_css = StyleSheet(
|
112
|
-
id="main_style",
|
113
|
-
content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
|
114
|
-
filename="main.css",
|
105
|
+
cover_path=cover_path,
|
115
106
|
)
|
116
|
-
book.add_stylesheet(main_css)
|
117
107
|
|
118
108
|
# --- Compile chapters ---
|
119
109
|
volumes = book_info.get("volumes", [])
|
110
|
+
if not volumes:
|
111
|
+
exporter.logger.warning("%s No volumes found in metadata.", TAG)
|
112
|
+
|
120
113
|
for vol_index, vol in enumerate(volumes, start=1):
|
121
|
-
|
122
|
-
|
123
|
-
vol_name =
|
114
|
+
raw_name = vol.get("volume_name", "")
|
115
|
+
raw_name = raw_name.replace(book_name, "").strip()
|
116
|
+
vol_name = raw_name or f"Volume {vol_index}"
|
124
117
|
exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
|
125
118
|
|
126
|
-
|
119
|
+
# Batch-fetch chapters for this volume
|
120
|
+
chap_ids = [
|
121
|
+
chap.get("chapterId")
|
122
|
+
for chap in vol.get("chapters", [])
|
123
|
+
if chap.get("chapterId")
|
124
|
+
]
|
125
|
+
chap_map = exporter._get_chapters(book_id, chap_ids)
|
126
|
+
|
127
|
+
vol_cover: Path | None = None
|
127
128
|
vol_cover_url = vol.get("volume_cover", "")
|
128
129
|
if vol_cover_url:
|
129
|
-
|
130
|
+
vol_cover = download(
|
130
131
|
vol_cover_url,
|
131
132
|
img_dir,
|
132
133
|
on_exist="skip",
|
134
|
+
default_suffix=DEFAULT_IMAGE_SUFFIX,
|
135
|
+
headers=_IMG_HEADERS,
|
133
136
|
)
|
134
137
|
|
135
138
|
curr_vol = Volume(
|
136
139
|
id=f"vol_{vol_index}",
|
137
140
|
title=vol_name,
|
138
|
-
intro=vol.get("volume_intro", ""),
|
139
|
-
cover=
|
141
|
+
intro=cleaner.clean_content(vol.get("volume_intro", "")),
|
142
|
+
cover=vol_cover,
|
140
143
|
)
|
141
144
|
|
142
|
-
for
|
143
|
-
chap_id =
|
144
|
-
chap_title = chap.get("title", "")
|
145
|
+
for chap_meta in vol.get("chapters", []):
|
146
|
+
chap_id = chap_meta.get("chapterId")
|
145
147
|
if not chap_id:
|
146
148
|
exporter.logger.warning(
|
147
149
|
"%s Missing chapterId, skipping: %s",
|
148
150
|
TAG,
|
149
|
-
|
151
|
+
chap_meta,
|
150
152
|
)
|
151
153
|
continue
|
152
154
|
|
153
|
-
|
154
|
-
|
155
|
+
chap_title = cleaner.clean_title(chap_meta.get("title", ""))
|
156
|
+
data = chap_map.get(chap_id)
|
157
|
+
if not data:
|
155
158
|
exporter.logger.info(
|
156
|
-
"%s Missing chapter
|
159
|
+
"%s Missing chapter: %s (%s), skipping.",
|
157
160
|
TAG,
|
158
161
|
chap_title,
|
159
162
|
chap_id,
|
160
163
|
)
|
161
164
|
continue
|
162
165
|
|
163
|
-
title =
|
164
|
-
content
|
165
|
-
content
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
166
|
+
title = cleaner.clean_title(data.get("title", chap_title)) or chap_id
|
167
|
+
content = cleaner.clean_content(data.get("content", ""))
|
168
|
+
content = inline_remote_images(
|
169
|
+
book,
|
170
|
+
content,
|
171
|
+
img_dir,
|
172
|
+
headers=_IMG_HEADERS,
|
173
|
+
)
|
174
|
+
|
175
|
+
chap_html = build_epub_chapter(
|
176
|
+
title=title,
|
177
|
+
paragraphs=content,
|
178
|
+
extras={},
|
172
179
|
)
|
173
180
|
curr_vol.add_chapter(
|
174
181
|
Chapter(
|
175
182
|
id=f"c_{chap_id}",
|
183
|
+
filename=f"c{chap_id}.xhtml",
|
176
184
|
title=title,
|
177
185
|
content=chap_html,
|
178
186
|
css=[main_css],
|
179
187
|
)
|
180
188
|
)
|
181
|
-
for img_path in img_paths:
|
182
|
-
book.add_image(img_path)
|
183
189
|
|
184
190
|
book.add_volume(curr_vol)
|
185
191
|
|
@@ -189,13 +195,13 @@ def export_whole_book(
|
|
189
195
|
author=book_info.get("author"),
|
190
196
|
ext="epub",
|
191
197
|
)
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
exporter.logger
|
197
|
-
|
198
|
-
|
198
|
+
finalize_export(
|
199
|
+
book=book,
|
200
|
+
out_dir=out_dir,
|
201
|
+
filename=out_name,
|
202
|
+
logger=exporter.logger,
|
203
|
+
tag=TAG,
|
204
|
+
)
|
199
205
|
return
|
200
206
|
|
201
207
|
|
@@ -204,203 +210,142 @@ def export_by_volume(
|
|
204
210
|
book_id: str,
|
205
211
|
) -> None:
|
206
212
|
"""
|
207
|
-
Export
|
213
|
+
Export each volume of a novel as a separate EPUB file.
|
214
|
+
|
215
|
+
Steps:
|
216
|
+
1. Load metadata from `book_info.json`.
|
217
|
+
2. For each volume:
|
218
|
+
a. Clean the volume title and determine output filename.
|
219
|
+
b. Batch-fetch all chapters in this volume to minimize SQLite overhead.
|
220
|
+
c. Initialize an EPUB builder for the volume, including cover and intro.
|
221
|
+
d. For each chapter: clean title & content, inline remote images.
|
222
|
+
e. Finalize and write the volume EPUB.
|
208
223
|
|
209
224
|
:param book_id: Identifier of the novel (used as subdirectory name).
|
210
225
|
"""
|
211
226
|
TAG = "[exporter]"
|
212
227
|
config = exporter._config
|
213
|
-
|
228
|
+
|
214
229
|
raw_base = exporter._raw_data_dir / book_id
|
215
|
-
img_dir =
|
230
|
+
img_dir = raw_base / "images"
|
216
231
|
out_dir = exporter.output_dir
|
232
|
+
|
217
233
|
img_dir.mkdir(parents=True, exist_ok=True)
|
218
234
|
out_dir.mkdir(parents=True, exist_ok=True)
|
219
235
|
|
236
|
+
cleaner = get_cleaner(
|
237
|
+
enabled=config.clean_text,
|
238
|
+
config=config.cleaner_cfg,
|
239
|
+
)
|
240
|
+
|
220
241
|
# --- Load book_info.json ---
|
221
|
-
|
222
|
-
|
223
|
-
info_text = info_path.read_text(encoding="utf-8")
|
224
|
-
book_info = json.loads(info_text)
|
225
|
-
except Exception as e:
|
226
|
-
exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
|
242
|
+
book_info = exporter._load_book_info(book_id)
|
243
|
+
if not book_info:
|
227
244
|
return
|
228
245
|
|
229
246
|
book_name = book_info.get("book_name", book_id)
|
230
247
|
book_author = book_info.get("author", "")
|
231
248
|
book_summary = book_info.get("summary", "")
|
249
|
+
|
232
250
|
exporter.logger.info(
|
233
251
|
"%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
|
234
252
|
)
|
235
253
|
|
236
|
-
main_css = StyleSheet(
|
237
|
-
id="main_style",
|
238
|
-
content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
|
239
|
-
filename="main.css",
|
240
|
-
)
|
241
|
-
|
242
254
|
# --- Compile columes ---
|
243
255
|
volumes = book_info.get("volumes", [])
|
244
|
-
|
245
|
-
|
246
|
-
raw_vol_name = raw_vol_name.replace(book_name, "").strip()
|
247
|
-
vol_name = raw_vol_name or f"Volume {vol_index}"
|
256
|
+
if not volumes:
|
257
|
+
exporter.logger.warning("%s No volumes found in metadata.", TAG)
|
248
258
|
|
249
|
-
|
259
|
+
for vol_index, vol in enumerate(volumes, start=1):
|
260
|
+
raw_name = vol.get("volume_name", "")
|
261
|
+
raw_name = cleaner.clean_title(raw_name.replace(book_name, ""))
|
262
|
+
vol_name = raw_name or f"Volume {vol_index}"
|
263
|
+
|
264
|
+
# Batch-fetch chapters for this volume
|
265
|
+
chap_ids = [
|
266
|
+
chap.get("chapterId")
|
267
|
+
for chap in vol.get("chapters", [])
|
268
|
+
if chap.get("chapterId")
|
269
|
+
]
|
270
|
+
chap_map = exporter._get_chapters(book_id, chap_ids)
|
271
|
+
|
272
|
+
vol_cover: Path | None = None
|
250
273
|
vol_cover_url = vol.get("volume_cover", "")
|
251
274
|
if config.include_cover and vol_cover_url:
|
252
|
-
|
275
|
+
vol_cover = download(
|
253
276
|
vol_cover_url,
|
254
277
|
img_dir,
|
255
278
|
headers=_IMG_HEADERS,
|
256
279
|
on_exist="skip",
|
280
|
+
default_suffix=DEFAULT_IMAGE_SUFFIX,
|
257
281
|
)
|
258
282
|
|
259
|
-
book =
|
260
|
-
|
283
|
+
book, main_css = prepare_builder(
|
284
|
+
site_name=exporter.site,
|
285
|
+
book_id=book_id,
|
286
|
+
title=book_name,
|
261
287
|
author=book_author,
|
262
288
|
description=vol.get("volume_intro") or book_summary,
|
263
|
-
cover_path=vol_cover_path,
|
264
289
|
subject=book_info.get("subject", []),
|
265
290
|
serial_status=vol.get("serial_status", ""),
|
266
291
|
word_count=vol.get("word_count", ""),
|
267
|
-
|
292
|
+
cover_path=vol_cover,
|
268
293
|
)
|
269
|
-
book.add_stylesheet(main_css)
|
270
294
|
|
271
|
-
for
|
272
|
-
chap_id =
|
273
|
-
chap_title = chap.get("title", "")
|
295
|
+
for chap_meta in vol.get("chapters", []):
|
296
|
+
chap_id = chap_meta.get("chapterId")
|
274
297
|
if not chap_id:
|
275
298
|
exporter.logger.warning(
|
276
299
|
"%s Missing chapterId, skipping: %s",
|
277
300
|
TAG,
|
278
|
-
|
301
|
+
chap_meta,
|
279
302
|
)
|
280
303
|
continue
|
281
304
|
|
282
|
-
|
283
|
-
|
305
|
+
chap_title = cleaner.clean_title(chap_meta.get("title", ""))
|
306
|
+
data = chap_map.get(chap_id)
|
307
|
+
if not data:
|
284
308
|
exporter.logger.info(
|
285
|
-
"%s Missing chapter
|
309
|
+
"%s Missing chapter: %s (%s), skipping.",
|
286
310
|
TAG,
|
287
311
|
chap_title,
|
288
312
|
chap_id,
|
289
313
|
)
|
290
314
|
continue
|
291
315
|
|
292
|
-
title =
|
293
|
-
content
|
294
|
-
content
|
295
|
-
|
296
|
-
|
297
|
-
|
316
|
+
title = cleaner.clean_title(data.get("title", chap_title)) or chap_id
|
317
|
+
content = cleaner.clean_content(data.get("content", ""))
|
318
|
+
content = inline_remote_images(
|
319
|
+
book,
|
320
|
+
content,
|
321
|
+
img_dir,
|
322
|
+
headers=_IMG_HEADERS,
|
323
|
+
)
|
324
|
+
chap_html = build_epub_chapter(
|
325
|
+
title=title,
|
326
|
+
paragraphs=content,
|
298
327
|
extras={},
|
299
328
|
)
|
300
329
|
book.add_chapter(
|
301
330
|
Chapter(
|
302
331
|
id=f"c_{chap_id}",
|
332
|
+
filename=f"c{chap_id}.xhtml",
|
303
333
|
title=title,
|
304
334
|
content=chap_html,
|
305
335
|
css=[main_css],
|
306
336
|
)
|
307
337
|
)
|
308
|
-
for img_path in imgs:
|
309
|
-
book.add_image(img_path)
|
310
338
|
|
311
339
|
out_name = exporter.get_filename(
|
312
340
|
title=vol_name,
|
313
341
|
author=book_info.get("author"),
|
314
342
|
ext="epub",
|
315
343
|
)
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
exporter.logger
|
321
|
-
|
322
|
-
|
344
|
+
finalize_export(
|
345
|
+
book=book,
|
346
|
+
out_dir=out_dir,
|
347
|
+
filename=out_name,
|
348
|
+
logger=exporter.logger,
|
349
|
+
tag=TAG,
|
350
|
+
)
|
323
351
|
return
|
324
|
-
|
325
|
-
|
326
|
-
def _inline_remote_images(
|
327
|
-
content: str,
|
328
|
-
image_dir: str | Path,
|
329
|
-
) -> tuple[str, list[Path]]:
|
330
|
-
"""
|
331
|
-
Download every remote `<img src="...">` in `content` into `image_dir`,
|
332
|
-
and replace the original tag with _IMAGE_WRAPPER
|
333
|
-
pointing to the local filename.
|
334
|
-
|
335
|
-
:param content: HTML/text of the chapter containing <img> tags.
|
336
|
-
:param image_dir: Directory to save downloaded images into.
|
337
|
-
:return: A tuple (modified_content, list_of_downloaded_image_paths).
|
338
|
-
"""
|
339
|
-
downloaded_images: list[Path] = []
|
340
|
-
|
341
|
-
def _replace(match: re.Match[str]) -> str:
|
342
|
-
url = match.group(1)
|
343
|
-
try:
|
344
|
-
# download_image returns a Path or None
|
345
|
-
local_path = download_image(
|
346
|
-
url,
|
347
|
-
image_dir,
|
348
|
-
target_name=None,
|
349
|
-
headers=_IMG_HEADERS,
|
350
|
-
on_exist="skip",
|
351
|
-
)
|
352
|
-
if not local_path:
|
353
|
-
return match.group(0)
|
354
|
-
|
355
|
-
downloaded_images.append(local_path)
|
356
|
-
return _IMAGE_WRAPPER.format(filename=local_path.name)
|
357
|
-
except Exception:
|
358
|
-
return match.group(0)
|
359
|
-
|
360
|
-
modified_content = _IMG_TAG_PATTERN.sub(_replace, content)
|
361
|
-
return modified_content, downloaded_images
|
362
|
-
|
363
|
-
|
364
|
-
def _txt_to_html(
|
365
|
-
chapter_title: str,
|
366
|
-
chapter_text: str,
|
367
|
-
extras: dict[str, str] | None = None,
|
368
|
-
) -> str:
|
369
|
-
"""
|
370
|
-
Convert chapter text and author note to styled HTML.
|
371
|
-
|
372
|
-
:param chapter_title: Title of the chapter.
|
373
|
-
:param chapter_text: Main content of the chapter.
|
374
|
-
:param extras: Optional dict of titles and content, e.g. {"作者说": "text"}.
|
375
|
-
:return: Rendered HTML as a string.
|
376
|
-
"""
|
377
|
-
|
378
|
-
def _render_block(text: str) -> str:
|
379
|
-
lines = (line.strip() for line in text.splitlines() if line.strip())
|
380
|
-
out = []
|
381
|
-
for line in lines:
|
382
|
-
# preserve raw HTML, otherwise wrap in <p>
|
383
|
-
if _RAW_HTML_RE.match(line):
|
384
|
-
out.append(line)
|
385
|
-
else:
|
386
|
-
out.append(f"<p>{html.escape(line)}</p>")
|
387
|
-
return "\n".join(out)
|
388
|
-
|
389
|
-
parts = []
|
390
|
-
parts.append(f"<h2>{html.escape(chapter_title)}</h2>")
|
391
|
-
parts.append(_render_block(chapter_text))
|
392
|
-
|
393
|
-
if extras:
|
394
|
-
for title, note in extras.items():
|
395
|
-
note = note.strip()
|
396
|
-
if not note:
|
397
|
-
continue
|
398
|
-
parts.extend(
|
399
|
-
[
|
400
|
-
"<hr />",
|
401
|
-
f"<p>{html.escape(title)}</p>",
|
402
|
-
_render_block(note),
|
403
|
-
]
|
404
|
-
)
|
405
|
-
|
406
|
-
return "\n".join(parts)
|
@@ -5,16 +5,15 @@ novel_downloader.core.exporters.linovelib.main_exporter
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
-
from collections.abc import Mapping
|
9
|
-
from typing import Any
|
10
8
|
|
11
9
|
from novel_downloader.core.exporters.base import BaseExporter
|
10
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
12
11
|
from novel_downloader.models import ExporterConfig
|
13
|
-
from novel_downloader.utils.chapter_storage import ChapterStorage
|
14
12
|
|
15
13
|
from .txt import linovelib_export_as_txt
|
16
14
|
|
17
15
|
|
16
|
+
@register_exporter(site_keys=["linovelib"])
|
18
17
|
class LinovelibExporter(BaseExporter):
|
19
18
|
""""""
|
20
19
|
|
@@ -29,8 +28,6 @@ class LinovelibExporter(BaseExporter):
|
|
29
28
|
save paths, formats, and options.
|
30
29
|
"""
|
31
30
|
super().__init__(config, "linovelib")
|
32
|
-
self._chapter_storage_cache: dict[str, list[ChapterStorage]] = {}
|
33
|
-
self._chap_folders: list[str] = ["chapters"]
|
34
31
|
|
35
32
|
def export_as_txt(self, book_id: str) -> None:
|
36
33
|
"""
|
@@ -71,57 +68,3 @@ class LinovelibExporter(BaseExporter):
|
|
71
68
|
f"Unsupported split_mode: {self._config.split_mode!r}"
|
72
69
|
) from err
|
73
70
|
return export_fn(self, book_id)
|
74
|
-
|
75
|
-
@property
|
76
|
-
def site(self) -> str:
|
77
|
-
"""
|
78
|
-
Get the site identifier.
|
79
|
-
|
80
|
-
:return: The site string.
|
81
|
-
"""
|
82
|
-
return self._site
|
83
|
-
|
84
|
-
@site.setter
|
85
|
-
def site(self, value: str) -> None:
|
86
|
-
"""
|
87
|
-
Set the site identifier.
|
88
|
-
|
89
|
-
:param value: New site string to set.
|
90
|
-
"""
|
91
|
-
self._site = value
|
92
|
-
|
93
|
-
def _get_chapter(
|
94
|
-
self,
|
95
|
-
book_id: str,
|
96
|
-
chap_id: str,
|
97
|
-
) -> Mapping[str, Any]:
|
98
|
-
for storage in self._chapter_storage_cache[book_id]:
|
99
|
-
data = storage.get(chap_id)
|
100
|
-
if data:
|
101
|
-
return data
|
102
|
-
return {}
|
103
|
-
|
104
|
-
def _init_chapter_storages(self, book_id: str) -> None:
|
105
|
-
if book_id in self._chapter_storage_cache:
|
106
|
-
return
|
107
|
-
raw_base = self._raw_data_dir / book_id
|
108
|
-
self._chapter_storage_cache[book_id] = [
|
109
|
-
ChapterStorage(
|
110
|
-
raw_base=raw_base,
|
111
|
-
namespace=ns,
|
112
|
-
backend_type=self._config.storage_backend,
|
113
|
-
)
|
114
|
-
for ns in self._chap_folders
|
115
|
-
]
|
116
|
-
|
117
|
-
def _on_close(self) -> None:
|
118
|
-
"""
|
119
|
-
Close all ChapterStorage connections in the cache.
|
120
|
-
"""
|
121
|
-
for storages in self._chapter_storage_cache.values():
|
122
|
-
for storage in storages:
|
123
|
-
try:
|
124
|
-
storage.close()
|
125
|
-
except Exception as e:
|
126
|
-
self.logger.warning("Failed to close storage %s: %s", storage, e)
|
127
|
-
self._chapter_storage_cache.clear()
|