novel-downloader 1.4.5__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -2
- novel_downloader/cli/config.py +1 -83
- novel_downloader/cli/download.py +4 -5
- novel_downloader/cli/export.py +4 -1
- novel_downloader/cli/main.py +2 -0
- novel_downloader/cli/search.py +123 -0
- novel_downloader/config/__init__.py +3 -10
- novel_downloader/config/adapter.py +190 -54
- novel_downloader/config/loader.py +2 -3
- novel_downloader/core/__init__.py +13 -13
- novel_downloader/core/downloaders/__init__.py +10 -11
- novel_downloader/core/downloaders/base.py +152 -26
- novel_downloader/core/downloaders/biquge.py +5 -1
- novel_downloader/core/downloaders/common.py +157 -378
- novel_downloader/core/downloaders/esjzone.py +5 -1
- novel_downloader/core/downloaders/linovelib.py +5 -1
- novel_downloader/core/downloaders/qianbi.py +291 -4
- novel_downloader/core/downloaders/qidian.py +199 -285
- novel_downloader/core/downloaders/registry.py +67 -0
- novel_downloader/core/downloaders/sfacg.py +5 -1
- novel_downloader/core/downloaders/yamibo.py +5 -1
- novel_downloader/core/exporters/__init__.py +10 -11
- novel_downloader/core/exporters/base.py +87 -7
- novel_downloader/core/exporters/biquge.py +5 -8
- novel_downloader/core/exporters/common/__init__.py +2 -2
- novel_downloader/core/exporters/common/epub.py +82 -166
- novel_downloader/core/exporters/common/main_exporter.py +0 -60
- novel_downloader/core/exporters/common/txt.py +82 -83
- novel_downloader/core/exporters/epub_util.py +157 -1330
- novel_downloader/core/exporters/esjzone.py +5 -8
- novel_downloader/core/exporters/linovelib/__init__.py +2 -2
- novel_downloader/core/exporters/linovelib/epub.py +157 -212
- novel_downloader/core/exporters/linovelib/main_exporter.py +2 -59
- novel_downloader/core/exporters/linovelib/txt.py +67 -63
- novel_downloader/core/exporters/qianbi.py +5 -8
- novel_downloader/core/exporters/qidian.py +14 -4
- novel_downloader/core/exporters/registry.py +53 -0
- novel_downloader/core/exporters/sfacg.py +5 -8
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/exporters/yamibo.py +5 -8
- novel_downloader/core/fetchers/__init__.py +19 -24
- novel_downloader/core/fetchers/base/__init__.py +3 -3
- novel_downloader/core/fetchers/base/browser.py +23 -4
- novel_downloader/core/fetchers/base/session.py +30 -5
- novel_downloader/core/fetchers/biquge/__init__.py +3 -3
- novel_downloader/core/fetchers/biquge/browser.py +5 -0
- novel_downloader/core/fetchers/biquge/session.py +6 -1
- novel_downloader/core/fetchers/esjzone/__init__.py +3 -3
- novel_downloader/core/fetchers/esjzone/browser.py +5 -0
- novel_downloader/core/fetchers/esjzone/session.py +6 -1
- novel_downloader/core/fetchers/linovelib/__init__.py +3 -3
- novel_downloader/core/fetchers/linovelib/browser.py +6 -1
- novel_downloader/core/fetchers/linovelib/session.py +6 -1
- novel_downloader/core/fetchers/qianbi/__init__.py +3 -3
- novel_downloader/core/fetchers/qianbi/browser.py +5 -0
- novel_downloader/core/fetchers/qianbi/session.py +5 -0
- novel_downloader/core/fetchers/qidian/__init__.py +3 -3
- novel_downloader/core/fetchers/qidian/browser.py +12 -4
- novel_downloader/core/fetchers/qidian/session.py +11 -3
- novel_downloader/core/fetchers/registry.py +71 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +3 -3
- novel_downloader/core/fetchers/sfacg/browser.py +5 -0
- novel_downloader/core/fetchers/sfacg/session.py +5 -0
- novel_downloader/core/fetchers/yamibo/__init__.py +3 -3
- novel_downloader/core/fetchers/yamibo/browser.py +5 -0
- novel_downloader/core/fetchers/yamibo/session.py +6 -1
- novel_downloader/core/interfaces/__init__.py +7 -5
- novel_downloader/core/interfaces/searcher.py +18 -0
- novel_downloader/core/parsers/__init__.py +10 -11
- novel_downloader/core/parsers/{biquge/main_parser.py → biquge.py} +7 -2
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +7 -2
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +7 -2
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +7 -2
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +23 -21
- novel_downloader/core/parsers/qidian/chapter_normal.py +1 -1
- novel_downloader/core/parsers/qidian/main_parser.py +10 -21
- novel_downloader/core/parsers/qidian/utils/__init__.py +11 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +5 -6
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/registry.py +68 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +7 -2
- novel_downloader/core/parsers/{yamibo/main_parser.py → yamibo.py} +7 -2
- novel_downloader/core/searchers/__init__.py +20 -0
- novel_downloader/core/searchers/base.py +92 -0
- novel_downloader/core/searchers/biquge.py +83 -0
- novel_downloader/core/searchers/esjzone.py +84 -0
- novel_downloader/core/searchers/qianbi.py +131 -0
- novel_downloader/core/searchers/qidian.py +87 -0
- novel_downloader/core/searchers/registry.py +63 -0
- novel_downloader/locales/en.json +12 -4
- novel_downloader/locales/zh.json +12 -4
- novel_downloader/models/__init__.py +4 -30
- novel_downloader/models/config.py +12 -6
- novel_downloader/models/search.py +16 -0
- novel_downloader/models/types.py +0 -2
- novel_downloader/resources/config/settings.toml +31 -4
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/utils/__init__.py +52 -0
- novel_downloader/utils/chapter_storage.py +244 -224
- novel_downloader/utils/constants.py +1 -21
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +77 -0
- novel_downloader/utils/epub/documents.py +403 -0
- novel_downloader/utils/epub/models.py +134 -0
- novel_downloader/utils/epub/utils.py +212 -0
- novel_downloader/utils/file_utils/__init__.py +10 -14
- novel_downloader/utils/file_utils/io.py +20 -51
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -3
- novel_downloader/utils/fontocr/__init__.py +5 -5
- novel_downloader/utils/{hash_store.py → fontocr/hash_store.py} +4 -3
- novel_downloader/utils/{hash_utils.py → fontocr/hash_utils.py} +2 -2
- novel_downloader/utils/fontocr/ocr_v1.py +13 -1
- novel_downloader/utils/fontocr/ocr_v2.py +13 -1
- novel_downloader/utils/fontocr/ocr_v3.py +744 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +2 -0
- novel_downloader/utils/network.py +110 -251
- novel_downloader/utils/state.py +1 -0
- novel_downloader/utils/text_utils/__init__.py +18 -17
- novel_downloader/utils/text_utils/diff_display.py +4 -5
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +3 -3
- novel_downloader/utils/time_utils/datetime_utils.py +4 -5
- novel_downloader/utils/time_utils/sleep_utils.py +2 -3
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/METADATA +2 -2
- novel_downloader-1.5.0.dist-info/RECORD +164 -0
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/common/browser.py +0 -79
- novel_downloader/core/fetchers/common/session.py +0 -79
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-1.5.0.dist-info}/top_level.txt +0 -0
@@ -13,19 +13,10 @@ Currently supported platforms:
|
|
13
13
|
- qidian (起点中文网)
|
14
14
|
- sfacg (SF轻小说)
|
15
15
|
- yamibo (百合会)
|
16
|
-
- common (通用架构)
|
17
16
|
"""
|
18
17
|
|
19
|
-
from .biquge import BiqugeExporter
|
20
|
-
from .common import CommonExporter
|
21
|
-
from .esjzone import EsjzoneExporter
|
22
|
-
from .linovelib import LinovelibExporter
|
23
|
-
from .qianbi import QianbiExporter
|
24
|
-
from .qidian import QidianExporter
|
25
|
-
from .sfacg import SfacgExporter
|
26
|
-
from .yamibo import YamiboExporter
|
27
|
-
|
28
18
|
__all__ = [
|
19
|
+
"get_exporter",
|
29
20
|
"BiqugeExporter",
|
30
21
|
"EsjzoneExporter",
|
31
22
|
"LinovelibExporter",
|
@@ -33,5 +24,13 @@ __all__ = [
|
|
33
24
|
"QidianExporter",
|
34
25
|
"SfacgExporter",
|
35
26
|
"YamiboExporter",
|
36
|
-
"CommonExporter",
|
37
27
|
]
|
28
|
+
|
29
|
+
from .biquge import BiqugeExporter
|
30
|
+
from .esjzone import EsjzoneExporter
|
31
|
+
from .linovelib import LinovelibExporter
|
32
|
+
from .qianbi import QianbiExporter
|
33
|
+
from .qidian import QidianExporter
|
34
|
+
from .registry import get_exporter
|
35
|
+
from .sfacg import SfacgExporter
|
36
|
+
from .yamibo import YamiboExporter
|
@@ -9,6 +9,7 @@ content into various output formats.
|
|
9
9
|
"""
|
10
10
|
|
11
11
|
import abc
|
12
|
+
import json
|
12
13
|
import logging
|
13
14
|
import types
|
14
15
|
from datetime import datetime
|
@@ -16,7 +17,8 @@ from pathlib import Path
|
|
16
17
|
from typing import Any, Self
|
17
18
|
|
18
19
|
from novel_downloader.core.interfaces import ExporterProtocol
|
19
|
-
from novel_downloader.models import ExporterConfig
|
20
|
+
from novel_downloader.models import ChapterDict, ExporterConfig
|
21
|
+
from novel_downloader.utils import ChapterStorage
|
20
22
|
|
21
23
|
|
22
24
|
class SafeDict(dict[str, Any]):
|
@@ -31,24 +33,33 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
31
33
|
such as TXT, EPUB, Markdown, or PDF.
|
32
34
|
"""
|
33
35
|
|
36
|
+
DEFAULT_SOURCE_ID = 0
|
37
|
+
DEFAULT_PRIORITIES_MAP = {
|
38
|
+
DEFAULT_SOURCE_ID: 0,
|
39
|
+
}
|
40
|
+
|
34
41
|
def __init__(
|
35
42
|
self,
|
36
43
|
config: ExporterConfig,
|
37
44
|
site: str,
|
45
|
+
priorities: dict[int, int] | None = None,
|
38
46
|
):
|
39
47
|
"""
|
40
48
|
Initialize the exporter with given configuration.
|
41
49
|
|
42
|
-
:param config:
|
43
|
-
|
50
|
+
:param config: Exporter configuration settings.
|
51
|
+
:param site: Identifier for the target website or source.
|
52
|
+
:param priorities: Mapping of source_id to priority value.
|
53
|
+
Lower numbers indicate higher priority.
|
54
|
+
E.X. {0: 10, 1: 100} means source 0 is preferred.
|
44
55
|
"""
|
45
56
|
self._config = config
|
46
57
|
self._site = site
|
58
|
+
self._priorities = priorities or self.DEFAULT_PRIORITIES_MAP
|
59
|
+
self._storage_cache: dict[str, ChapterStorage] = {}
|
47
60
|
|
48
|
-
self._cache_dir = Path(config.cache_dir) / site
|
49
61
|
self._raw_data_dir = Path(config.raw_data_dir) / site
|
50
62
|
self._output_dir = Path(config.output_dir)
|
51
|
-
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
52
63
|
self._output_dir.mkdir(parents=True, exist_ok=True)
|
53
64
|
|
54
65
|
self.logger = logging.getLogger(f"{self.__class__.__name__}")
|
@@ -160,16 +171,84 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
160
171
|
|
161
172
|
return f"{name}.{ext}"
|
162
173
|
|
174
|
+
@property
|
175
|
+
def site(self) -> str:
|
176
|
+
"""
|
177
|
+
Get the site identifier.
|
178
|
+
|
179
|
+
:return: The site string.
|
180
|
+
"""
|
181
|
+
return self._site
|
182
|
+
|
163
183
|
@property
|
164
184
|
def output_dir(self) -> Path:
|
165
|
-
"""
|
185
|
+
"""
|
186
|
+
Access the output directory for saving files.
|
187
|
+
"""
|
166
188
|
return self._output_dir
|
167
189
|
|
168
190
|
@property
|
169
191
|
def filename_template(self) -> str:
|
170
|
-
"""
|
192
|
+
"""
|
193
|
+
Access the filename template.
|
194
|
+
"""
|
171
195
|
return self._config.filename_template
|
172
196
|
|
197
|
+
def _get_chapter(
|
198
|
+
self,
|
199
|
+
book_id: str,
|
200
|
+
chap_id: str,
|
201
|
+
) -> ChapterDict | None:
|
202
|
+
if book_id not in self._storage_cache:
|
203
|
+
return None
|
204
|
+
return self._storage_cache[book_id].get_best_chapter(chap_id)
|
205
|
+
|
206
|
+
def _get_chapters(
|
207
|
+
self,
|
208
|
+
book_id: str,
|
209
|
+
chap_ids: list[str],
|
210
|
+
) -> dict[str, ChapterDict | None]:
|
211
|
+
if book_id not in self._storage_cache:
|
212
|
+
return {}
|
213
|
+
return self._storage_cache[book_id].get_best_chapters(chap_ids)
|
214
|
+
|
215
|
+
def _load_book_info(self, book_id: str) -> dict[str, Any]:
|
216
|
+
info_path = self._raw_data_dir / book_id / "book_info.json"
|
217
|
+
if not info_path.is_file():
|
218
|
+
self.logger.error("Missing metadata file: %s", info_path)
|
219
|
+
return {}
|
220
|
+
|
221
|
+
try:
|
222
|
+
text = info_path.read_text(encoding="utf-8")
|
223
|
+
data: Any = json.loads(text)
|
224
|
+
if not isinstance(data, dict):
|
225
|
+
self.logger.error(
|
226
|
+
"Invalid JSON structure in %s: expected an object at the top",
|
227
|
+
info_path,
|
228
|
+
)
|
229
|
+
return {}
|
230
|
+
return data
|
231
|
+
except json.JSONDecodeError as e:
|
232
|
+
self.logger.error("Corrupt JSON in %s: %s", info_path, e)
|
233
|
+
return {}
|
234
|
+
|
235
|
+
def _init_chapter_storages(self, book_id: str) -> None:
|
236
|
+
if book_id in self._storage_cache:
|
237
|
+
return
|
238
|
+
self._storage_cache[book_id] = ChapterStorage(
|
239
|
+
raw_base=self._raw_data_dir / book_id,
|
240
|
+
priorities=self._priorities,
|
241
|
+
)
|
242
|
+
self._storage_cache[book_id].connect()
|
243
|
+
|
244
|
+
def _close_chapter_storages(self) -> None:
|
245
|
+
for storage in self._storage_cache.values():
|
246
|
+
try:
|
247
|
+
storage.close()
|
248
|
+
except Exception as e:
|
249
|
+
self.logger.warning("Failed to close storage %s: %s", storage, e)
|
250
|
+
self._storage_cache.clear()
|
251
|
+
|
173
252
|
def _on_close(self) -> None:
|
174
253
|
"""
|
175
254
|
Hook method called at the beginning of close().
|
@@ -182,6 +261,7 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
182
261
|
Shutdown and clean up the exporter.
|
183
262
|
"""
|
184
263
|
self._on_close()
|
264
|
+
self._close_chapter_storages()
|
185
265
|
|
186
266
|
def __enter__(self) -> Self:
|
187
267
|
return self
|
@@ -5,21 +5,18 @@ novel_downloader.core.exporters.biquge
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
+
__all__ = ["BiqugeExporter"]
|
9
|
+
|
10
|
+
from novel_downloader.core.exporters.registry import register_exporter
|
8
11
|
from novel_downloader.models import ExporterConfig
|
9
12
|
|
10
13
|
from .common import CommonExporter
|
11
14
|
|
12
15
|
|
16
|
+
@register_exporter(site_keys=["biquge", "bqg"])
|
13
17
|
class BiqugeExporter(CommonExporter):
|
14
18
|
def __init__(
|
15
19
|
self,
|
16
20
|
config: ExporterConfig,
|
17
21
|
):
|
18
|
-
super().__init__(
|
19
|
-
config,
|
20
|
-
site="biquge",
|
21
|
-
chap_folders=["chapters"],
|
22
|
-
)
|
23
|
-
|
24
|
-
|
25
|
-
__all__ = ["BiqugeExporter"]
|
22
|
+
super().__init__(config, site="biquge")
|
@@ -8,36 +8,29 @@ Contains the logic for exporting novel content as a single `.epub` file.
|
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
10
|
|
11
|
-
import html
|
12
|
-
import json
|
13
|
-
import re
|
14
11
|
from pathlib import Path
|
15
12
|
from typing import TYPE_CHECKING
|
16
13
|
|
17
14
|
from novel_downloader.core.exporters.epub_util import (
|
18
|
-
|
15
|
+
build_epub_chapter,
|
16
|
+
download_cover,
|
17
|
+
finalize_export,
|
18
|
+
inline_remote_images,
|
19
|
+
prepare_builder,
|
20
|
+
)
|
21
|
+
from novel_downloader.utils import (
|
22
|
+
download,
|
23
|
+
get_cleaner,
|
24
|
+
)
|
25
|
+
from novel_downloader.utils.constants import DEFAULT_IMAGE_SUFFIX
|
26
|
+
from novel_downloader.utils.epub import (
|
19
27
|
Chapter,
|
20
|
-
StyleSheet,
|
21
28
|
Volume,
|
22
29
|
)
|
23
|
-
from novel_downloader.utils.constants import CSS_MAIN_PATH
|
24
|
-
from novel_downloader.utils.file_utils import sanitize_filename
|
25
|
-
from novel_downloader.utils.network import download_image
|
26
|
-
from novel_downloader.utils.text_utils import clean_chapter_title
|
27
30
|
|
28
31
|
if TYPE_CHECKING:
|
29
32
|
from .main_exporter import CommonExporter
|
30
33
|
|
31
|
-
_IMAGE_WRAPPER = (
|
32
|
-
'<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
|
33
|
-
)
|
34
|
-
_IMG_TAG_PATTERN = re.compile(
|
35
|
-
r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
|
36
|
-
)
|
37
|
-
_RAW_HTML_RE = re.compile(
|
38
|
-
r'^(<img\b[^>]*?\/>|<div class="duokan-image-single illus">.*?<\/div>)$', re.DOTALL
|
39
|
-
)
|
40
|
-
|
41
34
|
|
42
35
|
def common_export_as_epub(
|
43
36
|
exporter: CommonExporter,
|
@@ -50,132 +43,137 @@ def common_export_as_epub(
|
|
50
43
|
1. Load `book_info.json` for metadata.
|
51
44
|
2. Generate introductory HTML and optionally include the cover image.
|
52
45
|
3. Initialize the EPUB container.
|
53
|
-
4. Iterate through volumes and chapters, convert each to XHTML.
|
46
|
+
4. Iterate through volumes and chapters in volume-batches, convert each to XHTML.
|
54
47
|
5. Assemble the spine, TOC, CSS and write out the final `.epub`.
|
55
48
|
|
56
|
-
:param
|
49
|
+
:param exporter: The exporter instance, carrying config and path info.
|
57
50
|
:param book_id: Identifier of the novel (used as subdirectory name).
|
58
51
|
"""
|
59
52
|
TAG = "[exporter]"
|
60
53
|
config = exporter._config
|
61
|
-
|
54
|
+
|
62
55
|
raw_base = exporter._raw_data_dir / book_id
|
63
|
-
img_dir =
|
56
|
+
img_dir = raw_base / "images"
|
64
57
|
out_dir = exporter.output_dir
|
58
|
+
|
65
59
|
img_dir.mkdir(parents=True, exist_ok=True)
|
66
60
|
out_dir.mkdir(parents=True, exist_ok=True)
|
67
61
|
|
62
|
+
cleaner = get_cleaner(
|
63
|
+
enabled=config.clean_text,
|
64
|
+
config=config.cleaner_cfg,
|
65
|
+
)
|
66
|
+
|
68
67
|
# --- Load book_info.json ---
|
69
|
-
|
70
|
-
|
71
|
-
info_text = info_path.read_text(encoding="utf-8")
|
72
|
-
book_info = json.loads(info_text)
|
73
|
-
except Exception as e:
|
74
|
-
exporter.logger.error("%s Failed to load %s: %s", TAG, info_path, e)
|
68
|
+
book_info = exporter._load_book_info(book_id)
|
69
|
+
if not book_info:
|
75
70
|
return
|
76
71
|
|
77
72
|
book_name = book_info.get("book_name", book_id)
|
78
73
|
book_author = book_info.get("author", "")
|
74
|
+
|
79
75
|
exporter.logger.info(
|
80
76
|
"%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id
|
81
77
|
)
|
82
78
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
target_name="cover",
|
91
|
-
on_exist="overwrite",
|
92
|
-
)
|
93
|
-
if not cover_path:
|
94
|
-
exporter.logger.warning("Failed to download cover from %s", cover_url)
|
79
|
+
cover_path = download_cover(
|
80
|
+
book_info.get("cover_url", ""),
|
81
|
+
raw_base,
|
82
|
+
config.include_cover,
|
83
|
+
exporter.logger,
|
84
|
+
TAG,
|
85
|
+
)
|
95
86
|
|
96
87
|
# --- Initialize EPUB ---
|
97
|
-
book =
|
88
|
+
book, main_css = prepare_builder(
|
89
|
+
site_name=exporter.site,
|
90
|
+
book_id=book_id,
|
98
91
|
title=book_name,
|
99
92
|
author=book_author,
|
100
93
|
description=book_info.get("summary", ""),
|
101
|
-
cover_path=cover_path,
|
102
94
|
subject=book_info.get("subject", []),
|
103
95
|
serial_status=book_info.get("serial_status", ""),
|
104
96
|
word_count=book_info.get("word_count", ""),
|
105
|
-
|
106
|
-
)
|
107
|
-
main_css = StyleSheet(
|
108
|
-
id="main_style",
|
109
|
-
content=CSS_MAIN_PATH.read_text(encoding="utf-8"),
|
110
|
-
filename="main.css",
|
97
|
+
cover_path=cover_path,
|
111
98
|
)
|
112
|
-
book.add_stylesheet(main_css)
|
113
99
|
|
114
100
|
# --- Compile chapters ---
|
115
101
|
volumes = book_info.get("volumes", [])
|
116
|
-
|
117
|
-
|
118
|
-
raw_vol_name = raw_vol_name.replace(book_name, "").strip()
|
119
|
-
vol_name = raw_vol_name or f"Volume {vol_index}"
|
120
|
-
exporter.logger.info("Processing volume %d: %s", vol_index, vol_name)
|
102
|
+
if not volumes:
|
103
|
+
exporter.logger.warning("%s No volumes found in metadata.", TAG)
|
121
104
|
|
122
|
-
|
105
|
+
for vol_index, vol in enumerate(volumes, start=1):
|
106
|
+
raw_name = vol.get("volume_name", "")
|
107
|
+
raw_name = cleaner.clean_title(raw_name.replace(book_name, ""))
|
108
|
+
vol_name = raw_name or f"Volume {vol_index}"
|
109
|
+
exporter.logger.info("%s Processing volume %d: %s", TAG, vol_index, vol_name)
|
110
|
+
|
111
|
+
# Batch-fetch chapters for this volume
|
112
|
+
chap_ids = [
|
113
|
+
chap.get("chapterId")
|
114
|
+
for chap in vol.get("chapters", [])
|
115
|
+
if chap.get("chapterId")
|
116
|
+
]
|
117
|
+
chap_map = exporter._get_chapters(book_id, chap_ids)
|
118
|
+
|
119
|
+
vol_cover: Path | None = None
|
123
120
|
vol_cover_url = vol.get("volume_cover", "")
|
124
121
|
if vol_cover_url:
|
125
|
-
|
122
|
+
vol_cover = download(
|
126
123
|
vol_cover_url,
|
127
124
|
img_dir,
|
128
125
|
on_exist="skip",
|
126
|
+
default_suffix=DEFAULT_IMAGE_SUFFIX,
|
129
127
|
)
|
130
128
|
|
131
129
|
curr_vol = Volume(
|
132
130
|
id=f"vol_{vol_index}",
|
133
131
|
title=vol_name,
|
134
|
-
intro=vol.get("volume_intro", ""),
|
135
|
-
cover=
|
132
|
+
intro=cleaner.clean_content(vol.get("volume_intro", "")),
|
133
|
+
cover=vol_cover,
|
136
134
|
)
|
137
135
|
|
138
|
-
for
|
139
|
-
chap_id =
|
140
|
-
chap_title = chap.get("title", "")
|
136
|
+
for chap_meta in vol.get("chapters", []):
|
137
|
+
chap_id = chap_meta.get("chapterId")
|
141
138
|
if not chap_id:
|
142
139
|
exporter.logger.warning(
|
143
140
|
"%s Missing chapterId, skipping: %s",
|
144
141
|
TAG,
|
145
|
-
|
142
|
+
chap_meta,
|
146
143
|
)
|
147
144
|
continue
|
148
145
|
|
149
|
-
|
150
|
-
|
146
|
+
chap_title = cleaner.clean_title(chap_meta.get("title", ""))
|
147
|
+
data = chap_map.get(chap_id)
|
148
|
+
if not data:
|
151
149
|
exporter.logger.info(
|
152
|
-
"%s Missing chapter
|
150
|
+
"%s Missing chapter: %s (%s), skipping.",
|
153
151
|
TAG,
|
154
152
|
chap_title,
|
155
153
|
chap_id,
|
156
154
|
)
|
157
155
|
continue
|
158
156
|
|
159
|
-
title =
|
160
|
-
content
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
157
|
+
title = cleaner.clean_title(data.get("title", chap_title)) or chap_id
|
158
|
+
content = cleaner.clean_content(data.get("content", ""))
|
159
|
+
extra = data.get("extra", {})
|
160
|
+
author_note = cleaner.clean_content(extra.get("author_say", ""))
|
161
|
+
content = inline_remote_images(book, content, img_dir)
|
162
|
+
|
163
|
+
chap_html = build_epub_chapter(
|
164
|
+
title=title,
|
165
|
+
paragraphs=content,
|
166
|
+
extras={"作者说": author_note},
|
168
167
|
)
|
169
168
|
curr_vol.add_chapter(
|
170
169
|
Chapter(
|
171
170
|
id=f"c_{chap_id}",
|
171
|
+
filename=f"c{chap_id}.xhtml",
|
172
172
|
title=title,
|
173
173
|
content=chap_html,
|
174
174
|
css=[main_css],
|
175
175
|
)
|
176
176
|
)
|
177
|
-
for img_path in img_paths:
|
178
|
-
book.add_image(img_path)
|
179
177
|
|
180
178
|
book.add_volume(curr_vol)
|
181
179
|
|
@@ -185,93 +183,11 @@ def common_export_as_epub(
|
|
185
183
|
author=book_info.get("author"),
|
186
184
|
ext="epub",
|
187
185
|
)
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
exporter.logger
|
193
|
-
|
194
|
-
|
186
|
+
finalize_export(
|
187
|
+
book=book,
|
188
|
+
out_dir=out_dir,
|
189
|
+
filename=out_name,
|
190
|
+
logger=exporter.logger,
|
191
|
+
tag=TAG,
|
192
|
+
)
|
195
193
|
return
|
196
|
-
|
197
|
-
|
198
|
-
def _inline_remote_images(
|
199
|
-
content: str,
|
200
|
-
image_dir: str | Path,
|
201
|
-
) -> tuple[str, list[Path]]:
|
202
|
-
"""
|
203
|
-
Download every remote `<img src="...">` in `content` into `image_dir`,
|
204
|
-
and replace the original tag with _IMAGE_WRAPPER
|
205
|
-
pointing to the local filename.
|
206
|
-
|
207
|
-
:param content: HTML/text of the chapter containing <img> tags.
|
208
|
-
:param image_dir: Directory to save downloaded images into.
|
209
|
-
:return: A tuple (modified_content, list_of_downloaded_image_paths).
|
210
|
-
"""
|
211
|
-
downloaded_images: list[Path] = []
|
212
|
-
|
213
|
-
def _replace(match: re.Match[str]) -> str:
|
214
|
-
url = match.group(1)
|
215
|
-
try:
|
216
|
-
# download_image returns a Path or None
|
217
|
-
local_path = download_image(
|
218
|
-
url,
|
219
|
-
image_dir,
|
220
|
-
target_name=None,
|
221
|
-
on_exist="skip",
|
222
|
-
)
|
223
|
-
if not local_path:
|
224
|
-
return match.group(0)
|
225
|
-
|
226
|
-
downloaded_images.append(local_path)
|
227
|
-
return _IMAGE_WRAPPER.format(filename=local_path.name)
|
228
|
-
except Exception:
|
229
|
-
return match.group(0)
|
230
|
-
|
231
|
-
modified_content = _IMG_TAG_PATTERN.sub(_replace, content)
|
232
|
-
return modified_content, downloaded_images
|
233
|
-
|
234
|
-
|
235
|
-
def _txt_to_html(
|
236
|
-
chapter_title: str,
|
237
|
-
chapter_text: str,
|
238
|
-
extras: dict[str, str] | None = None,
|
239
|
-
) -> str:
|
240
|
-
"""
|
241
|
-
Convert chapter text and author note to styled HTML.
|
242
|
-
|
243
|
-
:param chapter_title: Title of the chapter.
|
244
|
-
:param chapter_text: Main content of the chapter.
|
245
|
-
:param extras: Optional dict of titles and content, e.g. {"作者说": "text"}.
|
246
|
-
:return: Rendered HTML as a string.
|
247
|
-
"""
|
248
|
-
|
249
|
-
def _render_block(text: str) -> str:
|
250
|
-
lines = (line.strip() for line in text.splitlines() if line.strip())
|
251
|
-
out = []
|
252
|
-
for line in lines:
|
253
|
-
# preserve raw HTML, otherwise wrap in <p>
|
254
|
-
if _RAW_HTML_RE.match(line):
|
255
|
-
out.append(line)
|
256
|
-
else:
|
257
|
-
out.append(f"<p>{html.escape(line)}</p>")
|
258
|
-
return "\n".join(out)
|
259
|
-
|
260
|
-
parts = []
|
261
|
-
parts.append(f"<h2>{html.escape(chapter_title)}</h2>")
|
262
|
-
parts.append(_render_block(chapter_text))
|
263
|
-
|
264
|
-
if extras:
|
265
|
-
for title, note in extras.items():
|
266
|
-
note = note.strip()
|
267
|
-
if not note:
|
268
|
-
continue
|
269
|
-
parts.extend(
|
270
|
-
[
|
271
|
-
"<hr />",
|
272
|
-
f"<p>{html.escape(title)}</p>",
|
273
|
-
_render_block(note),
|
274
|
-
]
|
275
|
-
)
|
276
|
-
|
277
|
-
return "\n".join(parts)
|
@@ -8,12 +8,7 @@ novel data. It defines the logic to compile, structure, and export novel content
|
|
8
8
|
in plain text format based on the platform's metadata and chapter files.
|
9
9
|
"""
|
10
10
|
|
11
|
-
from collections.abc import Mapping
|
12
|
-
from typing import Any
|
13
|
-
|
14
11
|
from novel_downloader.core.exporters.base import BaseExporter
|
15
|
-
from novel_downloader.models import ExporterConfig
|
16
|
-
from novel_downloader.utils.chapter_storage import ChapterStorage
|
17
12
|
|
18
13
|
from .txt import common_export_as_txt
|
19
14
|
|
@@ -26,16 +21,6 @@ class CommonExporter(BaseExporter):
|
|
26
21
|
and EPUB (.epub) files.
|
27
22
|
"""
|
28
23
|
|
29
|
-
def __init__(
|
30
|
-
self,
|
31
|
-
config: ExporterConfig,
|
32
|
-
site: str,
|
33
|
-
chap_folders: list[str] | None = None,
|
34
|
-
):
|
35
|
-
super().__init__(config, site)
|
36
|
-
self._chapter_storage_cache: dict[str, list[ChapterStorage]] = {}
|
37
|
-
self._chap_folders: list[str] = chap_folders or ["chapters"]
|
38
|
-
|
39
24
|
def export_as_txt(self, book_id: str) -> None:
|
40
25
|
"""
|
41
26
|
Compile and export a complete novel as a single .txt file.
|
@@ -70,48 +55,3 @@ class CommonExporter(BaseExporter):
|
|
70
55
|
|
71
56
|
self._init_chapter_storages(book_id)
|
72
57
|
return common_export_as_epub(self, book_id)
|
73
|
-
|
74
|
-
@property
|
75
|
-
def site(self) -> str:
|
76
|
-
"""
|
77
|
-
Get the site identifier.
|
78
|
-
|
79
|
-
:return: The site string.
|
80
|
-
"""
|
81
|
-
return self._site
|
82
|
-
|
83
|
-
def _get_chapter(
|
84
|
-
self,
|
85
|
-
book_id: str,
|
86
|
-
chap_id: str,
|
87
|
-
) -> Mapping[str, Any]:
|
88
|
-
for storage in self._chapter_storage_cache[book_id]:
|
89
|
-
data = storage.get(chap_id)
|
90
|
-
if data:
|
91
|
-
return data
|
92
|
-
return {}
|
93
|
-
|
94
|
-
def _init_chapter_storages(self, book_id: str) -> None:
|
95
|
-
if book_id in self._chapter_storage_cache:
|
96
|
-
return
|
97
|
-
raw_base = self._raw_data_dir / book_id
|
98
|
-
self._chapter_storage_cache[book_id] = [
|
99
|
-
ChapterStorage(
|
100
|
-
raw_base=raw_base,
|
101
|
-
namespace=ns,
|
102
|
-
backend_type=self._config.storage_backend,
|
103
|
-
)
|
104
|
-
for ns in self._chap_folders
|
105
|
-
]
|
106
|
-
|
107
|
-
def _on_close(self) -> None:
|
108
|
-
"""
|
109
|
-
Close all ChapterStorage connections in the cache.
|
110
|
-
"""
|
111
|
-
for storages in self._chapter_storage_cache.values():
|
112
|
-
for storage in storages:
|
113
|
-
try:
|
114
|
-
storage.close()
|
115
|
-
except Exception as e:
|
116
|
-
self.logger.warning("Failed to close storage %s: %s", storage, e)
|
117
|
-
self._chapter_storage_cache.clear()
|