novel-downloader 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +14 -0
- novel_downloader/cli/__init__.py +14 -0
- novel_downloader/cli/clean.py +134 -0
- novel_downloader/cli/download.py +98 -0
- novel_downloader/cli/interactive.py +67 -0
- novel_downloader/cli/main.py +45 -0
- novel_downloader/cli/settings.py +177 -0
- novel_downloader/config/__init__.py +52 -0
- novel_downloader/config/adapter.py +150 -0
- novel_downloader/config/loader.py +177 -0
- novel_downloader/config/models.py +170 -0
- novel_downloader/config/site_rules.py +97 -0
- novel_downloader/core/__init__.py +25 -0
- novel_downloader/core/downloaders/__init__.py +20 -0
- novel_downloader/core/downloaders/base_downloader.py +187 -0
- novel_downloader/core/downloaders/common_downloader.py +192 -0
- novel_downloader/core/downloaders/qidian_downloader.py +208 -0
- novel_downloader/core/factory/__init__.py +21 -0
- novel_downloader/core/factory/downloader_factory.py +62 -0
- novel_downloader/core/factory/parser_factory.py +62 -0
- novel_downloader/core/factory/requester_factory.py +62 -0
- novel_downloader/core/factory/saver_factory.py +49 -0
- novel_downloader/core/interfaces/__init__.py +28 -0
- novel_downloader/core/interfaces/downloader_protocol.py +37 -0
- novel_downloader/core/interfaces/parser_protocol.py +40 -0
- novel_downloader/core/interfaces/requester_protocol.py +65 -0
- novel_downloader/core/interfaces/saver_protocol.py +61 -0
- novel_downloader/core/parsers/__init__.py +28 -0
- novel_downloader/core/parsers/base_parser.py +96 -0
- novel_downloader/core/parsers/common_parser/__init__.py +14 -0
- novel_downloader/core/parsers/common_parser/helper.py +321 -0
- novel_downloader/core/parsers/common_parser/main_parser.py +86 -0
- novel_downloader/core/parsers/qidian_parser/__init__.py +20 -0
- novel_downloader/core/parsers/qidian_parser/browser/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_encrypted.py +498 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_normal.py +97 -0
- novel_downloader/core/parsers/qidian_parser/browser/chapter_router.py +70 -0
- novel_downloader/core/parsers/qidian_parser/browser/main_parser.py +110 -0
- novel_downloader/core/parsers/qidian_parser/session/__init__.py +13 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_encrypted.py +451 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_normal.py +119 -0
- novel_downloader/core/parsers/qidian_parser/session/chapter_router.py +67 -0
- novel_downloader/core/parsers/qidian_parser/session/main_parser.py +113 -0
- novel_downloader/core/parsers/qidian_parser/session/node_decryptor.py +164 -0
- novel_downloader/core/parsers/qidian_parser/shared/__init__.py +38 -0
- novel_downloader/core/parsers/qidian_parser/shared/book_info_parser.py +95 -0
- novel_downloader/core/parsers/qidian_parser/shared/helpers.py +133 -0
- novel_downloader/core/requesters/__init__.py +27 -0
- novel_downloader/core/requesters/base_browser.py +210 -0
- novel_downloader/core/requesters/base_session.py +243 -0
- novel_downloader/core/requesters/common_requester/__init__.py +14 -0
- novel_downloader/core/requesters/common_requester/common_session.py +126 -0
- novel_downloader/core/requesters/qidian_requester/__init__.py +22 -0
- novel_downloader/core/requesters/qidian_requester/qidian_broswer.py +377 -0
- novel_downloader/core/requesters/qidian_requester/qidian_session.py +202 -0
- novel_downloader/core/savers/__init__.py +20 -0
- novel_downloader/core/savers/base_saver.py +169 -0
- novel_downloader/core/savers/common_saver/__init__.py +13 -0
- novel_downloader/core/savers/common_saver/common_epub.py +232 -0
- novel_downloader/core/savers/common_saver/common_txt.py +176 -0
- novel_downloader/core/savers/common_saver/main_saver.py +86 -0
- novel_downloader/core/savers/epub_utils/__init__.py +27 -0
- novel_downloader/core/savers/epub_utils/css_builder.py +68 -0
- novel_downloader/core/savers/epub_utils/initializer.py +98 -0
- novel_downloader/core/savers/epub_utils/text_to_html.py +132 -0
- novel_downloader/core/savers/epub_utils/volume_intro.py +61 -0
- novel_downloader/core/savers/qidian_saver.py +22 -0
- novel_downloader/locales/en.json +91 -0
- novel_downloader/locales/zh.json +91 -0
- novel_downloader/resources/config/rules.toml +196 -0
- novel_downloader/resources/config/settings.yaml +70 -0
- novel_downloader/resources/css_styles/main.css +104 -0
- novel_downloader/resources/css_styles/volume-intro.css +56 -0
- novel_downloader/resources/images/volume_border.png +0 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +82 -0
- novel_downloader/resources/json/replace_word_map.json +4 -0
- novel_downloader/resources/text/blacklist.txt +22 -0
- novel_downloader/utils/__init__.py +0 -0
- novel_downloader/utils/cache.py +24 -0
- novel_downloader/utils/constants.py +158 -0
- novel_downloader/utils/crypto_utils.py +144 -0
- novel_downloader/utils/file_utils/__init__.py +43 -0
- novel_downloader/utils/file_utils/io.py +252 -0
- novel_downloader/utils/file_utils/normalize.py +68 -0
- novel_downloader/utils/file_utils/sanitize.py +77 -0
- novel_downloader/utils/fontocr/__init__.py +23 -0
- novel_downloader/utils/fontocr/ocr_v1.py +304 -0
- novel_downloader/utils/fontocr/ocr_v2.py +658 -0
- novel_downloader/utils/hash_store.py +288 -0
- novel_downloader/utils/hash_utils.py +103 -0
- novel_downloader/utils/i18n.py +41 -0
- novel_downloader/utils/logger.py +104 -0
- novel_downloader/utils/model_loader.py +72 -0
- novel_downloader/utils/network.py +287 -0
- novel_downloader/utils/state.py +156 -0
- novel_downloader/utils/text_utils/__init__.py +27 -0
- novel_downloader/utils/text_utils/chapter_formatting.py +46 -0
- novel_downloader/utils/text_utils/diff_display.py +75 -0
- novel_downloader/utils/text_utils/font_mapping.py +31 -0
- novel_downloader/utils/text_utils/text_cleaning.py +57 -0
- novel_downloader/utils/time_utils/__init__.py +22 -0
- novel_downloader/utils/time_utils/datetime_utils.py +146 -0
- novel_downloader/utils/time_utils/sleep_utils.py +49 -0
- novel_downloader-1.1.1.dist-info/METADATA +137 -0
- novel_downloader-1.1.1.dist-info/RECORD +109 -0
- novel_downloader-1.1.1.dist-info/WHEEL +5 -0
- novel_downloader-1.1.1.dist-info/entry_points.txt +2 -0
- novel_downloader-1.1.1.dist-info/licenses/LICENSE +21 -0
- novel_downloader-1.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,169 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.savers.base_saver
|
5
|
+
---------------------------------------
|
6
|
+
|
7
|
+
This module provides an abstract base class `BaseSaver` that defines the
|
8
|
+
common interface and reusable logic for saving assembled novel content
|
9
|
+
into various output formats.
|
10
|
+
"""
|
11
|
+
|
12
|
+
import abc
|
13
|
+
import logging
|
14
|
+
from datetime import datetime
|
15
|
+
from pathlib import Path
|
16
|
+
from typing import Any, Dict, Optional
|
17
|
+
|
18
|
+
from novel_downloader.config.models import SaverConfig
|
19
|
+
from novel_downloader.core.interfaces import SaverProtocol
|
20
|
+
|
21
|
+
logger = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
|
24
|
+
class SafeDict(Dict[str, Any]):
|
25
|
+
def __missing__(self, key: str) -> str:
|
26
|
+
return "{{{}}}".format(key)
|
27
|
+
|
28
|
+
|
29
|
+
class BaseSaver(SaverProtocol, abc.ABC):
|
30
|
+
"""
|
31
|
+
BaseSaver defines the interface and common structure for
|
32
|
+
saving assembled book content into various formats
|
33
|
+
such as TXT, EPUB, Markdown, or PDF.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __init__(self, config: SaverConfig):
|
37
|
+
"""
|
38
|
+
Initialize the saver with given configuration.
|
39
|
+
|
40
|
+
:param config: A SaverConfig object that defines
|
41
|
+
save paths, formats, and options.
|
42
|
+
"""
|
43
|
+
self._config = config
|
44
|
+
|
45
|
+
self._raw_data_dir = Path(config.raw_data_dir)
|
46
|
+
self._output_dir = Path(config.output_dir)
|
47
|
+
self._raw_data_dir.mkdir(parents=True, exist_ok=True)
|
48
|
+
self._output_dir.mkdir(parents=True, exist_ok=True)
|
49
|
+
|
50
|
+
self._filename_template = config.filename_template
|
51
|
+
|
52
|
+
def save(self, book_id: str) -> None:
|
53
|
+
"""
|
54
|
+
Save the book in the formats specified in config.
|
55
|
+
If a method is not implemented or fails, log the error and continue.
|
56
|
+
|
57
|
+
:param book_id: The book identifier (used for filename, lookup, etc.)
|
58
|
+
"""
|
59
|
+
TAG = "[Saver]"
|
60
|
+
actions = [
|
61
|
+
("make_txt", self.save_as_txt),
|
62
|
+
("make_epub", self.save_as_epub),
|
63
|
+
("make_md", self.save_as_md),
|
64
|
+
("make_pdf", self.save_as_pdf),
|
65
|
+
]
|
66
|
+
|
67
|
+
for flag_name, save_method in actions:
|
68
|
+
if getattr(self._config, flag_name, False):
|
69
|
+
try:
|
70
|
+
logger.info(
|
71
|
+
"%s Attempting to save book_id '%s' as %s...",
|
72
|
+
TAG,
|
73
|
+
book_id,
|
74
|
+
flag_name,
|
75
|
+
)
|
76
|
+
save_method(book_id)
|
77
|
+
logger.info("%s Successfully saved as %s.", TAG, flag_name)
|
78
|
+
except NotImplementedError as e:
|
79
|
+
logger.warning(
|
80
|
+
"%s Save method for %s not implemented: %s",
|
81
|
+
TAG,
|
82
|
+
flag_name,
|
83
|
+
str(e),
|
84
|
+
)
|
85
|
+
except Exception as e:
|
86
|
+
logger.error(
|
87
|
+
"%s Error while saving as %s: %s", TAG, flag_name, str(e)
|
88
|
+
)
|
89
|
+
return
|
90
|
+
|
91
|
+
@abc.abstractmethod
|
92
|
+
def save_as_txt(self, book_id: str) -> None:
|
93
|
+
"""
|
94
|
+
Persist the assembled book as a .txt file.
|
95
|
+
|
96
|
+
This method must be implemented by all subclasses.
|
97
|
+
|
98
|
+
:param book_id: The book identifier (used for filename, lookup, etc.)
|
99
|
+
"""
|
100
|
+
...
|
101
|
+
|
102
|
+
def save_as_epub(self, book_id: str) -> None:
|
103
|
+
"""
|
104
|
+
Optional: Persist the assembled book as a EPUB (.epub) file.
|
105
|
+
|
106
|
+
:param book_id: The book identifier.
|
107
|
+
:raises NotImplementedError: If the method is not overridden.
|
108
|
+
"""
|
109
|
+
raise NotImplementedError("EPUB export not supported by this saver.")
|
110
|
+
|
111
|
+
def save_as_md(self, book_id: str) -> None:
|
112
|
+
"""
|
113
|
+
Optional: Persist the assembled book as a Markdown file.
|
114
|
+
|
115
|
+
:param book_id: The book identifier.
|
116
|
+
:raises NotImplementedError: If the method is not overridden.
|
117
|
+
"""
|
118
|
+
raise NotImplementedError("Markdown export not supported by this saver.")
|
119
|
+
|
120
|
+
def save_as_pdf(self, book_id: str) -> None:
|
121
|
+
"""
|
122
|
+
Optional: Persist the assembled book as a PDF file.
|
123
|
+
|
124
|
+
:param book_id: The book identifier.
|
125
|
+
:raises NotImplementedError: If the method is not overridden.
|
126
|
+
"""
|
127
|
+
raise NotImplementedError("PDF export not supported by this saver.")
|
128
|
+
|
129
|
+
def get_filename(
|
130
|
+
self,
|
131
|
+
*,
|
132
|
+
title: str,
|
133
|
+
author: Optional[str] = None,
|
134
|
+
ext: str = "txt",
|
135
|
+
**extra_fields: str,
|
136
|
+
) -> str:
|
137
|
+
"""
|
138
|
+
Generate a filename based on the configured template and metadata fields.
|
139
|
+
|
140
|
+
:param title: Book title (required).
|
141
|
+
:param author: Author name (optional).
|
142
|
+
:param ext: File extension (e.g., "txt", "epub").
|
143
|
+
:param extra_fields: Any additional fields used in the filename template.
|
144
|
+
:return: Formatted filename with extension.
|
145
|
+
"""
|
146
|
+
# Merge all fields with defaults
|
147
|
+
context = SafeDict(title=title, author=author or "", **extra_fields)
|
148
|
+
|
149
|
+
name = self._filename_template.format_map(context)
|
150
|
+
|
151
|
+
if self._config.append_timestamp:
|
152
|
+
name += f"_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
153
|
+
|
154
|
+
return f"{name}.{ext}"
|
155
|
+
|
156
|
+
@property
|
157
|
+
def output_dir(self) -> Path:
|
158
|
+
"""Access the output directory for saving files."""
|
159
|
+
return self._output_dir
|
160
|
+
|
161
|
+
@property
|
162
|
+
def raw_data_dir(self) -> Path:
|
163
|
+
"""Access the raw data directory."""
|
164
|
+
return self._raw_data_dir
|
165
|
+
|
166
|
+
@property
|
167
|
+
def filename_template(self) -> str:
|
168
|
+
"""Access the filename template."""
|
169
|
+
return self._filename_template
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.savers.common_saver
|
5
|
+
-----------------------------------------
|
6
|
+
|
7
|
+
This module provides the `CommonSaver` class for handling the saving process
|
8
|
+
of novels.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from .main_saver import CommonSaver
|
12
|
+
|
13
|
+
__all__ = ["CommonSaver"]
|
@@ -0,0 +1,232 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.savers.common_saver.common_epub
|
5
|
+
-----------------------------------------------------
|
6
|
+
|
7
|
+
Contains the logic for exporting novel content as a single `.epub` file.
|
8
|
+
"""
|
9
|
+
|
10
|
+
from __future__ import annotations
|
11
|
+
|
12
|
+
import json
|
13
|
+
import logging
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import TYPE_CHECKING, List, Optional
|
16
|
+
from urllib.parse import unquote, urlparse
|
17
|
+
|
18
|
+
from ebooklib import epub
|
19
|
+
|
20
|
+
from novel_downloader.core.savers.epub_utils import (
|
21
|
+
chapter_txt_to_html,
|
22
|
+
create_css_items,
|
23
|
+
create_volume_intro,
|
24
|
+
generate_book_intro_html,
|
25
|
+
init_epub,
|
26
|
+
)
|
27
|
+
from novel_downloader.utils.constants import (
|
28
|
+
DEFAULT_IMAGE_SUFFIX,
|
29
|
+
EPUB_OPTIONS,
|
30
|
+
EPUB_TEXT_FOLDER,
|
31
|
+
)
|
32
|
+
from novel_downloader.utils.file_utils import sanitize_filename
|
33
|
+
from novel_downloader.utils.text_utils import clean_chapter_title
|
34
|
+
|
35
|
+
if TYPE_CHECKING:
|
36
|
+
from .main_saver import CommonSaver
|
37
|
+
|
38
|
+
logger = logging.getLogger(__name__)
|
39
|
+
|
40
|
+
CHAPTER_FOLDERS: List[str] = [
|
41
|
+
"chapters",
|
42
|
+
"encrypted_chapters",
|
43
|
+
]
|
44
|
+
|
45
|
+
|
46
|
+
def _find_chapter_file(
|
47
|
+
raw_base: Path,
|
48
|
+
chapter_id: str,
|
49
|
+
) -> Optional[Path]:
|
50
|
+
"""
|
51
|
+
Search for `<chapter_id>.json` under each folder in CHAPTER_FOLDERS
|
52
|
+
inside raw_data_dir/site/book_id. Return the first existing Path,
|
53
|
+
or None if not found.
|
54
|
+
"""
|
55
|
+
for folder in CHAPTER_FOLDERS:
|
56
|
+
candidate = raw_base / folder / f"{chapter_id}.json"
|
57
|
+
if candidate.exists():
|
58
|
+
return candidate
|
59
|
+
return None
|
60
|
+
|
61
|
+
|
62
|
+
def _image_url_to_filename(url: str) -> str:
|
63
|
+
"""
|
64
|
+
Parse and sanitize a image filename from a URL.
|
65
|
+
If no filename or suffix exists, fallback to default name and extension.
|
66
|
+
|
67
|
+
:param url: URL string
|
68
|
+
:return: Safe filename string
|
69
|
+
"""
|
70
|
+
if not url:
|
71
|
+
return ""
|
72
|
+
|
73
|
+
parsed_url = urlparse(url)
|
74
|
+
path = unquote(parsed_url.path)
|
75
|
+
filename = Path(path).name
|
76
|
+
|
77
|
+
if not filename:
|
78
|
+
filename = "image"
|
79
|
+
|
80
|
+
if not Path(filename).suffix:
|
81
|
+
filename += DEFAULT_IMAGE_SUFFIX
|
82
|
+
|
83
|
+
return filename
|
84
|
+
|
85
|
+
|
86
|
+
def common_save_as_epub(
|
87
|
+
saver: CommonSaver,
|
88
|
+
book_id: str,
|
89
|
+
) -> None:
|
90
|
+
"""
|
91
|
+
Export a single novel (identified by `book_id`) to an EPUB file.
|
92
|
+
|
93
|
+
This function will:
|
94
|
+
1. Load `book_info.json` for metadata.
|
95
|
+
2. Generate introductory HTML and optionally include the cover image.
|
96
|
+
3. Initialize the EPUB container.
|
97
|
+
4. Iterate through volumes and chapters, convert each to XHTML.
|
98
|
+
5. Assemble the spine, TOC, CSS and write out the final `.epub`.
|
99
|
+
|
100
|
+
:param saver: The saver instance, carrying config and path info.
|
101
|
+
:param book_id: Identifier of the novel (used as subdirectory name).
|
102
|
+
"""
|
103
|
+
TAG = "[saver]"
|
104
|
+
site = saver.site
|
105
|
+
config = saver._config
|
106
|
+
# --- Paths & options ---
|
107
|
+
raw_base = saver.raw_data_dir / site / book_id
|
108
|
+
out_dir = saver.output_dir
|
109
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
110
|
+
|
111
|
+
# --- Load book_info.json ---
|
112
|
+
info_path = raw_base / "book_info.json"
|
113
|
+
try:
|
114
|
+
info_text = info_path.read_text(encoding="utf-8")
|
115
|
+
book_info = json.loads(info_text)
|
116
|
+
except Exception as e:
|
117
|
+
logger.error("%s Failed to load %s: %s", TAG, info_path, e)
|
118
|
+
return
|
119
|
+
|
120
|
+
book_name = book_info.get("book_name", book_id)
|
121
|
+
logger.info("%s Starting EPUB generation: %s (ID: %s)", TAG, book_name, book_id)
|
122
|
+
|
123
|
+
# --- Generate intro + cover ---
|
124
|
+
intro_html = generate_book_intro_html(book_info)
|
125
|
+
cover_path: Optional[Path] = None
|
126
|
+
if config.include_cover:
|
127
|
+
cover_filename = _image_url_to_filename(book_info.get("cover_url", ""))
|
128
|
+
if cover_filename:
|
129
|
+
cover_path = raw_base / cover_filename
|
130
|
+
|
131
|
+
# --- Initialize EPUB ---
|
132
|
+
book, spine, toc_list = init_epub(
|
133
|
+
book_info=book_info,
|
134
|
+
book_id=book_id,
|
135
|
+
intro_html=intro_html,
|
136
|
+
book_cover_path=cover_path,
|
137
|
+
include_toc=config.include_toc,
|
138
|
+
)
|
139
|
+
for css in create_css_items(
|
140
|
+
include_main=True,
|
141
|
+
include_volume=True,
|
142
|
+
):
|
143
|
+
book.add_item(css)
|
144
|
+
|
145
|
+
# --- Compile chapters ---
|
146
|
+
volumes = book_info.get("volumes", [])
|
147
|
+
for vol_index, vol in enumerate(volumes, start=1):
|
148
|
+
raw_vol_name = vol.get("volume_name", "").strip()
|
149
|
+
vol_name = clean_chapter_title(raw_vol_name) or f"Unknown Volume {vol_index}"
|
150
|
+
logger.info("Processing volume %d: %s", vol_index, vol_name)
|
151
|
+
|
152
|
+
# Volume intro
|
153
|
+
vol_intro = epub.EpubHtml(
|
154
|
+
title=vol_name,
|
155
|
+
file_name=f"{EPUB_TEXT_FOLDER}/volume_intro_{vol_index}.xhtml",
|
156
|
+
lang="zh",
|
157
|
+
)
|
158
|
+
vol_intro.content = create_volume_intro(vol_name, vol.get("volume_intro", ""))
|
159
|
+
vol_intro.add_link(
|
160
|
+
href="../Styles/volume-intro.css",
|
161
|
+
rel="stylesheet",
|
162
|
+
type="text/css",
|
163
|
+
)
|
164
|
+
book.add_item(vol_intro)
|
165
|
+
spine.append(vol_intro)
|
166
|
+
|
167
|
+
section = epub.Section(vol_name, vol_intro.file_name)
|
168
|
+
chapter_items: List[epub.EpubHtml] = []
|
169
|
+
|
170
|
+
for chap in vol.get("chapters", []):
|
171
|
+
chap_id = chap.get("chapterId")
|
172
|
+
chap_title = chap.get("title", "")
|
173
|
+
if not chap_id:
|
174
|
+
logger.warning("%s Missing chapterId, skipping: %s", TAG, chap)
|
175
|
+
continue
|
176
|
+
|
177
|
+
json_path = _find_chapter_file(raw_base, chap_id)
|
178
|
+
if json_path is None:
|
179
|
+
logger.info(
|
180
|
+
"%s Missing chapter file: %s (%s), skipping.",
|
181
|
+
TAG,
|
182
|
+
chap_title,
|
183
|
+
chap_id,
|
184
|
+
)
|
185
|
+
continue
|
186
|
+
|
187
|
+
try:
|
188
|
+
data = json.loads(json_path.read_text(encoding="utf-8"))
|
189
|
+
title = clean_chapter_title(data.get("title", "")) or chap_id
|
190
|
+
chap_html = chapter_txt_to_html(
|
191
|
+
chapter_title=title,
|
192
|
+
chapter_text=data.get("content", ""),
|
193
|
+
author_say=data.get("author_say", ""),
|
194
|
+
)
|
195
|
+
except Exception as e:
|
196
|
+
logger.error("%s Error parsing chapter %s: %s", TAG, json_path, e)
|
197
|
+
continue
|
198
|
+
|
199
|
+
chap_path = f"{EPUB_TEXT_FOLDER}/{chap_id}.xhtml"
|
200
|
+
item = epub.EpubHtml(title=chap_title, file_name=chap_path, lang="zh")
|
201
|
+
item.content = chap_html
|
202
|
+
item.add_link(
|
203
|
+
href="../Styles/main.css",
|
204
|
+
rel="stylesheet",
|
205
|
+
type="text/css",
|
206
|
+
)
|
207
|
+
book.add_item(item)
|
208
|
+
spine.append(item)
|
209
|
+
chapter_items.append(item)
|
210
|
+
|
211
|
+
toc_list.append((section, chapter_items))
|
212
|
+
|
213
|
+
# --- 5. Finalize EPUB ---
|
214
|
+
logger.info("%s Building TOC and spine...", TAG)
|
215
|
+
book.toc = tuple(toc_list)
|
216
|
+
book.spine = spine
|
217
|
+
book.add_item(epub.EpubNcx())
|
218
|
+
book.add_item(epub.EpubNav())
|
219
|
+
|
220
|
+
out_name = saver.get_filename(
|
221
|
+
title=book_name,
|
222
|
+
author=book_info.get("author"),
|
223
|
+
ext="epub",
|
224
|
+
)
|
225
|
+
out_path = out_dir / sanitize_filename(out_name)
|
226
|
+
|
227
|
+
try:
|
228
|
+
epub.write_epub(out_path, book, EPUB_OPTIONS)
|
229
|
+
logger.info("%s EPUB successfully written to %s", TAG, out_path)
|
230
|
+
except Exception as e:
|
231
|
+
logger.error("%s Failed to write EPUB to %s: %s", TAG, out_path, e)
|
232
|
+
return
|
@@ -0,0 +1,176 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.savers.common_saver.qidian_txt
|
5
|
+
----------------------------------------------------
|
6
|
+
|
7
|
+
Contains the logic for exporting novel content as a single `.txt` file.
|
8
|
+
|
9
|
+
This module defines `common_save_as_txt` function, which assembles and formats
|
10
|
+
a novel based on metadata and chapter files found in the raw data directory.
|
11
|
+
It is intended to be used by `CommonSaver` as part of the save/export process.
|
12
|
+
"""
|
13
|
+
|
14
|
+
from __future__ import annotations
|
15
|
+
|
16
|
+
import json
|
17
|
+
import logging
|
18
|
+
from pathlib import Path
|
19
|
+
from typing import TYPE_CHECKING, List, Optional
|
20
|
+
|
21
|
+
from novel_downloader.utils.file_utils import save_as_txt
|
22
|
+
from novel_downloader.utils.text_utils import clean_chapter_title, format_chapter
|
23
|
+
|
24
|
+
if TYPE_CHECKING:
|
25
|
+
from .main_saver import CommonSaver
|
26
|
+
|
27
|
+
logger = logging.getLogger(__name__)
|
28
|
+
|
29
|
+
CHAPTER_FOLDERS: List[str] = [
|
30
|
+
"chapters",
|
31
|
+
"encrypted_chapters",
|
32
|
+
]
|
33
|
+
|
34
|
+
|
35
|
+
def _find_chapter_file(
|
36
|
+
raw_base: Path,
|
37
|
+
chapter_id: str,
|
38
|
+
) -> Optional[Path]:
|
39
|
+
"""
|
40
|
+
Search for `<chapter_id>.json` under each folder in CHAPTER_FOLDERS
|
41
|
+
inside raw_data_dir/site/book_id. Return the first existing Path,
|
42
|
+
or None if not found.
|
43
|
+
"""
|
44
|
+
for folder in CHAPTER_FOLDERS:
|
45
|
+
candidate = raw_base / folder / f"{chapter_id}.json"
|
46
|
+
if candidate.exists():
|
47
|
+
return candidate
|
48
|
+
return None
|
49
|
+
|
50
|
+
|
51
|
+
def common_save_as_txt(
|
52
|
+
saver: CommonSaver,
|
53
|
+
book_id: str,
|
54
|
+
) -> None:
|
55
|
+
"""
|
56
|
+
将 save_path 文件夹中该小说的所有章节 json 文件合并保存为一个完整的 txt 文件,
|
57
|
+
并保存到 out_path 下
|
58
|
+
假设章节文件名格式为 `{chapterId}.json`
|
59
|
+
|
60
|
+
处理流程:
|
61
|
+
1. 从 book_info.json 中加载书籍信息 (包含书名、作者、简介及卷章节列表)
|
62
|
+
2. 遍历各卷, 每个卷先追加卷标题, 然后依次追加该卷下各章节的标题和内容,
|
63
|
+
同时记录最后一个章节标题作为“原文截至”
|
64
|
+
3. 将书籍元信息 (书名、作者、原文截至、内容简介) 与所有章节内容拼接,
|
65
|
+
构成最终完整文本
|
66
|
+
4. 将最终结果保存到 out_path 下 (例如:`{book_name}.txt`)
|
67
|
+
|
68
|
+
:param book_id: Identifier of the novel (used as subdirectory name).
|
69
|
+
"""
|
70
|
+
TAG = "[saver]"
|
71
|
+
site = saver.site
|
72
|
+
# --- Paths & options ---
|
73
|
+
raw_base = saver.raw_data_dir / site / book_id
|
74
|
+
out_dir = saver.output_dir
|
75
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
76
|
+
|
77
|
+
# --- Load book_info.json ---
|
78
|
+
info_path = raw_base / "book_info.json"
|
79
|
+
try:
|
80
|
+
info_text = info_path.read_text(encoding="utf-8")
|
81
|
+
book_info = json.loads(info_text)
|
82
|
+
except Exception as e:
|
83
|
+
logger.error("%s Failed to load %s: %s", TAG, info_path, e)
|
84
|
+
return
|
85
|
+
|
86
|
+
# --- Compile chapters ---
|
87
|
+
parts: List[str] = []
|
88
|
+
latest_chapter: str = ""
|
89
|
+
volumes = book_info.get("volumes", [])
|
90
|
+
|
91
|
+
for vol in volumes:
|
92
|
+
vol_name = vol.get("volume_name", "").strip()
|
93
|
+
vol_name = clean_chapter_title(vol_name)
|
94
|
+
if vol_name:
|
95
|
+
volume_header = f"\n\n{'=' * 6} {vol_name} {'=' * 6}\n\n"
|
96
|
+
parts.append(volume_header)
|
97
|
+
logger.info("%s Processing volume: %s", TAG, vol_name)
|
98
|
+
for chap in vol.get("chapters", []):
|
99
|
+
chap_id = chap.get("chapterId")
|
100
|
+
chap_title = chap.get("title", "")
|
101
|
+
if not chap_id:
|
102
|
+
logger.warning("%s Missing chapterId, skipping: %s", TAG, chap)
|
103
|
+
continue
|
104
|
+
|
105
|
+
# Find the JSON file in one of the known subfolders
|
106
|
+
json_path = _find_chapter_file(raw_base, chap_id)
|
107
|
+
if json_path is None:
|
108
|
+
logger.info(
|
109
|
+
"%s Missing chapter file in: %s (%s), skipping.",
|
110
|
+
TAG,
|
111
|
+
chap_title,
|
112
|
+
chap_id,
|
113
|
+
)
|
114
|
+
continue
|
115
|
+
|
116
|
+
try:
|
117
|
+
chapter_data = json.loads(json_path.read_text(encoding="utf-8"))
|
118
|
+
except Exception as e:
|
119
|
+
logger.error("%s Error reading %s: %s", TAG, json_path, e)
|
120
|
+
continue
|
121
|
+
|
122
|
+
# Extract structured fields
|
123
|
+
title = chapter_data.get("title", chap_title).strip()
|
124
|
+
content = chapter_data.get("content", "").strip()
|
125
|
+
author_say = chapter_data.get("author_say", "").strip()
|
126
|
+
clean_title = clean_chapter_title(title)
|
127
|
+
|
128
|
+
parts.append(format_chapter(clean_title, content, author_say))
|
129
|
+
latest_chapter = clean_title
|
130
|
+
|
131
|
+
# --- Build header ---
|
132
|
+
name = book_info.get("book_name")
|
133
|
+
author = book_info.get("author")
|
134
|
+
words = book_info.get("word_count")
|
135
|
+
updated = book_info.get("update_time")
|
136
|
+
summary = book_info.get("summary")
|
137
|
+
|
138
|
+
header_lines = []
|
139
|
+
|
140
|
+
if name:
|
141
|
+
header_lines.append(f"书名: {name}")
|
142
|
+
|
143
|
+
if author:
|
144
|
+
header_lines.append(f"作者: {author}")
|
145
|
+
|
146
|
+
if words:
|
147
|
+
header_lines.append(f"总字数: {words}")
|
148
|
+
|
149
|
+
if updated:
|
150
|
+
header_lines.append(f"更新日期: {updated}")
|
151
|
+
|
152
|
+
header_lines.append(f"原文截至: {latest_chapter}")
|
153
|
+
|
154
|
+
if summary:
|
155
|
+
header_lines.append("内容简介:")
|
156
|
+
header_lines.append(summary)
|
157
|
+
|
158
|
+
header_lines.append("")
|
159
|
+
header_lines.append("-" * 10)
|
160
|
+
header_lines.append("")
|
161
|
+
|
162
|
+
header = "\n".join(header_lines)
|
163
|
+
|
164
|
+
final_text = header + "\n\n" + "\n\n".join(parts).strip()
|
165
|
+
|
166
|
+
# --- Determine output file path ---
|
167
|
+
out_name = saver.get_filename(title=name, author=author, ext="txt")
|
168
|
+
out_path = out_dir / out_name
|
169
|
+
|
170
|
+
# --- Save final text ---
|
171
|
+
try:
|
172
|
+
save_as_txt(content=final_text, filepath=out_path)
|
173
|
+
logger.info("%s Novel saved to: %s", TAG, out_path)
|
174
|
+
except Exception as e:
|
175
|
+
logger.error("%s Failed to save file: %s", TAG, e)
|
176
|
+
return
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.savers.common_saver.main_saver
|
5
|
+
----------------------------------------------------
|
6
|
+
|
7
|
+
This module implements the `QidianSaver` class, a concrete saver for handling
|
8
|
+
novel data from Qidian (起点中文网). It defines the logic to compile, structure,
|
9
|
+
and export novel content in plain text format based on the platform's metadata
|
10
|
+
and chapter files.
|
11
|
+
"""
|
12
|
+
|
13
|
+
from novel_downloader.config.models import SaverConfig
|
14
|
+
|
15
|
+
from ..base_saver import BaseSaver
|
16
|
+
from .common_txt import common_save_as_txt
|
17
|
+
|
18
|
+
|
19
|
+
class CommonSaver(BaseSaver):
|
20
|
+
"""
|
21
|
+
CommonSaver is a saver that processes and exports novels.
|
22
|
+
It extends the BaseSaver interface and provides
|
23
|
+
logic for exporting full novels as plain text (.txt) files.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def __init__(self, config: SaverConfig, site: str):
|
27
|
+
"""
|
28
|
+
Initialize the common saver with site information.
|
29
|
+
|
30
|
+
:param config: A SaverConfig object that defines
|
31
|
+
save paths, formats, and options.
|
32
|
+
:param site: Identifier for the site the saver is handling.
|
33
|
+
"""
|
34
|
+
super().__init__(config)
|
35
|
+
self._site = site
|
36
|
+
|
37
|
+
def save_as_txt(self, book_id: str) -> None:
|
38
|
+
"""
|
39
|
+
Compile and save a complete novel as a single .txt file.
|
40
|
+
|
41
|
+
Processing steps:
|
42
|
+
1. Load book metadata from `book_info.json`, including title,
|
43
|
+
author, summary, and chapter structure.
|
44
|
+
2. Iterate through all volumes and chapters, appending each
|
45
|
+
volume/chapter title and content.
|
46
|
+
3. Combine metadata and content into a final formatted text.
|
47
|
+
4. Save the final result to the output directory using the
|
48
|
+
configured filename template.
|
49
|
+
|
50
|
+
:param book_id: The book identifier (used to locate raw data)
|
51
|
+
"""
|
52
|
+
return common_save_as_txt(self, book_id)
|
53
|
+
|
54
|
+
def save_as_epub(self, book_id: str) -> None:
|
55
|
+
"""
|
56
|
+
Persist the assembled book as a EPUB (.epub) file.
|
57
|
+
|
58
|
+
:param book_id: The book identifier.
|
59
|
+
:raises NotImplementedError: If the method is not overridden.
|
60
|
+
"""
|
61
|
+
try:
|
62
|
+
from .common_epub import common_save_as_epub
|
63
|
+
except ImportError:
|
64
|
+
raise NotImplementedError(
|
65
|
+
"EPUB export not supported. Please install 'ebooklib'"
|
66
|
+
)
|
67
|
+
|
68
|
+
return common_save_as_epub(self, book_id)
|
69
|
+
|
70
|
+
@property
|
71
|
+
def site(self) -> str:
|
72
|
+
"""
|
73
|
+
Get the site identifier.
|
74
|
+
|
75
|
+
:return: The site string.
|
76
|
+
"""
|
77
|
+
return self._site
|
78
|
+
|
79
|
+
@site.setter
|
80
|
+
def site(self, value: str) -> None:
|
81
|
+
"""
|
82
|
+
Set the site identifier.
|
83
|
+
|
84
|
+
:param value: New site string to set.
|
85
|
+
"""
|
86
|
+
self._site = value
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
novel_downloader.core.savers.epub_utils
|
5
|
+
---------------------------------------
|
6
|
+
|
7
|
+
This package provides utility functions for constructing EPUB files,
|
8
|
+
including:
|
9
|
+
|
10
|
+
- CSS inclusion (create_css_items)
|
11
|
+
- EPUB book initialization (init_epub)
|
12
|
+
- Chapter text-to-HTML conversion (chapter_txt_to_html)
|
13
|
+
- Volume intro HTML generation (create_volume_intro)
|
14
|
+
"""
|
15
|
+
|
16
|
+
from .css_builder import create_css_items
|
17
|
+
from .initializer import init_epub
|
18
|
+
from .text_to_html import chapter_txt_to_html, generate_book_intro_html
|
19
|
+
from .volume_intro import create_volume_intro
|
20
|
+
|
21
|
+
__all__ = [
|
22
|
+
"create_css_items",
|
23
|
+
"init_epub",
|
24
|
+
"chapter_txt_to_html",
|
25
|
+
"create_volume_intro",
|
26
|
+
"generate_book_intro_html",
|
27
|
+
]
|