novel-downloader 1.3.1__py3-none-any.whl → 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +1 -1
- novel_downloader/config/adapter.py +3 -0
- novel_downloader/config/models.py +3 -0
- novel_downloader/core/downloaders/__init__.py +23 -1
- novel_downloader/core/downloaders/biquge/__init__.py +2 -0
- novel_downloader/core/downloaders/biquge/biquge_async.py +27 -0
- novel_downloader/core/downloaders/biquge/biquge_sync.py +5 -3
- novel_downloader/core/downloaders/common/common_async.py +5 -11
- novel_downloader/core/downloaders/common/common_sync.py +18 -18
- novel_downloader/core/downloaders/esjzone/__init__.py +14 -0
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +27 -0
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +27 -0
- novel_downloader/core/downloaders/qianbi/__init__.py +14 -0
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +27 -0
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +27 -0
- novel_downloader/core/downloaders/qidian/qidian_sync.py +9 -14
- novel_downloader/core/downloaders/sfacg/__init__.py +14 -0
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +27 -0
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +27 -0
- novel_downloader/core/downloaders/yamibo/__init__.py +14 -0
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +27 -0
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +27 -0
- novel_downloader/core/factory/downloader.py +35 -7
- novel_downloader/core/factory/parser.py +23 -2
- novel_downloader/core/factory/requester.py +32 -7
- novel_downloader/core/factory/saver.py +14 -2
- novel_downloader/core/interfaces/async_requester.py +3 -3
- novel_downloader/core/interfaces/parser.py +7 -2
- novel_downloader/core/interfaces/sync_requester.py +3 -3
- novel_downloader/core/parsers/__init__.py +15 -5
- novel_downloader/core/parsers/base.py +7 -2
- novel_downloader/core/parsers/biquge/main_parser.py +13 -4
- novel_downloader/core/parsers/common/main_parser.py +13 -4
- novel_downloader/core/parsers/esjzone/__init__.py +10 -0
- novel_downloader/core/parsers/esjzone/main_parser.py +220 -0
- novel_downloader/core/parsers/qianbi/__init__.py +10 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +142 -0
- novel_downloader/core/parsers/qidian/browser/main_parser.py +13 -4
- novel_downloader/core/parsers/qidian/session/main_parser.py +13 -4
- novel_downloader/core/parsers/sfacg/__init__.py +10 -0
- novel_downloader/core/parsers/sfacg/main_parser.py +166 -0
- novel_downloader/core/parsers/yamibo/__init__.py +10 -0
- novel_downloader/core/parsers/yamibo/main_parser.py +194 -0
- novel_downloader/core/requesters/__init__.py +33 -3
- novel_downloader/core/requesters/base/async_session.py +14 -10
- novel_downloader/core/requesters/base/browser.py +4 -7
- novel_downloader/core/requesters/base/session.py +25 -11
- novel_downloader/core/requesters/biquge/__init__.py +2 -0
- novel_downloader/core/requesters/biquge/async_session.py +71 -0
- novel_downloader/core/requesters/biquge/session.py +6 -6
- novel_downloader/core/requesters/common/async_session.py +4 -4
- novel_downloader/core/requesters/common/session.py +6 -6
- novel_downloader/core/requesters/esjzone/__init__.py +13 -0
- novel_downloader/core/requesters/esjzone/async_session.py +211 -0
- novel_downloader/core/requesters/esjzone/session.py +235 -0
- novel_downloader/core/requesters/qianbi/__init__.py +13 -0
- novel_downloader/core/requesters/qianbi/async_session.py +96 -0
- novel_downloader/core/requesters/qianbi/session.py +125 -0
- novel_downloader/core/requesters/qidian/broswer.py +9 -9
- novel_downloader/core/requesters/qidian/session.py +14 -11
- novel_downloader/core/requesters/sfacg/__init__.py +13 -0
- novel_downloader/core/requesters/sfacg/async_session.py +204 -0
- novel_downloader/core/requesters/sfacg/session.py +242 -0
- novel_downloader/core/requesters/yamibo/__init__.py +13 -0
- novel_downloader/core/requesters/yamibo/async_session.py +211 -0
- novel_downloader/core/requesters/yamibo/session.py +237 -0
- novel_downloader/core/savers/__init__.py +15 -3
- novel_downloader/core/savers/base.py +3 -7
- novel_downloader/core/savers/common/epub.py +21 -33
- novel_downloader/core/savers/common/main_saver.py +3 -1
- novel_downloader/core/savers/common/txt.py +1 -2
- novel_downloader/core/savers/epub_utils/__init__.py +14 -5
- novel_downloader/core/savers/epub_utils/css_builder.py +1 -0
- novel_downloader/core/savers/epub_utils/image_loader.py +89 -0
- novel_downloader/core/savers/epub_utils/initializer.py +1 -0
- novel_downloader/core/savers/epub_utils/text_to_html.py +48 -1
- novel_downloader/core/savers/epub_utils/volume_intro.py +1 -0
- novel_downloader/core/savers/esjzone.py +25 -0
- novel_downloader/core/savers/qianbi.py +25 -0
- novel_downloader/core/savers/sfacg.py +25 -0
- novel_downloader/core/savers/yamibo.py +25 -0
- novel_downloader/locales/en.json +1 -0
- novel_downloader/locales/zh.json +1 -0
- novel_downloader/resources/config/settings.toml +40 -4
- novel_downloader/utils/constants.py +4 -0
- novel_downloader/utils/file_utils/io.py +1 -1
- novel_downloader/utils/network.py +51 -38
- novel_downloader/utils/time_utils/__init__.py +2 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -1
- novel_downloader/utils/time_utils/sleep_utils.py +44 -2
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/METADATA +29 -24
- novel_downloader-1.3.3.dist-info/RECORD +166 -0
- novel_downloader-1.3.1.dist-info/RECORD +0 -127
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/WHEEL +0 -0
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.1.dist-info → novel_downloader-1.3.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,237 @@
|
|
1
|
+
"""
|
2
|
+
novel_downloader.core.requesters.yamibo.session
|
3
|
+
-----------------------------------------------
|
4
|
+
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Any
|
8
|
+
|
9
|
+
from lxml import etree
|
10
|
+
|
11
|
+
from novel_downloader.config.models import RequesterConfig
|
12
|
+
from novel_downloader.core.requesters.base import BaseSession
|
13
|
+
from novel_downloader.utils.i18n import t
|
14
|
+
from novel_downloader.utils.state import state_mgr
|
15
|
+
from novel_downloader.utils.time_utils import sleep_with_random_delay
|
16
|
+
|
17
|
+
|
18
|
+
class YamiboSession(BaseSession):
|
19
|
+
"""
|
20
|
+
A session class for interacting with the
|
21
|
+
yamibo (www.yamibo.com) novel website.
|
22
|
+
"""
|
23
|
+
|
24
|
+
BASE_URL = "https://www.yamibo.com"
|
25
|
+
BOOKCASE_URL = "https://www.yamibo.com/my/fav"
|
26
|
+
BOOK_INFO_URL = "https://www.yamibo.com/novel/{book_id}"
|
27
|
+
CHAPTER_URL = "https://www.yamibo.com/novel/view-chapter?id={chapter_id}"
|
28
|
+
|
29
|
+
LOGIN_URL = "https://www.yamibo.com/user/login"
|
30
|
+
|
31
|
+
def __init__(
|
32
|
+
self,
|
33
|
+
config: RequesterConfig,
|
34
|
+
):
|
35
|
+
super().__init__(config)
|
36
|
+
self._logged_in: bool = False
|
37
|
+
self._request_interval = config.backoff_factor
|
38
|
+
self._retry_times = config.retry_times
|
39
|
+
self._username = config.username
|
40
|
+
self._password = config.password
|
41
|
+
|
42
|
+
def login(
|
43
|
+
self,
|
44
|
+
username: str = "",
|
45
|
+
password: str = "",
|
46
|
+
manual_login: bool = False,
|
47
|
+
**kwargs: Any,
|
48
|
+
) -> bool:
|
49
|
+
"""
|
50
|
+
Restore cookies persisted by the session-based workflow.
|
51
|
+
"""
|
52
|
+
cookies: dict[str, str] = state_mgr.get_cookies("yamibo")
|
53
|
+
username = username or self._username
|
54
|
+
password = password or self._password
|
55
|
+
|
56
|
+
self.update_cookies(cookies)
|
57
|
+
for _ in range(self._retry_times):
|
58
|
+
if self._check_login_status():
|
59
|
+
self.logger.debug("[auth] Already logged in.")
|
60
|
+
self._logged_in = True
|
61
|
+
return True
|
62
|
+
if username and password and not self._api_login(username, password):
|
63
|
+
print(t("session_login_failed", site="esjzone"))
|
64
|
+
sleep_with_random_delay(
|
65
|
+
self._request_interval,
|
66
|
+
mul_spread=1.1,
|
67
|
+
max_sleep=self._request_interval + 2,
|
68
|
+
)
|
69
|
+
|
70
|
+
self._logged_in = self._check_login_status()
|
71
|
+
return self._logged_in
|
72
|
+
|
73
|
+
def get_book_info(
|
74
|
+
self,
|
75
|
+
book_id: str,
|
76
|
+
**kwargs: Any,
|
77
|
+
) -> list[str]:
|
78
|
+
"""
|
79
|
+
Fetch the raw HTML of the book info and catalog pages.
|
80
|
+
|
81
|
+
Order: [info, catalog]
|
82
|
+
|
83
|
+
:param book_id: The book identifier.
|
84
|
+
:return: The page content as a string.
|
85
|
+
"""
|
86
|
+
url = self.book_info_url(book_id=book_id)
|
87
|
+
try:
|
88
|
+
resp = self.get(url, **kwargs)
|
89
|
+
resp.raise_for_status()
|
90
|
+
return [resp.text]
|
91
|
+
except Exception as exc:
|
92
|
+
self.logger.warning(
|
93
|
+
"[session] get_book_info(%s) failed: %s",
|
94
|
+
book_id,
|
95
|
+
exc,
|
96
|
+
)
|
97
|
+
return []
|
98
|
+
|
99
|
+
def get_book_chapter(
|
100
|
+
self,
|
101
|
+
book_id: str,
|
102
|
+
chapter_id: str,
|
103
|
+
**kwargs: Any,
|
104
|
+
) -> list[str]:
|
105
|
+
"""
|
106
|
+
Fetch the HTML of a single chapter.
|
107
|
+
|
108
|
+
:param book_id: The book identifier.
|
109
|
+
:param chapter_id: The chapter identifier.
|
110
|
+
:return: The chapter content as a string.
|
111
|
+
"""
|
112
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
113
|
+
try:
|
114
|
+
resp = self.get(url, **kwargs)
|
115
|
+
resp.raise_for_status()
|
116
|
+
return [resp.text]
|
117
|
+
except Exception as exc:
|
118
|
+
self.logger.warning(
|
119
|
+
"[session] get_book_chapter(%s) failed: %s",
|
120
|
+
book_id,
|
121
|
+
exc,
|
122
|
+
)
|
123
|
+
return []
|
124
|
+
|
125
|
+
def get_bookcase(
|
126
|
+
self,
|
127
|
+
page: int = 1,
|
128
|
+
**kwargs: Any,
|
129
|
+
) -> list[str]:
|
130
|
+
"""
|
131
|
+
Retrieve the user's *bookcase* page.
|
132
|
+
|
133
|
+
:return: The HTML markup of the bookcase page.
|
134
|
+
"""
|
135
|
+
url = self.bookcase_url()
|
136
|
+
try:
|
137
|
+
resp = self.get(url, **kwargs)
|
138
|
+
resp.raise_for_status()
|
139
|
+
return [resp.text]
|
140
|
+
except Exception as exc:
|
141
|
+
self.logger.warning(
|
142
|
+
"[session] get_bookcase failed: %s",
|
143
|
+
exc,
|
144
|
+
)
|
145
|
+
return []
|
146
|
+
|
147
|
+
@classmethod
|
148
|
+
def bookcase_url(cls) -> str:
|
149
|
+
"""
|
150
|
+
Construct the URL for the user's bookcase page.
|
151
|
+
|
152
|
+
:return: Fully qualified URL of the bookcase.
|
153
|
+
"""
|
154
|
+
return cls.BOOKCASE_URL
|
155
|
+
|
156
|
+
@classmethod
|
157
|
+
def book_info_url(cls, book_id: str) -> str:
|
158
|
+
"""
|
159
|
+
Construct the URL for fetching a book's info page.
|
160
|
+
|
161
|
+
:param book_id: The identifier of the book.
|
162
|
+
:return: Fully qualified URL for the book info page.
|
163
|
+
"""
|
164
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
165
|
+
|
166
|
+
@classmethod
|
167
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
168
|
+
"""
|
169
|
+
Construct the URL for fetching a specific chapter.
|
170
|
+
|
171
|
+
:param book_id: The identifier of the book.
|
172
|
+
:param chapter_id: The identifier of the chapter.
|
173
|
+
:return: Fully qualified chapter URL.
|
174
|
+
"""
|
175
|
+
return cls.CHAPTER_URL.format(chapter_id=chapter_id)
|
176
|
+
|
177
|
+
def _api_login(self, username: str, password: str) -> bool:
|
178
|
+
"""
|
179
|
+
Login to the API using a 2-step token-based process.
|
180
|
+
|
181
|
+
Step 1: Get token `_csrf-frontend`.
|
182
|
+
Step 2: Use token and credentials to perform login.
|
183
|
+
Return True if login succeeds, False otherwise.
|
184
|
+
"""
|
185
|
+
try:
|
186
|
+
resp_1 = self.get(self.LOGIN_URL)
|
187
|
+
resp_1.raise_for_status()
|
188
|
+
tree = etree.HTML(resp_1.text)
|
189
|
+
csrf_value = tree.xpath('//input[@name="_csrf-frontend"]/@value')
|
190
|
+
csrf_value = csrf_value[0] if csrf_value else ""
|
191
|
+
if not csrf_value:
|
192
|
+
self.logger.warning("[session] _api_login: CSRF token not found.")
|
193
|
+
return False
|
194
|
+
except Exception as exc:
|
195
|
+
self.logger.warning("[session] _api_login failed at step 1: %s", exc)
|
196
|
+
return False
|
197
|
+
|
198
|
+
data_2 = {
|
199
|
+
"_csrf-frontend": csrf_value,
|
200
|
+
"LoginForm[username]": username,
|
201
|
+
"LoginForm[password]": password,
|
202
|
+
# "LoginForm[rememberMe]": 0,
|
203
|
+
"LoginForm[rememberMe]": 1,
|
204
|
+
"login-button": "",
|
205
|
+
}
|
206
|
+
temp_headers = dict(self.headers)
|
207
|
+
temp_headers["Origin"] = self.BASE_URL
|
208
|
+
temp_headers["Referer"] = self.LOGIN_URL
|
209
|
+
try:
|
210
|
+
resp_2 = self.post(self.LOGIN_URL, data=data_2, headers=temp_headers)
|
211
|
+
resp_2.raise_for_status()
|
212
|
+
return "登录成功" in resp_2.text
|
213
|
+
except Exception as exc:
|
214
|
+
self.logger.warning("[session] _api_login failed at step 2: %s", exc)
|
215
|
+
return False
|
216
|
+
|
217
|
+
def _check_login_status(self) -> bool:
|
218
|
+
"""
|
219
|
+
Check whether the user is currently logged in by
|
220
|
+
inspecting the bookcase page content.
|
221
|
+
|
222
|
+
:return: True if the user is logged in, False otherwise.
|
223
|
+
"""
|
224
|
+
keywords = [
|
225
|
+
"登录 - 百合会",
|
226
|
+
"用户名/邮箱",
|
227
|
+
]
|
228
|
+
resp_text = self.get_bookcase()
|
229
|
+
if not resp_text:
|
230
|
+
return False
|
231
|
+
return not any(kw in resp_text[0] for kw in keywords)
|
232
|
+
|
233
|
+
def _on_close(self) -> None:
|
234
|
+
"""
|
235
|
+
Save cookies to the state manager before closing.
|
236
|
+
"""
|
237
|
+
state_mgr.set_cookies("yamibo", self.cookies)
|
@@ -6,17 +6,29 @@ novel_downloader.core.savers
|
|
6
6
|
This module defines saver classes for different novel platforms.
|
7
7
|
|
8
8
|
Currently supported platforms:
|
9
|
-
-
|
10
|
-
-
|
11
|
-
-
|
9
|
+
- biquge (笔趣阁)
|
10
|
+
- esjzone (ESJ Zone)
|
11
|
+
- qianbi (铅笔小说)
|
12
|
+
- qidian (起点中文网)
|
13
|
+
- sfacg (SF轻小说)
|
14
|
+
- yamibo (百合会)
|
15
|
+
- common (通用架构)
|
12
16
|
"""
|
13
17
|
|
14
18
|
from .biquge import BiqugeSaver
|
15
19
|
from .common import CommonSaver
|
20
|
+
from .esjzone import EsjzoneSaver
|
21
|
+
from .qianbi import QianbiSaver
|
16
22
|
from .qidian import QidianSaver
|
23
|
+
from .sfacg import SfacgSaver
|
24
|
+
from .yamibo import YamiboSaver
|
17
25
|
|
18
26
|
__all__ = [
|
19
27
|
"BiqugeSaver",
|
20
28
|
"CommonSaver",
|
29
|
+
"EsjzoneSaver",
|
30
|
+
"QianbiSaver",
|
21
31
|
"QidianSaver",
|
32
|
+
"SfacgSaver",
|
33
|
+
"YamiboSaver",
|
22
34
|
]
|
@@ -39,9 +39,10 @@ class BaseSaver(SaverProtocol, abc.ABC):
|
|
39
39
|
"""
|
40
40
|
self._config = config
|
41
41
|
|
42
|
-
self.
|
42
|
+
self._base_cache_dir = Path(config.cache_dir)
|
43
|
+
self._base_raw_data_dir = Path(config.raw_data_dir)
|
43
44
|
self._output_dir = Path(config.output_dir)
|
44
|
-
self.
|
45
|
+
self._base_cache_dir.mkdir(parents=True, exist_ok=True)
|
45
46
|
self._output_dir.mkdir(parents=True, exist_ok=True)
|
46
47
|
|
47
48
|
self._filename_template = config.filename_template
|
@@ -157,11 +158,6 @@ class BaseSaver(SaverProtocol, abc.ABC):
|
|
157
158
|
"""Access the output directory for saving files."""
|
158
159
|
return self._output_dir
|
159
160
|
|
160
|
-
@property
|
161
|
-
def raw_data_dir(self) -> Path:
|
162
|
-
"""Access the raw data directory."""
|
163
|
-
return self._raw_data_dir
|
164
|
-
|
165
161
|
@property
|
166
162
|
def filename_template(self) -> str:
|
167
163
|
"""Access the filename template."""
|
@@ -11,53 +11,30 @@ from __future__ import annotations
|
|
11
11
|
import json
|
12
12
|
from pathlib import Path
|
13
13
|
from typing import TYPE_CHECKING
|
14
|
-
from urllib.parse import unquote, urlparse
|
15
14
|
|
16
15
|
from ebooklib import epub
|
17
16
|
|
18
17
|
from novel_downloader.core.savers.epub_utils import (
|
18
|
+
add_images_from_dir,
|
19
19
|
chapter_txt_to_html,
|
20
20
|
create_css_items,
|
21
21
|
create_volume_intro,
|
22
22
|
generate_book_intro_html,
|
23
23
|
init_epub,
|
24
|
+
inline_remote_images,
|
24
25
|
)
|
25
26
|
from novel_downloader.utils.constants import (
|
26
|
-
DEFAULT_IMAGE_SUFFIX,
|
27
27
|
EPUB_OPTIONS,
|
28
28
|
EPUB_TEXT_FOLDER,
|
29
29
|
)
|
30
30
|
from novel_downloader.utils.file_utils import sanitize_filename
|
31
|
+
from novel_downloader.utils.network import download_image
|
31
32
|
from novel_downloader.utils.text_utils import clean_chapter_title
|
32
33
|
|
33
34
|
if TYPE_CHECKING:
|
34
35
|
from .main_saver import CommonSaver
|
35
36
|
|
36
37
|
|
37
|
-
def _image_url_to_filename(url: str) -> str:
|
38
|
-
"""
|
39
|
-
Parse and sanitize a image filename from a URL.
|
40
|
-
If no filename or suffix exists, fallback to default name and extension.
|
41
|
-
|
42
|
-
:param url: URL string
|
43
|
-
:return: Safe filename string
|
44
|
-
"""
|
45
|
-
if not url:
|
46
|
-
return ""
|
47
|
-
|
48
|
-
parsed_url = urlparse(url)
|
49
|
-
path = unquote(parsed_url.path)
|
50
|
-
filename = Path(path).name
|
51
|
-
|
52
|
-
if not filename:
|
53
|
-
filename = "image"
|
54
|
-
|
55
|
-
if not Path(filename).suffix:
|
56
|
-
filename += DEFAULT_IMAGE_SUFFIX
|
57
|
-
|
58
|
-
return filename
|
59
|
-
|
60
|
-
|
61
38
|
def common_save_as_epub(
|
62
39
|
saver: CommonSaver,
|
63
40
|
book_id: str,
|
@@ -76,11 +53,12 @@ def common_save_as_epub(
|
|
76
53
|
:param book_id: Identifier of the novel (used as subdirectory name).
|
77
54
|
"""
|
78
55
|
TAG = "[saver]"
|
79
|
-
site = saver.site
|
80
56
|
config = saver._config
|
81
57
|
# --- Paths & options ---
|
82
|
-
raw_base = saver.
|
58
|
+
raw_base = saver._raw_data_dir / book_id
|
59
|
+
img_dir = saver._cache_dir / book_id / "images"
|
83
60
|
out_dir = saver.output_dir
|
61
|
+
img_dir.mkdir(parents=True, exist_ok=True)
|
84
62
|
out_dir.mkdir(parents=True, exist_ok=True)
|
85
63
|
|
86
64
|
# --- Load book_info.json ---
|
@@ -100,10 +78,16 @@ def common_save_as_epub(
|
|
100
78
|
# --- Generate intro + cover ---
|
101
79
|
intro_html = generate_book_intro_html(book_info)
|
102
80
|
cover_path: Path | None = None
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
81
|
+
cover_url = book_info.get("cover_url", "")
|
82
|
+
if config.include_cover and cover_url:
|
83
|
+
cover_path = download_image(
|
84
|
+
cover_url,
|
85
|
+
raw_base,
|
86
|
+
target_name="cover",
|
87
|
+
on_exist="overwrite",
|
88
|
+
)
|
89
|
+
if not cover_path:
|
90
|
+
saver.logger.warning("Failed to download cover from %s", cover_url)
|
107
91
|
|
108
92
|
# --- Initialize EPUB ---
|
109
93
|
book, spine, toc_list = init_epub(
|
@@ -162,9 +146,11 @@ def common_save_as_epub(
|
|
162
146
|
continue
|
163
147
|
|
164
148
|
title = clean_chapter_title(chapter_data.get("title", "")) or chap_id
|
149
|
+
content: str = chapter_data.get("content", "")
|
150
|
+
content = inline_remote_images(content, img_dir)
|
165
151
|
chap_html = chapter_txt_to_html(
|
166
152
|
chapter_title=title,
|
167
|
-
chapter_text=
|
153
|
+
chapter_text=content,
|
168
154
|
author_say=chapter_data.get("author_say", ""),
|
169
155
|
)
|
170
156
|
|
@@ -182,6 +168,8 @@ def common_save_as_epub(
|
|
182
168
|
|
183
169
|
toc_list.append((section, chapter_items))
|
184
170
|
|
171
|
+
book = add_images_from_dir(book, img_dir)
|
172
|
+
|
185
173
|
# --- 5. Finalize EPUB ---
|
186
174
|
saver.logger.info("%s Building TOC and spine...", TAG)
|
187
175
|
book.toc = toc_list
|
@@ -41,6 +41,8 @@ class CommonSaver(BaseSaver):
|
|
41
41
|
"""
|
42
42
|
super().__init__(config)
|
43
43
|
self._site = site
|
44
|
+
self._raw_data_dir = self._base_raw_data_dir / site
|
45
|
+
self._cache_dir = self._base_cache_dir / site
|
44
46
|
self._chapter_storage_cache: dict[str, list[ChapterStorage]] = {}
|
45
47
|
self._chap_folders: list[str] = chap_folders or ["chapters"]
|
46
48
|
|
@@ -109,7 +111,7 @@ class CommonSaver(BaseSaver):
|
|
109
111
|
return {}
|
110
112
|
|
111
113
|
def _init_chapter_storages(self, book_id: str) -> None:
|
112
|
-
raw_base = self.
|
114
|
+
raw_base = self._raw_data_dir / book_id
|
113
115
|
self._chapter_storage_cache[book_id] = [
|
114
116
|
ChapterStorage(
|
115
117
|
raw_base=raw_base,
|
@@ -45,9 +45,8 @@ def common_save_as_txt(
|
|
45
45
|
:param book_id: Identifier of the novel (used as subdirectory name).
|
46
46
|
"""
|
47
47
|
TAG = "[saver]"
|
48
|
-
site = saver.site
|
49
48
|
# --- Paths & options ---
|
50
|
-
raw_base = saver.
|
49
|
+
raw_base = saver._raw_data_dir / book_id
|
51
50
|
out_dir = saver.output_dir
|
52
51
|
out_dir.mkdir(parents=True, exist_ok=True)
|
53
52
|
|
@@ -6,21 +6,30 @@ novel_downloader.core.savers.epub_utils
|
|
6
6
|
This package provides utility functions for constructing EPUB files,
|
7
7
|
including:
|
8
8
|
|
9
|
-
- CSS inclusion (
|
10
|
-
-
|
11
|
-
-
|
12
|
-
-
|
9
|
+
- CSS inclusion (css_builder)
|
10
|
+
- Image embedding (image_loader)
|
11
|
+
- EPUB book initialization (initializer)
|
12
|
+
- Chapter text-to-HTML conversion (text_to_html)
|
13
|
+
- Volume intro HTML generation (volume_intro)
|
13
14
|
"""
|
14
15
|
|
15
16
|
from .css_builder import create_css_items
|
17
|
+
from .image_loader import add_images_from_dir, add_images_from_dirs
|
16
18
|
from .initializer import init_epub
|
17
|
-
from .text_to_html import
|
19
|
+
from .text_to_html import (
|
20
|
+
chapter_txt_to_html,
|
21
|
+
generate_book_intro_html,
|
22
|
+
inline_remote_images,
|
23
|
+
)
|
18
24
|
from .volume_intro import create_volume_intro
|
19
25
|
|
20
26
|
__all__ = [
|
21
27
|
"create_css_items",
|
28
|
+
"add_images_from_dir",
|
29
|
+
"add_images_from_dirs",
|
22
30
|
"init_epub",
|
23
31
|
"chapter_txt_to_html",
|
24
32
|
"create_volume_intro",
|
25
33
|
"generate_book_intro_html",
|
34
|
+
"inline_remote_images",
|
26
35
|
]
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.savers.epub_utils.image_loader
|
4
|
+
----------------------------------------------------
|
5
|
+
|
6
|
+
Utilities for embedding image files into an EpubBook.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import logging
|
10
|
+
from collections.abc import Iterable
|
11
|
+
from pathlib import Path
|
12
|
+
|
13
|
+
from ebooklib import epub
|
14
|
+
|
15
|
+
from novel_downloader.utils.constants import EPUB_IMAGE_FOLDER
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
|
19
|
+
_SUPPORTED_IMAGE_MEDIA_TYPES: dict[str, str] = {
|
20
|
+
"png": "image/png",
|
21
|
+
"jpg": "image/jpeg",
|
22
|
+
"jpeg": "image/jpeg",
|
23
|
+
"gif": "image/gif",
|
24
|
+
"svg": "image/svg+xml",
|
25
|
+
"webp": "image/webp",
|
26
|
+
}
|
27
|
+
_DEFAULT_IMAGE_MEDIA_TYPE = "image/jpeg"
|
28
|
+
|
29
|
+
|
30
|
+
def add_images_from_dir(
|
31
|
+
book: epub.EpubBook,
|
32
|
+
image_dir: str | Path,
|
33
|
+
) -> epub.EpubBook:
|
34
|
+
"""
|
35
|
+
Load every file in `image_dir` into the EPUB's image folder.
|
36
|
+
|
37
|
+
:param book: The EpubBook object to modify.
|
38
|
+
:param image_dir: Path to the directory containing image files.
|
39
|
+
:return: The same EpubBook instance, with images added.
|
40
|
+
"""
|
41
|
+
image_dir = Path(image_dir)
|
42
|
+
if not image_dir.is_dir():
|
43
|
+
logger.warning("Image directory not found or not a directory: %s", image_dir)
|
44
|
+
return book
|
45
|
+
|
46
|
+
for img_path in image_dir.iterdir():
|
47
|
+
if not img_path.is_file():
|
48
|
+
continue
|
49
|
+
|
50
|
+
suffix = img_path.suffix.lower().lstrip(".")
|
51
|
+
media_type = _SUPPORTED_IMAGE_MEDIA_TYPES.get(suffix)
|
52
|
+
if media_type is None:
|
53
|
+
media_type = _DEFAULT_IMAGE_MEDIA_TYPE
|
54
|
+
logger.warning(
|
55
|
+
"Unknown image suffix '%s' - defaulting media_type to %s",
|
56
|
+
suffix,
|
57
|
+
media_type,
|
58
|
+
)
|
59
|
+
|
60
|
+
try:
|
61
|
+
content = img_path.read_bytes()
|
62
|
+
item = epub.EpubItem(
|
63
|
+
uid=f"img_{img_path.stem}",
|
64
|
+
file_name=f"{EPUB_IMAGE_FOLDER}/{img_path.name}",
|
65
|
+
media_type=media_type,
|
66
|
+
content=content,
|
67
|
+
)
|
68
|
+
book.add_item(item)
|
69
|
+
logger.info("Embedded image: %s", img_path.name)
|
70
|
+
except Exception:
|
71
|
+
logger.exception("Failed to embed image %s", img_path)
|
72
|
+
|
73
|
+
return book
|
74
|
+
|
75
|
+
|
76
|
+
def add_images_from_dirs(
|
77
|
+
book: epub.EpubBook,
|
78
|
+
image_dirs: Iterable[str | Path],
|
79
|
+
) -> epub.EpubBook:
|
80
|
+
"""
|
81
|
+
Add all images from multiple directories into the given EpubBook.
|
82
|
+
|
83
|
+
:param book: The EpubBook object to modify.
|
84
|
+
:param image_dirs: An iterable of directory paths to scan for images.
|
85
|
+
:return: The same EpubBook instance, with all images added.
|
86
|
+
"""
|
87
|
+
for img_dir in image_dirs:
|
88
|
+
book = add_images_from_dir(book, img_dir)
|
89
|
+
return book
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
3
|
novel_downloader.core.savers.epub_utils.text_to_html
|
4
|
+
----------------------------------------------------
|
4
5
|
|
5
6
|
Module for converting raw chapter text to formatted HTML,
|
6
7
|
with automatic word correction and optional image/tag support.
|
@@ -8,13 +9,23 @@ with automatic word correction and optional image/tag support.
|
|
8
9
|
|
9
10
|
import json
|
10
11
|
import logging
|
12
|
+
import re
|
13
|
+
from pathlib import Path
|
11
14
|
from typing import Any
|
12
15
|
|
13
|
-
from novel_downloader.utils.constants import
|
16
|
+
from novel_downloader.utils.constants import (
|
17
|
+
EPUB_IMAGE_WRAPPER,
|
18
|
+
REPLACE_WORD_MAP_PATH,
|
19
|
+
)
|
20
|
+
from novel_downloader.utils.network import download_image
|
14
21
|
from novel_downloader.utils.text_utils import diff_inline_display
|
15
22
|
|
16
23
|
logger = logging.getLogger(__name__)
|
17
24
|
|
25
|
+
_IMG_TAG_PATTERN = re.compile(
|
26
|
+
r'<img\s+[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', re.IGNORECASE
|
27
|
+
)
|
28
|
+
|
18
29
|
|
19
30
|
# Load and sort replacement map from JSON
|
20
31
|
try:
|
@@ -87,6 +98,42 @@ def chapter_txt_to_html(
|
|
87
98
|
return "\n".join(html_parts)
|
88
99
|
|
89
100
|
|
101
|
+
def inline_remote_images(
|
102
|
+
content: str,
|
103
|
+
image_dir: str | Path,
|
104
|
+
) -> str:
|
105
|
+
"""
|
106
|
+
Download every remote <img src="…"> in `content` into `image_dir`,
|
107
|
+
and replace the original tag with EPUB_IMAGE_WRAPPER
|
108
|
+
pointing to the local filename.
|
109
|
+
|
110
|
+
:param content: HTML/text of the chapter containing <img> tags.
|
111
|
+
:param image_dir: Directory to save downloaded images into.
|
112
|
+
:return: Modified content with local image references.
|
113
|
+
"""
|
114
|
+
|
115
|
+
def _replace(match: re.Match[str]) -> str:
|
116
|
+
url = match.group(1)
|
117
|
+
try:
|
118
|
+
# download_image returns a Path or None
|
119
|
+
local_path = download_image(
|
120
|
+
url, image_dir, target_name=None, on_exist="skip"
|
121
|
+
)
|
122
|
+
if not local_path:
|
123
|
+
logger.warning(
|
124
|
+
"Failed to download image, leaving original tag: %s", url
|
125
|
+
)
|
126
|
+
return match.group(0)
|
127
|
+
|
128
|
+
# wrap with the EPUB_IMAGE_WRAPPER, inserting just the filename
|
129
|
+
return EPUB_IMAGE_WRAPPER.format(filename=local_path.name)
|
130
|
+
except Exception:
|
131
|
+
logger.exception("Error processing image URL: %s", url)
|
132
|
+
return match.group(0)
|
133
|
+
|
134
|
+
return _IMG_TAG_PATTERN.sub(_replace, content)
|
135
|
+
|
136
|
+
|
90
137
|
def generate_book_intro_html(book_info: dict[str, Any]) -> str:
|
91
138
|
"""
|
92
139
|
Generate HTML string for a book's information and summary.
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
3
|
novel_downloader.core.savers.epub_utils.volume_intro
|
4
|
+
----------------------------------------------------
|
4
5
|
|
5
6
|
Responsible for generating HTML code for volume introduction pages,
|
6
7
|
including two style variants and a unified entry point.
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.savers.esjzone
|
4
|
+
------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from novel_downloader.config.models import SaverConfig
|
9
|
+
|
10
|
+
from .common import CommonSaver
|
11
|
+
|
12
|
+
|
13
|
+
class EsjzoneSaver(CommonSaver):
|
14
|
+
def __init__(
|
15
|
+
self,
|
16
|
+
config: SaverConfig,
|
17
|
+
):
|
18
|
+
super().__init__(
|
19
|
+
config,
|
20
|
+
site="esjzone",
|
21
|
+
chap_folders=["chapters"],
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
__all__ = ["EsjzoneSaver"]
|