novel-downloader 1.4.1__py3-none-any.whl → 1.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +70 -11
- novel_downloader/config/adapter.py +43 -9
- novel_downloader/core/__init__.py +19 -1
- novel_downloader/core/downloaders/base.py +26 -29
- novel_downloader/core/downloaders/biquge.py +1 -3
- novel_downloader/core/downloaders/common.py +41 -7
- novel_downloader/core/downloaders/esjzone.py +1 -3
- novel_downloader/core/downloaders/linovelib.py +1 -3
- novel_downloader/core/downloaders/qianbi.py +1 -3
- novel_downloader/core/downloaders/qidian.py +61 -37
- novel_downloader/core/downloaders/sfacg.py +1 -3
- novel_downloader/core/downloaders/yamibo.py +1 -3
- novel_downloader/core/exporters/common/epub.py +153 -68
- novel_downloader/core/exporters/epub_util.py +1358 -0
- novel_downloader/core/exporters/linovelib/epub.py +147 -190
- novel_downloader/core/factory/downloader.py +3 -6
- novel_downloader/core/fetchers/base/browser.py +32 -12
- novel_downloader/core/fetchers/esjzone/browser.py +8 -6
- novel_downloader/core/fetchers/qidian/browser.py +62 -10
- novel_downloader/core/fetchers/yamibo/browser.py +3 -3
- novel_downloader/core/interfaces/downloader.py +13 -12
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +11 -2
- novel_downloader/core/parsers/qidian/chapter_normal.py +8 -1
- novel_downloader/core/parsers/qidian/main_parser.py +7 -2
- novel_downloader/core/parsers/qidian/utils/__init__.py +2 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +9 -0
- novel_downloader/locales/en.json +2 -0
- novel_downloader/locales/zh.json +2 -0
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/config.py +9 -0
- novel_downloader/resources/config/settings.toml +1 -0
- novel_downloader/tui/screens/home.py +13 -6
- novel_downloader/utils/constants.py +0 -29
- novel_downloader/utils/{model_loader.py → fontocr/model_loader.py} +2 -2
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -1
- novel_downloader/utils/text_utils/__init__.py +8 -1
- novel_downloader/utils/text_utils/text_cleaning.py +51 -0
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/METADATA +5 -2
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/RECORD +45 -50
- novel_downloader/core/exporters/epub_utils/__init__.py +0 -40
- novel_downloader/core/exporters/epub_utils/css_builder.py +0 -75
- novel_downloader/core/exporters/epub_utils/image_loader.py +0 -131
- novel_downloader/core/exporters/epub_utils/initializer.py +0 -100
- novel_downloader/core/exporters/epub_utils/text_to_html.py +0 -178
- novel_downloader/core/exporters/epub_utils/volume_intro.py +0 -60
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/entry_points.txt +0 -0
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.1.dist-info → novel_downloader-1.4.3.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@ novel_downloader.core.fetchers.qidian.browser
|
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
+
import asyncio
|
8
9
|
from typing import Any
|
9
10
|
|
10
11
|
from playwright.async_api import Page
|
@@ -189,18 +190,35 @@ class QidianBrowser(BaseBrowser):
|
|
189
190
|
"""
|
190
191
|
try:
|
191
192
|
page = await self.context.new_page()
|
192
|
-
await page.goto(self.HOMEPAGE_URL, wait_until="networkidle")
|
193
193
|
await self._login_auto(page)
|
194
194
|
await self._dismiss_overlay(page)
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
195
|
+
await page.goto(self.HOMEPAGE_URL, wait_until="networkidle")
|
196
|
+
sign_in_elem = await page.query_selector("#login-box .sign-in")
|
197
|
+
sign_out_elem = await page.query_selector("#login-box .sign-out")
|
198
|
+
|
199
|
+
sign_in_class = (
|
200
|
+
(await sign_in_elem.get_attribute("class") or "")
|
201
|
+
if sign_in_elem
|
202
|
+
else ""
|
203
|
+
)
|
204
|
+
sign_out_class = (
|
205
|
+
(await sign_out_elem.get_attribute("class") or "")
|
206
|
+
if sign_out_elem
|
207
|
+
else ""
|
208
|
+
)
|
209
|
+
|
210
|
+
sign_in_hidden = "hidden" in sign_in_class
|
211
|
+
sign_out_hidden = "hidden" in sign_out_class
|
212
|
+
|
213
|
+
await page.close()
|
214
|
+
|
215
|
+
# if sign_in_visible and not sign_out_visible:
|
216
|
+
if not sign_in_hidden and sign_out_hidden:
|
217
|
+
self.logger.debug("[auth] Detected as logged in.")
|
203
218
|
return True
|
219
|
+
else:
|
220
|
+
self.logger.debug("[auth] Detected as not logged in.")
|
221
|
+
return False
|
204
222
|
except Exception as e:
|
205
223
|
self.logger.warning("[auth] Error while checking login status: %s", e)
|
206
224
|
return False
|
@@ -220,7 +238,10 @@ class QidianBrowser(BaseBrowser):
|
|
220
238
|
|
221
239
|
self.logger.debug("[auth] Overlay mask detected; attempting to close.")
|
222
240
|
|
223
|
-
iframe_element = await page.
|
241
|
+
iframe_element = await page.wait_for_selector(
|
242
|
+
"#loginIfr",
|
243
|
+
timeout=timeout * 1000,
|
244
|
+
)
|
224
245
|
if iframe_element is None:
|
225
246
|
self.logger.debug("[auth] Login iframe not found.")
|
226
247
|
return
|
@@ -261,6 +282,37 @@ class QidianBrowser(BaseBrowser):
|
|
261
282
|
btn = await page.query_selector("#login-btn")
|
262
283
|
if btn and await btn.is_visible():
|
263
284
|
await btn.click()
|
285
|
+
tasks = [
|
286
|
+
asyncio.create_task(
|
287
|
+
page.wait_for_selector(
|
288
|
+
"div.mask",
|
289
|
+
timeout=timeout * 1000,
|
290
|
+
)
|
291
|
+
),
|
292
|
+
asyncio.create_task(
|
293
|
+
page.wait_for_selector(
|
294
|
+
"div.qdlogin-wrap",
|
295
|
+
timeout=timeout * 1000,
|
296
|
+
)
|
297
|
+
),
|
298
|
+
asyncio.create_task(
|
299
|
+
page.wait_for_url(
|
300
|
+
lambda url: "login" not in url,
|
301
|
+
timeout=timeout * 1000,
|
302
|
+
)
|
303
|
+
),
|
304
|
+
]
|
305
|
+
done, pending = await asyncio.wait(
|
306
|
+
tasks,
|
307
|
+
timeout=timeout + 1,
|
308
|
+
return_when=asyncio.FIRST_COMPLETED,
|
309
|
+
)
|
310
|
+
for task in pending:
|
311
|
+
task.cancel()
|
312
|
+
if done:
|
313
|
+
self.logger.debug("[auth] Login flow proceeded after button click.")
|
314
|
+
else:
|
315
|
+
self.logger.warning("[auth] Timeout waiting for login to proceed.")
|
264
316
|
except Exception as e:
|
265
317
|
self.logger.debug("[auth] Failed to click login button: %s", e)
|
266
318
|
return
|
@@ -48,8 +48,8 @@ class YamiboBrowser(BaseBrowser):
|
|
48
48
|
return False
|
49
49
|
|
50
50
|
for i in range(1, attempt + 1):
|
51
|
+
login_page = await self.context.new_page()
|
51
52
|
try:
|
52
|
-
login_page = await self.context.new_page()
|
53
53
|
await login_page.goto(self.LOGIN_URL, wait_until="networkidle")
|
54
54
|
|
55
55
|
await login_page.fill("#loginform-username", username)
|
@@ -68,8 +68,6 @@ class YamiboBrowser(BaseBrowser):
|
|
68
68
|
f"[auth] No URL change after login attempt {i}: {e}"
|
69
69
|
)
|
70
70
|
|
71
|
-
await login_page.close()
|
72
|
-
|
73
71
|
self._is_logged_in = await self._check_login_status()
|
74
72
|
if self._is_logged_in:
|
75
73
|
self.logger.info(f"[auth] Login successful on attempt {i}.")
|
@@ -83,6 +81,8 @@ class YamiboBrowser(BaseBrowser):
|
|
83
81
|
self.logger.error(
|
84
82
|
f"[auth] Unexpected error during login attempt {i}: {e}"
|
85
83
|
)
|
84
|
+
finally:
|
85
|
+
await login_page.close()
|
86
86
|
|
87
87
|
self.logger.error(f"[auth] Login failed after {attempt} attempt(s).")
|
88
88
|
return False
|
@@ -10,45 +10,46 @@ that outlines the expected behavior of any downloader class.
|
|
10
10
|
from collections.abc import Awaitable, Callable
|
11
11
|
from typing import Any, Protocol, runtime_checkable
|
12
12
|
|
13
|
+
from novel_downloader.models import BookConfig
|
14
|
+
|
13
15
|
|
14
16
|
@runtime_checkable
|
15
17
|
class DownloaderProtocol(Protocol):
|
16
18
|
"""
|
17
|
-
Protocol for
|
19
|
+
Protocol for async downloader implementations.
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
as well as optional pre-download hooks.
|
21
|
+
Uses BookConfig (with book_id, optional start_id/end_id/ignore_ids)
|
22
|
+
for both single and batch downloads.
|
22
23
|
"""
|
23
24
|
|
24
25
|
async def download(
|
25
26
|
self,
|
26
|
-
|
27
|
+
book: BookConfig,
|
27
28
|
*,
|
28
29
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
29
30
|
**kwargs: Any,
|
30
31
|
) -> None:
|
31
32
|
"""
|
32
|
-
Download
|
33
|
+
Download a single book.
|
33
34
|
|
34
|
-
:param
|
35
|
-
:param progress_hook:
|
35
|
+
:param book: BookConfig with at least 'book_id'.
|
36
|
+
:param progress_hook: Optional async callback after each chapter.
|
36
37
|
args: completed_count, total_count.
|
37
38
|
"""
|
38
39
|
...
|
39
40
|
|
40
41
|
async def download_many(
|
41
42
|
self,
|
42
|
-
|
43
|
+
books: list[BookConfig],
|
43
44
|
*,
|
44
45
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
45
46
|
**kwargs: Any,
|
46
47
|
) -> None:
|
47
48
|
"""
|
48
|
-
|
49
|
+
Download multiple books.
|
49
50
|
|
50
|
-
:param
|
51
|
-
:param progress_hook:
|
51
|
+
:param books: List of BookConfig entries.
|
52
|
+
:param progress_hook: Optional async callback after each chapter.
|
52
53
|
args: completed_count, total_count.
|
53
54
|
"""
|
54
55
|
...
|
@@ -19,12 +19,16 @@ from lxml import html
|
|
19
19
|
|
20
20
|
from novel_downloader.models import ChapterDict
|
21
21
|
from novel_downloader.utils.network import download_font_file
|
22
|
-
from novel_downloader.utils.text_utils import
|
22
|
+
from novel_downloader.utils.text_utils import (
|
23
|
+
apply_font_mapping,
|
24
|
+
truncate_half_lines,
|
25
|
+
)
|
23
26
|
|
24
27
|
from .utils import (
|
25
28
|
extract_chapter_info,
|
26
29
|
find_ssr_page_context,
|
27
30
|
get_decryptor,
|
31
|
+
is_duplicated,
|
28
32
|
vip_status,
|
29
33
|
)
|
30
34
|
|
@@ -76,6 +80,7 @@ def parse_encrypted_chapter(
|
|
76
80
|
fixedFontWoff2_url = chapter_info["fixedFontWoff2"]
|
77
81
|
|
78
82
|
title = chapter_info.get("chapterName", "Untitled")
|
83
|
+
duplicated = is_duplicated(ssr_data)
|
79
84
|
raw_html = chapter_info.get("content", "")
|
80
85
|
chapter_id = chapter_info.get("chapterId", chapter_id)
|
81
86
|
fkp = chapter_info.get("fkp", "")
|
@@ -83,7 +88,7 @@ def parse_encrypted_chapter(
|
|
83
88
|
update_time = chapter_info.get("updateTime", "")
|
84
89
|
update_timestamp = chapter_info.get("updateTimestamp", 0)
|
85
90
|
modify_time = chapter_info.get("modifyTime", 0)
|
86
|
-
word_count = chapter_info.get("
|
91
|
+
word_count = chapter_info.get("actualWords", 0)
|
87
92
|
seq = chapter_info.get("seq", None)
|
88
93
|
volume = chapter_info.get("extra", {}).get("volumeName", "")
|
89
94
|
|
@@ -177,6 +182,9 @@ def parse_encrypted_chapter(
|
|
177
182
|
final_paragraphs_str = "\n\n".join(
|
178
183
|
line.strip() for line in original_text.splitlines() if line.strip()
|
179
184
|
)
|
185
|
+
if parser._use_truncation and duplicated:
|
186
|
+
final_paragraphs_str = truncate_half_lines(final_paragraphs_str)
|
187
|
+
|
180
188
|
return {
|
181
189
|
"id": str(chapter_id),
|
182
190
|
"title": str(title),
|
@@ -187,6 +195,7 @@ def parse_encrypted_chapter(
|
|
187
195
|
"update_timestamp": update_timestamp,
|
188
196
|
"modify_time": modify_time,
|
189
197
|
"word_count": word_count,
|
198
|
+
"duplicated": duplicated,
|
190
199
|
"seq": seq,
|
191
200
|
"volume": volume,
|
192
201
|
"encrypted": True,
|
@@ -15,11 +15,13 @@ from typing import TYPE_CHECKING
|
|
15
15
|
from lxml import html
|
16
16
|
|
17
17
|
from novel_downloader.models import ChapterDict
|
18
|
+
from novel_downloader.utils.text_utils import truncate_half_lines
|
18
19
|
|
19
20
|
from .utils import (
|
20
21
|
extract_chapter_info,
|
21
22
|
find_ssr_page_context,
|
22
23
|
get_decryptor,
|
24
|
+
is_duplicated,
|
23
25
|
vip_status,
|
24
26
|
)
|
25
27
|
|
@@ -51,6 +53,7 @@ def parse_normal_chapter(
|
|
51
53
|
return None
|
52
54
|
|
53
55
|
title = chapter_info.get("chapterName", "Untitled")
|
56
|
+
duplicated = is_duplicated(ssr_data)
|
54
57
|
raw_html = chapter_info.get("content", "")
|
55
58
|
chapter_id = chapter_info.get("chapterId", chapter_id)
|
56
59
|
fkp = chapter_info.get("fkp", "")
|
@@ -58,7 +61,7 @@ def parse_normal_chapter(
|
|
58
61
|
update_time = chapter_info.get("updateTime", "")
|
59
62
|
update_timestamp = chapter_info.get("updateTimestamp", 0)
|
60
63
|
modify_time = chapter_info.get("modifyTime", 0)
|
61
|
-
word_count = chapter_info.get("
|
64
|
+
word_count = chapter_info.get("actualWords", 0)
|
62
65
|
seq = chapter_info.get("seq", None)
|
63
66
|
volume = chapter_info.get("extra", {}).get("volumeName", "")
|
64
67
|
|
@@ -74,6 +77,9 @@ def parse_normal_chapter(
|
|
74
77
|
if not chapter_text:
|
75
78
|
return None
|
76
79
|
|
80
|
+
if parser._use_truncation and duplicated:
|
81
|
+
chapter_text = truncate_half_lines(chapter_text)
|
82
|
+
|
77
83
|
return {
|
78
84
|
"id": str(chapter_id),
|
79
85
|
"title": title,
|
@@ -84,6 +90,7 @@ def parse_normal_chapter(
|
|
84
90
|
"update_timestamp": update_timestamp,
|
85
91
|
"modify_time": modify_time,
|
86
92
|
"word_count": word_count,
|
93
|
+
"duplicated": duplicated,
|
87
94
|
"seq": seq,
|
88
95
|
"volume": volume,
|
89
96
|
"encrypted": False,
|
@@ -32,7 +32,11 @@ class QidianParser(BaseParser):
|
|
32
32
|
Parser for Qidian site.
|
33
33
|
"""
|
34
34
|
|
35
|
-
def __init__(
|
35
|
+
def __init__(
|
36
|
+
self,
|
37
|
+
config: ParserConfig,
|
38
|
+
fuid: str = "",
|
39
|
+
):
|
36
40
|
"""
|
37
41
|
Initialize the QidianParser with the given configuration.
|
38
42
|
|
@@ -41,6 +45,7 @@ class QidianParser(BaseParser):
|
|
41
45
|
super().__init__(config)
|
42
46
|
|
43
47
|
# Extract and store parser flags from config
|
48
|
+
self._use_truncation = config.use_truncation
|
44
49
|
self._decode_font: bool = config.decode_font
|
45
50
|
self._save_font_debug: bool = config.save_font_debug
|
46
51
|
|
@@ -52,7 +57,7 @@ class QidianParser(BaseParser):
|
|
52
57
|
DATA_DIR / "qidian" / "browser_state.cookies",
|
53
58
|
DATA_DIR / "qidian" / "session_state.cookies",
|
54
59
|
]
|
55
|
-
self._fuid: str = find_cookie_value(state_files, "ywguid")
|
60
|
+
self._fuid: str = fuid or find_cookie_value(state_files, "ywguid")
|
56
61
|
|
57
62
|
self._font_ocr: FontOCR | None = None
|
58
63
|
if self._decode_font:
|
@@ -9,6 +9,7 @@ from .helpers import (
|
|
9
9
|
can_view_chapter,
|
10
10
|
extract_chapter_info,
|
11
11
|
find_ssr_page_context,
|
12
|
+
is_duplicated,
|
12
13
|
is_encrypted,
|
13
14
|
is_restricted_page,
|
14
15
|
vip_status,
|
@@ -22,6 +23,7 @@ __all__ = [
|
|
22
23
|
"vip_status",
|
23
24
|
"can_view_chapter",
|
24
25
|
"is_encrypted",
|
26
|
+
"is_duplicated",
|
25
27
|
"QidianNodeDecryptor",
|
26
28
|
"get_decryptor",
|
27
29
|
]
|
@@ -89,6 +89,15 @@ def can_view_chapter(ssr_data: dict[str, Any]) -> bool:
|
|
89
89
|
return not (vip_status == 1 and is_buy == 0)
|
90
90
|
|
91
91
|
|
92
|
+
def is_duplicated(ssr_data: dict[str, Any]) -> bool:
|
93
|
+
"""
|
94
|
+
Check if chapter is marked as duplicated (eFW = 1).
|
95
|
+
"""
|
96
|
+
chapter_info = extract_chapter_info(ssr_data)
|
97
|
+
efw_flag = chapter_info.get("eFW", 0)
|
98
|
+
return bool(efw_flag == 1)
|
99
|
+
|
100
|
+
|
92
101
|
def is_encrypted(content: str | dict[str, Any]) -> bool:
|
93
102
|
"""
|
94
103
|
Return True if content is encrypted.
|
novel_downloader/locales/en.json
CHANGED
@@ -66,6 +66,8 @@
|
|
66
66
|
"download_downloading": "Downloading book {book_id} from {site}...",
|
67
67
|
"download_prompt_parse": "Parse...",
|
68
68
|
"download_book_ids": "One or more book IDs to process",
|
69
|
+
"download_option_start": "Start chapter ID (applies to the first book ID only)",
|
70
|
+
"download_option_end": "End chapter ID (applies to the first book ID only)",
|
69
71
|
"login_description": "Description",
|
70
72
|
"login_hint": "Hint",
|
71
73
|
"login_manual_prompt": ">> Please complete login in your browser and press Enter to continue...",
|
novel_downloader/locales/zh.json
CHANGED
@@ -66,6 +66,8 @@
|
|
66
66
|
"download_downloading": "正在从 {site} 下载书籍 {book_id}...",
|
67
67
|
"download_prompt_parse": "结束...",
|
68
68
|
"download_book_ids": "要处理的一个或多个小说 ID",
|
69
|
+
"download_option_start": "起始章节 ID (仅用于第一个书籍 ID)",
|
70
|
+
"download_option_end": "结束章节 ID (仅用于第一个书籍 ID)",
|
69
71
|
"login_description": "说明",
|
70
72
|
"login_hint": "提示",
|
71
73
|
"login_manual_prompt": ">> 请在浏览器中完成登录后按回车继续...",
|
@@ -8,6 +8,7 @@ novel_downloader.models
|
|
8
8
|
from .browser import NewContextOptions
|
9
9
|
from .chapter import ChapterDict
|
10
10
|
from .config import (
|
11
|
+
BookConfig,
|
11
12
|
DownloaderConfig,
|
12
13
|
ExporterConfig,
|
13
14
|
FetcherConfig,
|
@@ -39,6 +40,7 @@ from .types import (
|
|
39
40
|
|
40
41
|
__all__ = [
|
41
42
|
"NewContextOptions",
|
43
|
+
"BookConfig",
|
42
44
|
"DownloaderConfig",
|
43
45
|
"ParserConfig",
|
44
46
|
"FetcherConfig",
|
@@ -17,6 +17,7 @@ strongly typed Python objects for safer and cleaner access.
|
|
17
17
|
"""
|
18
18
|
|
19
19
|
from dataclasses import dataclass
|
20
|
+
from typing import NotRequired, TypedDict
|
20
21
|
|
21
22
|
from .types import (
|
22
23
|
BrowserType,
|
@@ -67,6 +68,7 @@ class DownloaderConfig:
|
|
67
68
|
@dataclass
|
68
69
|
class ParserConfig:
|
69
70
|
cache_dir: str = "./novel_cache"
|
71
|
+
use_truncation: bool = True
|
70
72
|
decode_font: bool = False
|
71
73
|
use_freq: bool = False
|
72
74
|
use_ocr: bool = True
|
@@ -98,3 +100,10 @@ class ExporterConfig:
|
|
98
100
|
include_toc: bool = False
|
99
101
|
include_picture: bool = False
|
100
102
|
split_mode: SplitMode = "book"
|
103
|
+
|
104
|
+
|
105
|
+
class BookConfig(TypedDict):
|
106
|
+
book_id: str
|
107
|
+
start_id: NotRequired[str]
|
108
|
+
end_id: NotRequired[str]
|
109
|
+
ignore_ids: NotRequired[list[str]]
|
@@ -65,7 +65,13 @@ class HomeScreen(Screen): # type: ignore[misc]
|
|
65
65
|
return
|
66
66
|
id_list = {x.strip() for x in ids.split(",") if x.strip()}
|
67
67
|
adapter = ConfigAdapter(config=self.app.config, site=str(site))
|
68
|
-
asyncio.create_task(self._download(adapter, str(site), id_list))
|
68
|
+
# asyncio.create_task(self._download(adapter, str(site), id_list))
|
69
|
+
self.run_worker(
|
70
|
+
self._download(adapter, str(site), id_list),
|
71
|
+
name="download",
|
72
|
+
group="downloads",
|
73
|
+
description="正在下载书籍...",
|
74
|
+
)
|
69
75
|
|
70
76
|
def _make_title_bar(self) -> Horizontal:
|
71
77
|
return Horizontal(
|
@@ -106,12 +112,12 @@ class HomeScreen(Screen): # type: ignore[misc]
|
|
106
112
|
self,
|
107
113
|
adapter: ConfigAdapter,
|
108
114
|
site: str,
|
109
|
-
|
115
|
+
book_ids: set[str],
|
110
116
|
) -> None:
|
111
117
|
btn = self.query_one("#download", Button)
|
112
118
|
btn.disabled = True
|
113
119
|
try:
|
114
|
-
logging.info(f"下载请求: {site} | {
|
120
|
+
logging.info(f"下载请求: {site} | {book_ids}")
|
115
121
|
downloader_cfg = adapter.get_downloader_config()
|
116
122
|
fetcher_cfg = adapter.get_fetcher_config()
|
117
123
|
parser_cfg = adapter.get_parser_config()
|
@@ -134,16 +140,17 @@ class HomeScreen(Screen): # type: ignore[misc]
|
|
134
140
|
downloader = get_downloader(
|
135
141
|
fetcher=fetcher,
|
136
142
|
parser=parser,
|
137
|
-
exporter=exporter,
|
138
143
|
site=site,
|
139
144
|
config=downloader_cfg,
|
140
145
|
)
|
141
146
|
|
142
|
-
for book_id in
|
147
|
+
for book_id in book_ids:
|
143
148
|
logging.info(t("download_downloading", book_id=book_id, site=site))
|
144
149
|
await downloader.download(
|
145
|
-
book_id,
|
150
|
+
{"book_id": book_id},
|
151
|
+
progress_hook=self._update_progress,
|
146
152
|
)
|
153
|
+
await asyncio.to_thread(exporter.export, book_id)
|
147
154
|
|
148
155
|
if downloader_cfg.login_required and fetcher.is_logged_in:
|
149
156
|
await fetcher.save_state()
|
@@ -116,35 +116,6 @@ QD_DECRYPT_SCRIPT_PATH = files("novel_downloader.resources.js_scripts").joinpath
|
|
116
116
|
# Text Files
|
117
117
|
BLACKLIST_PATH = files("novel_downloader.resources.text").joinpath("blacklist.txt")
|
118
118
|
|
119
|
-
# -----------------------------------------------------------------------------
|
120
|
-
# EPUB defaults
|
121
|
-
# -----------------------------------------------------------------------------
|
122
|
-
EPUB_IMAGE_FOLDER = "Images"
|
123
|
-
EPUB_TEXT_FOLDER = "Text"
|
124
|
-
|
125
|
-
EPUB_IMAGE_WRAPPER = (
|
126
|
-
'<div class="duokan-image-single illus"><img src="../Images/{filename}" /></div>'
|
127
|
-
)
|
128
|
-
|
129
|
-
EPUB_OPTIONS = {
|
130
|
-
# guide 是 EPUB 2 的一个部分, 包含封面, 目录, 索引等重要导航信息
|
131
|
-
"epub2_guide": True,
|
132
|
-
# landmark 是 EPUB 3 用来标识重要页面 (如目录, 封面, 起始页) 的 <nav> 结构
|
133
|
-
"epub3_landmark": True,
|
134
|
-
# EPUB 3 允许提供一个 page list, 让电子书在不同设备上仍然保持相对一致的分页结构
|
135
|
-
"epub3_pages": True,
|
136
|
-
# 这个名字会出现在 EPUB 阅读器的导航栏
|
137
|
-
"landmark_title": "Guide",
|
138
|
-
# 这个名字会显示在 EPUB 阅读器的分页导航栏
|
139
|
-
"pages_title": "Pages",
|
140
|
-
# 是否根据 book.spine 的排列顺序自动设置 EPUB 阅读器的 page-progression-direction
|
141
|
-
"spine_direction": True,
|
142
|
-
# 控制 EPUB 阅读器的默认翻页方向 (LTR 或 RTL)
|
143
|
-
"package_direction": False,
|
144
|
-
# 是否为 EPUB 书籍中的章节 添加播放顺序
|
145
|
-
"play_order": {"enabled": True, "start_from": 1},
|
146
|
-
}
|
147
|
-
|
148
119
|
# ---------------------------------------------------------------------
|
149
120
|
# Pretrained model registry (e.g. used in font recovery or OCR)
|
150
121
|
# ---------------------------------------------------------------------
|
@@ -25,7 +25,8 @@ from novel_downloader.utils.constants import (
|
|
25
25
|
REC_IMAGE_SHAPE_MAP,
|
26
26
|
)
|
27
27
|
from novel_downloader.utils.hash_store import img_hash_store
|
28
|
-
|
28
|
+
|
29
|
+
from .model_loader import get_rec_chinese_char_model_dir
|
29
30
|
|
30
31
|
logger = logging.getLogger(__name__)
|
31
32
|
|
@@ -36,7 +36,8 @@ from novel_downloader.utils.constants import (
|
|
36
36
|
REC_IMAGE_SHAPE_MAP,
|
37
37
|
)
|
38
38
|
from novel_downloader.utils.hash_store import img_hash_store
|
39
|
-
|
39
|
+
|
40
|
+
from .model_loader import (
|
40
41
|
get_rec_char_vector_dir,
|
41
42
|
get_rec_chinese_char_model_dir,
|
42
43
|
)
|
@@ -15,12 +15,19 @@ Submodules:
|
|
15
15
|
from .chapter_formatting import format_chapter
|
16
16
|
from .diff_display import diff_inline_display
|
17
17
|
from .font_mapping import apply_font_mapping
|
18
|
-
from .text_cleaning import
|
18
|
+
from .text_cleaning import (
|
19
|
+
clean_chapter_title,
|
20
|
+
content_prefix,
|
21
|
+
is_promotional_line,
|
22
|
+
truncate_half_lines,
|
23
|
+
)
|
19
24
|
|
20
25
|
__all__ = [
|
21
26
|
"apply_font_mapping",
|
22
27
|
"format_chapter",
|
23
28
|
"clean_chapter_title",
|
24
29
|
"is_promotional_line",
|
30
|
+
"content_prefix",
|
31
|
+
"truncate_half_lines",
|
25
32
|
"diff_inline_display",
|
26
33
|
]
|
@@ -6,6 +6,7 @@ novel_downloader.utils.text_utils.text_cleaning
|
|
6
6
|
Tools for detecting and removing promotional or ad-like content from text.
|
7
7
|
"""
|
8
8
|
|
9
|
+
import math
|
9
10
|
import re
|
10
11
|
|
11
12
|
from novel_downloader.utils.file_utils.io import load_blacklisted_words
|
@@ -50,7 +51,57 @@ def is_promotional_line(line: str) -> bool:
|
|
50
51
|
return False
|
51
52
|
|
52
53
|
|
54
|
+
def content_prefix(
|
55
|
+
text: str,
|
56
|
+
n: int,
|
57
|
+
ignore_chars: set[str] | None = None,
|
58
|
+
) -> str:
|
59
|
+
"""
|
60
|
+
Return the prefix of `text` containing the first `n` non-ignored characters.
|
61
|
+
|
62
|
+
:param text: The full input string.
|
63
|
+
:param n: Number of content characters to include.
|
64
|
+
:param ignore_chars: Characters to ignore when counting content.
|
65
|
+
:return: Truncated string preserving original whitespace and line breaks.
|
66
|
+
"""
|
67
|
+
ignore = ignore_chars or set()
|
68
|
+
cnt = 0
|
69
|
+
|
70
|
+
for i, ch in enumerate(text):
|
71
|
+
if ch not in ignore:
|
72
|
+
cnt += 1
|
73
|
+
if cnt >= n:
|
74
|
+
return text[: i + 1]
|
75
|
+
|
76
|
+
return text
|
77
|
+
|
78
|
+
|
79
|
+
def truncate_half_lines(text: str) -> str:
|
80
|
+
"""
|
81
|
+
Keep the first half of the lines (rounded up), preserving line breaks.
|
82
|
+
|
83
|
+
:param text: Full input text
|
84
|
+
:return: Truncated text with first half of lines
|
85
|
+
"""
|
86
|
+
lines = text.splitlines()
|
87
|
+
non_empty_lines = [line for line in lines if line.strip()]
|
88
|
+
keep_count = math.ceil(len(non_empty_lines) / 2)
|
89
|
+
|
90
|
+
result_lines = []
|
91
|
+
count = 0
|
92
|
+
for line in lines:
|
93
|
+
result_lines.append(line)
|
94
|
+
if line.strip():
|
95
|
+
count += 1
|
96
|
+
if count >= keep_count:
|
97
|
+
break
|
98
|
+
|
99
|
+
return "\n".join(result_lines)
|
100
|
+
|
101
|
+
|
53
102
|
__all__ = [
|
54
103
|
"clean_chapter_title",
|
55
104
|
"is_promotional_line",
|
105
|
+
"content_prefix",
|
106
|
+
"truncate_half_lines",
|
56
107
|
]
|