novel-downloader 1.4.5__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +2 -4
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +27 -104
- novel_downloader/cli/download.py +78 -66
- novel_downloader/cli/export.py +20 -21
- novel_downloader/cli/main.py +3 -1
- novel_downloader/cli/search.py +120 -0
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +10 -14
- novel_downloader/config/adapter.py +195 -99
- novel_downloader/config/{loader.py → file_io.py} +53 -27
- novel_downloader/core/__init__.py +14 -13
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/archived/qidian/searcher.py +79 -0
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +8 -30
- novel_downloader/core/downloaders/base.py +182 -30
- novel_downloader/core/downloaders/common.py +217 -384
- novel_downloader/core/downloaders/qianbi.py +332 -4
- novel_downloader/core/downloaders/qidian.py +250 -290
- novel_downloader/core/downloaders/registry.py +69 -0
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +8 -26
- novel_downloader/core/exporters/base.py +107 -31
- novel_downloader/core/exporters/common/__init__.py +3 -4
- novel_downloader/core/exporters/common/epub.py +92 -171
- novel_downloader/core/exporters/common/main_exporter.py +14 -67
- novel_downloader/core/exporters/common/txt.py +90 -86
- novel_downloader/core/exporters/epub_util.py +184 -1327
- novel_downloader/core/exporters/linovelib/__init__.py +3 -2
- novel_downloader/core/exporters/linovelib/epub.py +165 -222
- novel_downloader/core/exporters/linovelib/main_exporter.py +10 -71
- novel_downloader/core/exporters/linovelib/txt.py +76 -66
- novel_downloader/core/exporters/qidian.py +15 -11
- novel_downloader/core/exporters/registry.py +55 -0
- novel_downloader/core/exporters/txt_util.py +67 -0
- novel_downloader/core/fetchers/__init__.py +57 -56
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +10 -10
- novel_downloader/core/fetchers/{base/session.py → base.py} +63 -47
- novel_downloader/core/fetchers/biquyuedu.py +83 -0
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +23 -11
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +22 -26
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/{biquge/browser.py → lewenn.py} +15 -15
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +16 -12
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +9 -9
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +55 -40
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +60 -0
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +11 -9
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/{common/browser.py → shuhaige.py} +24 -19
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/{common/session.py → wanbengo.py} +21 -17
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +23 -11
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +8 -14
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +26 -0
- novel_downloader/core/parsers/__init__.py +58 -22
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/{esjzone/main_parser.py → esjzone.py} +67 -67
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/{linovelib/main_parser.py → linovelib.py} +54 -65
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/{qianbi/main_parser.py → qianbi.py} +54 -51
- novel_downloader/core/parsers/qidian/__init__.py +2 -2
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +290 -346
- novel_downloader/core/parsers/qidian/chapter_normal.py +25 -56
- novel_downloader/core/parsers/qidian/main_parser.py +19 -57
- novel_downloader/core/parsers/qidian/utils/__init__.py +12 -11
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +6 -7
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +2 -2
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +57 -0
- novel_downloader/core/parsers/{sfacg/main_parser.py → sfacg.py} +46 -48
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +155 -0
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +51 -0
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/b520.py +84 -0
- novel_downloader/core/searchers/base.py +168 -0
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +102 -0
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +165 -0
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +79 -0
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +36 -79
- novel_downloader/locales/zh.json +37 -80
- novel_downloader/models/__init__.py +23 -50
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +16 -43
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +21 -0
- novel_downloader/resources/config/settings.toml +39 -74
- novel_downloader/resources/css_styles/intro.css +83 -0
- novel_downloader/resources/css_styles/main.css +30 -89
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +43 -0
- novel_downloader/utils/chapter_storage.py +247 -226
- novel_downloader/utils/constants.py +5 -50
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +34 -0
- novel_downloader/utils/epub/builder.py +377 -0
- novel_downloader/utils/epub/constants.py +118 -0
- novel_downloader/utils/epub/documents.py +297 -0
- novel_downloader/utils/epub/models.py +120 -0
- novel_downloader/utils/epub/utils.py +179 -0
- novel_downloader/utils/file_utils/__init__.py +5 -30
- novel_downloader/utils/file_utils/io.py +9 -150
- novel_downloader/utils/file_utils/normalize.py +2 -2
- novel_downloader/utils/file_utils/sanitize.py +2 -7
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/i18n.py +2 -0
- novel_downloader/utils/logger.py +10 -16
- novel_downloader/utils/network.py +111 -252
- novel_downloader/utils/state.py +5 -90
- novel_downloader/utils/text_utils/__init__.py +16 -21
- novel_downloader/utils/text_utils/diff_display.py +6 -9
- novel_downloader/utils/text_utils/numeric_conversion.py +253 -0
- novel_downloader/utils/text_utils/text_cleaner.py +179 -0
- novel_downloader/utils/text_utils/truncate_utils.py +62 -0
- novel_downloader/utils/time_utils/__init__.py +6 -12
- novel_downloader/utils/time_utils/datetime_utils.py +23 -33
- novel_downloader/utils/time_utils/sleep_utils.py +5 -10
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/config/site_rules.py +0 -94
- novel_downloader/core/downloaders/biquge.py +0 -25
- novel_downloader/core/downloaders/esjzone.py +0 -25
- novel_downloader/core/downloaders/linovelib.py +0 -25
- novel_downloader/core/downloaders/sfacg.py +0 -25
- novel_downloader/core/downloaders/yamibo.py +0 -25
- novel_downloader/core/exporters/biquge.py +0 -25
- novel_downloader/core/exporters/esjzone.py +0 -25
- novel_downloader/core/exporters/qianbi.py +0 -25
- novel_downloader/core/exporters/sfacg.py +0 -25
- novel_downloader/core/exporters/yamibo.py +0 -25
- novel_downloader/core/factory/__init__.py +0 -20
- novel_downloader/core/factory/downloader.py +0 -73
- novel_downloader/core/factory/exporter.py +0 -58
- novel_downloader/core/factory/fetcher.py +0 -96
- novel_downloader/core/factory/parser.py +0 -86
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -403
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/common/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -204
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -193
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -318
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -189
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -229
- novel_downloader/core/parsers/biquge/__init__.py +0 -10
- novel_downloader/core/parsers/biquge/main_parser.py +0 -134
- novel_downloader/core/parsers/common/__init__.py +0 -13
- novel_downloader/core/parsers/common/helper.py +0 -323
- novel_downloader/core/parsers/common/main_parser.py +0 -106
- novel_downloader/core/parsers/esjzone/__init__.py +0 -10
- novel_downloader/core/parsers/linovelib/__init__.py +0 -10
- novel_downloader/core/parsers/qianbi/__init__.py +0 -10
- novel_downloader/core/parsers/sfacg/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/__init__.py +0 -10
- novel_downloader/core/parsers/yamibo/main_parser.py +0 -194
- novel_downloader/models/browser.py +0 -21
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/site_rules.py +0 -99
- novel_downloader/models/tasks.py +0 -33
- novel_downloader/models/types.py +0 -15
- novel_downloader/resources/css_styles/volume-intro.css +0 -56
- novel_downloader/resources/json/replace_word_map.json +0 -4
- novel_downloader/resources/text/blacklist.txt +0 -22
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -303
- novel_downloader/utils/fontocr/ocr_v2.py +0 -752
- novel_downloader/utils/hash_store.py +0 -279
- novel_downloader/utils/hash_utils.py +0 -103
- novel_downloader/utils/text_utils/chapter_formatting.py +0 -46
- novel_downloader/utils/text_utils/font_mapping.py +0 -28
- novel_downloader/utils/text_utils/text_cleaning.py +0 -107
- novel_downloader-1.4.5.dist-info/METADATA +0 -196
- novel_downloader-1.4.5.dist-info/RECORD +0 -165
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.4.5.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,32 +3,47 @@
|
|
3
3
|
novel_downloader.core.downloaders.base
|
4
4
|
--------------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
common interface and reusable logic for all downloader implementations.
|
6
|
+
Abstract base class providing common workflow and utilities for novel downloaders
|
8
7
|
"""
|
9
8
|
|
10
9
|
import abc
|
10
|
+
import asyncio
|
11
|
+
import json
|
11
12
|
import logging
|
12
|
-
from collections.abc import Awaitable, Callable
|
13
|
+
from collections.abc import AsyncIterator, Awaitable, Callable, Sequence
|
13
14
|
from pathlib import Path
|
14
|
-
from typing import Any
|
15
|
+
from typing import Any, cast
|
15
16
|
|
16
17
|
from novel_downloader.core.interfaces import (
|
17
18
|
DownloaderProtocol,
|
18
19
|
FetcherProtocol,
|
19
20
|
ParserProtocol,
|
20
21
|
)
|
21
|
-
from novel_downloader.models import
|
22
|
+
from novel_downloader.models import (
|
23
|
+
BookConfig,
|
24
|
+
BookInfoDict,
|
25
|
+
DownloaderConfig,
|
26
|
+
VolumeInfoDict,
|
27
|
+
)
|
28
|
+
from novel_downloader.utils import time_diff
|
22
29
|
|
23
30
|
|
24
31
|
class BaseDownloader(DownloaderProtocol, abc.ABC):
|
25
32
|
"""
|
26
|
-
Abstract
|
27
|
-
|
33
|
+
Abstract base class for novel downloaders.
|
34
|
+
|
35
|
+
Defines the general interface and batch download workflow,
|
36
|
+
while delegating book-specific downloading logic to subclasses.
|
28
37
|
|
29
|
-
Subclasses
|
38
|
+
Subclasses are required to implement methods for downloading
|
39
|
+
a single book, using the provided fetcher and parser components.
|
30
40
|
"""
|
31
41
|
|
42
|
+
DEFAULT_SOURCE_ID = 0
|
43
|
+
PRIORITIES_MAP = {
|
44
|
+
DEFAULT_SOURCE_ID: 0,
|
45
|
+
}
|
46
|
+
|
32
47
|
def __init__(
|
33
48
|
self,
|
34
49
|
fetcher: FetcherProtocol,
|
@@ -36,15 +51,23 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
36
51
|
config: DownloaderConfig,
|
37
52
|
site: str,
|
38
53
|
):
|
54
|
+
"""
|
55
|
+
Initialize the downloader for a specific site.
|
56
|
+
|
57
|
+
:param fetcher: Fetcher component for retrieving raw chapter data.
|
58
|
+
:param parser: Parser component for extracting chapter content.
|
59
|
+
:param config: Downloader configuration settings.
|
60
|
+
:param site: Identifier for the target website or source.
|
61
|
+
"""
|
39
62
|
self._fetcher = fetcher
|
40
63
|
self._parser = parser
|
41
64
|
self._config = config
|
42
65
|
self._site = site
|
43
66
|
|
44
67
|
self._raw_data_dir = Path(config.raw_data_dir) / site
|
45
|
-
self._cache_dir = Path(config.cache_dir) / site
|
46
68
|
self._raw_data_dir.mkdir(parents=True, exist_ok=True)
|
47
|
-
self.
|
69
|
+
self._debug_dir = Path.cwd() / "debug" / site
|
70
|
+
self._debug_dir.mkdir(parents=True, exist_ok=True)
|
48
71
|
|
49
72
|
self.logger = logging.getLogger(f"{self.__class__.__name__}")
|
50
73
|
|
@@ -53,6 +76,7 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
53
76
|
books: list[BookConfig],
|
54
77
|
*,
|
55
78
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
79
|
+
cancel_event: asyncio.Event | None = None,
|
56
80
|
**kwargs: Any,
|
57
81
|
) -> None:
|
58
82
|
"""
|
@@ -61,6 +85,7 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
61
85
|
:param books: List of BookConfig entries.
|
62
86
|
:param progress_hook: Optional async callback after each chapter.
|
63
87
|
args: completed_count, total_count.
|
88
|
+
:param cancel_event: Optional asyncio.Event to allow cancellation.
|
64
89
|
"""
|
65
90
|
if not await self._ensure_ready():
|
66
91
|
book_ids = [b["book_id"] for b in books]
|
@@ -72,10 +97,20 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
72
97
|
return
|
73
98
|
|
74
99
|
for book in books:
|
100
|
+
# stop early if cancellation requested
|
101
|
+
if cancel_event and cancel_event.is_set():
|
102
|
+
self.logger.info(
|
103
|
+
"[%s] download cancelled before book: %s",
|
104
|
+
self._site,
|
105
|
+
book["book_id"],
|
106
|
+
)
|
107
|
+
break
|
108
|
+
|
75
109
|
try:
|
76
110
|
await self._download_one(
|
77
111
|
book,
|
78
112
|
progress_hook=progress_hook,
|
113
|
+
cancel_event=cancel_event,
|
79
114
|
**kwargs,
|
80
115
|
)
|
81
116
|
except Exception as e:
|
@@ -88,6 +123,7 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
88
123
|
book: BookConfig,
|
89
124
|
*,
|
90
125
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
126
|
+
cancel_event: asyncio.Event | None = None,
|
91
127
|
**kwargs: Any,
|
92
128
|
) -> None:
|
93
129
|
"""
|
@@ -96,6 +132,7 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
96
132
|
:param book: BookConfig with at least 'book_id'.
|
97
133
|
:param progress_hook: Optional async callback after each chapter.
|
98
134
|
args: completed_count, total_count.
|
135
|
+
:param cancel_event: Optional asyncio.Event to allow cancellation.
|
99
136
|
"""
|
100
137
|
if not await self._ensure_ready():
|
101
138
|
self.logger.warning(
|
@@ -106,10 +143,20 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
106
143
|
book.get("end_id", "-"),
|
107
144
|
)
|
108
145
|
|
146
|
+
# if already cancelled before starting
|
147
|
+
if cancel_event and cancel_event.is_set():
|
148
|
+
self.logger.info(
|
149
|
+
"[%s] download cancelled before start of book: %s",
|
150
|
+
self._site,
|
151
|
+
book["book_id"],
|
152
|
+
)
|
153
|
+
return
|
154
|
+
|
109
155
|
try:
|
110
156
|
await self._download_one(
|
111
157
|
book,
|
112
158
|
progress_hook=progress_hook,
|
159
|
+
cancel_event=cancel_event,
|
113
160
|
**kwargs,
|
114
161
|
)
|
115
162
|
except Exception as e:
|
@@ -117,12 +164,35 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
117
164
|
|
118
165
|
await self._finalize()
|
119
166
|
|
167
|
+
async def load_book_info(
|
168
|
+
self,
|
169
|
+
book_id: str,
|
170
|
+
html_dir: Path,
|
171
|
+
) -> BookInfoDict | None:
|
172
|
+
book_info = self._load_book_info(
|
173
|
+
book_id=book_id,
|
174
|
+
max_age_days=1,
|
175
|
+
)
|
176
|
+
if book_info:
|
177
|
+
return book_info
|
178
|
+
|
179
|
+
info_html = await self.fetcher.get_book_info(book_id)
|
180
|
+
self._save_html_pages(html_dir, "info", info_html)
|
181
|
+
book_info = self.parser.parse_book_info(info_html)
|
182
|
+
|
183
|
+
if book_info:
|
184
|
+
self._save_book_info(book_id, book_info)
|
185
|
+
return book_info
|
186
|
+
|
187
|
+
return self._load_book_info(book_id)
|
188
|
+
|
120
189
|
@abc.abstractmethod
|
121
190
|
async def _download_one(
|
122
191
|
self,
|
123
192
|
book: BookConfig,
|
124
193
|
*,
|
125
194
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
195
|
+
cancel_event: asyncio.Event | None = None,
|
126
196
|
**kwargs: Any,
|
127
197
|
) -> None:
|
128
198
|
"""
|
@@ -147,29 +217,111 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
147
217
|
"""
|
148
218
|
return
|
149
219
|
|
150
|
-
|
151
|
-
|
152
|
-
|
220
|
+
def _load_book_info(
|
221
|
+
self,
|
222
|
+
book_id: str,
|
223
|
+
*,
|
224
|
+
max_age_days: int | None = None,
|
225
|
+
) -> BookInfoDict | None:
|
226
|
+
"""
|
227
|
+
Attempt to read and parse the book_info.json for a given book_id.
|
153
228
|
|
154
|
-
|
155
|
-
|
156
|
-
return
|
229
|
+
:param book_id: identifier of the book
|
230
|
+
:param max_age_days: if set, only return if 'update_time' is less
|
231
|
+
:return: dict of book info if is valid JSON, else empty
|
232
|
+
"""
|
233
|
+
info_path = self._raw_data_dir / book_id / "book_info.json"
|
234
|
+
if not info_path.is_file():
|
235
|
+
return None
|
157
236
|
|
158
|
-
|
159
|
-
|
160
|
-
|
237
|
+
try:
|
238
|
+
raw: dict[str, Any] = json.loads(info_path.read_text(encoding="utf-8"))
|
239
|
+
except json.JSONDecodeError:
|
240
|
+
return None
|
241
|
+
|
242
|
+
if max_age_days is not None:
|
243
|
+
days, *_ = time_diff(
|
244
|
+
raw.get("update_time", ""),
|
245
|
+
"UTC+8",
|
246
|
+
)
|
247
|
+
if days > max_age_days:
|
248
|
+
return None
|
161
249
|
|
162
|
-
|
163
|
-
|
164
|
-
|
250
|
+
# return data
|
251
|
+
return cast(BookInfoDict, raw)
|
252
|
+
|
253
|
+
def _save_book_info(
|
254
|
+
self,
|
255
|
+
book_id: str,
|
256
|
+
book_info: BookInfoDict,
|
257
|
+
) -> None:
|
258
|
+
"""
|
259
|
+
Serialize and save the book_info dict as json.
|
260
|
+
|
261
|
+
:param book_id: identifier of the book
|
262
|
+
:param book_info: dict containing metadata about the book
|
263
|
+
"""
|
264
|
+
target_dir = self._raw_data_dir / book_id
|
265
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
266
|
+
(target_dir / "book_info.json").write_text(
|
267
|
+
json.dumps(book_info, ensure_ascii=False, indent=2),
|
268
|
+
encoding="utf-8",
|
269
|
+
)
|
270
|
+
|
271
|
+
def _save_html_pages(
|
272
|
+
self,
|
273
|
+
html_dir: Path,
|
274
|
+
filename: str,
|
275
|
+
html_list: Sequence[str],
|
276
|
+
) -> None:
|
277
|
+
"""
|
278
|
+
If save_html is enabled, write each HTML snippet to a file.
|
279
|
+
|
280
|
+
Filenames will be {chap_id}_{index}.html in html_dir.
|
281
|
+
|
282
|
+
:param html_dir: directory in which to write HTML files
|
283
|
+
:param filename: used as filename prefix
|
284
|
+
:param html_list: list of HTML strings to save
|
285
|
+
"""
|
286
|
+
if not self.save_html:
|
287
|
+
return
|
288
|
+
|
289
|
+
html_dir.mkdir(parents=True, exist_ok=True)
|
290
|
+
for i, html in enumerate(html_list):
|
291
|
+
file_path = html_dir / f"{filename}_{i}.html"
|
292
|
+
file_path.write_text(html, encoding="utf-8")
|
293
|
+
|
294
|
+
@staticmethod
|
295
|
+
async def _chapter_ids(
|
296
|
+
volumes: list[VolumeInfoDict],
|
297
|
+
start_id: str | None,
|
298
|
+
end_id: str | None,
|
299
|
+
) -> AsyncIterator[str]:
|
300
|
+
"""
|
301
|
+
Yield each chapterId in order, respecting start/end bounds.
|
302
|
+
"""
|
303
|
+
seen_start = start_id is None
|
304
|
+
for vol in volumes:
|
305
|
+
for chap in vol["chapters"]:
|
306
|
+
cid = chap.get("chapterId")
|
307
|
+
if not cid:
|
308
|
+
continue
|
309
|
+
if not seen_start:
|
310
|
+
if cid == start_id:
|
311
|
+
seen_start = True
|
312
|
+
else:
|
313
|
+
continue
|
314
|
+
yield cid
|
315
|
+
if end_id is not None and cid == end_id:
|
316
|
+
return
|
165
317
|
|
166
318
|
@property
|
167
|
-
def
|
168
|
-
return self.
|
319
|
+
def fetcher(self) -> FetcherProtocol:
|
320
|
+
return self._fetcher
|
169
321
|
|
170
322
|
@property
|
171
|
-
def
|
172
|
-
return self.
|
323
|
+
def parser(self) -> ParserProtocol:
|
324
|
+
return self._parser
|
173
325
|
|
174
326
|
@property
|
175
327
|
def save_html(self) -> bool:
|
@@ -196,12 +348,12 @@ class BaseDownloader(DownloaderProtocol, abc.ABC):
|
|
196
348
|
return self._config.backoff_factor
|
197
349
|
|
198
350
|
@property
|
199
|
-
def
|
200
|
-
return self._config.
|
351
|
+
def workers(self) -> int:
|
352
|
+
return self._config.workers
|
201
353
|
|
202
354
|
@property
|
203
|
-
def
|
204
|
-
return self._config.
|
355
|
+
def storage_batch_size(self) -> int:
|
356
|
+
return max(1, self._config.storage_batch_size)
|
205
357
|
|
206
358
|
def _handle_download_exception(self, book: BookConfig, error: Exception) -> None:
|
207
359
|
"""
|