novel-downloader 1.3.3__py3-none-any.whl → 1.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/clean.py +97 -78
- novel_downloader/cli/config.py +177 -0
- novel_downloader/cli/download.py +132 -87
- novel_downloader/cli/export.py +77 -0
- novel_downloader/cli/main.py +21 -28
- novel_downloader/config/__init__.py +1 -25
- novel_downloader/config/adapter.py +32 -31
- novel_downloader/config/loader.py +3 -3
- novel_downloader/config/site_rules.py +1 -2
- novel_downloader/core/__init__.py +3 -6
- novel_downloader/core/downloaders/__init__.py +10 -13
- novel_downloader/core/downloaders/base.py +233 -0
- novel_downloader/core/downloaders/biquge.py +27 -0
- novel_downloader/core/downloaders/common.py +414 -0
- novel_downloader/core/downloaders/esjzone.py +27 -0
- novel_downloader/core/downloaders/linovelib.py +27 -0
- novel_downloader/core/downloaders/qianbi.py +27 -0
- novel_downloader/core/downloaders/qidian.py +352 -0
- novel_downloader/core/downloaders/sfacg.py +27 -0
- novel_downloader/core/downloaders/yamibo.py +27 -0
- novel_downloader/core/exporters/__init__.py +37 -0
- novel_downloader/core/{savers → exporters}/base.py +73 -39
- novel_downloader/core/exporters/biquge.py +25 -0
- novel_downloader/core/exporters/common/__init__.py +12 -0
- novel_downloader/core/{savers → exporters}/common/epub.py +22 -22
- novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +35 -40
- novel_downloader/core/{savers → exporters}/common/txt.py +20 -23
- novel_downloader/core/{savers → exporters}/epub_utils/__init__.py +8 -3
- novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -2
- novel_downloader/core/{savers → exporters}/epub_utils/image_loader.py +46 -4
- novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -4
- novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +3 -3
- novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -2
- novel_downloader/core/exporters/esjzone.py +25 -0
- novel_downloader/core/exporters/linovelib/__init__.py +10 -0
- novel_downloader/core/exporters/linovelib/epub.py +449 -0
- novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
- novel_downloader/core/exporters/linovelib/txt.py +129 -0
- novel_downloader/core/exporters/qianbi.py +25 -0
- novel_downloader/core/{savers → exporters}/qidian.py +8 -8
- novel_downloader/core/exporters/sfacg.py +25 -0
- novel_downloader/core/exporters/yamibo.py +25 -0
- novel_downloader/core/factory/__init__.py +5 -17
- novel_downloader/core/factory/downloader.py +24 -126
- novel_downloader/core/factory/exporter.py +58 -0
- novel_downloader/core/factory/fetcher.py +96 -0
- novel_downloader/core/factory/parser.py +17 -12
- novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
- novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
- novel_downloader/core/fetchers/base/browser.py +383 -0
- novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
- novel_downloader/core/fetchers/base/session.py +419 -0
- novel_downloader/core/fetchers/biquge/__init__.py +14 -0
- novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
- novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
- novel_downloader/core/fetchers/common/__init__.py +14 -0
- novel_downloader/core/fetchers/common/browser.py +79 -0
- novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
- novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
- novel_downloader/core/fetchers/esjzone/browser.py +202 -0
- novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
- novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
- novel_downloader/core/fetchers/linovelib/browser.py +193 -0
- novel_downloader/core/fetchers/linovelib/session.py +193 -0
- novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
- novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
- novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
- novel_downloader/core/fetchers/qidian/__init__.py +14 -0
- novel_downloader/core/fetchers/qidian/browser.py +266 -0
- novel_downloader/core/fetchers/qidian/session.py +326 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
- novel_downloader/core/fetchers/sfacg/browser.py +189 -0
- novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
- novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
- novel_downloader/core/fetchers/yamibo/browser.py +229 -0
- novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
- novel_downloader/core/interfaces/__init__.py +8 -12
- novel_downloader/core/interfaces/downloader.py +54 -0
- novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
- novel_downloader/core/interfaces/fetcher.py +162 -0
- novel_downloader/core/interfaces/parser.py +6 -7
- novel_downloader/core/parsers/__init__.py +5 -6
- novel_downloader/core/parsers/base.py +9 -13
- novel_downloader/core/parsers/biquge/main_parser.py +12 -13
- novel_downloader/core/parsers/common/helper.py +3 -3
- novel_downloader/core/parsers/common/main_parser.py +39 -34
- novel_downloader/core/parsers/esjzone/main_parser.py +20 -14
- novel_downloader/core/parsers/linovelib/__init__.py +10 -0
- novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
- novel_downloader/core/parsers/qidian/__init__.py +2 -11
- novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
- novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
- novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
- novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
- novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
- novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
- novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
- novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
- novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
- novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
- novel_downloader/locales/en.json +18 -2
- novel_downloader/locales/zh.json +18 -2
- novel_downloader/models/__init__.py +64 -0
- novel_downloader/models/browser.py +21 -0
- novel_downloader/models/chapter.py +25 -0
- novel_downloader/models/config.py +100 -0
- novel_downloader/models/login.py +20 -0
- novel_downloader/models/site_rules.py +99 -0
- novel_downloader/models/tasks.py +33 -0
- novel_downloader/models/types.py +15 -0
- novel_downloader/resources/config/settings.toml +31 -25
- novel_downloader/resources/json/linovelib_font_map.json +3573 -0
- novel_downloader/tui/__init__.py +7 -0
- novel_downloader/tui/app.py +32 -0
- novel_downloader/tui/main.py +17 -0
- novel_downloader/tui/screens/__init__.py +14 -0
- novel_downloader/tui/screens/home.py +191 -0
- novel_downloader/tui/screens/login.py +74 -0
- novel_downloader/tui/styles/home_layout.tcss +79 -0
- novel_downloader/tui/widgets/richlog_handler.py +24 -0
- novel_downloader/utils/__init__.py +6 -0
- novel_downloader/utils/chapter_storage.py +25 -38
- novel_downloader/utils/constants.py +11 -5
- novel_downloader/utils/cookies.py +66 -0
- novel_downloader/utils/crypto_utils.py +1 -74
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -2
- novel_downloader/utils/hash_store.py +10 -18
- novel_downloader/utils/hash_utils.py +3 -2
- novel_downloader/utils/logger.py +2 -3
- novel_downloader/utils/network.py +2 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -1
- novel_downloader/utils/text_utils/text_cleaning.py +1 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -3
- novel_downloader/utils/time_utils/sleep_utils.py +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/METADATA +69 -35
- novel_downloader-1.4.1.dist-info/RECORD +170 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/WHEEL +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/entry_points.txt +1 -0
- novel_downloader/cli/interactive.py +0 -66
- novel_downloader/cli/settings.py +0 -177
- novel_downloader/config/models.py +0 -187
- novel_downloader/core/downloaders/base/__init__.py +0 -14
- novel_downloader/core/downloaders/base/base_async.py +0 -153
- novel_downloader/core/downloaders/base/base_sync.py +0 -208
- novel_downloader/core/downloaders/biquge/__init__.py +0 -14
- novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
- novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
- novel_downloader/core/downloaders/common/__init__.py +0 -14
- novel_downloader/core/downloaders/common/common_async.py +0 -210
- novel_downloader/core/downloaders/common/common_sync.py +0 -202
- novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
- novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
- novel_downloader/core/downloaders/qidian/__init__.py +0 -10
- novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -219
- novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
- novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
- novel_downloader/core/factory/requester.py +0 -144
- novel_downloader/core/factory/saver.py +0 -56
- novel_downloader/core/interfaces/async_downloader.py +0 -36
- novel_downloader/core/interfaces/async_requester.py +0 -84
- novel_downloader/core/interfaces/sync_downloader.py +0 -36
- novel_downloader/core/interfaces/sync_requester.py +0 -82
- novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
- novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
- novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
- novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
- novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
- novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
- novel_downloader/core/requesters/base/async_session.py +0 -410
- novel_downloader/core/requesters/base/browser.py +0 -337
- novel_downloader/core/requesters/base/session.py +0 -378
- novel_downloader/core/requesters/biquge/__init__.py +0 -14
- novel_downloader/core/requesters/common/__init__.py +0 -17
- novel_downloader/core/requesters/common/session.py +0 -113
- novel_downloader/core/requesters/esjzone/__init__.py +0 -13
- novel_downloader/core/requesters/esjzone/session.py +0 -235
- novel_downloader/core/requesters/qianbi/__init__.py +0 -13
- novel_downloader/core/requesters/qidian/__init__.py +0 -21
- novel_downloader/core/requesters/qidian/broswer.py +0 -307
- novel_downloader/core/requesters/qidian/session.py +0 -290
- novel_downloader/core/requesters/sfacg/__init__.py +0 -13
- novel_downloader/core/requesters/sfacg/session.py +0 -242
- novel_downloader/core/requesters/yamibo/__init__.py +0 -13
- novel_downloader/core/requesters/yamibo/session.py +0 -237
- novel_downloader/core/savers/__init__.py +0 -34
- novel_downloader/core/savers/biquge.py +0 -25
- novel_downloader/core/savers/common/__init__.py +0 -12
- novel_downloader/core/savers/esjzone.py +0 -25
- novel_downloader/core/savers/qianbi.py +0 -25
- novel_downloader/core/savers/sfacg.py +0 -25
- novel_downloader/core/savers/yamibo.py +0 -25
- novel_downloader/resources/config/rules.toml +0 -196
- novel_downloader-1.3.3.dist-info/RECORD +0 -166
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.1.dist-info}/top_level.txt +0 -0
@@ -1,153 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.base.base_async
|
4
|
-
-------------------------------------------------
|
5
|
-
|
6
|
-
Defines the abstract base class `BaseAsyncDownloader`, which provides a
|
7
|
-
common interface and reusable logic for all downloader implementations.
|
8
|
-
"""
|
9
|
-
|
10
|
-
import abc
|
11
|
-
import logging
|
12
|
-
from pathlib import Path
|
13
|
-
|
14
|
-
from novel_downloader.config import DownloaderConfig
|
15
|
-
from novel_downloader.core.interfaces import (
|
16
|
-
AsyncDownloaderProtocol,
|
17
|
-
AsyncRequesterProtocol,
|
18
|
-
ParserProtocol,
|
19
|
-
SaverProtocol,
|
20
|
-
)
|
21
|
-
|
22
|
-
|
23
|
-
class BaseAsyncDownloader(AsyncDownloaderProtocol, abc.ABC):
|
24
|
-
"""
|
25
|
-
Abstract downloader that defines the initialization interface
|
26
|
-
and the general batch download flow.
|
27
|
-
|
28
|
-
Subclasses must implement the logic for downloading a single book.
|
29
|
-
"""
|
30
|
-
|
31
|
-
def __init__(
|
32
|
-
self,
|
33
|
-
requester: AsyncRequesterProtocol,
|
34
|
-
parser: ParserProtocol,
|
35
|
-
saver: SaverProtocol,
|
36
|
-
config: DownloaderConfig,
|
37
|
-
site: str,
|
38
|
-
):
|
39
|
-
self._requester = requester
|
40
|
-
self._parser = parser
|
41
|
-
self._saver = saver
|
42
|
-
self._config = config
|
43
|
-
self._site = site
|
44
|
-
|
45
|
-
self._raw_data_dir = Path(config.raw_data_dir) / site
|
46
|
-
self._cache_dir = Path(config.cache_dir) / site
|
47
|
-
self._raw_data_dir.mkdir(parents=True, exist_ok=True)
|
48
|
-
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
49
|
-
|
50
|
-
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
51
|
-
|
52
|
-
async def download(self, book_ids: list[str]) -> None:
|
53
|
-
"""
|
54
|
-
The general batch download process:
|
55
|
-
1. Iterate over all book IDs
|
56
|
-
2. For each ID, call `download_one()`
|
57
|
-
|
58
|
-
:param book_ids: A list of book identifiers to download.
|
59
|
-
"""
|
60
|
-
await self.prepare()
|
61
|
-
|
62
|
-
# 2) batch download
|
63
|
-
for idx, book_id in enumerate(book_ids, start=1):
|
64
|
-
self.logger.debug(
|
65
|
-
"[%s] Starting download for %r (%s/%s)",
|
66
|
-
self.__class__.__name__,
|
67
|
-
book_id,
|
68
|
-
idx,
|
69
|
-
len(book_ids),
|
70
|
-
)
|
71
|
-
try:
|
72
|
-
await self.download_one(book_id)
|
73
|
-
except Exception as e:
|
74
|
-
self._handle_download_exception(book_id, e)
|
75
|
-
|
76
|
-
@abc.abstractmethod
|
77
|
-
async def download_one(self, book_id: str) -> None:
|
78
|
-
"""
|
79
|
-
The full download logic for a single book.
|
80
|
-
|
81
|
-
Subclasses must implement this method.
|
82
|
-
|
83
|
-
:param book_id: The identifier of the book to download.
|
84
|
-
"""
|
85
|
-
...
|
86
|
-
|
87
|
-
async def prepare(self) -> None:
|
88
|
-
"""
|
89
|
-
Optional hook called before downloading each book.
|
90
|
-
|
91
|
-
Subclasses can override this method to perform pre-download setup.
|
92
|
-
"""
|
93
|
-
return
|
94
|
-
|
95
|
-
@property
|
96
|
-
def requester(self) -> AsyncRequesterProtocol:
|
97
|
-
return self._requester
|
98
|
-
|
99
|
-
@property
|
100
|
-
def parser(self) -> ParserProtocol:
|
101
|
-
return self._parser
|
102
|
-
|
103
|
-
@property
|
104
|
-
def saver(self) -> SaverProtocol:
|
105
|
-
return self._saver
|
106
|
-
|
107
|
-
@property
|
108
|
-
def config(self) -> DownloaderConfig:
|
109
|
-
return self._config
|
110
|
-
|
111
|
-
@property
|
112
|
-
def raw_data_dir(self) -> Path:
|
113
|
-
return self._raw_data_dir
|
114
|
-
|
115
|
-
@property
|
116
|
-
def cache_dir(self) -> Path:
|
117
|
-
return self._cache_dir
|
118
|
-
|
119
|
-
@property
|
120
|
-
def site(self) -> str:
|
121
|
-
return self._site
|
122
|
-
|
123
|
-
@property
|
124
|
-
def save_html(self) -> bool:
|
125
|
-
return self._config.save_html
|
126
|
-
|
127
|
-
@property
|
128
|
-
def skip_existing(self) -> bool:
|
129
|
-
return self._config.skip_existing
|
130
|
-
|
131
|
-
@property
|
132
|
-
def login_required(self) -> bool:
|
133
|
-
return self._config.login_required
|
134
|
-
|
135
|
-
@property
|
136
|
-
def request_interval(self) -> float:
|
137
|
-
return self._config.request_interval
|
138
|
-
|
139
|
-
def _handle_download_exception(self, book_id: str, error: Exception) -> None:
|
140
|
-
"""
|
141
|
-
Handle download errors in a consistent way.
|
142
|
-
|
143
|
-
This method can be overridden or extended to implement retry logic, etc.
|
144
|
-
|
145
|
-
:param book_id: The ID of the book that failed.
|
146
|
-
:param error: The exception raised during download.
|
147
|
-
"""
|
148
|
-
self.logger.warning(
|
149
|
-
"[%s] Failed to download %r: %s",
|
150
|
-
self.__class__.__name__,
|
151
|
-
book_id,
|
152
|
-
error,
|
153
|
-
)
|
@@ -1,208 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.base.base_sync
|
4
|
-
------------------------------------------------
|
5
|
-
|
6
|
-
Defines the abstract base class `BaseDownloader`, which provides a
|
7
|
-
common interface and reusable logic for all downloader implementations.
|
8
|
-
"""
|
9
|
-
|
10
|
-
import abc
|
11
|
-
import logging
|
12
|
-
from pathlib import Path
|
13
|
-
|
14
|
-
from novel_downloader.config import DownloaderConfig
|
15
|
-
from novel_downloader.core.interfaces import (
|
16
|
-
ParserProtocol,
|
17
|
-
SaverProtocol,
|
18
|
-
SyncDownloaderProtocol,
|
19
|
-
SyncRequesterProtocol,
|
20
|
-
)
|
21
|
-
|
22
|
-
|
23
|
-
class BaseDownloader(SyncDownloaderProtocol, abc.ABC):
|
24
|
-
"""
|
25
|
-
Abstract downloader that defines the initialization interface
|
26
|
-
and the general batch download flow.
|
27
|
-
|
28
|
-
Subclasses must implement the logic for downloading a single book.
|
29
|
-
"""
|
30
|
-
|
31
|
-
def __init__(
|
32
|
-
self,
|
33
|
-
requester: SyncRequesterProtocol,
|
34
|
-
parser: ParserProtocol,
|
35
|
-
saver: SaverProtocol,
|
36
|
-
config: DownloaderConfig,
|
37
|
-
site: str,
|
38
|
-
):
|
39
|
-
"""
|
40
|
-
Initialize the downloader with its components.
|
41
|
-
|
42
|
-
:param requester: Object implementing RequesterProtocol, used to fetch raw data.
|
43
|
-
:param parser: Object implementing ParserProtocol, used to parse page content.
|
44
|
-
:param saver: Object implementing SaverProtocol, used to save final output.
|
45
|
-
:param config: Downloader configuration object.
|
46
|
-
"""
|
47
|
-
self._requester = requester
|
48
|
-
self._parser = parser
|
49
|
-
self._saver = saver
|
50
|
-
self._config = config
|
51
|
-
self._site = site
|
52
|
-
|
53
|
-
self._raw_data_dir = Path(config.raw_data_dir) / site
|
54
|
-
self._cache_dir = Path(config.cache_dir) / site
|
55
|
-
self._raw_data_dir.mkdir(parents=True, exist_ok=True)
|
56
|
-
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
57
|
-
|
58
|
-
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
59
|
-
|
60
|
-
def download(self, book_ids: list[str]) -> None:
|
61
|
-
"""
|
62
|
-
The general batch download process:
|
63
|
-
1. Iterate over all book IDs
|
64
|
-
2. For each ID, call `download_one()`
|
65
|
-
|
66
|
-
:param book_ids: A list of book identifiers to download.
|
67
|
-
"""
|
68
|
-
self.prepare()
|
69
|
-
|
70
|
-
for idx, book_id in enumerate(book_ids, start=1):
|
71
|
-
self.logger.debug(
|
72
|
-
"[downloader] Starting download for book_id: %s (%s/%s)",
|
73
|
-
book_id,
|
74
|
-
idx,
|
75
|
-
len(book_ids),
|
76
|
-
)
|
77
|
-
try:
|
78
|
-
self.download_one(book_id)
|
79
|
-
except Exception as e:
|
80
|
-
self._handle_download_exception(book_id, e)
|
81
|
-
|
82
|
-
@abc.abstractmethod
|
83
|
-
def download_one(self, book_id: str) -> None:
|
84
|
-
"""
|
85
|
-
The full download logic for a single book.
|
86
|
-
|
87
|
-
Subclasses must implement this method.
|
88
|
-
|
89
|
-
:param book_id: The identifier of the book to download.
|
90
|
-
"""
|
91
|
-
...
|
92
|
-
|
93
|
-
def prepare(self) -> None:
|
94
|
-
"""
|
95
|
-
Optional hook called before downloading each book.
|
96
|
-
|
97
|
-
Subclasses can override this method to perform pre-download setup.
|
98
|
-
"""
|
99
|
-
return
|
100
|
-
|
101
|
-
@property
|
102
|
-
def requester(self) -> SyncRequesterProtocol:
|
103
|
-
"""
|
104
|
-
Access the current requester.
|
105
|
-
|
106
|
-
:return: The internal requester instance.
|
107
|
-
"""
|
108
|
-
return self._requester
|
109
|
-
|
110
|
-
@property
|
111
|
-
def parser(self) -> ParserProtocol:
|
112
|
-
"""
|
113
|
-
Access the current parser.
|
114
|
-
|
115
|
-
:return: The internal parser instance.
|
116
|
-
"""
|
117
|
-
return self._parser
|
118
|
-
|
119
|
-
@property
|
120
|
-
def saver(self) -> SaverProtocol:
|
121
|
-
"""
|
122
|
-
Access the current saver.
|
123
|
-
|
124
|
-
:return: The internal saver instance.
|
125
|
-
"""
|
126
|
-
return self._saver
|
127
|
-
|
128
|
-
@property
|
129
|
-
def config(self) -> DownloaderConfig:
|
130
|
-
"""
|
131
|
-
Access the downloader configuration.
|
132
|
-
|
133
|
-
:return: The internal DownloaderConfig object.
|
134
|
-
"""
|
135
|
-
return self._config
|
136
|
-
|
137
|
-
@property
|
138
|
-
def raw_data_dir(self) -> Path:
|
139
|
-
"""
|
140
|
-
Access the root directory for storing raw downloaded data.
|
141
|
-
|
142
|
-
:return: Path to the raw data directory.
|
143
|
-
"""
|
144
|
-
return self._raw_data_dir
|
145
|
-
|
146
|
-
@property
|
147
|
-
def cache_dir(self) -> Path:
|
148
|
-
"""
|
149
|
-
Access the directory used for temporary caching during download.
|
150
|
-
|
151
|
-
:return: Path to the cache directory.
|
152
|
-
"""
|
153
|
-
return self._cache_dir
|
154
|
-
|
155
|
-
@property
|
156
|
-
def site(self) -> str:
|
157
|
-
return self._site
|
158
|
-
|
159
|
-
@property
|
160
|
-
def save_html(self) -> bool:
|
161
|
-
return self._config.save_html
|
162
|
-
|
163
|
-
@property
|
164
|
-
def skip_existing(self) -> bool:
|
165
|
-
return self._config.skip_existing
|
166
|
-
|
167
|
-
@property
|
168
|
-
def login_required(self) -> bool:
|
169
|
-
return self._config.login_required
|
170
|
-
|
171
|
-
@property
|
172
|
-
def request_interval(self) -> float:
|
173
|
-
return self._config.request_interval
|
174
|
-
|
175
|
-
def set_requester(self, requester: SyncRequesterProtocol) -> None:
|
176
|
-
"""
|
177
|
-
Replace the requester instance with a new one.
|
178
|
-
|
179
|
-
:param requester: The new requester to be used.
|
180
|
-
"""
|
181
|
-
self._requester = requester
|
182
|
-
|
183
|
-
def set_parser(self, parser: ParserProtocol) -> None:
|
184
|
-
"""
|
185
|
-
Replace the parser instance with a new one.
|
186
|
-
|
187
|
-
:param parser: The new parser to be used.
|
188
|
-
"""
|
189
|
-
self._parser = parser
|
190
|
-
|
191
|
-
def set_saver(self, saver: SaverProtocol) -> None:
|
192
|
-
"""
|
193
|
-
Replace the saver instance with a new one.
|
194
|
-
|
195
|
-
:param saver: The new saver to be used.
|
196
|
-
"""
|
197
|
-
self._saver = saver
|
198
|
-
|
199
|
-
def _handle_download_exception(self, book_id: str, error: Exception) -> None:
|
200
|
-
"""
|
201
|
-
Handle download errors in a consistent way.
|
202
|
-
|
203
|
-
This method can be overridden or extended to implement retry logic, etc.
|
204
|
-
|
205
|
-
:param book_id: The ID of the book that failed.
|
206
|
-
:param error: The exception raised during download.
|
207
|
-
"""
|
208
|
-
self.logger.warning("[downloader] Failed to download %s: %s", book_id, error)
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.biquge
|
4
|
-
----------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from .biquge_async import BiqugeAsyncDownloader
|
9
|
-
from .biquge_sync import BiqugeDownloader
|
10
|
-
|
11
|
-
__all__ = [
|
12
|
-
"BiqugeAsyncDownloader",
|
13
|
-
"BiqugeDownloader",
|
14
|
-
]
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.biquge.biquge_async
|
4
|
-
-----------------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from novel_downloader.config.models import DownloaderConfig
|
9
|
-
from novel_downloader.core.downloaders.common import CommonAsyncDownloader
|
10
|
-
from novel_downloader.core.interfaces import (
|
11
|
-
AsyncRequesterProtocol,
|
12
|
-
ParserProtocol,
|
13
|
-
SaverProtocol,
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
class BiqugeAsyncDownloader(CommonAsyncDownloader):
|
18
|
-
""""""
|
19
|
-
|
20
|
-
def __init__(
|
21
|
-
self,
|
22
|
-
requester: AsyncRequesterProtocol,
|
23
|
-
parser: ParserProtocol,
|
24
|
-
saver: SaverProtocol,
|
25
|
-
config: DownloaderConfig,
|
26
|
-
):
|
27
|
-
super().__init__(requester, parser, saver, config, "biquge")
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.biquge.biquge_sync
|
4
|
-
----------------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from novel_downloader.config.models import DownloaderConfig
|
9
|
-
from novel_downloader.core.downloaders.common import CommonDownloader
|
10
|
-
from novel_downloader.core.interfaces import (
|
11
|
-
ParserProtocol,
|
12
|
-
SaverProtocol,
|
13
|
-
SyncRequesterProtocol,
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
class BiqugeDownloader(CommonDownloader):
|
18
|
-
""""""
|
19
|
-
|
20
|
-
def __init__(
|
21
|
-
self,
|
22
|
-
requester: SyncRequesterProtocol,
|
23
|
-
parser: ParserProtocol,
|
24
|
-
saver: SaverProtocol,
|
25
|
-
config: DownloaderConfig,
|
26
|
-
):
|
27
|
-
super().__init__(requester, parser, saver, config, "biquge")
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.common
|
4
|
-
----------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from .common_async import CommonAsyncDownloader
|
9
|
-
from .common_sync import CommonDownloader
|
10
|
-
|
11
|
-
__all__ = [
|
12
|
-
"CommonAsyncDownloader",
|
13
|
-
"CommonDownloader",
|
14
|
-
]
|
@@ -1,210 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.common.common_async
|
4
|
-
-----------------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
import asyncio
|
9
|
-
import json
|
10
|
-
import logging
|
11
|
-
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
12
|
-
from typing import Any
|
13
|
-
|
14
|
-
from novel_downloader.config import DownloaderConfig
|
15
|
-
from novel_downloader.core.downloaders.base import BaseAsyncDownloader
|
16
|
-
from novel_downloader.core.interfaces import (
|
17
|
-
AsyncRequesterProtocol,
|
18
|
-
ParserProtocol,
|
19
|
-
SaverProtocol,
|
20
|
-
)
|
21
|
-
from novel_downloader.utils.chapter_storage import ChapterDict, ChapterStorage
|
22
|
-
from novel_downloader.utils.file_utils import save_as_json, save_as_txt
|
23
|
-
from novel_downloader.utils.time_utils import calculate_time_difference
|
24
|
-
|
25
|
-
logger = logging.getLogger(__name__)
|
26
|
-
|
27
|
-
|
28
|
-
class CommonAsyncDownloader(BaseAsyncDownloader):
|
29
|
-
"""
|
30
|
-
Specialized Async downloader for common novels.
|
31
|
-
"""
|
32
|
-
|
33
|
-
def __init__(
|
34
|
-
self,
|
35
|
-
requester: AsyncRequesterProtocol,
|
36
|
-
parser: ParserProtocol,
|
37
|
-
saver: SaverProtocol,
|
38
|
-
config: DownloaderConfig,
|
39
|
-
site: str,
|
40
|
-
):
|
41
|
-
""" """
|
42
|
-
super().__init__(requester, parser, saver, config, site)
|
43
|
-
self._is_logged_in = False
|
44
|
-
|
45
|
-
async def prepare(self) -> None:
|
46
|
-
"""
|
47
|
-
Perform login
|
48
|
-
"""
|
49
|
-
if self.login_required and not self._is_logged_in:
|
50
|
-
success = await self.requester.login()
|
51
|
-
if not success:
|
52
|
-
raise RuntimeError("Login failed")
|
53
|
-
self._is_logged_in = True
|
54
|
-
|
55
|
-
async def download_one(self, book_id: str) -> None:
|
56
|
-
"""
|
57
|
-
The full download logic for a single book.
|
58
|
-
|
59
|
-
:param book_id: The identifier of the book to download.
|
60
|
-
"""
|
61
|
-
assert isinstance(self.requester, AsyncRequesterProtocol)
|
62
|
-
await self.prepare()
|
63
|
-
|
64
|
-
TAG = "[AsyncDownloader]"
|
65
|
-
wait_time = self.config.request_interval
|
66
|
-
|
67
|
-
raw_base = self.raw_data_dir / book_id
|
68
|
-
cache_base = self.cache_dir / book_id
|
69
|
-
info_path = raw_base / "book_info.json"
|
70
|
-
chapters_html_dir = cache_base / "html"
|
71
|
-
|
72
|
-
raw_base.mkdir(parents=True, exist_ok=True)
|
73
|
-
if self.save_html:
|
74
|
-
chapters_html_dir.mkdir(parents=True, exist_ok=True)
|
75
|
-
normal_cs = ChapterStorage(
|
76
|
-
raw_base=raw_base,
|
77
|
-
namespace="chapters",
|
78
|
-
backend_type=self._config.storage_backend,
|
79
|
-
batch_size=self._config.storage_batch_size,
|
80
|
-
)
|
81
|
-
|
82
|
-
# load or fetch book_info
|
83
|
-
book_info: dict[str, Any]
|
84
|
-
re_fetch = True
|
85
|
-
if info_path.exists():
|
86
|
-
try:
|
87
|
-
data = json.loads(info_path.read_text("utf-8"))
|
88
|
-
days, *_ = calculate_time_difference(
|
89
|
-
data.get("update_time", ""), "UTC+8"
|
90
|
-
)
|
91
|
-
re_fetch = days > 1
|
92
|
-
except Exception:
|
93
|
-
re_fetch = True
|
94
|
-
|
95
|
-
if re_fetch:
|
96
|
-
info_html = await self.requester.get_book_info(book_id)
|
97
|
-
if self.save_html:
|
98
|
-
for i, html in enumerate(info_html):
|
99
|
-
save_as_txt(html, chapters_html_dir / f"info_{i}.html")
|
100
|
-
book_info = self.parser.parse_book_info(info_html)
|
101
|
-
if book_info.get("book_name") != "未找到书名":
|
102
|
-
save_as_json(book_info, info_path)
|
103
|
-
else:
|
104
|
-
logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
|
105
|
-
await asyncio.sleep(wait_time)
|
106
|
-
else:
|
107
|
-
book_info = json.loads(info_path.read_text("utf-8"))
|
108
|
-
|
109
|
-
# setup queue, semaphore, executor
|
110
|
-
semaphore = asyncio.Semaphore(self.download_workers)
|
111
|
-
queue: asyncio.Queue[tuple[str, list[str]]] = asyncio.Queue()
|
112
|
-
save_queue: asyncio.Queue[ChapterDict] = asyncio.Queue()
|
113
|
-
loop = asyncio.get_running_loop()
|
114
|
-
executor = (
|
115
|
-
ProcessPoolExecutor() if self.use_process_pool else ThreadPoolExecutor()
|
116
|
-
)
|
117
|
-
|
118
|
-
async def parser_worker(worker_id: int) -> None:
|
119
|
-
while True:
|
120
|
-
cid, html = await queue.get()
|
121
|
-
try:
|
122
|
-
chap_json = await loop.run_in_executor(
|
123
|
-
executor, self.parser.parse_chapter, html, cid
|
124
|
-
)
|
125
|
-
if chap_json:
|
126
|
-
await save_queue.put(chap_json)
|
127
|
-
logger.info(
|
128
|
-
"%s [Parser-%d] saved chapter %s", TAG, worker_id, cid
|
129
|
-
)
|
130
|
-
except Exception as e:
|
131
|
-
logger.error(
|
132
|
-
"%s [Parser-%d] error on chapter %s: %s", TAG, worker_id, cid, e
|
133
|
-
)
|
134
|
-
finally:
|
135
|
-
queue.task_done()
|
136
|
-
|
137
|
-
async def saver_loop(
|
138
|
-
cs: ChapterStorage,
|
139
|
-
queue: asyncio.Queue[ChapterDict],
|
140
|
-
) -> None:
|
141
|
-
while True:
|
142
|
-
data = await queue.get()
|
143
|
-
try:
|
144
|
-
cs.save(data)
|
145
|
-
except Exception as e:
|
146
|
-
logger.error(
|
147
|
-
"[saver] Error saving chapter %s: %s",
|
148
|
-
data.get("id"),
|
149
|
-
e,
|
150
|
-
)
|
151
|
-
finally:
|
152
|
-
queue.task_done()
|
153
|
-
|
154
|
-
async def download_worker(chap: dict[str, Any]) -> None:
|
155
|
-
cid = str(chap.get("chapterId") or "")
|
156
|
-
if not cid:
|
157
|
-
return
|
158
|
-
if normal_cs.exists(cid) and self.skip_existing:
|
159
|
-
logger.info("%s skipping existing chapter %s", TAG, cid)
|
160
|
-
return
|
161
|
-
|
162
|
-
try:
|
163
|
-
async with semaphore:
|
164
|
-
html = await self.requester.get_book_chapter(book_id, cid)
|
165
|
-
await queue.put((cid, html))
|
166
|
-
logger.info("%s downloaded chapter %s", TAG, cid)
|
167
|
-
except Exception as e:
|
168
|
-
logger.error("%s error downloading %s: %s", TAG, cid, e)
|
169
|
-
|
170
|
-
# start parser workers
|
171
|
-
parsers = [
|
172
|
-
asyncio.create_task(parser_worker(i)) for i in range(self.parser_workers)
|
173
|
-
]
|
174
|
-
chapter_saver = asyncio.create_task(saver_loop(normal_cs, save_queue))
|
175
|
-
|
176
|
-
# enqueue + run downloads
|
177
|
-
download_tasks = []
|
178
|
-
for vol in book_info.get("volumes", []):
|
179
|
-
for chap in vol.get("chapters", []):
|
180
|
-
download_tasks.append(asyncio.create_task(download_worker(chap)))
|
181
|
-
|
182
|
-
await asyncio.gather(*download_tasks)
|
183
|
-
await queue.join() # wait until all parsed
|
184
|
-
await save_queue.join()
|
185
|
-
for p in parsers:
|
186
|
-
p.cancel() # stop parser loops
|
187
|
-
chapter_saver.cancel()
|
188
|
-
|
189
|
-
# final save
|
190
|
-
await loop.run_in_executor(executor, self.saver.save, book_id)
|
191
|
-
executor.shutdown(wait=True)
|
192
|
-
|
193
|
-
logger.info(
|
194
|
-
"%s Novel '%s' download completed.",
|
195
|
-
TAG,
|
196
|
-
book_info.get("book_name", "unknown"),
|
197
|
-
)
|
198
|
-
return
|
199
|
-
|
200
|
-
@property
|
201
|
-
def parser_workers(self) -> int:
|
202
|
-
return self.config.parser_workers
|
203
|
-
|
204
|
-
@property
|
205
|
-
def download_workers(self) -> int:
|
206
|
-
return self.config.download_workers
|
207
|
-
|
208
|
-
@property
|
209
|
-
def use_process_pool(self) -> bool:
|
210
|
-
return self.config.use_process_pool
|