novel-downloader 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/clean.py +97 -78
- novel_downloader/cli/config.py +177 -0
- novel_downloader/cli/download.py +132 -87
- novel_downloader/cli/export.py +77 -0
- novel_downloader/cli/main.py +21 -28
- novel_downloader/config/__init__.py +1 -25
- novel_downloader/config/adapter.py +32 -31
- novel_downloader/config/loader.py +3 -3
- novel_downloader/config/site_rules.py +1 -2
- novel_downloader/core/__init__.py +3 -6
- novel_downloader/core/downloaders/__init__.py +10 -13
- novel_downloader/core/downloaders/base.py +233 -0
- novel_downloader/core/downloaders/biquge.py +27 -0
- novel_downloader/core/downloaders/common.py +414 -0
- novel_downloader/core/downloaders/esjzone.py +27 -0
- novel_downloader/core/downloaders/linovelib.py +27 -0
- novel_downloader/core/downloaders/qianbi.py +27 -0
- novel_downloader/core/downloaders/qidian.py +352 -0
- novel_downloader/core/downloaders/sfacg.py +27 -0
- novel_downloader/core/downloaders/yamibo.py +27 -0
- novel_downloader/core/exporters/__init__.py +37 -0
- novel_downloader/core/{savers → exporters}/base.py +73 -44
- novel_downloader/core/exporters/biquge.py +25 -0
- novel_downloader/core/exporters/common/__init__.py +12 -0
- novel_downloader/core/{savers → exporters}/common/epub.py +40 -52
- novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +36 -39
- novel_downloader/core/{savers → exporters}/common/txt.py +20 -24
- novel_downloader/core/exporters/epub_utils/__init__.py +40 -0
- novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -1
- novel_downloader/core/exporters/epub_utils/image_loader.py +131 -0
- novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -3
- novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +49 -2
- novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -1
- novel_downloader/core/exporters/esjzone.py +25 -0
- novel_downloader/core/exporters/linovelib/__init__.py +10 -0
- novel_downloader/core/exporters/linovelib/epub.py +449 -0
- novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
- novel_downloader/core/exporters/linovelib/txt.py +129 -0
- novel_downloader/core/exporters/qianbi.py +25 -0
- novel_downloader/core/{savers → exporters}/qidian.py +8 -8
- novel_downloader/core/exporters/sfacg.py +25 -0
- novel_downloader/core/exporters/yamibo.py +25 -0
- novel_downloader/core/factory/__init__.py +5 -17
- novel_downloader/core/factory/downloader.py +24 -126
- novel_downloader/core/factory/exporter.py +58 -0
- novel_downloader/core/factory/fetcher.py +96 -0
- novel_downloader/core/factory/parser.py +17 -12
- novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
- novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
- novel_downloader/core/fetchers/base/browser.py +383 -0
- novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
- novel_downloader/core/fetchers/base/session.py +419 -0
- novel_downloader/core/fetchers/biquge/__init__.py +14 -0
- novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
- novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
- novel_downloader/core/fetchers/common/__init__.py +14 -0
- novel_downloader/core/fetchers/common/browser.py +79 -0
- novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
- novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
- novel_downloader/core/fetchers/esjzone/browser.py +202 -0
- novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
- novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
- novel_downloader/core/fetchers/linovelib/browser.py +178 -0
- novel_downloader/core/fetchers/linovelib/session.py +178 -0
- novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
- novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
- novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
- novel_downloader/core/fetchers/qidian/__init__.py +14 -0
- novel_downloader/core/fetchers/qidian/browser.py +266 -0
- novel_downloader/core/fetchers/qidian/session.py +326 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
- novel_downloader/core/fetchers/sfacg/browser.py +189 -0
- novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
- novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
- novel_downloader/core/fetchers/yamibo/browser.py +229 -0
- novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
- novel_downloader/core/interfaces/__init__.py +8 -12
- novel_downloader/core/interfaces/downloader.py +54 -0
- novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
- novel_downloader/core/interfaces/fetcher.py +162 -0
- novel_downloader/core/interfaces/parser.py +6 -7
- novel_downloader/core/parsers/__init__.py +5 -6
- novel_downloader/core/parsers/base.py +9 -13
- novel_downloader/core/parsers/biquge/main_parser.py +12 -13
- novel_downloader/core/parsers/common/helper.py +3 -3
- novel_downloader/core/parsers/common/main_parser.py +39 -34
- novel_downloader/core/parsers/esjzone/main_parser.py +24 -17
- novel_downloader/core/parsers/linovelib/__init__.py +10 -0
- novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
- novel_downloader/core/parsers/qidian/__init__.py +2 -11
- novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
- novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
- novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
- novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
- novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
- novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
- novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
- novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
- novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
- novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
- novel_downloader/locales/en.json +18 -2
- novel_downloader/locales/zh.json +18 -2
- novel_downloader/models/__init__.py +64 -0
- novel_downloader/models/browser.py +21 -0
- novel_downloader/models/chapter.py +25 -0
- novel_downloader/models/config.py +100 -0
- novel_downloader/models/login.py +20 -0
- novel_downloader/models/site_rules.py +99 -0
- novel_downloader/models/tasks.py +33 -0
- novel_downloader/models/types.py +15 -0
- novel_downloader/resources/config/settings.toml +31 -25
- novel_downloader/resources/json/linovelib_font_map.json +3573 -0
- novel_downloader/tui/__init__.py +7 -0
- novel_downloader/tui/app.py +32 -0
- novel_downloader/tui/main.py +17 -0
- novel_downloader/tui/screens/__init__.py +14 -0
- novel_downloader/tui/screens/home.py +191 -0
- novel_downloader/tui/screens/login.py +74 -0
- novel_downloader/tui/styles/home_layout.tcss +79 -0
- novel_downloader/tui/widgets/richlog_handler.py +24 -0
- novel_downloader/utils/__init__.py +6 -0
- novel_downloader/utils/chapter_storage.py +25 -38
- novel_downloader/utils/constants.py +15 -5
- novel_downloader/utils/cookies.py +66 -0
- novel_downloader/utils/crypto_utils.py +1 -74
- novel_downloader/utils/file_utils/io.py +1 -1
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -2
- novel_downloader/utils/hash_store.py +10 -18
- novel_downloader/utils/hash_utils.py +3 -2
- novel_downloader/utils/logger.py +2 -3
- novel_downloader/utils/network.py +53 -39
- novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -1
- novel_downloader/utils/text_utils/text_cleaning.py +1 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -3
- novel_downloader/utils/time_utils/sleep_utils.py +3 -3
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +72 -38
- novel_downloader-1.4.0.dist-info/RECORD +170 -0
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
- novel_downloader/cli/interactive.py +0 -66
- novel_downloader/cli/settings.py +0 -177
- novel_downloader/config/models.py +0 -187
- novel_downloader/core/downloaders/base/__init__.py +0 -14
- novel_downloader/core/downloaders/base/base_async.py +0 -153
- novel_downloader/core/downloaders/base/base_sync.py +0 -208
- novel_downloader/core/downloaders/biquge/__init__.py +0 -14
- novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
- novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
- novel_downloader/core/downloaders/common/__init__.py +0 -14
- novel_downloader/core/downloaders/common/common_async.py +0 -218
- novel_downloader/core/downloaders/common/common_sync.py +0 -210
- novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
- novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
- novel_downloader/core/downloaders/qidian/__init__.py +0 -10
- novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -227
- novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
- novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
- novel_downloader/core/factory/requester.py +0 -144
- novel_downloader/core/factory/saver.py +0 -56
- novel_downloader/core/interfaces/async_downloader.py +0 -36
- novel_downloader/core/interfaces/async_requester.py +0 -84
- novel_downloader/core/interfaces/sync_downloader.py +0 -36
- novel_downloader/core/interfaces/sync_requester.py +0 -82
- novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
- novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
- novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
- novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
- novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
- novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
- novel_downloader/core/requesters/base/async_session.py +0 -410
- novel_downloader/core/requesters/base/browser.py +0 -337
- novel_downloader/core/requesters/base/session.py +0 -378
- novel_downloader/core/requesters/biquge/__init__.py +0 -14
- novel_downloader/core/requesters/common/__init__.py +0 -17
- novel_downloader/core/requesters/common/session.py +0 -113
- novel_downloader/core/requesters/esjzone/__init__.py +0 -13
- novel_downloader/core/requesters/esjzone/session.py +0 -235
- novel_downloader/core/requesters/qianbi/__init__.py +0 -13
- novel_downloader/core/requesters/qidian/__init__.py +0 -21
- novel_downloader/core/requesters/qidian/broswer.py +0 -307
- novel_downloader/core/requesters/qidian/session.py +0 -290
- novel_downloader/core/requesters/sfacg/__init__.py +0 -13
- novel_downloader/core/requesters/sfacg/session.py +0 -242
- novel_downloader/core/requesters/yamibo/__init__.py +0 -13
- novel_downloader/core/requesters/yamibo/session.py +0 -237
- novel_downloader/core/savers/__init__.py +0 -34
- novel_downloader/core/savers/biquge.py +0 -25
- novel_downloader/core/savers/common/__init__.py +0 -12
- novel_downloader/core/savers/epub_utils/__init__.py +0 -26
- novel_downloader/core/savers/esjzone.py +0 -25
- novel_downloader/core/savers/qianbi.py +0 -25
- novel_downloader/core/savers/sfacg.py +0 -25
- novel_downloader/core/savers/yamibo.py +0 -25
- novel_downloader/resources/config/rules.toml +0 -196
- novel_downloader-1.3.2.dist-info/RECORD +0 -165
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,218 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.common.common_async
|
4
|
-
-----------------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
import asyncio
|
9
|
-
import json
|
10
|
-
import logging
|
11
|
-
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
12
|
-
from typing import Any
|
13
|
-
|
14
|
-
from novel_downloader.config import DownloaderConfig
|
15
|
-
from novel_downloader.core.downloaders.base import BaseAsyncDownloader
|
16
|
-
from novel_downloader.core.interfaces import (
|
17
|
-
AsyncRequesterProtocol,
|
18
|
-
ParserProtocol,
|
19
|
-
SaverProtocol,
|
20
|
-
)
|
21
|
-
from novel_downloader.utils.chapter_storage import ChapterDict, ChapterStorage
|
22
|
-
from novel_downloader.utils.file_utils import save_as_json, save_as_txt
|
23
|
-
from novel_downloader.utils.network import download_image_as_bytes
|
24
|
-
from novel_downloader.utils.time_utils import calculate_time_difference
|
25
|
-
|
26
|
-
logger = logging.getLogger(__name__)
|
27
|
-
|
28
|
-
|
29
|
-
class CommonAsyncDownloader(BaseAsyncDownloader):
|
30
|
-
"""
|
31
|
-
Specialized Async downloader for common novels.
|
32
|
-
"""
|
33
|
-
|
34
|
-
def __init__(
|
35
|
-
self,
|
36
|
-
requester: AsyncRequesterProtocol,
|
37
|
-
parser: ParserProtocol,
|
38
|
-
saver: SaverProtocol,
|
39
|
-
config: DownloaderConfig,
|
40
|
-
site: str,
|
41
|
-
):
|
42
|
-
""" """
|
43
|
-
super().__init__(requester, parser, saver, config, site)
|
44
|
-
self._is_logged_in = False
|
45
|
-
|
46
|
-
async def prepare(self) -> None:
|
47
|
-
"""
|
48
|
-
Perform login
|
49
|
-
"""
|
50
|
-
if self.login_required and not self._is_logged_in:
|
51
|
-
success = await self.requester.login()
|
52
|
-
if not success:
|
53
|
-
raise RuntimeError("Login failed")
|
54
|
-
self._is_logged_in = True
|
55
|
-
|
56
|
-
async def download_one(self, book_id: str) -> None:
|
57
|
-
"""
|
58
|
-
The full download logic for a single book.
|
59
|
-
|
60
|
-
:param book_id: The identifier of the book to download.
|
61
|
-
"""
|
62
|
-
assert isinstance(self.requester, AsyncRequesterProtocol)
|
63
|
-
await self.prepare()
|
64
|
-
|
65
|
-
TAG = "[AsyncDownloader]"
|
66
|
-
wait_time = self.config.request_interval
|
67
|
-
|
68
|
-
raw_base = self.raw_data_dir / book_id
|
69
|
-
cache_base = self.cache_dir / book_id
|
70
|
-
info_path = raw_base / "book_info.json"
|
71
|
-
chapters_html_dir = cache_base / "html"
|
72
|
-
|
73
|
-
raw_base.mkdir(parents=True, exist_ok=True)
|
74
|
-
if self.save_html:
|
75
|
-
chapters_html_dir.mkdir(parents=True, exist_ok=True)
|
76
|
-
normal_cs = ChapterStorage(
|
77
|
-
raw_base=raw_base,
|
78
|
-
namespace="chapters",
|
79
|
-
backend_type=self._config.storage_backend,
|
80
|
-
batch_size=self._config.storage_batch_size,
|
81
|
-
)
|
82
|
-
|
83
|
-
# load or fetch book_info
|
84
|
-
book_info: dict[str, Any]
|
85
|
-
re_fetch = True
|
86
|
-
if info_path.exists():
|
87
|
-
try:
|
88
|
-
data = json.loads(info_path.read_text("utf-8"))
|
89
|
-
days, *_ = calculate_time_difference(
|
90
|
-
data.get("update_time", ""), "UTC+8"
|
91
|
-
)
|
92
|
-
re_fetch = days > 1
|
93
|
-
except Exception:
|
94
|
-
re_fetch = True
|
95
|
-
|
96
|
-
if re_fetch:
|
97
|
-
info_html = await self.requester.get_book_info(book_id)
|
98
|
-
if self.save_html:
|
99
|
-
for i, html in enumerate(info_html):
|
100
|
-
save_as_txt(html, chapters_html_dir / f"info_{i}.html")
|
101
|
-
book_info = self.parser.parse_book_info(info_html)
|
102
|
-
if book_info.get("book_name") != "未找到书名":
|
103
|
-
save_as_json(book_info, info_path)
|
104
|
-
else:
|
105
|
-
logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
|
106
|
-
await asyncio.sleep(wait_time)
|
107
|
-
else:
|
108
|
-
book_info = json.loads(info_path.read_text("utf-8"))
|
109
|
-
|
110
|
-
# download cover
|
111
|
-
cover_url = book_info.get("cover_url", "")
|
112
|
-
if cover_url:
|
113
|
-
await asyncio.get_running_loop().run_in_executor(
|
114
|
-
None, download_image_as_bytes, cover_url, raw_base
|
115
|
-
)
|
116
|
-
|
117
|
-
# setup queue, semaphore, executor
|
118
|
-
semaphore = asyncio.Semaphore(self.download_workers)
|
119
|
-
queue: asyncio.Queue[tuple[str, list[str]]] = asyncio.Queue()
|
120
|
-
save_queue: asyncio.Queue[ChapterDict] = asyncio.Queue()
|
121
|
-
loop = asyncio.get_running_loop()
|
122
|
-
executor = (
|
123
|
-
ProcessPoolExecutor() if self.use_process_pool else ThreadPoolExecutor()
|
124
|
-
)
|
125
|
-
|
126
|
-
async def parser_worker(worker_id: int) -> None:
|
127
|
-
while True:
|
128
|
-
cid, html = await queue.get()
|
129
|
-
try:
|
130
|
-
chap_json = await loop.run_in_executor(
|
131
|
-
executor, self.parser.parse_chapter, html, cid
|
132
|
-
)
|
133
|
-
if chap_json:
|
134
|
-
await save_queue.put(chap_json)
|
135
|
-
logger.info(
|
136
|
-
"%s [Parser-%d] saved chapter %s", TAG, worker_id, cid
|
137
|
-
)
|
138
|
-
except Exception as e:
|
139
|
-
logger.error(
|
140
|
-
"%s [Parser-%d] error on chapter %s: %s", TAG, worker_id, cid, e
|
141
|
-
)
|
142
|
-
finally:
|
143
|
-
queue.task_done()
|
144
|
-
|
145
|
-
async def saver_loop(
|
146
|
-
cs: ChapterStorage,
|
147
|
-
queue: asyncio.Queue[ChapterDict],
|
148
|
-
) -> None:
|
149
|
-
while True:
|
150
|
-
data = await queue.get()
|
151
|
-
try:
|
152
|
-
cs.save(data)
|
153
|
-
except Exception as e:
|
154
|
-
logger.error(
|
155
|
-
"[saver] Error saving chapter %s: %s",
|
156
|
-
data.get("id"),
|
157
|
-
e,
|
158
|
-
)
|
159
|
-
finally:
|
160
|
-
queue.task_done()
|
161
|
-
|
162
|
-
async def download_worker(chap: dict[str, Any]) -> None:
|
163
|
-
cid = str(chap.get("chapterId") or "")
|
164
|
-
if not cid:
|
165
|
-
return
|
166
|
-
if normal_cs.exists(cid) and self.skip_existing:
|
167
|
-
logger.info("%s skipping existing chapter %s", TAG, cid)
|
168
|
-
return
|
169
|
-
|
170
|
-
try:
|
171
|
-
async with semaphore:
|
172
|
-
html = await self.requester.get_book_chapter(book_id, cid)
|
173
|
-
await queue.put((cid, html))
|
174
|
-
logger.info("%s downloaded chapter %s", TAG, cid)
|
175
|
-
except Exception as e:
|
176
|
-
logger.error("%s error downloading %s: %s", TAG, cid, e)
|
177
|
-
|
178
|
-
# start parser workers
|
179
|
-
parsers = [
|
180
|
-
asyncio.create_task(parser_worker(i)) for i in range(self.parser_workers)
|
181
|
-
]
|
182
|
-
chapter_saver = asyncio.create_task(saver_loop(normal_cs, save_queue))
|
183
|
-
|
184
|
-
# enqueue + run downloads
|
185
|
-
download_tasks = []
|
186
|
-
for vol in book_info.get("volumes", []):
|
187
|
-
for chap in vol.get("chapters", []):
|
188
|
-
download_tasks.append(asyncio.create_task(download_worker(chap)))
|
189
|
-
|
190
|
-
await asyncio.gather(*download_tasks)
|
191
|
-
await queue.join() # wait until all parsed
|
192
|
-
await save_queue.join()
|
193
|
-
for p in parsers:
|
194
|
-
p.cancel() # stop parser loops
|
195
|
-
chapter_saver.cancel()
|
196
|
-
|
197
|
-
# final save
|
198
|
-
await loop.run_in_executor(executor, self.saver.save, book_id)
|
199
|
-
executor.shutdown(wait=True)
|
200
|
-
|
201
|
-
logger.info(
|
202
|
-
"%s Novel '%s' download completed.",
|
203
|
-
TAG,
|
204
|
-
book_info.get("book_name", "unknown"),
|
205
|
-
)
|
206
|
-
return
|
207
|
-
|
208
|
-
@property
|
209
|
-
def parser_workers(self) -> int:
|
210
|
-
return self.config.parser_workers
|
211
|
-
|
212
|
-
@property
|
213
|
-
def download_workers(self) -> int:
|
214
|
-
return self.config.download_workers
|
215
|
-
|
216
|
-
@property
|
217
|
-
def use_process_pool(self) -> bool:
|
218
|
-
return self.config.use_process_pool
|
@@ -1,210 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.common.common_sync
|
4
|
-
----------------------------------------------------
|
5
|
-
|
6
|
-
This module defines `CommonDownloader`.
|
7
|
-
"""
|
8
|
-
|
9
|
-
import json
|
10
|
-
import logging
|
11
|
-
from typing import Any
|
12
|
-
|
13
|
-
from novel_downloader.config import DownloaderConfig
|
14
|
-
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
|
-
from novel_downloader.core.interfaces import (
|
16
|
-
ParserProtocol,
|
17
|
-
SaverProtocol,
|
18
|
-
SyncRequesterProtocol,
|
19
|
-
)
|
20
|
-
from novel_downloader.utils.chapter_storage import ChapterStorage
|
21
|
-
from novel_downloader.utils.file_utils import save_as_json, save_as_txt
|
22
|
-
from novel_downloader.utils.network import download_image_as_bytes
|
23
|
-
from novel_downloader.utils.time_utils import (
|
24
|
-
calculate_time_difference,
|
25
|
-
sleep_with_random_delay,
|
26
|
-
)
|
27
|
-
|
28
|
-
logger = logging.getLogger(__name__)
|
29
|
-
|
30
|
-
|
31
|
-
class CommonDownloader(BaseDownloader):
|
32
|
-
"""
|
33
|
-
Specialized downloader for common novels.
|
34
|
-
"""
|
35
|
-
|
36
|
-
def __init__(
|
37
|
-
self,
|
38
|
-
requester: SyncRequesterProtocol,
|
39
|
-
parser: ParserProtocol,
|
40
|
-
saver: SaverProtocol,
|
41
|
-
config: DownloaderConfig,
|
42
|
-
site: str,
|
43
|
-
):
|
44
|
-
"""
|
45
|
-
Initialize the common novel downloader with site information.
|
46
|
-
|
47
|
-
:param requester: Object implementing RequesterProtocol, used to fetch raw data.
|
48
|
-
:param parser: Object implementing ParserProtocol, used to parse page content.
|
49
|
-
:param saver: Object implementing SaverProtocol, used to save final output.
|
50
|
-
:param config: Downloader configuration object.
|
51
|
-
:param site: Identifier for the site the downloader is targeting.
|
52
|
-
"""
|
53
|
-
super().__init__(requester, parser, saver, config, site)
|
54
|
-
self._site = site
|
55
|
-
self._is_logged_in = False
|
56
|
-
|
57
|
-
def prepare(self) -> None:
|
58
|
-
"""
|
59
|
-
Perform login
|
60
|
-
"""
|
61
|
-
if self.login_required and not self._is_logged_in:
|
62
|
-
success = self.requester.login()
|
63
|
-
if not success:
|
64
|
-
raise RuntimeError("Login failed")
|
65
|
-
self._is_logged_in = True
|
66
|
-
|
67
|
-
def download_one(self, book_id: str) -> None:
|
68
|
-
"""
|
69
|
-
The full download logic for a single book.
|
70
|
-
|
71
|
-
:param book_id: The identifier of the book to download.
|
72
|
-
"""
|
73
|
-
self.prepare()
|
74
|
-
|
75
|
-
TAG = "[Downloader]"
|
76
|
-
save_html = self.config.save_html
|
77
|
-
skip_existing = self.config.skip_existing
|
78
|
-
wait_time = self.config.request_interval
|
79
|
-
|
80
|
-
raw_base = self.raw_data_dir / book_id
|
81
|
-
cache_base = self.cache_dir / book_id
|
82
|
-
info_path = raw_base / "book_info.json"
|
83
|
-
chapters_html_dir = cache_base / "html"
|
84
|
-
|
85
|
-
raw_base.mkdir(parents=True, exist_ok=True)
|
86
|
-
if self.save_html:
|
87
|
-
chapters_html_dir.mkdir(parents=True, exist_ok=True)
|
88
|
-
normal_cs = ChapterStorage(
|
89
|
-
raw_base=raw_base,
|
90
|
-
namespace="chapters",
|
91
|
-
backend_type=self._config.storage_backend,
|
92
|
-
batch_size=self._config.storage_batch_size,
|
93
|
-
)
|
94
|
-
|
95
|
-
book_info: dict[str, Any]
|
96
|
-
|
97
|
-
try:
|
98
|
-
if not info_path.exists():
|
99
|
-
raise FileNotFoundError
|
100
|
-
book_info = json.loads(info_path.read_text(encoding="utf-8"))
|
101
|
-
days, hrs, mins, secs = calculate_time_difference(
|
102
|
-
book_info.get("update_time", ""), "UTC+8"
|
103
|
-
)
|
104
|
-
logger.info(
|
105
|
-
"%s Last updated %dd %dh %dm %ds ago", TAG, days, hrs, mins, secs
|
106
|
-
)
|
107
|
-
if days > 1:
|
108
|
-
raise FileNotFoundError # trigger re-fetch
|
109
|
-
except Exception:
|
110
|
-
info_html = self.requester.get_book_info(book_id)
|
111
|
-
if save_html:
|
112
|
-
for i, html in enumerate(info_html):
|
113
|
-
save_as_txt(html, chapters_html_dir / f"info_{i}.html")
|
114
|
-
book_info = self.parser.parse_book_info(info_html)
|
115
|
-
if (
|
116
|
-
book_info.get("book_name", "") != "未找到书名"
|
117
|
-
and book_info.get("update_time", "") != "未找到更新时间"
|
118
|
-
):
|
119
|
-
save_as_json(book_info, info_path)
|
120
|
-
sleep_with_random_delay(wait_time, mul_spread=1.1, max_sleep=wait_time + 2)
|
121
|
-
|
122
|
-
# download cover
|
123
|
-
cover_url = book_info.get("cover_url", "")
|
124
|
-
if cover_url:
|
125
|
-
cover_bytes = download_image_as_bytes(cover_url, raw_base)
|
126
|
-
if not cover_bytes:
|
127
|
-
logger.warning("%s Failed to download cover: %s", TAG, cover_url)
|
128
|
-
|
129
|
-
# enqueue chapters
|
130
|
-
for vol in book_info.get("volumes", []):
|
131
|
-
vol_name = vol.get("volume_name", "")
|
132
|
-
logger.info("%s Enqueuing volume: %s", TAG, vol_name)
|
133
|
-
|
134
|
-
for chap in vol.get("chapters", []):
|
135
|
-
cid = chap.get("chapterId")
|
136
|
-
if not cid:
|
137
|
-
logger.warning("%s Skipping chapter without chapterId", TAG)
|
138
|
-
continue
|
139
|
-
|
140
|
-
if normal_cs.exists(cid) and skip_existing:
|
141
|
-
logger.debug(
|
142
|
-
"%s Chapter already exists, skipping: %s",
|
143
|
-
TAG,
|
144
|
-
cid,
|
145
|
-
)
|
146
|
-
continue
|
147
|
-
|
148
|
-
chap_title = chap.get("title", "")
|
149
|
-
logger.info("%s Fetching chapter: %s (%s)", TAG, chap_title, cid)
|
150
|
-
try:
|
151
|
-
chap_html = self.requester.get_book_chapter(book_id, cid)
|
152
|
-
|
153
|
-
if save_html:
|
154
|
-
for i, html in enumerate(chap_html):
|
155
|
-
html_path = chapters_html_dir / f"{cid}_{i}.html"
|
156
|
-
save_as_txt(html, html_path, on_exist="skip")
|
157
|
-
|
158
|
-
chap_json = self.parser.parse_chapter(chap_html, cid)
|
159
|
-
|
160
|
-
sleep_with_random_delay(
|
161
|
-
wait_time, mul_spread=1.1, max_sleep=wait_time + 2
|
162
|
-
)
|
163
|
-
if not chap_json:
|
164
|
-
logger.warning(
|
165
|
-
"%s Parsed chapter json is empty, skipping: %s (%s)",
|
166
|
-
TAG,
|
167
|
-
chap_title,
|
168
|
-
cid,
|
169
|
-
)
|
170
|
-
continue
|
171
|
-
except Exception as e:
|
172
|
-
logger.warning(
|
173
|
-
"%s Error while processing chapter %s (%s): %s",
|
174
|
-
TAG,
|
175
|
-
chap_title,
|
176
|
-
cid,
|
177
|
-
str(e),
|
178
|
-
)
|
179
|
-
continue
|
180
|
-
|
181
|
-
normal_cs.save(chap_json)
|
182
|
-
logger.info("%s Saved chapter: %s (%s)", TAG, chap_title, cid)
|
183
|
-
|
184
|
-
normal_cs.close()
|
185
|
-
self.saver.save(book_id)
|
186
|
-
|
187
|
-
logger.info(
|
188
|
-
"%s Novel '%s' download completed.",
|
189
|
-
TAG,
|
190
|
-
book_info.get("book_name", "unknown"),
|
191
|
-
)
|
192
|
-
return
|
193
|
-
|
194
|
-
@property
|
195
|
-
def site(self) -> str:
|
196
|
-
"""
|
197
|
-
Get the site identifier.
|
198
|
-
|
199
|
-
:return: The site string.
|
200
|
-
"""
|
201
|
-
return self._site
|
202
|
-
|
203
|
-
@site.setter
|
204
|
-
def site(self, value: str) -> None:
|
205
|
-
"""
|
206
|
-
Set the site identifier.
|
207
|
-
|
208
|
-
:param value: New site string to set.
|
209
|
-
"""
|
210
|
-
self._site = value
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.esjzone
|
4
|
-
-----------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from .esjzone_async import EsjzoneAsyncDownloader
|
9
|
-
from .esjzone_sync import EsjzoneDownloader
|
10
|
-
|
11
|
-
__all__ = [
|
12
|
-
"EsjzoneAsyncDownloader",
|
13
|
-
"EsjzoneDownloader",
|
14
|
-
]
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.esjzone.esjzone_async
|
4
|
-
-------------------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from novel_downloader.config.models import DownloaderConfig
|
9
|
-
from novel_downloader.core.downloaders.common import CommonAsyncDownloader
|
10
|
-
from novel_downloader.core.interfaces import (
|
11
|
-
AsyncRequesterProtocol,
|
12
|
-
ParserProtocol,
|
13
|
-
SaverProtocol,
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
class EsjzoneAsyncDownloader(CommonAsyncDownloader):
|
18
|
-
""""""
|
19
|
-
|
20
|
-
def __init__(
|
21
|
-
self,
|
22
|
-
requester: AsyncRequesterProtocol,
|
23
|
-
parser: ParserProtocol,
|
24
|
-
saver: SaverProtocol,
|
25
|
-
config: DownloaderConfig,
|
26
|
-
):
|
27
|
-
super().__init__(requester, parser, saver, config, "esjzone")
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.esjzone.esjzone_sync
|
4
|
-
------------------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from novel_downloader.config.models import DownloaderConfig
|
9
|
-
from novel_downloader.core.downloaders.common import CommonDownloader
|
10
|
-
from novel_downloader.core.interfaces import (
|
11
|
-
ParserProtocol,
|
12
|
-
SaverProtocol,
|
13
|
-
SyncRequesterProtocol,
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
class EsjzoneDownloader(CommonDownloader):
|
18
|
-
""""""
|
19
|
-
|
20
|
-
def __init__(
|
21
|
-
self,
|
22
|
-
requester: SyncRequesterProtocol,
|
23
|
-
parser: ParserProtocol,
|
24
|
-
saver: SaverProtocol,
|
25
|
-
config: DownloaderConfig,
|
26
|
-
):
|
27
|
-
super().__init__(requester, parser, saver, config, "esjzone")
|
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.qianbi
|
4
|
-
----------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from .qianbi_async import QianbiAsyncDownloader
|
9
|
-
from .qianbi_sync import QianbiDownloader
|
10
|
-
|
11
|
-
__all__ = [
|
12
|
-
"QianbiAsyncDownloader",
|
13
|
-
"QianbiDownloader",
|
14
|
-
]
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.qianbi.qianbi_async
|
4
|
-
-----------------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from novel_downloader.config.models import DownloaderConfig
|
9
|
-
from novel_downloader.core.downloaders.common import CommonAsyncDownloader
|
10
|
-
from novel_downloader.core.interfaces import (
|
11
|
-
AsyncRequesterProtocol,
|
12
|
-
ParserProtocol,
|
13
|
-
SaverProtocol,
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
class QianbiAsyncDownloader(CommonAsyncDownloader):
|
18
|
-
""""""
|
19
|
-
|
20
|
-
def __init__(
|
21
|
-
self,
|
22
|
-
requester: AsyncRequesterProtocol,
|
23
|
-
parser: ParserProtocol,
|
24
|
-
saver: SaverProtocol,
|
25
|
-
config: DownloaderConfig,
|
26
|
-
):
|
27
|
-
super().__init__(requester, parser, saver, config, "qianbi")
|
@@ -1,27 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
"""
|
3
|
-
novel_downloader.core.downloaders.qianbi.qianbi_sync
|
4
|
-
----------------------------------------------------
|
5
|
-
|
6
|
-
"""
|
7
|
-
|
8
|
-
from novel_downloader.config.models import DownloaderConfig
|
9
|
-
from novel_downloader.core.downloaders.common import CommonDownloader
|
10
|
-
from novel_downloader.core.interfaces import (
|
11
|
-
ParserProtocol,
|
12
|
-
SaverProtocol,
|
13
|
-
SyncRequesterProtocol,
|
14
|
-
)
|
15
|
-
|
16
|
-
|
17
|
-
class QianbiDownloader(CommonDownloader):
|
18
|
-
""""""
|
19
|
-
|
20
|
-
def __init__(
|
21
|
-
self,
|
22
|
-
requester: SyncRequesterProtocol,
|
23
|
-
parser: ParserProtocol,
|
24
|
-
saver: SaverProtocol,
|
25
|
-
config: DownloaderConfig,
|
26
|
-
):
|
27
|
-
super().__init__(requester, parser, saver, config, "qianbi")
|