novel-downloader 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/clean.py +97 -78
- novel_downloader/cli/config.py +177 -0
- novel_downloader/cli/download.py +132 -87
- novel_downloader/cli/export.py +77 -0
- novel_downloader/cli/main.py +21 -28
- novel_downloader/config/__init__.py +1 -25
- novel_downloader/config/adapter.py +32 -31
- novel_downloader/config/loader.py +3 -3
- novel_downloader/config/site_rules.py +1 -2
- novel_downloader/core/__init__.py +3 -6
- novel_downloader/core/downloaders/__init__.py +10 -13
- novel_downloader/core/downloaders/base.py +233 -0
- novel_downloader/core/downloaders/biquge.py +27 -0
- novel_downloader/core/downloaders/common.py +414 -0
- novel_downloader/core/downloaders/esjzone.py +27 -0
- novel_downloader/core/downloaders/linovelib.py +27 -0
- novel_downloader/core/downloaders/qianbi.py +27 -0
- novel_downloader/core/downloaders/qidian.py +352 -0
- novel_downloader/core/downloaders/sfacg.py +27 -0
- novel_downloader/core/downloaders/yamibo.py +27 -0
- novel_downloader/core/exporters/__init__.py +37 -0
- novel_downloader/core/{savers → exporters}/base.py +73 -44
- novel_downloader/core/exporters/biquge.py +25 -0
- novel_downloader/core/exporters/common/__init__.py +12 -0
- novel_downloader/core/{savers → exporters}/common/epub.py +40 -52
- novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +36 -39
- novel_downloader/core/{savers → exporters}/common/txt.py +20 -24
- novel_downloader/core/exporters/epub_utils/__init__.py +40 -0
- novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -1
- novel_downloader/core/exporters/epub_utils/image_loader.py +131 -0
- novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -3
- novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +49 -2
- novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -1
- novel_downloader/core/exporters/esjzone.py +25 -0
- novel_downloader/core/exporters/linovelib/__init__.py +10 -0
- novel_downloader/core/exporters/linovelib/epub.py +449 -0
- novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
- novel_downloader/core/exporters/linovelib/txt.py +129 -0
- novel_downloader/core/exporters/qianbi.py +25 -0
- novel_downloader/core/{savers → exporters}/qidian.py +8 -8
- novel_downloader/core/exporters/sfacg.py +25 -0
- novel_downloader/core/exporters/yamibo.py +25 -0
- novel_downloader/core/factory/__init__.py +5 -17
- novel_downloader/core/factory/downloader.py +24 -126
- novel_downloader/core/factory/exporter.py +58 -0
- novel_downloader/core/factory/fetcher.py +96 -0
- novel_downloader/core/factory/parser.py +17 -12
- novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
- novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
- novel_downloader/core/fetchers/base/browser.py +383 -0
- novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
- novel_downloader/core/fetchers/base/session.py +419 -0
- novel_downloader/core/fetchers/biquge/__init__.py +14 -0
- novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
- novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
- novel_downloader/core/fetchers/common/__init__.py +14 -0
- novel_downloader/core/fetchers/common/browser.py +79 -0
- novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
- novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
- novel_downloader/core/fetchers/esjzone/browser.py +202 -0
- novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
- novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
- novel_downloader/core/fetchers/linovelib/browser.py +178 -0
- novel_downloader/core/fetchers/linovelib/session.py +178 -0
- novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
- novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
- novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
- novel_downloader/core/fetchers/qidian/__init__.py +14 -0
- novel_downloader/core/fetchers/qidian/browser.py +266 -0
- novel_downloader/core/fetchers/qidian/session.py +326 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
- novel_downloader/core/fetchers/sfacg/browser.py +189 -0
- novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
- novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
- novel_downloader/core/fetchers/yamibo/browser.py +229 -0
- novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
- novel_downloader/core/interfaces/__init__.py +8 -12
- novel_downloader/core/interfaces/downloader.py +54 -0
- novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
- novel_downloader/core/interfaces/fetcher.py +162 -0
- novel_downloader/core/interfaces/parser.py +6 -7
- novel_downloader/core/parsers/__init__.py +5 -6
- novel_downloader/core/parsers/base.py +9 -13
- novel_downloader/core/parsers/biquge/main_parser.py +12 -13
- novel_downloader/core/parsers/common/helper.py +3 -3
- novel_downloader/core/parsers/common/main_parser.py +39 -34
- novel_downloader/core/parsers/esjzone/main_parser.py +24 -17
- novel_downloader/core/parsers/linovelib/__init__.py +10 -0
- novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
- novel_downloader/core/parsers/qidian/__init__.py +2 -11
- novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
- novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
- novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
- novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
- novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
- novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
- novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
- novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
- novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
- novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
- novel_downloader/locales/en.json +18 -2
- novel_downloader/locales/zh.json +18 -2
- novel_downloader/models/__init__.py +64 -0
- novel_downloader/models/browser.py +21 -0
- novel_downloader/models/chapter.py +25 -0
- novel_downloader/models/config.py +100 -0
- novel_downloader/models/login.py +20 -0
- novel_downloader/models/site_rules.py +99 -0
- novel_downloader/models/tasks.py +33 -0
- novel_downloader/models/types.py +15 -0
- novel_downloader/resources/config/settings.toml +31 -25
- novel_downloader/resources/json/linovelib_font_map.json +3573 -0
- novel_downloader/tui/__init__.py +7 -0
- novel_downloader/tui/app.py +32 -0
- novel_downloader/tui/main.py +17 -0
- novel_downloader/tui/screens/__init__.py +14 -0
- novel_downloader/tui/screens/home.py +191 -0
- novel_downloader/tui/screens/login.py +74 -0
- novel_downloader/tui/styles/home_layout.tcss +79 -0
- novel_downloader/tui/widgets/richlog_handler.py +24 -0
- novel_downloader/utils/__init__.py +6 -0
- novel_downloader/utils/chapter_storage.py +25 -38
- novel_downloader/utils/constants.py +15 -5
- novel_downloader/utils/cookies.py +66 -0
- novel_downloader/utils/crypto_utils.py +1 -74
- novel_downloader/utils/file_utils/io.py +1 -1
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -2
- novel_downloader/utils/hash_store.py +10 -18
- novel_downloader/utils/hash_utils.py +3 -2
- novel_downloader/utils/logger.py +2 -3
- novel_downloader/utils/network.py +53 -39
- novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -1
- novel_downloader/utils/text_utils/text_cleaning.py +1 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -3
- novel_downloader/utils/time_utils/sleep_utils.py +3 -3
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +72 -38
- novel_downloader-1.4.0.dist-info/RECORD +170 -0
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
- novel_downloader/cli/interactive.py +0 -66
- novel_downloader/cli/settings.py +0 -177
- novel_downloader/config/models.py +0 -187
- novel_downloader/core/downloaders/base/__init__.py +0 -14
- novel_downloader/core/downloaders/base/base_async.py +0 -153
- novel_downloader/core/downloaders/base/base_sync.py +0 -208
- novel_downloader/core/downloaders/biquge/__init__.py +0 -14
- novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
- novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
- novel_downloader/core/downloaders/common/__init__.py +0 -14
- novel_downloader/core/downloaders/common/common_async.py +0 -218
- novel_downloader/core/downloaders/common/common_sync.py +0 -210
- novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
- novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
- novel_downloader/core/downloaders/qidian/__init__.py +0 -10
- novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -227
- novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
- novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
- novel_downloader/core/factory/requester.py +0 -144
- novel_downloader/core/factory/saver.py +0 -56
- novel_downloader/core/interfaces/async_downloader.py +0 -36
- novel_downloader/core/interfaces/async_requester.py +0 -84
- novel_downloader/core/interfaces/sync_downloader.py +0 -36
- novel_downloader/core/interfaces/sync_requester.py +0 -82
- novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
- novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
- novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
- novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
- novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
- novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
- novel_downloader/core/requesters/base/async_session.py +0 -410
- novel_downloader/core/requesters/base/browser.py +0 -337
- novel_downloader/core/requesters/base/session.py +0 -378
- novel_downloader/core/requesters/biquge/__init__.py +0 -14
- novel_downloader/core/requesters/common/__init__.py +0 -17
- novel_downloader/core/requesters/common/session.py +0 -113
- novel_downloader/core/requesters/esjzone/__init__.py +0 -13
- novel_downloader/core/requesters/esjzone/session.py +0 -235
- novel_downloader/core/requesters/qianbi/__init__.py +0 -13
- novel_downloader/core/requesters/qidian/__init__.py +0 -21
- novel_downloader/core/requesters/qidian/broswer.py +0 -307
- novel_downloader/core/requesters/qidian/session.py +0 -290
- novel_downloader/core/requesters/sfacg/__init__.py +0 -13
- novel_downloader/core/requesters/sfacg/session.py +0 -242
- novel_downloader/core/requesters/yamibo/__init__.py +0 -13
- novel_downloader/core/requesters/yamibo/session.py +0 -237
- novel_downloader/core/savers/__init__.py +0 -34
- novel_downloader/core/savers/biquge.py +0 -25
- novel_downloader/core/savers/common/__init__.py +0 -12
- novel_downloader/core/savers/epub_utils/__init__.py +0 -26
- novel_downloader/core/savers/esjzone.py +0 -25
- novel_downloader/core/savers/qianbi.py +0 -25
- novel_downloader/core/savers/sfacg.py +0 -25
- novel_downloader/core/savers/yamibo.py +0 -25
- novel_downloader/resources/config/rules.toml +0 -196
- novel_downloader-1.3.2.dist-info/RECORD +0 -165
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.2.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,352 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.downloaders.qidian
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import json
|
10
|
+
from collections.abc import Awaitable, Callable
|
11
|
+
from contextlib import suppress
|
12
|
+
from typing import Any, cast
|
13
|
+
|
14
|
+
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
|
+
from novel_downloader.core.interfaces import (
|
16
|
+
ExporterProtocol,
|
17
|
+
FetcherProtocol,
|
18
|
+
ParserProtocol,
|
19
|
+
)
|
20
|
+
from novel_downloader.models import (
|
21
|
+
ChapterDict,
|
22
|
+
CidTask,
|
23
|
+
DownloaderConfig,
|
24
|
+
HtmlTask,
|
25
|
+
)
|
26
|
+
from novel_downloader.utils.chapter_storage import ChapterStorage
|
27
|
+
from novel_downloader.utils.file_utils import save_as_json, save_as_txt
|
28
|
+
from novel_downloader.utils.time_utils import (
|
29
|
+
async_sleep_with_random_delay,
|
30
|
+
calculate_time_difference,
|
31
|
+
)
|
32
|
+
|
33
|
+
|
34
|
+
class QidianDownloader(BaseDownloader):
|
35
|
+
"""
|
36
|
+
Specialized downloader for Qidian novels.
|
37
|
+
"""
|
38
|
+
|
39
|
+
def __init__(
|
40
|
+
self,
|
41
|
+
fetcher: FetcherProtocol,
|
42
|
+
parser: ParserProtocol,
|
43
|
+
exporter: ExporterProtocol,
|
44
|
+
config: DownloaderConfig,
|
45
|
+
):
|
46
|
+
config.request_interval = max(1.0, config.request_interval)
|
47
|
+
super().__init__(fetcher, parser, exporter, config, "qidian")
|
48
|
+
|
49
|
+
async def _download_one(
|
50
|
+
self,
|
51
|
+
book_id: str,
|
52
|
+
*,
|
53
|
+
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
54
|
+
**kwargs: Any,
|
55
|
+
) -> None:
|
56
|
+
"""
|
57
|
+
The full download logic for a single book.
|
58
|
+
|
59
|
+
:param book_id: The identifier of the book to download.
|
60
|
+
"""
|
61
|
+
TAG = "[Downloader]"
|
62
|
+
|
63
|
+
raw_base = self.raw_data_dir / book_id
|
64
|
+
cache_base = self.cache_dir / book_id
|
65
|
+
info_path = raw_base / "book_info.json"
|
66
|
+
chapters_html_dir = cache_base / "html"
|
67
|
+
|
68
|
+
raw_base.mkdir(parents=True, exist_ok=True)
|
69
|
+
if self.save_html:
|
70
|
+
chapters_html_dir.mkdir(parents=True, exist_ok=True)
|
71
|
+
normal_cs = ChapterStorage(
|
72
|
+
raw_base=raw_base,
|
73
|
+
namespace="chapters",
|
74
|
+
backend_type=self._config.storage_backend,
|
75
|
+
batch_size=self._config.storage_batch_size,
|
76
|
+
)
|
77
|
+
encrypted_cs = ChapterStorage(
|
78
|
+
raw_base=raw_base,
|
79
|
+
namespace="encrypted_chapters",
|
80
|
+
backend_type=self._config.storage_backend,
|
81
|
+
batch_size=self._config.storage_batch_size,
|
82
|
+
)
|
83
|
+
|
84
|
+
# load or fetch book_info
|
85
|
+
book_info: dict[str, Any]
|
86
|
+
re_fetch = True
|
87
|
+
old_data: dict[str, Any] = {}
|
88
|
+
|
89
|
+
if info_path.exists():
|
90
|
+
try:
|
91
|
+
old_data = json.loads(info_path.read_text("utf-8"))
|
92
|
+
days, *_ = calculate_time_difference(
|
93
|
+
old_data.get("update_time", ""), "UTC+8"
|
94
|
+
)
|
95
|
+
re_fetch = days > 1
|
96
|
+
except Exception:
|
97
|
+
re_fetch = True
|
98
|
+
|
99
|
+
if re_fetch:
|
100
|
+
info_html = await self.fetcher.get_book_info(book_id)
|
101
|
+
if self.save_html:
|
102
|
+
for i, html in enumerate(info_html):
|
103
|
+
save_as_txt(html, chapters_html_dir / f"info_{i}.html")
|
104
|
+
book_info = self.parser.parse_book_info(info_html)
|
105
|
+
|
106
|
+
if book_info.get("book_name") != "未找到书名":
|
107
|
+
save_as_json(book_info, info_path)
|
108
|
+
else:
|
109
|
+
self.logger.warning("%s 书籍信息未找到, book_id = %s", TAG, book_id)
|
110
|
+
book_info = old_data or {"book_name": "未找到书名"}
|
111
|
+
else:
|
112
|
+
book_info = old_data
|
113
|
+
|
114
|
+
vols = book_info.get("volumes", [])
|
115
|
+
total_chapters = 0
|
116
|
+
for vol in vols:
|
117
|
+
total_chapters += len(vol.get("chapters", []))
|
118
|
+
if total_chapters == 0:
|
119
|
+
self.logger.warning("%s 书籍没有章节可下载: book_id=%s", TAG, book_id)
|
120
|
+
return
|
121
|
+
|
122
|
+
completed_count = 0
|
123
|
+
|
124
|
+
# setup queue
|
125
|
+
cid_queue: asyncio.Queue[CidTask] = asyncio.Queue()
|
126
|
+
html_queue: asyncio.Queue[HtmlTask] = asyncio.Queue()
|
127
|
+
save_queue: asyncio.Queue[ChapterDict] = asyncio.Queue()
|
128
|
+
|
129
|
+
async def fetcher_worker(
|
130
|
+
book_id: str,
|
131
|
+
cid_queue: asyncio.Queue[CidTask],
|
132
|
+
html_queue: asyncio.Queue[HtmlTask],
|
133
|
+
retry_times: int,
|
134
|
+
) -> None:
|
135
|
+
while True:
|
136
|
+
task = await cid_queue.get()
|
137
|
+
cid = task.cid
|
138
|
+
if not cid:
|
139
|
+
self.logger.warning("[Fetcher] Skipped empty cid task: %s", task)
|
140
|
+
cid_queue.task_done()
|
141
|
+
continue
|
142
|
+
|
143
|
+
try:
|
144
|
+
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
145
|
+
await html_queue.put(
|
146
|
+
HtmlTask(cid=cid, retry=task.retry, html_list=html_list)
|
147
|
+
)
|
148
|
+
self.logger.info("[Fetcher] Downloaded chapter %s", cid)
|
149
|
+
await async_sleep_with_random_delay(
|
150
|
+
self.request_interval,
|
151
|
+
mul_spread=1.1,
|
152
|
+
max_sleep=self.request_interval + 2,
|
153
|
+
)
|
154
|
+
|
155
|
+
except Exception as e:
|
156
|
+
if task.retry < retry_times:
|
157
|
+
await cid_queue.put(
|
158
|
+
CidTask(
|
159
|
+
prev_cid=task.prev_cid,
|
160
|
+
cid=cid,
|
161
|
+
retry=task.retry + 1,
|
162
|
+
)
|
163
|
+
)
|
164
|
+
self.logger.info(
|
165
|
+
"[Fetcher] Re-queued chapter %s for retry #%d: %s",
|
166
|
+
cid,
|
167
|
+
task.retry + 1,
|
168
|
+
e,
|
169
|
+
)
|
170
|
+
backoff = self.backoff_factor * (2**task.retry)
|
171
|
+
await async_sleep_with_random_delay(
|
172
|
+
base=backoff,
|
173
|
+
mul_spread=1.2,
|
174
|
+
max_sleep=backoff + 3,
|
175
|
+
)
|
176
|
+
else:
|
177
|
+
self.logger.warning(
|
178
|
+
"[Fetcher] Max retries reached for chapter %s: %s",
|
179
|
+
cid,
|
180
|
+
e,
|
181
|
+
)
|
182
|
+
|
183
|
+
finally:
|
184
|
+
cid_queue.task_done()
|
185
|
+
|
186
|
+
async def parser_worker(
|
187
|
+
cid_queue: asyncio.Queue[CidTask],
|
188
|
+
html_queue: asyncio.Queue[HtmlTask],
|
189
|
+
save_queue: asyncio.Queue[ChapterDict],
|
190
|
+
retry_times: int,
|
191
|
+
) -> None:
|
192
|
+
while True:
|
193
|
+
task = await html_queue.get()
|
194
|
+
skip_retry = False
|
195
|
+
try:
|
196
|
+
chap_json: ChapterDict | None = None
|
197
|
+
if self.is_restricted_page(task.html_list):
|
198
|
+
self.logger.info(
|
199
|
+
"[Parser] Skipped restricted page for cid %s", task.cid
|
200
|
+
)
|
201
|
+
skip_retry = True
|
202
|
+
else:
|
203
|
+
chap_json = await asyncio.to_thread(
|
204
|
+
self.parser.parse_chapter,
|
205
|
+
task.html_list,
|
206
|
+
task.cid,
|
207
|
+
)
|
208
|
+
if self.check_encrypted(task.html_list):
|
209
|
+
skip_retry = True
|
210
|
+
if chap_json:
|
211
|
+
await save_queue.put(chap_json)
|
212
|
+
self.logger.info(
|
213
|
+
"[Parser] saved chapter %s",
|
214
|
+
task.cid,
|
215
|
+
)
|
216
|
+
if self.save_html:
|
217
|
+
is_encrypted = chap_json.get("extra", {}).get(
|
218
|
+
"encrypted", False
|
219
|
+
)
|
220
|
+
folder = chapters_html_dir / (
|
221
|
+
"html_encrypted" if is_encrypted else "html_plain"
|
222
|
+
)
|
223
|
+
html_path = folder / f"{task.cid}.html"
|
224
|
+
save_as_txt(task.html_list[0], html_path, on_exist="skip")
|
225
|
+
self.logger.debug(
|
226
|
+
"%s Saved raw HTML for chapter %s to %s",
|
227
|
+
TAG,
|
228
|
+
task.cid,
|
229
|
+
html_path,
|
230
|
+
)
|
231
|
+
else:
|
232
|
+
raise ValueError("Empty parse result")
|
233
|
+
except Exception as e:
|
234
|
+
if not skip_retry and task.retry < retry_times:
|
235
|
+
await cid_queue.put(
|
236
|
+
CidTask(prev_cid=None, cid=task.cid, retry=task.retry + 1)
|
237
|
+
)
|
238
|
+
self.logger.info(
|
239
|
+
"[Parser] Re-queued cid %s for retry #%d: %s",
|
240
|
+
task.cid,
|
241
|
+
task.retry + 1,
|
242
|
+
e,
|
243
|
+
)
|
244
|
+
elif not skip_retry:
|
245
|
+
self.logger.warning(
|
246
|
+
"[Parser] Max retries reached for cid %s: %s",
|
247
|
+
task.cid,
|
248
|
+
e,
|
249
|
+
)
|
250
|
+
finally:
|
251
|
+
html_queue.task_done()
|
252
|
+
|
253
|
+
async def storage_worker(
|
254
|
+
normal_cs: ChapterStorage,
|
255
|
+
encrypted_cs: ChapterStorage,
|
256
|
+
save_queue: asyncio.Queue[ChapterDict],
|
257
|
+
) -> None:
|
258
|
+
nonlocal completed_count
|
259
|
+
while True:
|
260
|
+
item = await save_queue.get()
|
261
|
+
try:
|
262
|
+
is_encrypted = item.get("extra", {}).get("encrypted", False)
|
263
|
+
cs = encrypted_cs if is_encrypted else normal_cs
|
264
|
+
cs.save(cast(ChapterDict, item))
|
265
|
+
completed_count += 1
|
266
|
+
if progress_hook:
|
267
|
+
await progress_hook(completed_count, total_chapters)
|
268
|
+
except Exception as e:
|
269
|
+
self.logger.error("[storage_worker] Failed to save: %s", e)
|
270
|
+
finally:
|
271
|
+
save_queue.task_done()
|
272
|
+
|
273
|
+
fetcher_task = asyncio.create_task(
|
274
|
+
fetcher_worker(
|
275
|
+
book_id,
|
276
|
+
cid_queue,
|
277
|
+
html_queue,
|
278
|
+
self.retry_times,
|
279
|
+
)
|
280
|
+
)
|
281
|
+
|
282
|
+
parser_task = asyncio.create_task(
|
283
|
+
parser_worker(
|
284
|
+
cid_queue,
|
285
|
+
html_queue,
|
286
|
+
save_queue,
|
287
|
+
self.retry_times,
|
288
|
+
)
|
289
|
+
)
|
290
|
+
|
291
|
+
storage_task = asyncio.create_task(
|
292
|
+
storage_worker(
|
293
|
+
normal_cs=normal_cs,
|
294
|
+
encrypted_cs=encrypted_cs,
|
295
|
+
save_queue=save_queue,
|
296
|
+
)
|
297
|
+
)
|
298
|
+
|
299
|
+
last_cid: str | None = None
|
300
|
+
for vol in book_info.get("volumes", []):
|
301
|
+
chapters = vol.get("chapters", [])
|
302
|
+
for chap in chapters:
|
303
|
+
cid = chap.get("chapterId")
|
304
|
+
if cid and normal_cs.exists(cid) and self.skip_existing:
|
305
|
+
completed_count += 1
|
306
|
+
if progress_hook:
|
307
|
+
await progress_hook(completed_count, total_chapters)
|
308
|
+
last_cid = cid
|
309
|
+
continue
|
310
|
+
|
311
|
+
await cid_queue.put(CidTask(cid=cid, prev_cid=last_cid))
|
312
|
+
last_cid = cid
|
313
|
+
|
314
|
+
await cid_queue.join()
|
315
|
+
await html_queue.join()
|
316
|
+
await save_queue.join()
|
317
|
+
|
318
|
+
for task in [fetcher_task, parser_task, storage_task]:
|
319
|
+
task.cancel()
|
320
|
+
with suppress(asyncio.CancelledError):
|
321
|
+
await task
|
322
|
+
|
323
|
+
normal_cs.close()
|
324
|
+
encrypted_cs.close()
|
325
|
+
|
326
|
+
await asyncio.to_thread(self.exporter.export, book_id)
|
327
|
+
|
328
|
+
self.logger.info(
|
329
|
+
"%s Novel '%s' download completed.",
|
330
|
+
TAG,
|
331
|
+
book_info.get("book_name", "unknown"),
|
332
|
+
)
|
333
|
+
return
|
334
|
+
|
335
|
+
@staticmethod
|
336
|
+
def is_restricted_page(html_list: list[str]) -> bool:
|
337
|
+
"""
|
338
|
+
Return True if page content indicates access restriction
|
339
|
+
(e.g. not subscribed/purchased).
|
340
|
+
|
341
|
+
:param html_list: Raw HTML string.
|
342
|
+
"""
|
343
|
+
if not html_list:
|
344
|
+
return True
|
345
|
+
markers = ["这是VIP章节", "需要订阅", "订阅后才能阅读"]
|
346
|
+
return any(m in html_list[0] for m in markers)
|
347
|
+
|
348
|
+
@staticmethod
|
349
|
+
def check_encrypted(html_list: list[str]) -> bool:
|
350
|
+
if not html_list:
|
351
|
+
return True
|
352
|
+
return '"cES":2' in html_list[0]
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.downloaders.sfacg
|
4
|
+
---------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from novel_downloader.core.downloaders.common import CommonDownloader
|
9
|
+
from novel_downloader.core.interfaces import (
|
10
|
+
ExporterProtocol,
|
11
|
+
FetcherProtocol,
|
12
|
+
ParserProtocol,
|
13
|
+
)
|
14
|
+
from novel_downloader.models import DownloaderConfig
|
15
|
+
|
16
|
+
|
17
|
+
class SfacgDownloader(CommonDownloader):
|
18
|
+
""""""
|
19
|
+
|
20
|
+
def __init__(
|
21
|
+
self,
|
22
|
+
fetcher: FetcherProtocol,
|
23
|
+
parser: ParserProtocol,
|
24
|
+
exporter: ExporterProtocol,
|
25
|
+
config: DownloaderConfig,
|
26
|
+
):
|
27
|
+
super().__init__(fetcher, parser, exporter, config, "sfacg")
|
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.downloaders.yamibo
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from novel_downloader.core.downloaders.common import CommonDownloader
|
9
|
+
from novel_downloader.core.interfaces import (
|
10
|
+
ExporterProtocol,
|
11
|
+
FetcherProtocol,
|
12
|
+
ParserProtocol,
|
13
|
+
)
|
14
|
+
from novel_downloader.models import DownloaderConfig
|
15
|
+
|
16
|
+
|
17
|
+
class YamiboDownloader(CommonDownloader):
|
18
|
+
""""""
|
19
|
+
|
20
|
+
def __init__(
|
21
|
+
self,
|
22
|
+
fetcher: FetcherProtocol,
|
23
|
+
parser: ParserProtocol,
|
24
|
+
exporter: ExporterProtocol,
|
25
|
+
config: DownloaderConfig,
|
26
|
+
):
|
27
|
+
super().__init__(fetcher, parser, exporter, config, "yamibo")
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.exporters
|
4
|
+
-------------------------------
|
5
|
+
|
6
|
+
This module defines exporter classes for different novel platforms.
|
7
|
+
|
8
|
+
Currently supported platforms:
|
9
|
+
- biquge (笔趣阁)
|
10
|
+
- esjzone (ESJ Zone)
|
11
|
+
- linovelib (哔哩轻小说)
|
12
|
+
- qianbi (铅笔小说)
|
13
|
+
- qidian (起点中文网)
|
14
|
+
- sfacg (SF轻小说)
|
15
|
+
- yamibo (百合会)
|
16
|
+
- common (通用架构)
|
17
|
+
"""
|
18
|
+
|
19
|
+
from .biquge import BiqugeExporter
|
20
|
+
from .common import CommonExporter
|
21
|
+
from .esjzone import EsjzoneExporter
|
22
|
+
from .linovelib import LinovelibExporter
|
23
|
+
from .qianbi import QianbiExporter
|
24
|
+
from .qidian import QidianExporter
|
25
|
+
from .sfacg import SfacgExporter
|
26
|
+
from .yamibo import YamiboExporter
|
27
|
+
|
28
|
+
__all__ = [
|
29
|
+
"BiqugeExporter",
|
30
|
+
"EsjzoneExporter",
|
31
|
+
"LinovelibExporter",
|
32
|
+
"QianbiExporter",
|
33
|
+
"QidianExporter",
|
34
|
+
"SfacgExporter",
|
35
|
+
"YamiboExporter",
|
36
|
+
"CommonExporter",
|
37
|
+
]
|
@@ -1,21 +1,22 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.core.
|
4
|
-
|
3
|
+
novel_downloader.core.exporters.base
|
4
|
+
------------------------------------
|
5
5
|
|
6
|
-
This module provides an abstract base class `
|
7
|
-
common interface and reusable logic for saving assembled novel
|
8
|
-
into various output formats.
|
6
|
+
This module provides an abstract base class `BaseExporter` that defines
|
7
|
+
the common interface and reusable logic for saving assembled novel
|
8
|
+
content into various output formats.
|
9
9
|
"""
|
10
10
|
|
11
11
|
import abc
|
12
12
|
import logging
|
13
|
+
import types
|
13
14
|
from datetime import datetime
|
14
15
|
from pathlib import Path
|
15
|
-
from typing import Any
|
16
|
+
from typing import Any, Self
|
16
17
|
|
17
|
-
from novel_downloader.
|
18
|
-
from novel_downloader.
|
18
|
+
from novel_downloader.core.interfaces import ExporterProtocol
|
19
|
+
from novel_downloader.models import ExporterConfig
|
19
20
|
|
20
21
|
|
21
22
|
class SafeDict(dict[str, Any]):
|
@@ -23,61 +24,67 @@ class SafeDict(dict[str, Any]):
|
|
23
24
|
return f"{{{key}}}"
|
24
25
|
|
25
26
|
|
26
|
-
class
|
27
|
+
class BaseExporter(ExporterProtocol, abc.ABC):
|
27
28
|
"""
|
28
|
-
|
29
|
+
BaseExporter defines the interface and common structure for
|
29
30
|
saving assembled book content into various formats
|
30
31
|
such as TXT, EPUB, Markdown, or PDF.
|
31
32
|
"""
|
32
33
|
|
33
|
-
def __init__(
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
config: ExporterConfig,
|
37
|
+
site: str,
|
38
|
+
):
|
34
39
|
"""
|
35
|
-
Initialize the
|
40
|
+
Initialize the exporter with given configuration.
|
36
41
|
|
37
|
-
:param config: A
|
42
|
+
:param config: A ExporterConfig object that defines
|
38
43
|
save paths, formats, and options.
|
39
44
|
"""
|
40
45
|
self._config = config
|
46
|
+
self._site = site
|
41
47
|
|
42
|
-
self.
|
43
|
-
self._raw_data_dir = Path(config.raw_data_dir)
|
48
|
+
self._cache_dir = Path(config.cache_dir) / site
|
49
|
+
self._raw_data_dir = Path(config.raw_data_dir) / site
|
44
50
|
self._output_dir = Path(config.output_dir)
|
45
|
-
self.
|
51
|
+
self._cache_dir.mkdir(parents=True, exist_ok=True)
|
46
52
|
self._output_dir.mkdir(parents=True, exist_ok=True)
|
47
53
|
|
48
|
-
self.
|
49
|
-
|
50
|
-
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
54
|
+
self.logger = logging.getLogger(f"{self.__class__.__name__}")
|
51
55
|
|
52
|
-
def
|
56
|
+
def export(
|
57
|
+
self,
|
58
|
+
book_id: str,
|
59
|
+
) -> None:
|
53
60
|
"""
|
54
|
-
|
61
|
+
Export the book in the formats specified in config.
|
55
62
|
If a method is not implemented or fails, log the error and continue.
|
56
63
|
|
57
64
|
:param book_id: The book identifier (used for filename, lookup, etc.)
|
58
65
|
"""
|
59
|
-
TAG = "[
|
66
|
+
TAG = "[Exporter]"
|
60
67
|
actions = [
|
61
|
-
("make_txt", self.
|
62
|
-
("make_epub", self.
|
63
|
-
("make_md", self.
|
64
|
-
("make_pdf", self.
|
68
|
+
("make_txt", self.export_as_txt),
|
69
|
+
("make_epub", self.export_as_epub),
|
70
|
+
("make_md", self.export_as_md),
|
71
|
+
("make_pdf", self.export_as_pdf),
|
65
72
|
]
|
66
73
|
|
67
|
-
for flag_name,
|
74
|
+
for flag_name, export_method in actions:
|
68
75
|
if getattr(self._config, flag_name, False):
|
69
76
|
try:
|
70
77
|
self.logger.info(
|
71
|
-
"%s Attempting to
|
78
|
+
"%s Attempting to export book_id '%s' as %s...",
|
72
79
|
TAG,
|
73
80
|
book_id,
|
74
81
|
flag_name,
|
75
82
|
)
|
76
|
-
|
83
|
+
export_method(book_id)
|
77
84
|
self.logger.info("%s Successfully saved as %s.", TAG, flag_name)
|
78
85
|
except NotImplementedError as e:
|
79
86
|
self.logger.warning(
|
80
|
-
"%s
|
87
|
+
"%s Export method for %s not implemented: %s",
|
81
88
|
TAG,
|
82
89
|
flag_name,
|
83
90
|
str(e),
|
@@ -89,7 +96,7 @@ class BaseSaver(SaverProtocol, abc.ABC):
|
|
89
96
|
return
|
90
97
|
|
91
98
|
@abc.abstractmethod
|
92
|
-
def
|
99
|
+
def export_as_txt(self, book_id: str) -> None:
|
93
100
|
"""
|
94
101
|
Persist the assembled book as a .txt file.
|
95
102
|
|
@@ -99,32 +106,32 @@ class BaseSaver(SaverProtocol, abc.ABC):
|
|
99
106
|
"""
|
100
107
|
...
|
101
108
|
|
102
|
-
def
|
109
|
+
def export_as_epub(self, book_id: str) -> None:
|
103
110
|
"""
|
104
111
|
Optional: Persist the assembled book as a EPUB (.epub) file.
|
105
112
|
|
106
113
|
:param book_id: The book identifier.
|
107
114
|
:raises NotImplementedError: If the method is not overridden.
|
108
115
|
"""
|
109
|
-
raise NotImplementedError("EPUB export not supported by this
|
116
|
+
raise NotImplementedError("EPUB export not supported by this Exporter.")
|
110
117
|
|
111
|
-
def
|
118
|
+
def export_as_md(self, book_id: str) -> None:
|
112
119
|
"""
|
113
120
|
Optional: Persist the assembled book as a Markdown file.
|
114
121
|
|
115
122
|
:param book_id: The book identifier.
|
116
123
|
:raises NotImplementedError: If the method is not overridden.
|
117
124
|
"""
|
118
|
-
raise NotImplementedError("Markdown export not supported by this
|
125
|
+
raise NotImplementedError("Markdown export not supported by this Exporter.")
|
119
126
|
|
120
|
-
def
|
127
|
+
def export_as_pdf(self, book_id: str) -> None:
|
121
128
|
"""
|
122
129
|
Optional: Persist the assembled book as a PDF file.
|
123
130
|
|
124
131
|
:param book_id: The book identifier.
|
125
132
|
:raises NotImplementedError: If the method is not overridden.
|
126
133
|
"""
|
127
|
-
raise NotImplementedError("PDF export not supported by this
|
134
|
+
raise NotImplementedError("PDF export not supported by this Exporter.")
|
128
135
|
|
129
136
|
def get_filename(
|
130
137
|
self,
|
@@ -146,7 +153,7 @@ class BaseSaver(SaverProtocol, abc.ABC):
|
|
146
153
|
# Merge all fields with defaults
|
147
154
|
context = SafeDict(title=title, author=author or "", **extra_fields)
|
148
155
|
|
149
|
-
name = self.
|
156
|
+
name = self.filename_template.format_map(context)
|
150
157
|
|
151
158
|
if self._config.append_timestamp:
|
152
159
|
name += f"_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
@@ -158,12 +165,34 @@ class BaseSaver(SaverProtocol, abc.ABC):
|
|
158
165
|
"""Access the output directory for saving files."""
|
159
166
|
return self._output_dir
|
160
167
|
|
161
|
-
@property
|
162
|
-
def raw_data_dir(self) -> Path:
|
163
|
-
"""Access the raw data directory."""
|
164
|
-
return self._raw_data_dir
|
165
|
-
|
166
168
|
@property
|
167
169
|
def filename_template(self) -> str:
|
168
170
|
"""Access the filename template."""
|
169
|
-
return self.
|
171
|
+
return self._config.filename_template
|
172
|
+
|
173
|
+
def _on_close(self) -> None:
|
174
|
+
"""
|
175
|
+
Hook method called at the beginning of close().
|
176
|
+
Override in subclass if needed.
|
177
|
+
"""
|
178
|
+
pass
|
179
|
+
|
180
|
+
def close(self) -> None:
|
181
|
+
"""
|
182
|
+
Shutdown and clean up the exporter.
|
183
|
+
"""
|
184
|
+
self._on_close()
|
185
|
+
|
186
|
+
def __enter__(self) -> Self:
|
187
|
+
return self
|
188
|
+
|
189
|
+
def __exit__(
|
190
|
+
self,
|
191
|
+
exc_type: type[BaseException] | None,
|
192
|
+
exc_val: BaseException | None,
|
193
|
+
tb: types.TracebackType | None,
|
194
|
+
) -> None:
|
195
|
+
self.close()
|
196
|
+
|
197
|
+
def __del__(self) -> None:
|
198
|
+
self.close()
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.exporters.biquge
|
4
|
+
--------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from novel_downloader.models import ExporterConfig
|
9
|
+
|
10
|
+
from .common import CommonExporter
|
11
|
+
|
12
|
+
|
13
|
+
class BiqugeExporter(CommonExporter):
|
14
|
+
def __init__(
|
15
|
+
self,
|
16
|
+
config: ExporterConfig,
|
17
|
+
):
|
18
|
+
super().__init__(
|
19
|
+
config,
|
20
|
+
site="biquge",
|
21
|
+
chap_folders=["chapters"],
|
22
|
+
)
|
23
|
+
|
24
|
+
|
25
|
+
__all__ = ["BiqugeExporter"]
|