novel-downloader 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/download.py +11 -8
- novel_downloader/cli/export.py +17 -17
- novel_downloader/cli/ui.py +28 -1
- novel_downloader/config/adapter.py +27 -1
- novel_downloader/core/archived/deqixs/fetcher.py +1 -28
- novel_downloader/core/downloaders/__init__.py +2 -0
- novel_downloader/core/downloaders/base.py +34 -85
- novel_downloader/core/downloaders/common.py +147 -171
- novel_downloader/core/downloaders/qianbi.py +30 -64
- novel_downloader/core/downloaders/qidian.py +157 -184
- novel_downloader/core/downloaders/qqbook.py +292 -0
- novel_downloader/core/downloaders/registry.py +2 -2
- novel_downloader/core/exporters/__init__.py +2 -0
- novel_downloader/core/exporters/base.py +37 -59
- novel_downloader/core/exporters/common.py +620 -0
- novel_downloader/core/exporters/linovelib.py +47 -0
- novel_downloader/core/exporters/qidian.py +41 -12
- novel_downloader/core/exporters/qqbook.py +28 -0
- novel_downloader/core/exporters/registry.py +2 -2
- novel_downloader/core/fetchers/__init__.py +4 -2
- novel_downloader/core/fetchers/aaatxt.py +2 -22
- novel_downloader/core/fetchers/b520.py +3 -23
- novel_downloader/core/fetchers/base.py +80 -105
- novel_downloader/core/fetchers/biquyuedu.py +2 -22
- novel_downloader/core/fetchers/dxmwx.py +10 -22
- novel_downloader/core/fetchers/esjzone.py +6 -29
- novel_downloader/core/fetchers/guidaye.py +2 -22
- novel_downloader/core/fetchers/hetushu.py +9 -29
- novel_downloader/core/fetchers/i25zw.py +2 -16
- novel_downloader/core/fetchers/ixdzs8.py +2 -16
- novel_downloader/core/fetchers/jpxs123.py +2 -16
- novel_downloader/core/fetchers/lewenn.py +2 -22
- novel_downloader/core/fetchers/linovelib.py +4 -20
- novel_downloader/core/fetchers/{eightnovel.py → n8novel.py} +12 -40
- novel_downloader/core/fetchers/piaotia.py +2 -16
- novel_downloader/core/fetchers/qbtr.py +2 -16
- novel_downloader/core/fetchers/qianbi.py +1 -20
- novel_downloader/core/fetchers/qidian.py +7 -33
- novel_downloader/core/fetchers/qqbook.py +177 -0
- novel_downloader/core/fetchers/quanben5.py +9 -29
- novel_downloader/core/fetchers/rate_limiter.py +22 -53
- novel_downloader/core/fetchers/sfacg.py +3 -16
- novel_downloader/core/fetchers/shencou.py +2 -16
- novel_downloader/core/fetchers/shuhaige.py +2 -22
- novel_downloader/core/fetchers/tongrenquan.py +2 -22
- novel_downloader/core/fetchers/ttkan.py +3 -14
- novel_downloader/core/fetchers/wanbengo.py +2 -22
- novel_downloader/core/fetchers/xiaoshuowu.py +2 -16
- novel_downloader/core/fetchers/xiguashuwu.py +4 -20
- novel_downloader/core/fetchers/xs63b.py +3 -15
- novel_downloader/core/fetchers/xshbook.py +2 -22
- novel_downloader/core/fetchers/yamibo.py +4 -28
- novel_downloader/core/fetchers/yibige.py +13 -26
- novel_downloader/core/interfaces/exporter.py +19 -7
- novel_downloader/core/interfaces/fetcher.py +21 -47
- novel_downloader/core/parsers/__init__.py +4 -2
- novel_downloader/core/parsers/b520.py +2 -2
- novel_downloader/core/parsers/base.py +4 -39
- novel_downloader/core/parsers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/core/parsers/{qidian/main_parser.py → qidian.py} +147 -266
- novel_downloader/core/parsers/qqbook.py +709 -0
- novel_downloader/core/parsers/xiguashuwu.py +3 -4
- novel_downloader/core/searchers/__init__.py +2 -2
- novel_downloader/core/searchers/b520.py +1 -1
- novel_downloader/core/searchers/base.py +2 -2
- novel_downloader/core/searchers/{eightnovel.py → n8novel.py} +5 -5
- novel_downloader/models/__init__.py +2 -0
- novel_downloader/models/book.py +1 -0
- novel_downloader/models/config.py +12 -0
- novel_downloader/resources/config/settings.toml +23 -5
- novel_downloader/resources/js_scripts/expr_to_json.js +14 -0
- novel_downloader/resources/js_scripts/qidian_decrypt_node.js +21 -16
- novel_downloader/resources/js_scripts/qq_decrypt_node.js +92 -0
- novel_downloader/utils/constants.py +6 -0
- novel_downloader/utils/crypto_utils/aes_util.py +1 -1
- novel_downloader/utils/epub/constants.py +1 -6
- novel_downloader/utils/fontocr/core.py +2 -0
- novel_downloader/utils/fontocr/loader.py +10 -8
- novel_downloader/utils/node_decryptor/__init__.py +13 -0
- novel_downloader/utils/node_decryptor/decryptor.py +342 -0
- novel_downloader/{core/parsers/qidian/utils → utils/node_decryptor}/decryptor_fetcher.py +5 -6
- novel_downloader/web/pages/download.py +1 -1
- novel_downloader/web/pages/search.py +1 -1
- novel_downloader/web/services/task_manager.py +2 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/METADATA +4 -1
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/RECORD +91 -94
- novel_downloader/core/exporters/common/__init__.py +0 -11
- novel_downloader/core/exporters/common/epub.py +0 -198
- novel_downloader/core/exporters/common/main_exporter.py +0 -64
- novel_downloader/core/exporters/common/txt.py +0 -146
- novel_downloader/core/exporters/epub_util.py +0 -215
- novel_downloader/core/exporters/linovelib/__init__.py +0 -11
- novel_downloader/core/exporters/linovelib/epub.py +0 -349
- novel_downloader/core/exporters/linovelib/main_exporter.py +0 -66
- novel_downloader/core/exporters/linovelib/txt.py +0 -139
- novel_downloader/core/exporters/txt_util.py +0 -67
- novel_downloader/core/parsers/qidian/__init__.py +0 -10
- novel_downloader/core/parsers/qidian/utils/__init__.py +0 -11
- novel_downloader/core/parsers/qidian/utils/node_decryptor.py +0 -175
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/WHEEL +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/entry_points.txt +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-2.0.1.dist-info → novel_downloader-2.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,292 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.downloaders.qqbook
|
4
|
+
----------------------------------------
|
5
|
+
|
6
|
+
Downloader implementation for QQ novels, with unpurchased chapter ID skip logic.
|
7
|
+
"""
|
8
|
+
|
9
|
+
import asyncio
|
10
|
+
from collections.abc import Awaitable, Callable
|
11
|
+
from pathlib import Path
|
12
|
+
from typing import Any, ClassVar
|
13
|
+
|
14
|
+
from novel_downloader.core.downloaders.base import BaseDownloader
|
15
|
+
from novel_downloader.core.downloaders.registry import register_downloader
|
16
|
+
from novel_downloader.core.downloaders.signals import STOP, Progress, StopToken
|
17
|
+
from novel_downloader.models import (
|
18
|
+
BookConfig,
|
19
|
+
ChapterDict,
|
20
|
+
VolumeInfoDict,
|
21
|
+
)
|
22
|
+
from novel_downloader.utils import ChapterStorage, async_jitter_sleep
|
23
|
+
|
24
|
+
|
25
|
+
@register_downloader(site_keys=["qqbook", "qq"])
|
26
|
+
class QqbookDownloader(BaseDownloader):
|
27
|
+
"""
|
28
|
+
Specialized downloader for QQ 阅读 novels.
|
29
|
+
|
30
|
+
Processes each chapter in a single worker that skip non-accessible
|
31
|
+
and handles fetch -> parse -> enqueue storage.
|
32
|
+
"""
|
33
|
+
|
34
|
+
DEFAULT_SOURCE_ID: ClassVar[int] = 0
|
35
|
+
ENCRYPTED_SOURCE_ID: ClassVar[int] = 1
|
36
|
+
PRIORITIES_MAP: ClassVar[dict[int, int]] = {
|
37
|
+
DEFAULT_SOURCE_ID: 0,
|
38
|
+
ENCRYPTED_SOURCE_ID: 1,
|
39
|
+
}
|
40
|
+
|
41
|
+
async def _download_one(
|
42
|
+
self,
|
43
|
+
book: BookConfig,
|
44
|
+
*,
|
45
|
+
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
46
|
+
cancel_event: asyncio.Event | None = None,
|
47
|
+
**kwargs: Any,
|
48
|
+
) -> None:
|
49
|
+
"""
|
50
|
+
The full download logic for a single book.
|
51
|
+
|
52
|
+
:param book: BookConfig with at least 'book_id'.
|
53
|
+
"""
|
54
|
+
TAG = "[Downloader]"
|
55
|
+
NUM_WORKERS = 1
|
56
|
+
|
57
|
+
book_id = book["book_id"]
|
58
|
+
start_id = book.get("start_id")
|
59
|
+
end_id = book.get("end_id")
|
60
|
+
ignore_set = set(book.get("ignore_ids", []))
|
61
|
+
|
62
|
+
raw_base = self._raw_data_dir / book_id
|
63
|
+
raw_base.mkdir(parents=True, exist_ok=True)
|
64
|
+
html_dir = self._debug_dir / book_id / "html"
|
65
|
+
|
66
|
+
def cancelled() -> bool:
|
67
|
+
return bool(cancel_event and cancel_event.is_set())
|
68
|
+
|
69
|
+
# ---- metadata ---
|
70
|
+
book_info = await self.load_book_info(book_id=book_id, html_dir=html_dir)
|
71
|
+
if not book_info:
|
72
|
+
return
|
73
|
+
|
74
|
+
vols = book_info["volumes"]
|
75
|
+
plan = self._planned_chapter_ids(vols, start_id, end_id, ignore_set)
|
76
|
+
if not plan:
|
77
|
+
self.logger.info("%s nothing to do after filtering: %s", TAG, book_id)
|
78
|
+
return
|
79
|
+
|
80
|
+
progress = Progress(total=len(plan), hook=progress_hook)
|
81
|
+
|
82
|
+
# ---- queues & batching ---
|
83
|
+
cid_q: asyncio.Queue[str | StopToken] = asyncio.Queue(maxsize=self._workers * 2)
|
84
|
+
save_q: asyncio.Queue[ChapterDict | StopToken] = asyncio.Queue(
|
85
|
+
maxsize=self._workers * 2
|
86
|
+
)
|
87
|
+
default_batch: list[ChapterDict] = []
|
88
|
+
encrypted_batch: list[ChapterDict] = []
|
89
|
+
|
90
|
+
def select_batch(chap: ChapterDict) -> tuple[list[ChapterDict], int]:
|
91
|
+
# set extra.encrypted (by parser); default to plain if absent.
|
92
|
+
if chap.get("extra", {}).get("font_encrypt", False):
|
93
|
+
return encrypted_batch, self.ENCRYPTED_SOURCE_ID
|
94
|
+
return default_batch, self.DEFAULT_SOURCE_ID
|
95
|
+
|
96
|
+
async def flush_batch(batch: list[ChapterDict], src: int) -> None:
|
97
|
+
if not batch:
|
98
|
+
return
|
99
|
+
try:
|
100
|
+
storage.upsert_chapters(batch, src)
|
101
|
+
except Exception as e:
|
102
|
+
self.logger.error(
|
103
|
+
"[Storage] batch upsert failed (size=%d, src=%d): %s",
|
104
|
+
len(batch),
|
105
|
+
src,
|
106
|
+
e,
|
107
|
+
exc_info=True,
|
108
|
+
)
|
109
|
+
else:
|
110
|
+
await progress.bump(len(batch))
|
111
|
+
finally:
|
112
|
+
batch.clear()
|
113
|
+
|
114
|
+
async def flush_all() -> None:
|
115
|
+
await flush_batch(default_batch, self.DEFAULT_SOURCE_ID)
|
116
|
+
await flush_batch(encrypted_batch, self.ENCRYPTED_SOURCE_ID)
|
117
|
+
|
118
|
+
# ---- workers ---
|
119
|
+
async def storage_worker() -> None:
|
120
|
+
"""
|
121
|
+
Consumes parsed chapters, batches by source, flushes on threshold.
|
122
|
+
|
123
|
+
Terminates after receiving STOP from each chapter worker.
|
124
|
+
|
125
|
+
On cancel: drains queue, flushes once, then waits for remaining STOPs.
|
126
|
+
"""
|
127
|
+
stop_count = 0
|
128
|
+
while True:
|
129
|
+
chap = await save_q.get()
|
130
|
+
if isinstance(chap, StopToken):
|
131
|
+
stop_count += 1
|
132
|
+
if stop_count == NUM_WORKERS:
|
133
|
+
await flush_all()
|
134
|
+
return
|
135
|
+
continue
|
136
|
+
|
137
|
+
batch, src = select_batch(chap)
|
138
|
+
batch.append(chap)
|
139
|
+
if len(batch) >= self._storage_batch_size:
|
140
|
+
await flush_batch(batch, src)
|
141
|
+
|
142
|
+
if cancelled():
|
143
|
+
# Drain whatever is already parsed
|
144
|
+
try:
|
145
|
+
while True:
|
146
|
+
nxt = save_q.get_nowait()
|
147
|
+
if isinstance(nxt, StopToken):
|
148
|
+
stop_count += 1
|
149
|
+
else:
|
150
|
+
nbatch, nsrc = select_batch(nxt)
|
151
|
+
nbatch.append(nxt)
|
152
|
+
except asyncio.QueueEmpty:
|
153
|
+
pass
|
154
|
+
await flush_all()
|
155
|
+
# Wait for remaining STOPs to arrive
|
156
|
+
while stop_count < NUM_WORKERS:
|
157
|
+
nxt = await save_q.get()
|
158
|
+
if nxt is STOP:
|
159
|
+
stop_count += 1
|
160
|
+
return
|
161
|
+
|
162
|
+
async def chapter_worker() -> None:
|
163
|
+
"""
|
164
|
+
Single worker: fetch + parse with retry, then enqueue ChapterDict.
|
165
|
+
|
166
|
+
Exits on STOP. If cancelled, does not start a new fetch; signals STOP.
|
167
|
+
"""
|
168
|
+
while True:
|
169
|
+
cid = await cid_q.get()
|
170
|
+
if isinstance(cid, StopToken):
|
171
|
+
await save_q.put(STOP)
|
172
|
+
return
|
173
|
+
|
174
|
+
if cancelled():
|
175
|
+
await save_q.put(STOP)
|
176
|
+
return
|
177
|
+
|
178
|
+
chap = await self._process_chapter(book_id, cid, html_dir)
|
179
|
+
if chap and not cancelled():
|
180
|
+
await save_q.put(chap)
|
181
|
+
|
182
|
+
await async_jitter_sleep(
|
183
|
+
self._request_interval,
|
184
|
+
mul_spread=1.1,
|
185
|
+
max_sleep=self._request_interval + 2,
|
186
|
+
)
|
187
|
+
|
188
|
+
async def producer() -> None:
|
189
|
+
"""
|
190
|
+
Enqueue chapter IDs respecting start/end/skip_existing.
|
191
|
+
|
192
|
+
Always emits STOP x NUM_WORKERS at the end (even if cancelled early).
|
193
|
+
"""
|
194
|
+
try:
|
195
|
+
for cid in plan:
|
196
|
+
if cancelled():
|
197
|
+
break
|
198
|
+
if self._skip_existing and storage.exists(
|
199
|
+
cid, self.DEFAULT_SOURCE_ID
|
200
|
+
):
|
201
|
+
# Already have not-encrypted; count as done.
|
202
|
+
await progress.bump(1)
|
203
|
+
else:
|
204
|
+
await cid_q.put(cid)
|
205
|
+
finally:
|
206
|
+
for _ in range(NUM_WORKERS):
|
207
|
+
await cid_q.put(STOP)
|
208
|
+
|
209
|
+
# ---- run tasks ---
|
210
|
+
with ChapterStorage(raw_base, priorities=self.PRIORITIES_MAP) as storage:
|
211
|
+
async with asyncio.TaskGroup() as tg:
|
212
|
+
tg.create_task(storage_worker())
|
213
|
+
for _ in range(NUM_WORKERS):
|
214
|
+
tg.create_task(chapter_worker())
|
215
|
+
tg.create_task(producer())
|
216
|
+
|
217
|
+
# ---- done ---
|
218
|
+
if cancelled():
|
219
|
+
self.logger.info(
|
220
|
+
"%s Novel '%s' cancelled: flushed %d/%d chapters.",
|
221
|
+
TAG,
|
222
|
+
book_info.get("book_name", "unknown"),
|
223
|
+
progress.done,
|
224
|
+
progress.total,
|
225
|
+
)
|
226
|
+
else:
|
227
|
+
self.logger.info(
|
228
|
+
"%s Novel '%s' download completed.",
|
229
|
+
TAG,
|
230
|
+
book_info.get("book_name", "unknown"),
|
231
|
+
)
|
232
|
+
|
233
|
+
@staticmethod
|
234
|
+
def _planned_chapter_ids(
|
235
|
+
vols: list[VolumeInfoDict],
|
236
|
+
start_id: str | None,
|
237
|
+
end_id: str | None,
|
238
|
+
ignore: set[str],
|
239
|
+
) -> list[str]:
|
240
|
+
seen_start = start_id is None
|
241
|
+
out: list[str] = []
|
242
|
+
for vol in vols:
|
243
|
+
for chap in vol["chapters"]:
|
244
|
+
cid = chap.get("chapterId")
|
245
|
+
if not cid:
|
246
|
+
continue
|
247
|
+
if not seen_start:
|
248
|
+
if cid == start_id:
|
249
|
+
seen_start = True
|
250
|
+
else:
|
251
|
+
continue
|
252
|
+
if cid not in ignore and chap.get("accessible", True):
|
253
|
+
out.append(cid)
|
254
|
+
if end_id is not None and cid == end_id:
|
255
|
+
return out
|
256
|
+
return out
|
257
|
+
|
258
|
+
async def _process_chapter(
|
259
|
+
self,
|
260
|
+
book_id: str,
|
261
|
+
cid: str,
|
262
|
+
html_dir: Path,
|
263
|
+
) -> ChapterDict | None:
|
264
|
+
"""
|
265
|
+
Fetch, debug-save, parse a single chapter with retries.
|
266
|
+
|
267
|
+
:return: ChapterDict on success, or None on failure.
|
268
|
+
"""
|
269
|
+
for attempt in range(self._retry_times + 1):
|
270
|
+
try:
|
271
|
+
html_list = await self.fetcher.get_book_chapter(book_id, cid)
|
272
|
+
self._save_html_pages(html_dir, cid, html_list)
|
273
|
+
chap = await asyncio.to_thread(
|
274
|
+
self.parser.parse_chapter, html_list, cid
|
275
|
+
)
|
276
|
+
if not chap:
|
277
|
+
raise ValueError("Empty parse result")
|
278
|
+
return chap
|
279
|
+
except Exception as e:
|
280
|
+
if attempt < self._retry_times:
|
281
|
+
self.logger.info(
|
282
|
+
"[ChapterWorker] Retry %s (%s): %s", cid, attempt + 1, e
|
283
|
+
)
|
284
|
+
backoff = self._backoff_factor * (2**attempt)
|
285
|
+
await async_jitter_sleep(
|
286
|
+
base=backoff,
|
287
|
+
mul_spread=1.2,
|
288
|
+
max_sleep=backoff + 3,
|
289
|
+
)
|
290
|
+
else:
|
291
|
+
self.logger.warning("[ChapterWorker] Failed %s: %s", cid, e)
|
292
|
+
return None
|
@@ -20,7 +20,7 @@ from novel_downloader.core.interfaces import (
|
|
20
20
|
from novel_downloader.models import DownloaderConfig
|
21
21
|
|
22
22
|
DownloaderBuilder = Callable[
|
23
|
-
[FetcherProtocol, ParserProtocol, DownloaderConfig],
|
23
|
+
[FetcherProtocol, ParserProtocol, DownloaderConfig, str],
|
24
24
|
DownloaderProtocol,
|
25
25
|
]
|
26
26
|
D = TypeVar("D", bound=DownloaderProtocol)
|
@@ -66,4 +66,4 @@ def get_downloader(
|
|
66
66
|
downloader_cls = _DOWNLOADER_MAP[site_key]
|
67
67
|
except KeyError:
|
68
68
|
return CommonDownloader(fetcher, parser, config, site_key)
|
69
|
-
return downloader_cls(fetcher, parser, config)
|
69
|
+
return downloader_cls(fetcher, parser, config, site_key)
|
@@ -11,9 +11,11 @@ __all__ = [
|
|
11
11
|
"CommonExporter",
|
12
12
|
"LinovelibExporter",
|
13
13
|
"QidianExporter",
|
14
|
+
"QqbookExporter",
|
14
15
|
]
|
15
16
|
|
16
17
|
from .common import CommonExporter
|
17
18
|
from .linovelib import LinovelibExporter
|
18
19
|
from .qidian import QidianExporter
|
20
|
+
from .qqbook import QqbookExporter
|
19
21
|
from .registry import get_exporter
|
@@ -7,16 +7,16 @@ Abstract base class providing common structure and utilities for book exporters
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
import abc
|
10
|
+
import contextlib
|
10
11
|
import json
|
11
12
|
import logging
|
12
13
|
import types
|
13
14
|
from datetime import datetime
|
14
15
|
from pathlib import Path
|
15
|
-
from typing import Any, Self, cast
|
16
|
+
from typing import Any, ClassVar, Self, cast
|
16
17
|
|
17
|
-
from novel_downloader.core.interfaces import ExporterProtocol
|
18
18
|
from novel_downloader.models import BookInfoDict, ChapterDict, ExporterConfig
|
19
|
-
from novel_downloader.utils import ChapterStorage
|
19
|
+
from novel_downloader.utils import ChapterStorage, get_cleaner
|
20
20
|
|
21
21
|
|
22
22
|
class SafeDict(dict[str, Any]):
|
@@ -24,15 +24,15 @@ class SafeDict(dict[str, Any]):
|
|
24
24
|
return f"{{{key}}}"
|
25
25
|
|
26
26
|
|
27
|
-
class BaseExporter(
|
27
|
+
class BaseExporter(abc.ABC):
|
28
28
|
"""
|
29
29
|
BaseExporter defines the interface and common structure for
|
30
30
|
saving assembled book content into various formats
|
31
31
|
such as TXT, EPUB, Markdown, or PDF.
|
32
32
|
"""
|
33
33
|
|
34
|
-
DEFAULT_SOURCE_ID = 0
|
35
|
-
PRIORITIES_MAP = {
|
34
|
+
DEFAULT_SOURCE_ID: ClassVar[int] = 0
|
35
|
+
PRIORITIES_MAP: ClassVar[dict[int, int]] = {
|
36
36
|
DEFAULT_SOURCE_ID: 0,
|
37
37
|
}
|
38
38
|
|
@@ -47,15 +47,30 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
47
47
|
:param config: Exporter configuration settings.
|
48
48
|
:param site: Identifier for the target website or source.
|
49
49
|
"""
|
50
|
-
self._config = config
|
51
50
|
self._site = site
|
52
51
|
self._storage_cache: dict[str, ChapterStorage] = {}
|
53
52
|
|
53
|
+
self._make_txt = config.make_txt
|
54
|
+
self._make_epub = config.make_epub
|
55
|
+
self._make_md = config.make_md
|
56
|
+
self._make_pdf = config.make_pdf
|
57
|
+
|
58
|
+
self._include_cover = config.include_cover
|
59
|
+
self._include_picture = config.include_picture
|
60
|
+
self._split_mode = config.split_mode
|
61
|
+
self._filename_template = config.filename_template
|
62
|
+
self._append_timestamp = config.append_timestamp
|
63
|
+
|
54
64
|
self._raw_data_dir = Path(config.raw_data_dir) / site
|
55
65
|
self._output_dir = Path(config.output_dir)
|
56
66
|
self._output_dir.mkdir(parents=True, exist_ok=True)
|
57
67
|
|
58
|
-
self.
|
68
|
+
self._cleaner = get_cleaner(
|
69
|
+
enabled=config.clean_text,
|
70
|
+
config=config.cleaner_cfg,
|
71
|
+
)
|
72
|
+
|
73
|
+
self.logger = logging.getLogger(f"{__name__}.{self.__class__.__name__}")
|
59
74
|
|
60
75
|
def export(self, book_id: str) -> dict[str, Path]:
|
61
76
|
"""
|
@@ -67,14 +82,14 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
67
82
|
results: dict[str, Path] = {}
|
68
83
|
|
69
84
|
actions = [
|
70
|
-
(
|
71
|
-
(
|
72
|
-
(
|
73
|
-
(
|
85
|
+
(self._make_txt, "txt", self.export_as_txt),
|
86
|
+
(self._make_epub, "epub", self.export_as_epub),
|
87
|
+
(self._make_md, "md", self.export_as_md),
|
88
|
+
(self._make_pdf, "pdf", self.export_as_pdf),
|
74
89
|
]
|
75
90
|
|
76
|
-
for
|
77
|
-
if
|
91
|
+
for enabled, fmt_key, export_method in actions:
|
92
|
+
if enabled:
|
78
93
|
try:
|
79
94
|
self.logger.info(
|
80
95
|
"%s Attempting to export book_id '%s' as %s...",
|
@@ -93,16 +108,13 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
93
108
|
"%s Export method for %s not implemented: %s",
|
94
109
|
TAG,
|
95
110
|
fmt_key,
|
96
|
-
|
97
|
-
)
|
98
|
-
except Exception as e:
|
99
|
-
self.logger.error(
|
100
|
-
"%s Error while saving as %s: %s", TAG, fmt_key, str(e)
|
111
|
+
e,
|
101
112
|
)
|
113
|
+
except Exception:
|
114
|
+
self.logger.exception("%s Error while saving as %s", TAG, fmt_key)
|
102
115
|
|
103
116
|
return results
|
104
117
|
|
105
|
-
@abc.abstractmethod
|
106
118
|
def export_as_txt(self, book_id: str) -> Path | None:
|
107
119
|
"""
|
108
120
|
Persist the assembled book as a .txt file.
|
@@ -111,7 +123,7 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
111
123
|
|
112
124
|
:param book_id: The book identifier (used for filename, lookup, etc.)
|
113
125
|
"""
|
114
|
-
|
126
|
+
raise NotImplementedError("TXT export not supported by this Exporter.")
|
115
127
|
|
116
128
|
def export_as_epub(self, book_id: str) -> Path | None:
|
117
129
|
"""
|
@@ -157,39 +169,12 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
157
169
|
:param extra_fields: Any additional fields used in the filename template.
|
158
170
|
:return: Formatted filename with extension.
|
159
171
|
"""
|
160
|
-
# Merge all fields with defaults
|
161
172
|
context = SafeDict(title=title, author=author or "", **extra_fields)
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
if self._config.append_timestamp:
|
173
|
+
name = self._filename_template.format_map(context)
|
174
|
+
if self._append_timestamp:
|
166
175
|
name += f"_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
167
|
-
|
168
176
|
return f"{name}.{ext}"
|
169
177
|
|
170
|
-
@property
|
171
|
-
def site(self) -> str:
|
172
|
-
"""
|
173
|
-
Get the site identifier.
|
174
|
-
|
175
|
-
:return: The site string.
|
176
|
-
"""
|
177
|
-
return self._site
|
178
|
-
|
179
|
-
@property
|
180
|
-
def output_dir(self) -> Path:
|
181
|
-
"""
|
182
|
-
Access the output directory for saving files.
|
183
|
-
"""
|
184
|
-
return self._output_dir
|
185
|
-
|
186
|
-
@property
|
187
|
-
def filename_template(self) -> str:
|
188
|
-
"""
|
189
|
-
Access the filename template.
|
190
|
-
"""
|
191
|
-
return self._config.filename_template
|
192
|
-
|
193
178
|
def _get_chapter(
|
194
179
|
self,
|
195
180
|
book_id: str,
|
@@ -245,18 +230,10 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
245
230
|
self.logger.warning("Failed to close storage %s: %s", storage, e)
|
246
231
|
self._storage_cache.clear()
|
247
232
|
|
248
|
-
def _on_close(self) -> None:
|
249
|
-
"""
|
250
|
-
Hook method called at the beginning of close().
|
251
|
-
Override in subclass if needed.
|
252
|
-
"""
|
253
|
-
pass
|
254
|
-
|
255
233
|
def close(self) -> None:
|
256
234
|
"""
|
257
235
|
Shutdown and clean up the exporter.
|
258
236
|
"""
|
259
|
-
self._on_close()
|
260
237
|
self._close_chapter_storages()
|
261
238
|
|
262
239
|
def __enter__(self) -> Self:
|
@@ -271,4 +248,5 @@ class BaseExporter(ExporterProtocol, abc.ABC):
|
|
271
248
|
self.close()
|
272
249
|
|
273
250
|
def __del__(self) -> None:
|
274
|
-
|
251
|
+
with contextlib.suppress(Exception):
|
252
|
+
self.close()
|