novel-downloader 1.5.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/__init__.py +1 -3
- novel_downloader/cli/clean.py +21 -88
- novel_downloader/cli/config.py +26 -21
- novel_downloader/cli/download.py +77 -64
- novel_downloader/cli/export.py +16 -20
- novel_downloader/cli/main.py +1 -1
- novel_downloader/cli/search.py +62 -65
- novel_downloader/cli/ui.py +156 -0
- novel_downloader/config/__init__.py +8 -5
- novel_downloader/config/adapter.py +65 -105
- novel_downloader/config/{loader.py → file_io.py} +53 -26
- novel_downloader/core/__init__.py +1 -0
- novel_downloader/core/archived/deqixs/fetcher.py +115 -0
- novel_downloader/core/archived/deqixs/parser.py +132 -0
- novel_downloader/core/archived/deqixs/searcher.py +89 -0
- novel_downloader/core/{searchers/qidian.py → archived/qidian/searcher.py} +12 -20
- novel_downloader/core/archived/wanbengo/searcher.py +98 -0
- novel_downloader/core/archived/xshbook/searcher.py +93 -0
- novel_downloader/core/downloaders/__init__.py +3 -24
- novel_downloader/core/downloaders/base.py +49 -23
- novel_downloader/core/downloaders/common.py +191 -137
- novel_downloader/core/downloaders/qianbi.py +187 -146
- novel_downloader/core/downloaders/qidian.py +187 -141
- novel_downloader/core/downloaders/registry.py +4 -2
- novel_downloader/core/downloaders/signals.py +46 -0
- novel_downloader/core/exporters/__init__.py +3 -20
- novel_downloader/core/exporters/base.py +33 -37
- novel_downloader/core/exporters/common/__init__.py +1 -2
- novel_downloader/core/exporters/common/epub.py +15 -10
- novel_downloader/core/exporters/common/main_exporter.py +19 -12
- novel_downloader/core/exporters/common/txt.py +14 -9
- novel_downloader/core/exporters/epub_util.py +59 -29
- novel_downloader/core/exporters/linovelib/__init__.py +1 -0
- novel_downloader/core/exporters/linovelib/epub.py +23 -25
- novel_downloader/core/exporters/linovelib/main_exporter.py +8 -12
- novel_downloader/core/exporters/linovelib/txt.py +17 -11
- novel_downloader/core/exporters/qidian.py +2 -8
- novel_downloader/core/exporters/registry.py +4 -2
- novel_downloader/core/exporters/txt_util.py +7 -7
- novel_downloader/core/fetchers/__init__.py +54 -48
- novel_downloader/core/fetchers/aaatxt.py +83 -0
- novel_downloader/core/fetchers/{biquge/session.py → b520.py} +6 -11
- novel_downloader/core/fetchers/{base/session.py → base.py} +37 -46
- novel_downloader/core/fetchers/{biquge/browser.py → biquyuedu.py} +12 -17
- novel_downloader/core/fetchers/dxmwx.py +110 -0
- novel_downloader/core/fetchers/eightnovel.py +139 -0
- novel_downloader/core/fetchers/{esjzone/session.py → esjzone.py} +19 -12
- novel_downloader/core/fetchers/guidaye.py +85 -0
- novel_downloader/core/fetchers/hetushu.py +92 -0
- novel_downloader/core/fetchers/{qianbi/browser.py → i25zw.py} +19 -28
- novel_downloader/core/fetchers/ixdzs8.py +113 -0
- novel_downloader/core/fetchers/jpxs123.py +101 -0
- novel_downloader/core/fetchers/lewenn.py +83 -0
- novel_downloader/core/fetchers/{linovelib/session.py → linovelib.py} +12 -13
- novel_downloader/core/fetchers/piaotia.py +105 -0
- novel_downloader/core/fetchers/qbtr.py +101 -0
- novel_downloader/core/fetchers/{qianbi/session.py → qianbi.py} +5 -10
- novel_downloader/core/fetchers/{qidian/session.py → qidian.py} +46 -39
- novel_downloader/core/fetchers/quanben5.py +92 -0
- novel_downloader/core/fetchers/{base/rate_limiter.py → rate_limiter.py} +2 -2
- novel_downloader/core/fetchers/registry.py +5 -16
- novel_downloader/core/fetchers/{sfacg/session.py → sfacg.py} +7 -10
- novel_downloader/core/fetchers/shencou.py +106 -0
- novel_downloader/core/fetchers/shuhaige.py +84 -0
- novel_downloader/core/fetchers/tongrenquan.py +84 -0
- novel_downloader/core/fetchers/ttkan.py +95 -0
- novel_downloader/core/fetchers/wanbengo.py +83 -0
- novel_downloader/core/fetchers/xiaoshuowu.py +106 -0
- novel_downloader/core/fetchers/xiguashuwu.py +177 -0
- novel_downloader/core/fetchers/xs63b.py +171 -0
- novel_downloader/core/fetchers/xshbook.py +85 -0
- novel_downloader/core/fetchers/{yamibo/session.py → yamibo.py} +19 -12
- novel_downloader/core/fetchers/yibige.py +114 -0
- novel_downloader/core/interfaces/__init__.py +1 -9
- novel_downloader/core/interfaces/downloader.py +6 -2
- novel_downloader/core/interfaces/exporter.py +7 -7
- novel_downloader/core/interfaces/fetcher.py +4 -17
- novel_downloader/core/interfaces/parser.py +5 -6
- novel_downloader/core/interfaces/searcher.py +9 -1
- novel_downloader/core/parsers/__init__.py +49 -12
- novel_downloader/core/parsers/aaatxt.py +132 -0
- novel_downloader/core/parsers/b520.py +116 -0
- novel_downloader/core/parsers/base.py +63 -12
- novel_downloader/core/parsers/biquyuedu.py +133 -0
- novel_downloader/core/parsers/dxmwx.py +162 -0
- novel_downloader/core/parsers/eightnovel.py +224 -0
- novel_downloader/core/parsers/esjzone.py +61 -66
- novel_downloader/core/parsers/guidaye.py +128 -0
- novel_downloader/core/parsers/hetushu.py +139 -0
- novel_downloader/core/parsers/i25zw.py +137 -0
- novel_downloader/core/parsers/ixdzs8.py +186 -0
- novel_downloader/core/parsers/jpxs123.py +137 -0
- novel_downloader/core/parsers/lewenn.py +142 -0
- novel_downloader/core/parsers/linovelib.py +48 -64
- novel_downloader/core/parsers/piaotia.py +189 -0
- novel_downloader/core/parsers/qbtr.py +136 -0
- novel_downloader/core/parsers/qianbi.py +48 -50
- novel_downloader/core/parsers/qidian/book_info_parser.py +58 -59
- novel_downloader/core/parsers/qidian/chapter_encrypted.py +272 -330
- novel_downloader/core/parsers/qidian/chapter_normal.py +24 -55
- novel_downloader/core/parsers/qidian/main_parser.py +11 -38
- novel_downloader/core/parsers/qidian/utils/__init__.py +1 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +1 -1
- novel_downloader/core/parsers/qidian/utils/fontmap_recover.py +143 -0
- novel_downloader/core/parsers/qidian/utils/helpers.py +0 -4
- novel_downloader/core/parsers/quanben5.py +103 -0
- novel_downloader/core/parsers/registry.py +5 -16
- novel_downloader/core/parsers/sfacg.py +38 -45
- novel_downloader/core/parsers/shencou.py +215 -0
- novel_downloader/core/parsers/shuhaige.py +111 -0
- novel_downloader/core/parsers/tongrenquan.py +116 -0
- novel_downloader/core/parsers/ttkan.py +132 -0
- novel_downloader/core/parsers/wanbengo.py +191 -0
- novel_downloader/core/parsers/xiaoshuowu.py +173 -0
- novel_downloader/core/parsers/xiguashuwu.py +435 -0
- novel_downloader/core/parsers/xs63b.py +161 -0
- novel_downloader/core/parsers/xshbook.py +134 -0
- novel_downloader/core/parsers/yamibo.py +87 -131
- novel_downloader/core/parsers/yibige.py +166 -0
- novel_downloader/core/searchers/__init__.py +34 -3
- novel_downloader/core/searchers/aaatxt.py +107 -0
- novel_downloader/core/searchers/{biquge.py → b520.py} +29 -28
- novel_downloader/core/searchers/base.py +112 -36
- novel_downloader/core/searchers/dxmwx.py +105 -0
- novel_downloader/core/searchers/eightnovel.py +84 -0
- novel_downloader/core/searchers/esjzone.py +43 -25
- novel_downloader/core/searchers/hetushu.py +92 -0
- novel_downloader/core/searchers/i25zw.py +93 -0
- novel_downloader/core/searchers/ixdzs8.py +107 -0
- novel_downloader/core/searchers/jpxs123.py +107 -0
- novel_downloader/core/searchers/piaotia.py +100 -0
- novel_downloader/core/searchers/qbtr.py +106 -0
- novel_downloader/core/searchers/qianbi.py +74 -40
- novel_downloader/core/searchers/quanben5.py +144 -0
- novel_downloader/core/searchers/registry.py +24 -8
- novel_downloader/core/searchers/shuhaige.py +124 -0
- novel_downloader/core/searchers/tongrenquan.py +110 -0
- novel_downloader/core/searchers/ttkan.py +92 -0
- novel_downloader/core/searchers/xiaoshuowu.py +122 -0
- novel_downloader/core/searchers/xiguashuwu.py +95 -0
- novel_downloader/core/searchers/xs63b.py +104 -0
- novel_downloader/locales/en.json +31 -82
- novel_downloader/locales/zh.json +32 -83
- novel_downloader/models/__init__.py +21 -22
- novel_downloader/models/book.py +44 -0
- novel_downloader/models/config.py +4 -37
- novel_downloader/models/login.py +1 -1
- novel_downloader/models/search.py +5 -0
- novel_downloader/resources/config/settings.toml +8 -70
- novel_downloader/resources/json/xiguashuwu.json +718 -0
- novel_downloader/utils/__init__.py +13 -22
- novel_downloader/utils/chapter_storage.py +3 -2
- novel_downloader/utils/constants.py +4 -29
- novel_downloader/utils/cookies.py +6 -18
- novel_downloader/utils/crypto_utils/__init__.py +13 -0
- novel_downloader/utils/crypto_utils/aes_util.py +90 -0
- novel_downloader/utils/crypto_utils/aes_v1.py +619 -0
- novel_downloader/utils/crypto_utils/aes_v2.py +1143 -0
- novel_downloader/utils/{crypto_utils.py → crypto_utils/rc4.py} +3 -10
- novel_downloader/utils/epub/__init__.py +1 -1
- novel_downloader/utils/epub/constants.py +57 -16
- novel_downloader/utils/epub/documents.py +88 -194
- novel_downloader/utils/epub/models.py +0 -14
- novel_downloader/utils/epub/utils.py +63 -96
- novel_downloader/utils/file_utils/__init__.py +2 -23
- novel_downloader/utils/file_utils/io.py +3 -113
- novel_downloader/utils/file_utils/sanitize.py +0 -4
- novel_downloader/utils/fontocr.py +207 -0
- novel_downloader/utils/logger.py +8 -16
- novel_downloader/utils/network.py +2 -2
- novel_downloader/utils/state.py +4 -90
- novel_downloader/utils/text_utils/__init__.py +1 -7
- novel_downloader/utils/text_utils/diff_display.py +5 -7
- novel_downloader/utils/time_utils/__init__.py +5 -11
- novel_downloader/utils/time_utils/datetime_utils.py +20 -29
- novel_downloader/utils/time_utils/sleep_utils.py +4 -8
- novel_downloader/web/__init__.py +13 -0
- novel_downloader/web/components/__init__.py +11 -0
- novel_downloader/web/components/navigation.py +35 -0
- novel_downloader/web/main.py +66 -0
- novel_downloader/web/pages/__init__.py +17 -0
- novel_downloader/web/pages/download.py +78 -0
- novel_downloader/web/pages/progress.py +147 -0
- novel_downloader/web/pages/search.py +329 -0
- novel_downloader/web/services/__init__.py +17 -0
- novel_downloader/web/services/client_dialog.py +164 -0
- novel_downloader/web/services/cred_broker.py +113 -0
- novel_downloader/web/services/cred_models.py +35 -0
- novel_downloader/web/services/task_manager.py +264 -0
- novel_downloader-2.0.0.dist-info/METADATA +171 -0
- novel_downloader-2.0.0.dist-info/RECORD +210 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/entry_points.txt +1 -1
- novel_downloader/core/downloaders/biquge.py +0 -29
- novel_downloader/core/downloaders/esjzone.py +0 -29
- novel_downloader/core/downloaders/linovelib.py +0 -29
- novel_downloader/core/downloaders/sfacg.py +0 -29
- novel_downloader/core/downloaders/yamibo.py +0 -29
- novel_downloader/core/exporters/biquge.py +0 -22
- novel_downloader/core/exporters/esjzone.py +0 -22
- novel_downloader/core/exporters/qianbi.py +0 -22
- novel_downloader/core/exporters/sfacg.py +0 -22
- novel_downloader/core/exporters/yamibo.py +0 -22
- novel_downloader/core/fetchers/base/__init__.py +0 -14
- novel_downloader/core/fetchers/base/browser.py +0 -422
- novel_downloader/core/fetchers/biquge/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/__init__.py +0 -14
- novel_downloader/core/fetchers/esjzone/browser.py +0 -209
- novel_downloader/core/fetchers/linovelib/__init__.py +0 -14
- novel_downloader/core/fetchers/linovelib/browser.py +0 -198
- novel_downloader/core/fetchers/qianbi/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/__init__.py +0 -14
- novel_downloader/core/fetchers/qidian/browser.py +0 -326
- novel_downloader/core/fetchers/sfacg/__init__.py +0 -14
- novel_downloader/core/fetchers/sfacg/browser.py +0 -194
- novel_downloader/core/fetchers/yamibo/__init__.py +0 -14
- novel_downloader/core/fetchers/yamibo/browser.py +0 -234
- novel_downloader/core/parsers/biquge.py +0 -139
- novel_downloader/models/chapter.py +0 -25
- novel_downloader/models/types.py +0 -13
- novel_downloader/tui/__init__.py +0 -7
- novel_downloader/tui/app.py +0 -32
- novel_downloader/tui/main.py +0 -17
- novel_downloader/tui/screens/__init__.py +0 -14
- novel_downloader/tui/screens/home.py +0 -198
- novel_downloader/tui/screens/login.py +0 -74
- novel_downloader/tui/styles/home_layout.tcss +0 -79
- novel_downloader/tui/widgets/richlog_handler.py +0 -24
- novel_downloader/utils/cache.py +0 -24
- novel_downloader/utils/fontocr/__init__.py +0 -22
- novel_downloader/utils/fontocr/hash_store.py +0 -280
- novel_downloader/utils/fontocr/hash_utils.py +0 -103
- novel_downloader/utils/fontocr/model_loader.py +0 -69
- novel_downloader/utils/fontocr/ocr_v1.py +0 -315
- novel_downloader/utils/fontocr/ocr_v2.py +0 -764
- novel_downloader/utils/fontocr/ocr_v3.py +0 -744
- novel_downloader-1.5.0.dist-info/METADATA +0 -196
- novel_downloader-1.5.0.dist-info/RECORD +0 -164
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/WHEEL +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.5.0.dist-info → novel_downloader-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,171 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.xs63b
|
4
|
+
------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import base64
|
10
|
+
import re
|
11
|
+
from typing import Any
|
12
|
+
|
13
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
14
|
+
from novel_downloader.core.fetchers.registry import register_fetcher
|
15
|
+
from novel_downloader.models import FetcherConfig
|
16
|
+
from novel_downloader.utils import async_jitter_sleep
|
17
|
+
|
18
|
+
|
19
|
+
@register_fetcher(
|
20
|
+
site_keys=["xs63b"],
|
21
|
+
)
|
22
|
+
class Xs63bSession(BaseSession):
|
23
|
+
"""
|
24
|
+
A session class for interacting with the 小说路上 (m.xs63b.com) novel website.
|
25
|
+
"""
|
26
|
+
|
27
|
+
BOOK_INFO_URL = "https://m.xs63b.com/{book_id}/"
|
28
|
+
BOOK_CATALOG_URL = "https://www.xs63b.com/{book_id}/"
|
29
|
+
CHAPTER_URL = "https://m.xs63b.com/{book_id}/{chapter_id}.html"
|
30
|
+
|
31
|
+
_JSARR_PATTERN = re.compile(r"var\s+jsarr\s*=\s*\[([^\]]+)\]")
|
32
|
+
_JSSTR_PATTERN = re.compile(r"var\s+jsstr\s*=\s*\"([^\"]+)\";")
|
33
|
+
|
34
|
+
def __init__(
|
35
|
+
self,
|
36
|
+
config: FetcherConfig,
|
37
|
+
cookies: dict[str, str] | None = None,
|
38
|
+
**kwargs: Any,
|
39
|
+
) -> None:
|
40
|
+
super().__init__("xs63b", config, cookies, **kwargs)
|
41
|
+
|
42
|
+
async def get_book_info(
|
43
|
+
self,
|
44
|
+
book_id: str,
|
45
|
+
**kwargs: Any,
|
46
|
+
) -> list[str]:
|
47
|
+
"""
|
48
|
+
Fetch the raw HTML of the book info page asynchronously.
|
49
|
+
|
50
|
+
Order: [info, catalog]
|
51
|
+
|
52
|
+
:param book_id: The book identifier.
|
53
|
+
:return: The page content as string list.
|
54
|
+
"""
|
55
|
+
book_id = book_id.replace("-", "/")
|
56
|
+
info_url = self.book_info_url(book_id=book_id)
|
57
|
+
catalog_url = self.book_catalog_url(book_id=book_id)
|
58
|
+
|
59
|
+
info_html, catalog_html = await asyncio.gather(
|
60
|
+
self.fetch(info_url, ssl=False, **kwargs),
|
61
|
+
self.fetch(catalog_url, ssl=False, **kwargs),
|
62
|
+
)
|
63
|
+
return [info_html, catalog_html]
|
64
|
+
|
65
|
+
async def get_book_chapter(
|
66
|
+
self,
|
67
|
+
book_id: str,
|
68
|
+
chapter_id: str,
|
69
|
+
**kwargs: Any,
|
70
|
+
) -> list[str]:
|
71
|
+
"""
|
72
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
73
|
+
|
74
|
+
Order: [page1, ..., pageN]
|
75
|
+
|
76
|
+
:param book_id: The book identifier.
|
77
|
+
:param chapter_id: The chapter identifier.
|
78
|
+
:return: The page content as string list.
|
79
|
+
"""
|
80
|
+
book_id = book_id.replace("-", "/")
|
81
|
+
html_pages: list[str] = []
|
82
|
+
chapter_url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
83
|
+
|
84
|
+
while True:
|
85
|
+
try:
|
86
|
+
html = await self.fetch(chapter_url, **kwargs)
|
87
|
+
except Exception as exc:
|
88
|
+
self.logger.warning(
|
89
|
+
"[async] get_book_chapter(%s page %d) failed: %s",
|
90
|
+
chapter_url,
|
91
|
+
exc,
|
92
|
+
)
|
93
|
+
break
|
94
|
+
|
95
|
+
html_pages.append(html)
|
96
|
+
if "/xs635/mobile/images/nextpage.png" not in html:
|
97
|
+
break
|
98
|
+
|
99
|
+
jsarr = self._parse_jsarr(html)
|
100
|
+
jsstr = self._parse_jsstr(html)
|
101
|
+
chapter_url = self._build_chapter_url(book_id, jsarr, jsstr)
|
102
|
+
|
103
|
+
await async_jitter_sleep(
|
104
|
+
self.request_interval,
|
105
|
+
mul_spread=1.1,
|
106
|
+
max_sleep=self.request_interval + 2,
|
107
|
+
)
|
108
|
+
|
109
|
+
return html_pages
|
110
|
+
|
111
|
+
@classmethod
|
112
|
+
def book_info_url(cls, book_id: str) -> str:
|
113
|
+
"""
|
114
|
+
Construct the URL for fetching a book's info page.
|
115
|
+
|
116
|
+
:param book_id: The identifier of the book.
|
117
|
+
:return: Fully qualified URL for the book info page.
|
118
|
+
"""
|
119
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
120
|
+
|
121
|
+
@classmethod
|
122
|
+
def book_catalog_url(cls, book_id: str) -> str:
|
123
|
+
"""
|
124
|
+
Construct the URL for fetching a book's catalog page.
|
125
|
+
|
126
|
+
:param book_id: The identifier of the book.
|
127
|
+
:return: Fully qualified catalog page URL.
|
128
|
+
"""
|
129
|
+
return cls.BOOK_CATALOG_URL.format(book_id=book_id)
|
130
|
+
|
131
|
+
@classmethod
|
132
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
133
|
+
"""
|
134
|
+
Construct the URL for fetching a specific chapter.
|
135
|
+
|
136
|
+
:param book_id: The identifier of the book.
|
137
|
+
:param chapter_id: The identifier of the chapter.
|
138
|
+
:return: Fully qualified chapter URL.
|
139
|
+
"""
|
140
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
141
|
+
|
142
|
+
@classmethod
|
143
|
+
def _parse_jsarr(cls, text: str) -> list[int]:
|
144
|
+
"""
|
145
|
+
Extract jsarr from `var jsarr = [...];`.
|
146
|
+
|
147
|
+
Raises ValueError if not found.
|
148
|
+
"""
|
149
|
+
m = cls._JSARR_PATTERN.search(text)
|
150
|
+
if not m:
|
151
|
+
raise ValueError("jsarr not found")
|
152
|
+
return [int(x) for x in m.group(1).split(",")]
|
153
|
+
|
154
|
+
@classmethod
|
155
|
+
def _parse_jsstr(cls, text: str) -> str:
|
156
|
+
"""
|
157
|
+
Extract jsstr from `var jsstr = "...";`.
|
158
|
+
|
159
|
+
Raises ValueError if not found.
|
160
|
+
"""
|
161
|
+
m = cls._JSSTR_PATTERN.search(text)
|
162
|
+
if not m:
|
163
|
+
raise ValueError("jsstr not found")
|
164
|
+
return m.group(1)
|
165
|
+
|
166
|
+
@staticmethod
|
167
|
+
def _build_chapter_url(book_id: str, jsarr: list[int], jsstr: str) -> str:
|
168
|
+
decoded = base64.b64decode(jsstr).decode("utf-8")
|
169
|
+
nnarr = list(decoded)
|
170
|
+
nnstr = "".join(nnarr[i] for i in jsarr)
|
171
|
+
return f"https://m.xs63b.com/{book_id}/{nnstr}.html"
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.xshbook
|
4
|
+
--------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
11
|
+
from novel_downloader.core.fetchers.registry import register_fetcher
|
12
|
+
from novel_downloader.models import FetcherConfig
|
13
|
+
|
14
|
+
|
15
|
+
@register_fetcher(
|
16
|
+
site_keys=["xshbook"],
|
17
|
+
)
|
18
|
+
class XshbookSession(BaseSession):
|
19
|
+
"""
|
20
|
+
A session class for interacting with the 小说虎 (www.xshbook.com) novel website.
|
21
|
+
"""
|
22
|
+
|
23
|
+
BOOK_INFO_URL = "https://www.xshbook.com/{book_id}/"
|
24
|
+
CHAPTER_URL = "https://www.xshbook.com/{book_id}/{chapter_id}.html"
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
config: FetcherConfig,
|
29
|
+
cookies: dict[str, str] | None = None,
|
30
|
+
**kwargs: Any,
|
31
|
+
) -> None:
|
32
|
+
super().__init__("xshbook", config, cookies, **kwargs)
|
33
|
+
|
34
|
+
async def get_book_info(
|
35
|
+
self,
|
36
|
+
book_id: str,
|
37
|
+
**kwargs: Any,
|
38
|
+
) -> list[str]:
|
39
|
+
"""
|
40
|
+
Fetch the raw HTML of the book info page asynchronously.
|
41
|
+
|
42
|
+
:param book_id: The book identifier.
|
43
|
+
:return: The page content as string list.
|
44
|
+
"""
|
45
|
+
book_id = book_id.replace("-", "/")
|
46
|
+
url = self.book_info_url(book_id=book_id)
|
47
|
+
return [await self.fetch(url, **kwargs)]
|
48
|
+
|
49
|
+
async def get_book_chapter(
|
50
|
+
self,
|
51
|
+
book_id: str,
|
52
|
+
chapter_id: str,
|
53
|
+
**kwargs: Any,
|
54
|
+
) -> list[str]:
|
55
|
+
"""
|
56
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
57
|
+
|
58
|
+
:param book_id: The book identifier.
|
59
|
+
:param chapter_id: The chapter identifier.
|
60
|
+
:return: The page content as string list.
|
61
|
+
"""
|
62
|
+
book_id = book_id.replace("-", "/")
|
63
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
64
|
+
return [await self.fetch(url, **kwargs)]
|
65
|
+
|
66
|
+
@classmethod
|
67
|
+
def book_info_url(cls, book_id: str) -> str:
|
68
|
+
"""
|
69
|
+
Construct the URL for fetching a book's info page.
|
70
|
+
|
71
|
+
:param book_id: The identifier of the book.
|
72
|
+
:return: Fully qualified URL for the book info page.
|
73
|
+
"""
|
74
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
75
|
+
|
76
|
+
@classmethod
|
77
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
78
|
+
"""
|
79
|
+
Construct the URL for fetching a specific chapter.
|
80
|
+
|
81
|
+
:param book_id: The identifier of the book.
|
82
|
+
:param chapter_id: The identifier of the chapter.
|
83
|
+
:return: Fully qualified chapter URL.
|
84
|
+
"""
|
85
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
@@ -1,10 +1,11 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.core.fetchers.yamibo
|
4
|
-
|
3
|
+
novel_downloader.core.fetchers.yamibo
|
4
|
+
-------------------------------------
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
|
+
from collections.abc import Mapping
|
8
9
|
from typing import Any
|
9
10
|
|
10
11
|
from lxml import html
|
@@ -12,16 +13,15 @@ from lxml import html
|
|
12
13
|
from novel_downloader.core.fetchers.base import BaseSession
|
13
14
|
from novel_downloader.core.fetchers.registry import register_fetcher
|
14
15
|
from novel_downloader.models import FetcherConfig, LoginField
|
15
|
-
from novel_downloader.utils import
|
16
|
+
from novel_downloader.utils import async_jitter_sleep
|
16
17
|
|
17
18
|
|
18
19
|
@register_fetcher(
|
19
20
|
site_keys=["yamibo"],
|
20
|
-
backends=["session"],
|
21
21
|
)
|
22
22
|
class YamiboSession(BaseSession):
|
23
23
|
"""
|
24
|
-
A session class for interacting with the
|
24
|
+
A session class for interacting with the 百合会 (www.yamibo.com) novel website.
|
25
25
|
"""
|
26
26
|
|
27
27
|
BASE_URL = "https://www.yamibo.com"
|
@@ -69,7 +69,7 @@ class YamiboSession(BaseSession):
|
|
69
69
|
):
|
70
70
|
self._is_logged_in = True
|
71
71
|
return True
|
72
|
-
await
|
72
|
+
await async_jitter_sleep(
|
73
73
|
self.backoff_factor,
|
74
74
|
mul_spread=1.1,
|
75
75
|
max_sleep=self.backoff_factor + 2,
|
@@ -87,7 +87,7 @@ class YamiboSession(BaseSession):
|
|
87
87
|
Fetch the raw HTML of the book info page asynchronously.
|
88
88
|
|
89
89
|
:param book_id: The book identifier.
|
90
|
-
:return: The page content as
|
90
|
+
:return: The page content as string list.
|
91
91
|
"""
|
92
92
|
url = self.book_info_url(book_id=book_id)
|
93
93
|
return [await self.fetch(url, **kwargs)]
|
@@ -103,7 +103,7 @@ class YamiboSession(BaseSession):
|
|
103
103
|
|
104
104
|
:param book_id: The book identifier.
|
105
105
|
:param chapter_id: The chapter identifier.
|
106
|
-
:return: The
|
106
|
+
:return: The page content as string list.
|
107
107
|
"""
|
108
108
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
109
109
|
return [await self.fetch(url, **kwargs)]
|
@@ -171,10 +171,6 @@ class YamiboSession(BaseSession):
|
|
171
171
|
"""
|
172
172
|
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
173
173
|
|
174
|
-
@property
|
175
|
-
def hostname(self) -> str:
|
176
|
-
return "www.yamibo.com"
|
177
|
-
|
178
174
|
async def _api_login(self, username: str, password: str) -> bool:
|
179
175
|
"""
|
180
176
|
Login to the API using a 2-step token-based process.
|
@@ -232,3 +228,14 @@ class YamiboSession(BaseSession):
|
|
232
228
|
if not resp_text:
|
233
229
|
return False
|
234
230
|
return not any(kw in resp_text[0] for kw in keywords)
|
231
|
+
|
232
|
+
@staticmethod
|
233
|
+
def _filter_cookies(
|
234
|
+
raw_cookies: list[Mapping[str, Any]],
|
235
|
+
) -> dict[str, str]:
|
236
|
+
ALLOWED_DOMAINS = {"www.yamibo.com", "bbs.yamibo.com", ""}
|
237
|
+
return {
|
238
|
+
c["name"]: c["value"]
|
239
|
+
for c in raw_cookies
|
240
|
+
if c.get("domain", "") in ALLOWED_DOMAINS
|
241
|
+
}
|
@@ -0,0 +1,114 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.yibige
|
4
|
+
-------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
from typing import Any
|
10
|
+
|
11
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
12
|
+
from novel_downloader.core.fetchers.registry import register_fetcher
|
13
|
+
from novel_downloader.models import FetcherConfig
|
14
|
+
|
15
|
+
|
16
|
+
@register_fetcher(
|
17
|
+
site_keys=["yibige"],
|
18
|
+
)
|
19
|
+
class YibigeSession(BaseSession):
|
20
|
+
"""
|
21
|
+
A session class for interacting with the 一笔阁 (www.yibige.org) novel website.
|
22
|
+
"""
|
23
|
+
|
24
|
+
BOOK_INFO_URL = "https://{base_url}/{book_id}/"
|
25
|
+
BOOK_CATALOG_URL = "https://{base_url}/{book_id}/index.html"
|
26
|
+
CHAPTER_URL = "https://{base_url}/{book_id}/{chapter_id}.html"
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
config: FetcherConfig,
|
31
|
+
cookies: dict[str, str] | None = None,
|
32
|
+
**kwargs: Any,
|
33
|
+
) -> None:
|
34
|
+
super().__init__("yibige", config, cookies, **kwargs)
|
35
|
+
self.base_url = (
|
36
|
+
"www.yibige.org" if config.locale_style == "simplified" else "tw.yibige.org"
|
37
|
+
)
|
38
|
+
# 主站: www.yibige.org
|
39
|
+
# 新加坡: sg.yibige.org
|
40
|
+
# 臺灣正體: tw.yibige.org
|
41
|
+
# 香港繁體: hk.yibige.org
|
42
|
+
|
43
|
+
async def get_book_info(
|
44
|
+
self,
|
45
|
+
book_id: str,
|
46
|
+
**kwargs: Any,
|
47
|
+
) -> list[str]:
|
48
|
+
"""
|
49
|
+
Fetch the raw HTML of the book info page asynchronously.
|
50
|
+
|
51
|
+
Order: [info, catalog]
|
52
|
+
|
53
|
+
:param book_id: The book identifier.
|
54
|
+
:return: The page content as string list.
|
55
|
+
"""
|
56
|
+
info_url = self.book_info_url(base_url=self.base_url, book_id=book_id)
|
57
|
+
catalog_url = self.book_catalog_url(base_url=self.base_url, book_id=book_id)
|
58
|
+
|
59
|
+
info_html, catalog_html = await asyncio.gather(
|
60
|
+
self.fetch(info_url, **kwargs),
|
61
|
+
self.fetch(catalog_url, **kwargs),
|
62
|
+
)
|
63
|
+
return [info_html, catalog_html]
|
64
|
+
|
65
|
+
async def get_book_chapter(
|
66
|
+
self,
|
67
|
+
book_id: str,
|
68
|
+
chapter_id: str,
|
69
|
+
**kwargs: Any,
|
70
|
+
) -> list[str]:
|
71
|
+
"""
|
72
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
73
|
+
|
74
|
+
:param book_id: The book identifier.
|
75
|
+
:param chapter_id: The chapter identifier.
|
76
|
+
:return: The page content as string list.
|
77
|
+
"""
|
78
|
+
url = self.chapter_url(
|
79
|
+
base_url=self.base_url, book_id=book_id, chapter_id=chapter_id
|
80
|
+
)
|
81
|
+
return [await self.fetch(url, **kwargs)]
|
82
|
+
|
83
|
+
@classmethod
|
84
|
+
def book_info_url(cls, base_url: str, book_id: str) -> str:
|
85
|
+
"""
|
86
|
+
Construct the URL for fetching a book's info page.
|
87
|
+
|
88
|
+
:param book_id: The identifier of the book.
|
89
|
+
:return: Fully qualified URL for the book info page.
|
90
|
+
"""
|
91
|
+
return cls.BOOK_INFO_URL.format(base_url=base_url, book_id=book_id)
|
92
|
+
|
93
|
+
@classmethod
|
94
|
+
def book_catalog_url(cls, base_url: str, book_id: str) -> str:
|
95
|
+
"""
|
96
|
+
Construct the URL for fetching a book's catalog page.
|
97
|
+
|
98
|
+
:param book_id: The identifier of the book.
|
99
|
+
:return: Fully qualified catalog page URL.
|
100
|
+
"""
|
101
|
+
return cls.BOOK_CATALOG_URL.format(base_url=base_url, book_id=book_id)
|
102
|
+
|
103
|
+
@classmethod
|
104
|
+
def chapter_url(cls, base_url: str, book_id: str, chapter_id: str) -> str:
|
105
|
+
"""
|
106
|
+
Construct the URL for fetching a specific chapter.
|
107
|
+
|
108
|
+
:param book_id: The identifier of the book.
|
109
|
+
:param chapter_id: The identifier of the chapter.
|
110
|
+
:return: Fully qualified chapter URL.
|
111
|
+
"""
|
112
|
+
return cls.CHAPTER_URL.format(
|
113
|
+
base_url=base_url, book_id=book_id, chapter_id=chapter_id
|
114
|
+
)
|
@@ -3,15 +3,7 @@
|
|
3
3
|
novel_downloader.core.interfaces
|
4
4
|
--------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
system to promote interface-based design and type-safe dependency
|
8
|
-
injection.
|
9
|
-
|
10
|
-
Included protocols:
|
11
|
-
- DownloaderProtocol
|
12
|
-
- FetcherProtocol
|
13
|
-
- ParserProtocol
|
14
|
-
- ExporterProtocol
|
6
|
+
Protocol interfaces defining the contracts for core components.
|
15
7
|
"""
|
16
8
|
|
17
9
|
__all__ = [
|
@@ -3,10 +3,10 @@
|
|
3
3
|
novel_downloader.core.interfaces.downloader
|
4
4
|
-------------------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
that outlines the expected behavior of any downloader class.
|
6
|
+
Protocol defining the interface for asynchronous book downloaders.
|
8
7
|
"""
|
9
8
|
|
9
|
+
import asyncio
|
10
10
|
from collections.abc import Awaitable, Callable
|
11
11
|
from typing import Any, Protocol, runtime_checkable
|
12
12
|
|
@@ -27,6 +27,7 @@ class DownloaderProtocol(Protocol):
|
|
27
27
|
book: BookConfig,
|
28
28
|
*,
|
29
29
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
30
|
+
cancel_event: asyncio.Event | None = None,
|
30
31
|
**kwargs: Any,
|
31
32
|
) -> None:
|
32
33
|
"""
|
@@ -35,6 +36,7 @@ class DownloaderProtocol(Protocol):
|
|
35
36
|
:param book: BookConfig with at least 'book_id'.
|
36
37
|
:param progress_hook: Optional async callback after each chapter.
|
37
38
|
args: completed_count, total_count.
|
39
|
+
:param cancel_event: Optional asyncio.Event to allow cancellation.
|
38
40
|
"""
|
39
41
|
...
|
40
42
|
|
@@ -43,6 +45,7 @@ class DownloaderProtocol(Protocol):
|
|
43
45
|
books: list[BookConfig],
|
44
46
|
*,
|
45
47
|
progress_hook: Callable[[int, int], Awaitable[None]] | None = None,
|
48
|
+
cancel_event: asyncio.Event | None = None,
|
46
49
|
**kwargs: Any,
|
47
50
|
) -> None:
|
48
51
|
"""
|
@@ -51,5 +54,6 @@ class DownloaderProtocol(Protocol):
|
|
51
54
|
:param books: List of BookConfig entries.
|
52
55
|
:param progress_hook: Optional async callback after each chapter.
|
53
56
|
args: completed_count, total_count.
|
57
|
+
:param cancel_event: Optional asyncio.Event to allow cancellation.
|
54
58
|
"""
|
55
59
|
...
|
@@ -3,10 +3,10 @@
|
|
3
3
|
novel_downloader.core.interfaces.exporter
|
4
4
|
-----------------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
TXT, EPUB, Markdown, and PDF formats.
|
6
|
+
Protocol defining the interface for exporting books to text, EPUB, and other formats.
|
8
7
|
"""
|
9
8
|
|
9
|
+
from pathlib import Path
|
10
10
|
from typing import Protocol, runtime_checkable
|
11
11
|
|
12
12
|
|
@@ -18,7 +18,7 @@ class ExporterProtocol(Protocol):
|
|
18
18
|
It may also optionally implement an EPUB (or other format) exporter.
|
19
19
|
"""
|
20
20
|
|
21
|
-
def export(self, book_id: str) ->
|
21
|
+
def export(self, book_id: str) -> dict[str, Path]:
|
22
22
|
"""
|
23
23
|
Export the book in the formats specified in config.
|
24
24
|
If a method is not implemented or fails, log the error and continue.
|
@@ -27,7 +27,7 @@ class ExporterProtocol(Protocol):
|
|
27
27
|
"""
|
28
28
|
...
|
29
29
|
|
30
|
-
def export_as_txt(self, book_id: str) -> None:
|
30
|
+
def export_as_txt(self, book_id: str) -> Path | None:
|
31
31
|
"""
|
32
32
|
Persist the assembled book as a .txt file.
|
33
33
|
|
@@ -35,7 +35,7 @@ class ExporterProtocol(Protocol):
|
|
35
35
|
"""
|
36
36
|
...
|
37
37
|
|
38
|
-
def export_as_epub(self, book_id: str) -> None:
|
38
|
+
def export_as_epub(self, book_id: str) -> Path | None:
|
39
39
|
"""
|
40
40
|
Optional: Persist the assembled book as an .epub file.
|
41
41
|
|
@@ -43,7 +43,7 @@ class ExporterProtocol(Protocol):
|
|
43
43
|
"""
|
44
44
|
...
|
45
45
|
|
46
|
-
def export_as_md(self, book_id: str) -> None:
|
46
|
+
def export_as_md(self, book_id: str) -> Path | None:
|
47
47
|
"""
|
48
48
|
Optional: Persist the assembled book as a Markdown (.md) file.
|
49
49
|
|
@@ -51,7 +51,7 @@ class ExporterProtocol(Protocol):
|
|
51
51
|
"""
|
52
52
|
...
|
53
53
|
|
54
|
-
def export_as_pdf(self, book_id: str) -> None:
|
54
|
+
def export_as_pdf(self, book_id: str) -> Path | None:
|
55
55
|
"""
|
56
56
|
Optional: Persist the assembled book as a PDF file.
|
57
57
|
|
@@ -3,8 +3,7 @@
|
|
3
3
|
novel_downloader.core.interfaces.fetcher
|
4
4
|
----------------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
for book info pages, individual chapters, managing request lifecycle
|
6
|
+
Protocol defining the interface for asynchronous fetching, login, and session management
|
8
7
|
"""
|
9
8
|
|
10
9
|
import types
|
@@ -32,6 +31,7 @@ class FetcherProtocol(Protocol):
|
|
32
31
|
) -> bool:
|
33
32
|
"""
|
34
33
|
Attempt to log in asynchronously.
|
34
|
+
|
35
35
|
:returns: True if login succeeded.
|
36
36
|
"""
|
37
37
|
...
|
@@ -45,7 +45,7 @@ class FetcherProtocol(Protocol):
|
|
45
45
|
Fetch the raw HTML (or JSON) of the book info page asynchronously.
|
46
46
|
|
47
47
|
:param book_id: The book identifier.
|
48
|
-
:return: The page content as
|
48
|
+
:return: The page content as string list.
|
49
49
|
"""
|
50
50
|
...
|
51
51
|
|
@@ -60,7 +60,7 @@ class FetcherProtocol(Protocol):
|
|
60
60
|
|
61
61
|
:param book_id: The book identifier.
|
62
62
|
:param chapter_id: The chapter identifier.
|
63
|
-
:return: The
|
63
|
+
:return: The page content as string list.
|
64
64
|
"""
|
65
65
|
...
|
66
66
|
|
@@ -126,19 +126,6 @@ class FetcherProtocol(Protocol):
|
|
126
126
|
"""
|
127
127
|
...
|
128
128
|
|
129
|
-
async def set_interactive_mode(self, enable: bool) -> bool:
|
130
|
-
"""
|
131
|
-
Enable or disable interactive mode for manual login.
|
132
|
-
|
133
|
-
:param enable: True to enable, False to disable interactive mode.
|
134
|
-
:return: True if operation or login check succeeded, False otherwise.
|
135
|
-
"""
|
136
|
-
...
|
137
|
-
|
138
|
-
@property
|
139
|
-
def requester_type(self) -> str:
|
140
|
-
...
|
141
|
-
|
142
129
|
@property
|
143
130
|
def is_logged_in(self) -> bool:
|
144
131
|
"""
|
@@ -3,13 +3,12 @@
|
|
3
3
|
novel_downloader.core.interfaces.parser
|
4
4
|
---------------------------------------
|
5
5
|
|
6
|
-
|
7
|
-
parsing individual chapter content, and setting parser context via book_id.
|
6
|
+
Protocol defining the interface for parsing book metadata and chapter content.
|
8
7
|
"""
|
9
8
|
|
10
9
|
from typing import Any, Protocol, runtime_checkable
|
11
10
|
|
12
|
-
from novel_downloader.models import ChapterDict
|
11
|
+
from novel_downloader.models import BookInfoDict, ChapterDict
|
13
12
|
|
14
13
|
|
15
14
|
@runtime_checkable
|
@@ -24,7 +23,7 @@ class ParserProtocol(Protocol):
|
|
24
23
|
self,
|
25
24
|
html_list: list[str],
|
26
25
|
**kwargs: Any,
|
27
|
-
) ->
|
26
|
+
) -> BookInfoDict | None:
|
28
27
|
"""
|
29
28
|
Parse and return a dictionary of book information from the raw HTML.
|
30
29
|
|
@@ -40,10 +39,10 @@ class ParserProtocol(Protocol):
|
|
40
39
|
**kwargs: Any,
|
41
40
|
) -> ChapterDict | None:
|
42
41
|
"""
|
43
|
-
Parse and
|
42
|
+
Parse chapter page and extract the content of one chapter.
|
44
43
|
|
45
44
|
:param html_list: The HTML list of the chapter pages.
|
46
45
|
:param chapter_id: Identifier of the chapter being parsed.
|
47
|
-
:return: The chapter's
|
46
|
+
:return: The chapter's data.
|
48
47
|
"""
|
49
48
|
...
|
@@ -3,10 +3,13 @@
|
|
3
3
|
novel_downloader.core.interfaces.searcher
|
4
4
|
-----------------------------------------
|
5
5
|
|
6
|
+
Protocol defining the interface for site search implementations.
|
6
7
|
"""
|
7
8
|
|
8
9
|
from typing import Protocol
|
9
10
|
|
11
|
+
import aiohttp
|
12
|
+
|
10
13
|
from novel_downloader.models import SearchResult
|
11
14
|
|
12
15
|
|
@@ -14,5 +17,10 @@ class SearcherProtocol(Protocol):
|
|
14
17
|
site_name: str
|
15
18
|
|
16
19
|
@classmethod
|
17
|
-
def
|
20
|
+
def configure(cls, session: aiohttp.ClientSession) -> None:
|
21
|
+
"""Configure the shared session"""
|
22
|
+
...
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
async def search(cls, keyword: str, limit: int | None = None) -> list[SearchResult]:
|
18
26
|
...
|