novel-downloader 1.3.3__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- novel_downloader/__init__.py +1 -1
- novel_downloader/cli/clean.py +97 -78
- novel_downloader/cli/config.py +177 -0
- novel_downloader/cli/download.py +132 -87
- novel_downloader/cli/export.py +77 -0
- novel_downloader/cli/main.py +21 -28
- novel_downloader/config/__init__.py +1 -25
- novel_downloader/config/adapter.py +32 -31
- novel_downloader/config/loader.py +3 -3
- novel_downloader/config/site_rules.py +1 -2
- novel_downloader/core/__init__.py +3 -6
- novel_downloader/core/downloaders/__init__.py +10 -13
- novel_downloader/core/downloaders/base.py +233 -0
- novel_downloader/core/downloaders/biquge.py +27 -0
- novel_downloader/core/downloaders/common.py +414 -0
- novel_downloader/core/downloaders/esjzone.py +27 -0
- novel_downloader/core/downloaders/linovelib.py +27 -0
- novel_downloader/core/downloaders/qianbi.py +27 -0
- novel_downloader/core/downloaders/qidian.py +352 -0
- novel_downloader/core/downloaders/sfacg.py +27 -0
- novel_downloader/core/downloaders/yamibo.py +27 -0
- novel_downloader/core/exporters/__init__.py +37 -0
- novel_downloader/core/{savers → exporters}/base.py +73 -39
- novel_downloader/core/exporters/biquge.py +25 -0
- novel_downloader/core/exporters/common/__init__.py +12 -0
- novel_downloader/core/{savers → exporters}/common/epub.py +22 -22
- novel_downloader/core/{savers/common/main_saver.py → exporters/common/main_exporter.py} +35 -40
- novel_downloader/core/{savers → exporters}/common/txt.py +20 -23
- novel_downloader/core/{savers → exporters}/epub_utils/__init__.py +8 -3
- novel_downloader/core/{savers → exporters}/epub_utils/css_builder.py +2 -2
- novel_downloader/core/{savers → exporters}/epub_utils/image_loader.py +46 -4
- novel_downloader/core/{savers → exporters}/epub_utils/initializer.py +6 -4
- novel_downloader/core/{savers → exporters}/epub_utils/text_to_html.py +3 -3
- novel_downloader/core/{savers → exporters}/epub_utils/volume_intro.py +2 -2
- novel_downloader/core/exporters/esjzone.py +25 -0
- novel_downloader/core/exporters/linovelib/__init__.py +10 -0
- novel_downloader/core/exporters/linovelib/epub.py +449 -0
- novel_downloader/core/exporters/linovelib/main_exporter.py +127 -0
- novel_downloader/core/exporters/linovelib/txt.py +129 -0
- novel_downloader/core/exporters/qianbi.py +25 -0
- novel_downloader/core/{savers → exporters}/qidian.py +8 -8
- novel_downloader/core/exporters/sfacg.py +25 -0
- novel_downloader/core/exporters/yamibo.py +25 -0
- novel_downloader/core/factory/__init__.py +5 -17
- novel_downloader/core/factory/downloader.py +24 -126
- novel_downloader/core/factory/exporter.py +58 -0
- novel_downloader/core/factory/fetcher.py +96 -0
- novel_downloader/core/factory/parser.py +17 -12
- novel_downloader/core/{requesters → fetchers}/__init__.py +22 -15
- novel_downloader/core/{requesters → fetchers}/base/__init__.py +2 -4
- novel_downloader/core/fetchers/base/browser.py +383 -0
- novel_downloader/core/fetchers/base/rate_limiter.py +86 -0
- novel_downloader/core/fetchers/base/session.py +419 -0
- novel_downloader/core/fetchers/biquge/__init__.py +14 -0
- novel_downloader/core/{requesters/biquge/async_session.py → fetchers/biquge/browser.py} +18 -6
- novel_downloader/core/{requesters → fetchers}/biquge/session.py +23 -30
- novel_downloader/core/fetchers/common/__init__.py +14 -0
- novel_downloader/core/fetchers/common/browser.py +79 -0
- novel_downloader/core/{requesters/common/async_session.py → fetchers/common/session.py} +8 -25
- novel_downloader/core/fetchers/esjzone/__init__.py +14 -0
- novel_downloader/core/fetchers/esjzone/browser.py +202 -0
- novel_downloader/core/{requesters/esjzone/async_session.py → fetchers/esjzone/session.py} +62 -42
- novel_downloader/core/fetchers/linovelib/__init__.py +14 -0
- novel_downloader/core/fetchers/linovelib/browser.py +178 -0
- novel_downloader/core/fetchers/linovelib/session.py +178 -0
- novel_downloader/core/fetchers/qianbi/__init__.py +14 -0
- novel_downloader/core/{requesters/qianbi/session.py → fetchers/qianbi/browser.py} +30 -48
- novel_downloader/core/{requesters/qianbi/async_session.py → fetchers/qianbi/session.py} +18 -6
- novel_downloader/core/fetchers/qidian/__init__.py +14 -0
- novel_downloader/core/fetchers/qidian/browser.py +266 -0
- novel_downloader/core/fetchers/qidian/session.py +326 -0
- novel_downloader/core/fetchers/sfacg/__init__.py +14 -0
- novel_downloader/core/fetchers/sfacg/browser.py +189 -0
- novel_downloader/core/{requesters/sfacg/async_session.py → fetchers/sfacg/session.py} +43 -73
- novel_downloader/core/fetchers/yamibo/__init__.py +14 -0
- novel_downloader/core/fetchers/yamibo/browser.py +229 -0
- novel_downloader/core/{requesters/yamibo/async_session.py → fetchers/yamibo/session.py} +62 -44
- novel_downloader/core/interfaces/__init__.py +8 -12
- novel_downloader/core/interfaces/downloader.py +54 -0
- novel_downloader/core/interfaces/{saver.py → exporter.py} +12 -12
- novel_downloader/core/interfaces/fetcher.py +162 -0
- novel_downloader/core/interfaces/parser.py +6 -7
- novel_downloader/core/parsers/__init__.py +5 -6
- novel_downloader/core/parsers/base.py +9 -13
- novel_downloader/core/parsers/biquge/main_parser.py +12 -13
- novel_downloader/core/parsers/common/helper.py +3 -3
- novel_downloader/core/parsers/common/main_parser.py +39 -34
- novel_downloader/core/parsers/esjzone/main_parser.py +20 -14
- novel_downloader/core/parsers/linovelib/__init__.py +10 -0
- novel_downloader/core/parsers/linovelib/main_parser.py +210 -0
- novel_downloader/core/parsers/qianbi/main_parser.py +21 -15
- novel_downloader/core/parsers/qidian/__init__.py +2 -11
- novel_downloader/core/parsers/qidian/book_info_parser.py +113 -0
- novel_downloader/core/parsers/qidian/{browser/chapter_encrypted.py → chapter_encrypted.py} +162 -135
- novel_downloader/core/parsers/qidian/chapter_normal.py +150 -0
- novel_downloader/core/parsers/qidian/{session/chapter_router.py → chapter_router.py} +15 -15
- novel_downloader/core/parsers/qidian/{browser/main_parser.py → main_parser.py} +49 -40
- novel_downloader/core/parsers/qidian/utils/__init__.py +27 -0
- novel_downloader/core/parsers/qidian/utils/decryptor_fetcher.py +145 -0
- novel_downloader/core/parsers/qidian/{shared → utils}/helpers.py +41 -68
- novel_downloader/core/parsers/qidian/{session → utils}/node_decryptor.py +64 -50
- novel_downloader/core/parsers/sfacg/main_parser.py +12 -12
- novel_downloader/core/parsers/yamibo/main_parser.py +10 -10
- novel_downloader/locales/en.json +18 -2
- novel_downloader/locales/zh.json +18 -2
- novel_downloader/models/__init__.py +64 -0
- novel_downloader/models/browser.py +21 -0
- novel_downloader/models/chapter.py +25 -0
- novel_downloader/models/config.py +100 -0
- novel_downloader/models/login.py +20 -0
- novel_downloader/models/site_rules.py +99 -0
- novel_downloader/models/tasks.py +33 -0
- novel_downloader/models/types.py +15 -0
- novel_downloader/resources/config/settings.toml +31 -25
- novel_downloader/resources/json/linovelib_font_map.json +3573 -0
- novel_downloader/tui/__init__.py +7 -0
- novel_downloader/tui/app.py +32 -0
- novel_downloader/tui/main.py +17 -0
- novel_downloader/tui/screens/__init__.py +14 -0
- novel_downloader/tui/screens/home.py +191 -0
- novel_downloader/tui/screens/login.py +74 -0
- novel_downloader/tui/styles/home_layout.tcss +79 -0
- novel_downloader/tui/widgets/richlog_handler.py +24 -0
- novel_downloader/utils/__init__.py +6 -0
- novel_downloader/utils/chapter_storage.py +25 -38
- novel_downloader/utils/constants.py +11 -5
- novel_downloader/utils/cookies.py +66 -0
- novel_downloader/utils/crypto_utils.py +1 -74
- novel_downloader/utils/fontocr/ocr_v1.py +2 -1
- novel_downloader/utils/fontocr/ocr_v2.py +2 -2
- novel_downloader/utils/hash_store.py +10 -18
- novel_downloader/utils/hash_utils.py +3 -2
- novel_downloader/utils/logger.py +2 -3
- novel_downloader/utils/network.py +2 -1
- novel_downloader/utils/text_utils/chapter_formatting.py +6 -1
- novel_downloader/utils/text_utils/font_mapping.py +1 -1
- novel_downloader/utils/text_utils/text_cleaning.py +1 -1
- novel_downloader/utils/time_utils/datetime_utils.py +3 -3
- novel_downloader/utils/time_utils/sleep_utils.py +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/METADATA +69 -35
- novel_downloader-1.4.0.dist-info/RECORD +170 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/WHEEL +1 -1
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/entry_points.txt +1 -0
- novel_downloader/cli/interactive.py +0 -66
- novel_downloader/cli/settings.py +0 -177
- novel_downloader/config/models.py +0 -187
- novel_downloader/core/downloaders/base/__init__.py +0 -14
- novel_downloader/core/downloaders/base/base_async.py +0 -153
- novel_downloader/core/downloaders/base/base_sync.py +0 -208
- novel_downloader/core/downloaders/biquge/__init__.py +0 -14
- novel_downloader/core/downloaders/biquge/biquge_async.py +0 -27
- novel_downloader/core/downloaders/biquge/biquge_sync.py +0 -27
- novel_downloader/core/downloaders/common/__init__.py +0 -14
- novel_downloader/core/downloaders/common/common_async.py +0 -210
- novel_downloader/core/downloaders/common/common_sync.py +0 -202
- novel_downloader/core/downloaders/esjzone/__init__.py +0 -14
- novel_downloader/core/downloaders/esjzone/esjzone_async.py +0 -27
- novel_downloader/core/downloaders/esjzone/esjzone_sync.py +0 -27
- novel_downloader/core/downloaders/qianbi/__init__.py +0 -14
- novel_downloader/core/downloaders/qianbi/qianbi_async.py +0 -27
- novel_downloader/core/downloaders/qianbi/qianbi_sync.py +0 -27
- novel_downloader/core/downloaders/qidian/__init__.py +0 -10
- novel_downloader/core/downloaders/qidian/qidian_sync.py +0 -219
- novel_downloader/core/downloaders/sfacg/__init__.py +0 -14
- novel_downloader/core/downloaders/sfacg/sfacg_async.py +0 -27
- novel_downloader/core/downloaders/sfacg/sfacg_sync.py +0 -27
- novel_downloader/core/downloaders/yamibo/__init__.py +0 -14
- novel_downloader/core/downloaders/yamibo/yamibo_async.py +0 -27
- novel_downloader/core/downloaders/yamibo/yamibo_sync.py +0 -27
- novel_downloader/core/factory/requester.py +0 -144
- novel_downloader/core/factory/saver.py +0 -56
- novel_downloader/core/interfaces/async_downloader.py +0 -36
- novel_downloader/core/interfaces/async_requester.py +0 -84
- novel_downloader/core/interfaces/sync_downloader.py +0 -36
- novel_downloader/core/interfaces/sync_requester.py +0 -82
- novel_downloader/core/parsers/qidian/browser/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/browser/chapter_normal.py +0 -93
- novel_downloader/core/parsers/qidian/browser/chapter_router.py +0 -71
- novel_downloader/core/parsers/qidian/session/__init__.py +0 -12
- novel_downloader/core/parsers/qidian/session/chapter_encrypted.py +0 -443
- novel_downloader/core/parsers/qidian/session/chapter_normal.py +0 -115
- novel_downloader/core/parsers/qidian/session/main_parser.py +0 -128
- novel_downloader/core/parsers/qidian/shared/__init__.py +0 -37
- novel_downloader/core/parsers/qidian/shared/book_info_parser.py +0 -150
- novel_downloader/core/requesters/base/async_session.py +0 -410
- novel_downloader/core/requesters/base/browser.py +0 -337
- novel_downloader/core/requesters/base/session.py +0 -378
- novel_downloader/core/requesters/biquge/__init__.py +0 -14
- novel_downloader/core/requesters/common/__init__.py +0 -17
- novel_downloader/core/requesters/common/session.py +0 -113
- novel_downloader/core/requesters/esjzone/__init__.py +0 -13
- novel_downloader/core/requesters/esjzone/session.py +0 -235
- novel_downloader/core/requesters/qianbi/__init__.py +0 -13
- novel_downloader/core/requesters/qidian/__init__.py +0 -21
- novel_downloader/core/requesters/qidian/broswer.py +0 -307
- novel_downloader/core/requesters/qidian/session.py +0 -290
- novel_downloader/core/requesters/sfacg/__init__.py +0 -13
- novel_downloader/core/requesters/sfacg/session.py +0 -242
- novel_downloader/core/requesters/yamibo/__init__.py +0 -13
- novel_downloader/core/requesters/yamibo/session.py +0 -237
- novel_downloader/core/savers/__init__.py +0 -34
- novel_downloader/core/savers/biquge.py +0 -25
- novel_downloader/core/savers/common/__init__.py +0 -12
- novel_downloader/core/savers/esjzone.py +0 -25
- novel_downloader/core/savers/qianbi.py +0 -25
- novel_downloader/core/savers/sfacg.py +0 -25
- novel_downloader/core/savers/yamibo.py +0 -25
- novel_downloader/resources/config/rules.toml +0 -196
- novel_downloader-1.3.3.dist-info/RECORD +0 -166
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/licenses/LICENSE +0 -0
- {novel_downloader-1.3.3.dist-info → novel_downloader-1.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,178 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.linovelib.session
|
4
|
+
------------------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
import re
|
9
|
+
from typing import Any
|
10
|
+
|
11
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
12
|
+
from novel_downloader.models import FetcherConfig
|
13
|
+
from novel_downloader.utils.time_utils import async_sleep_with_random_delay
|
14
|
+
|
15
|
+
|
16
|
+
class LinovelibSession(BaseSession):
|
17
|
+
"""
|
18
|
+
A session class for interacting with Linovelib (www.linovelib.com) novel website.
|
19
|
+
"""
|
20
|
+
|
21
|
+
BASE_URL = "https://www.linovelib.com"
|
22
|
+
BOOK_INFO_URL = "https://www.linovelib.com/novel/{book_id}.html"
|
23
|
+
BOOK_VOL_URL = "https://www.linovelib.com/novel/{book_id}/{vol_id}.html"
|
24
|
+
CHAPTER_URL = "https://www.linovelib.com/novel/{book_id}/{chapter_id}.html"
|
25
|
+
|
26
|
+
_VOL_ID_PATTERN: re.Pattern[str] = re.compile(r"/novel/\d+/(vol_\d+)\.html")
|
27
|
+
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
config: FetcherConfig,
|
31
|
+
cookies: dict[str, str] | None = None,
|
32
|
+
**kwargs: Any,
|
33
|
+
) -> None:
|
34
|
+
super().__init__("linovelib", config, cookies, **kwargs)
|
35
|
+
|
36
|
+
async def get_book_info(
|
37
|
+
self,
|
38
|
+
book_id: str,
|
39
|
+
**kwargs: Any,
|
40
|
+
) -> list[str]:
|
41
|
+
"""
|
42
|
+
Fetch the raw HTML of the book info page.
|
43
|
+
|
44
|
+
:param book_id: The book identifier.
|
45
|
+
:return: A list of HTML strings: [info_html, vol1_html, ..., volN_html]
|
46
|
+
"""
|
47
|
+
url = self.book_info_url(book_id=book_id)
|
48
|
+
info_html = await self.fetch(url, **kwargs)
|
49
|
+
|
50
|
+
vol_ids = self._extract_vol_ids(info_html)
|
51
|
+
vol_ids.reverse()
|
52
|
+
|
53
|
+
vol_htmls = []
|
54
|
+
for vol_id in vol_ids:
|
55
|
+
await async_sleep_with_random_delay(
|
56
|
+
self.request_interval,
|
57
|
+
mul_spread=1.1,
|
58
|
+
max_sleep=self.request_interval + 2,
|
59
|
+
)
|
60
|
+
html = await self.get_book_volume(book_id, vol_id, **kwargs)
|
61
|
+
if html:
|
62
|
+
vol_htmls.append(html)
|
63
|
+
|
64
|
+
return [info_html] + vol_htmls
|
65
|
+
|
66
|
+
async def get_book_volume(
|
67
|
+
self,
|
68
|
+
book_id: str,
|
69
|
+
vol_id: str,
|
70
|
+
**kwargs: Any,
|
71
|
+
) -> str:
|
72
|
+
"""
|
73
|
+
Fetch the HTML content of a specific volume.
|
74
|
+
|
75
|
+
:param book_id: The book identifier.
|
76
|
+
:param vol_id: The volume identifier.
|
77
|
+
:return: The volume content as a string.
|
78
|
+
"""
|
79
|
+
url = self.volume_url(book_id=book_id, vol_id=vol_id)
|
80
|
+
return await self.fetch(url, **kwargs)
|
81
|
+
|
82
|
+
async def get_book_chapter(
|
83
|
+
self,
|
84
|
+
book_id: str,
|
85
|
+
chapter_id: str,
|
86
|
+
**kwargs: Any,
|
87
|
+
) -> list[str]:
|
88
|
+
"""
|
89
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
90
|
+
|
91
|
+
:param book_id: The book identifier.
|
92
|
+
:param chapter_id: The chapter identifier.
|
93
|
+
:return: The chapter content as a string.
|
94
|
+
"""
|
95
|
+
html_pages: list[str] = []
|
96
|
+
idx = 1
|
97
|
+
|
98
|
+
while True:
|
99
|
+
chapter_suffix = chapter_id if idx == 1 else f"{chapter_id}_{idx}"
|
100
|
+
relative_path = self.relative_chapter_url(book_id, chapter_suffix)
|
101
|
+
full_url = self.BASE_URL + relative_path
|
102
|
+
|
103
|
+
if idx > 1 and relative_path not in html_pages[-1]:
|
104
|
+
break
|
105
|
+
|
106
|
+
try:
|
107
|
+
html = await self.fetch(full_url, **kwargs)
|
108
|
+
except Exception as exc:
|
109
|
+
self.logger.warning(
|
110
|
+
"[async] get_book_chapter(%s page %d) failed: %s",
|
111
|
+
chapter_id,
|
112
|
+
idx,
|
113
|
+
exc,
|
114
|
+
)
|
115
|
+
break
|
116
|
+
|
117
|
+
html_pages.append(html)
|
118
|
+
idx += 1
|
119
|
+
await async_sleep_with_random_delay(
|
120
|
+
self.request_interval,
|
121
|
+
mul_spread=1.1,
|
122
|
+
max_sleep=self.request_interval + 2,
|
123
|
+
)
|
124
|
+
|
125
|
+
return html_pages
|
126
|
+
|
127
|
+
@classmethod
|
128
|
+
def book_info_url(cls, book_id: str) -> str:
|
129
|
+
"""
|
130
|
+
Construct the URL for fetching a book's info page.
|
131
|
+
|
132
|
+
:param book_id: The identifier of the book.
|
133
|
+
:return: Fully qualified URL for the book info page.
|
134
|
+
"""
|
135
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
136
|
+
|
137
|
+
@classmethod
|
138
|
+
def volume_url(cls, book_id: str, vol_id: str) -> str:
|
139
|
+
"""
|
140
|
+
Construct the URL for fetching a specific volume.
|
141
|
+
|
142
|
+
:param book_id: The identifier of the book.
|
143
|
+
:param vol_id: The identifier of the volume.
|
144
|
+
:return: Fully qualified volume URL.
|
145
|
+
"""
|
146
|
+
return cls.BOOK_VOL_URL.format(book_id=book_id, vol_id=vol_id)
|
147
|
+
|
148
|
+
@classmethod
|
149
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
150
|
+
"""
|
151
|
+
Construct the URL for fetching a specific chapter.
|
152
|
+
|
153
|
+
:param book_id: The identifier of the book.
|
154
|
+
:param chapter_id: The identifier of the chapter.
|
155
|
+
:return: Fully qualified chapter URL.
|
156
|
+
"""
|
157
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
158
|
+
|
159
|
+
@classmethod
|
160
|
+
def relative_chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
161
|
+
"""
|
162
|
+
Return the relative URL path for a given chapter.
|
163
|
+
"""
|
164
|
+
return f"/novel/{book_id}/{chapter_id}.html"
|
165
|
+
|
166
|
+
@property
|
167
|
+
def hostname(self) -> str:
|
168
|
+
return "www.linovelib.com"
|
169
|
+
|
170
|
+
def _extract_vol_ids(self, html_str: str) -> list[str]:
|
171
|
+
"""
|
172
|
+
Extract volume IDs (like 'vol_12345') from the info HTML.
|
173
|
+
|
174
|
+
:param html_str: Raw HTML of the info page.
|
175
|
+
:return: List of volume ID strings.
|
176
|
+
"""
|
177
|
+
# /novel/{book_id}/{vol_id}.html
|
178
|
+
return self._VOL_ID_PATTERN.findall(html_str)
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.qianbi
|
4
|
+
-------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from .browser import QianbiBrowser
|
9
|
+
from .session import QianbiSession
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
"QianbiBrowser",
|
13
|
+
"QianbiSession",
|
14
|
+
]
|
@@ -1,18 +1,19 @@
|
|
1
|
+
#!/usr/bin/env python3
|
1
2
|
"""
|
2
|
-
novel_downloader.core.
|
3
|
-
|
3
|
+
novel_downloader.core.fetchers.qianbi.browser
|
4
|
+
---------------------------------------------
|
4
5
|
|
5
6
|
"""
|
6
7
|
|
7
8
|
from typing import Any
|
8
9
|
|
9
|
-
from novel_downloader.core.
|
10
|
+
from novel_downloader.core.fetchers.base import BaseBrowser
|
11
|
+
from novel_downloader.models import FetcherConfig
|
10
12
|
|
11
13
|
|
12
|
-
class
|
14
|
+
class QianbiBrowser(BaseBrowser):
|
13
15
|
"""
|
14
|
-
A
|
15
|
-
Qianbi (www.23qb.com) novel website.
|
16
|
+
A browser class for interacting with the Qianbi (www.23qb.com) novel website.
|
16
17
|
"""
|
17
18
|
|
18
19
|
BASE_URLS = [
|
@@ -24,13 +25,21 @@ class QianbiSession(BaseSession):
|
|
24
25
|
BOOK_CATALOG_URL = "https://www.23qb.com/book/{book_id}/catalog"
|
25
26
|
CHAPTER_URL = "https://www.23qb.com/book/{book_id}/{chapter_id}.html"
|
26
27
|
|
27
|
-
def
|
28
|
+
def __init__(
|
29
|
+
self,
|
30
|
+
config: FetcherConfig,
|
31
|
+
reuse_page: bool = False,
|
32
|
+
**kwargs: Any,
|
33
|
+
) -> None:
|
34
|
+
super().__init__("qianbi", config, reuse_page, **kwargs)
|
35
|
+
|
36
|
+
async def get_book_info(
|
28
37
|
self,
|
29
38
|
book_id: str,
|
30
39
|
**kwargs: Any,
|
31
40
|
) -> list[str]:
|
32
41
|
"""
|
33
|
-
Fetch the raw HTML of the book info
|
42
|
+
Fetch the raw HTML of the book info page asynchronously.
|
34
43
|
|
35
44
|
Order: [info, catalog]
|
36
45
|
|
@@ -40,58 +49,27 @@ class QianbiSession(BaseSession):
|
|
40
49
|
info_url = self.book_info_url(book_id=book_id)
|
41
50
|
catalog_url = self.book_catalog_url(book_id=book_id)
|
42
51
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
self.logger.warning(
|
50
|
-
"[session] get_book_info(info:%s) failed: %s",
|
51
|
-
book_id,
|
52
|
-
exc,
|
53
|
-
)
|
54
|
-
pages.append("")
|
55
|
-
|
56
|
-
try:
|
57
|
-
resp = self.get(catalog_url, **kwargs)
|
58
|
-
resp.raise_for_status()
|
59
|
-
pages.append(resp.text)
|
60
|
-
except Exception as exc:
|
61
|
-
self.logger.warning(
|
62
|
-
"[session] get_book_info(catalog:%s) failed: %s",
|
63
|
-
book_id,
|
64
|
-
exc,
|
65
|
-
)
|
66
|
-
pages.append("")
|
67
|
-
|
68
|
-
return pages
|
69
|
-
|
70
|
-
def get_book_chapter(
|
52
|
+
info_html = await self.fetch(info_url, **kwargs)
|
53
|
+
catalog_html = await self.fetch(catalog_url, **kwargs)
|
54
|
+
|
55
|
+
return [info_html, catalog_html]
|
56
|
+
|
57
|
+
async def get_book_chapter(
|
71
58
|
self,
|
72
59
|
book_id: str,
|
73
60
|
chapter_id: str,
|
74
61
|
**kwargs: Any,
|
75
62
|
) -> list[str]:
|
76
63
|
"""
|
77
|
-
Fetch the HTML of a single chapter.
|
64
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
78
65
|
|
79
66
|
:param book_id: The book identifier.
|
80
67
|
:param chapter_id: The chapter identifier.
|
81
68
|
:return: The chapter content as a string.
|
82
69
|
"""
|
70
|
+
catalog_url = self.book_catalog_url(book_id=book_id)
|
83
71
|
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
84
|
-
|
85
|
-
resp = self.get(url, **kwargs)
|
86
|
-
resp.raise_for_status()
|
87
|
-
return [resp.text]
|
88
|
-
except Exception as exc:
|
89
|
-
self.logger.warning(
|
90
|
-
"[session] get_book_chapter(%s) failed: %s",
|
91
|
-
book_id,
|
92
|
-
exc,
|
93
|
-
)
|
94
|
-
return []
|
72
|
+
return [await self.fetch(url, referer=catalog_url, **kwargs)]
|
95
73
|
|
96
74
|
@classmethod
|
97
75
|
def book_info_url(cls, book_id: str) -> str:
|
@@ -123,3 +101,7 @@ class QianbiSession(BaseSession):
|
|
123
101
|
:return: Fully qualified chapter URL.
|
124
102
|
"""
|
125
103
|
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
104
|
+
|
105
|
+
@property
|
106
|
+
def hostname(self) -> str:
|
107
|
+
return "www.23qb.com"
|
@@ -1,20 +1,20 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
"""
|
3
|
-
novel_downloader.core.
|
4
|
-
|
3
|
+
novel_downloader.core.fetchers.qianbi.session
|
4
|
+
---------------------------------------------
|
5
5
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
import asyncio
|
9
9
|
from typing import Any
|
10
10
|
|
11
|
-
from novel_downloader.core.
|
11
|
+
from novel_downloader.core.fetchers.base import BaseSession
|
12
|
+
from novel_downloader.models import FetcherConfig
|
12
13
|
|
13
14
|
|
14
|
-
class
|
15
|
+
class QianbiSession(BaseSession):
|
15
16
|
"""
|
16
|
-
A
|
17
|
-
Qianbi (www.23qb.com) novel website.
|
17
|
+
A session class for interacting with the Qianbi (www.23qb.com) novel website.
|
18
18
|
"""
|
19
19
|
|
20
20
|
BASE_URLS = [
|
@@ -26,6 +26,14 @@ class QianbiAsyncSession(BaseAsyncSession):
|
|
26
26
|
BOOK_CATALOG_URL = "https://www.23qb.com/book/{book_id}/catalog"
|
27
27
|
CHAPTER_URL = "https://www.23qb.com/book/{book_id}/{chapter_id}.html"
|
28
28
|
|
29
|
+
def __init__(
|
30
|
+
self,
|
31
|
+
config: FetcherConfig,
|
32
|
+
cookies: dict[str, str] | None = None,
|
33
|
+
**kwargs: Any,
|
34
|
+
) -> None:
|
35
|
+
super().__init__("qianbi", config, cookies, **kwargs)
|
36
|
+
|
29
37
|
async def get_book_info(
|
30
38
|
self,
|
31
39
|
book_id: str,
|
@@ -94,3 +102,7 @@ class QianbiAsyncSession(BaseAsyncSession):
|
|
94
102
|
:return: Fully qualified chapter URL.
|
95
103
|
"""
|
96
104
|
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
105
|
+
|
106
|
+
@property
|
107
|
+
def hostname(self) -> str:
|
108
|
+
return "www.23qb.com"
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.qidian
|
4
|
+
-------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from .browser import QidianBrowser
|
9
|
+
from .session import QidianSession
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
"QidianBrowser",
|
13
|
+
"QidianSession",
|
14
|
+
]
|
@@ -0,0 +1,266 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
"""
|
3
|
+
novel_downloader.core.fetchers.qidian.browser
|
4
|
+
---------------------------------------------
|
5
|
+
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
from playwright.async_api import Page
|
11
|
+
|
12
|
+
from novel_downloader.core.fetchers.base import BaseBrowser
|
13
|
+
from novel_downloader.models import FetcherConfig, LoginField
|
14
|
+
from novel_downloader.utils.i18n import t
|
15
|
+
|
16
|
+
|
17
|
+
class QidianBrowser(BaseBrowser):
|
18
|
+
"""
|
19
|
+
A browser class for interacting with the Qidian (www.qidian.com) novel website.
|
20
|
+
"""
|
21
|
+
|
22
|
+
HOMEPAGE_URL = "https://www.qidian.com/"
|
23
|
+
BOOKCASE_URL = "https://my.qidian.com/bookcase/"
|
24
|
+
BOOK_INFO_URL = "https://book.qidian.com/info/{book_id}/"
|
25
|
+
# BOOK_INFO_URL = "https://www.qidian.com/book/{book_id}/"
|
26
|
+
CHAPTER_URL = "https://www.qidian.com/chapter/{book_id}/{chapter_id}/"
|
27
|
+
|
28
|
+
LOGIN_URL = "https://passport.qidian.com/"
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
config: FetcherConfig,
|
33
|
+
reuse_page: bool = False,
|
34
|
+
**kwargs: Any,
|
35
|
+
) -> None:
|
36
|
+
super().__init__("qidian", config, reuse_page, **kwargs)
|
37
|
+
|
38
|
+
async def login(
|
39
|
+
self,
|
40
|
+
username: str = "",
|
41
|
+
password: str = "",
|
42
|
+
cookies: dict[str, str] | None = None,
|
43
|
+
attempt: int = 1,
|
44
|
+
**kwargs: Any,
|
45
|
+
) -> bool:
|
46
|
+
self._is_logged_in = await self._check_login_status()
|
47
|
+
return self._is_logged_in
|
48
|
+
|
49
|
+
async def get_book_info(
|
50
|
+
self,
|
51
|
+
book_id: str,
|
52
|
+
**kwargs: Any,
|
53
|
+
) -> list[str]:
|
54
|
+
"""
|
55
|
+
Fetch the raw HTML of the book info page asynchronously.
|
56
|
+
|
57
|
+
:param book_id: The book identifier.
|
58
|
+
:return: The page content as a string.
|
59
|
+
"""
|
60
|
+
url = self.book_info_url(book_id=book_id)
|
61
|
+
return [await self.fetch(url, **kwargs)]
|
62
|
+
|
63
|
+
async def get_book_chapter(
|
64
|
+
self,
|
65
|
+
book_id: str,
|
66
|
+
chapter_id: str,
|
67
|
+
**kwargs: Any,
|
68
|
+
) -> list[str]:
|
69
|
+
"""
|
70
|
+
Fetch the raw HTML of a single chapter asynchronously.
|
71
|
+
|
72
|
+
:param book_id: The book identifier.
|
73
|
+
:param chapter_id: The chapter identifier.
|
74
|
+
:return: The chapter content as a string.
|
75
|
+
"""
|
76
|
+
catalog_url = self.book_info_url(book_id=book_id)
|
77
|
+
url = self.chapter_url(book_id=book_id, chapter_id=chapter_id)
|
78
|
+
return [await self.fetch(url, referer=catalog_url, **kwargs)]
|
79
|
+
|
80
|
+
async def get_bookcase(
|
81
|
+
self,
|
82
|
+
**kwargs: Any,
|
83
|
+
) -> list[str]:
|
84
|
+
"""
|
85
|
+
Retrieve the user's *bookcase* page.
|
86
|
+
|
87
|
+
:return: The HTML markup of the bookcase page.
|
88
|
+
"""
|
89
|
+
url = self.bookcase_url()
|
90
|
+
return [await self.fetch(url, **kwargs)]
|
91
|
+
|
92
|
+
async def get_homepage(
|
93
|
+
self,
|
94
|
+
**kwargs: Any,
|
95
|
+
) -> list[str]:
|
96
|
+
"""
|
97
|
+
Retrieve the site home page.
|
98
|
+
|
99
|
+
:return: The HTML markup of the home page.
|
100
|
+
"""
|
101
|
+
url = self.homepage_url()
|
102
|
+
return [await self.fetch(url, **kwargs)]
|
103
|
+
|
104
|
+
async def set_interactive_mode(self, enable: bool) -> bool:
|
105
|
+
"""
|
106
|
+
Enable or disable interactive mode for manual login.
|
107
|
+
|
108
|
+
:param enable: True to enable, False to disable interactive mode.
|
109
|
+
:return: True if operation or login check succeeded, False otherwise.
|
110
|
+
"""
|
111
|
+
if enable:
|
112
|
+
if self.headless:
|
113
|
+
await self._restart_browser(headless=False)
|
114
|
+
if self._manual_page is None:
|
115
|
+
self._manual_page = await self.context.new_page()
|
116
|
+
await self._manual_page.goto(self.LOGIN_URL)
|
117
|
+
return True
|
118
|
+
|
119
|
+
# restore
|
120
|
+
if self._manual_page:
|
121
|
+
await self._manual_page.close()
|
122
|
+
self._manual_page = None
|
123
|
+
if self.headless:
|
124
|
+
await self._restart_browser(headless=True)
|
125
|
+
self._is_logged_in = await self._check_login_status()
|
126
|
+
return self.is_logged_in
|
127
|
+
|
128
|
+
@property
|
129
|
+
def login_fields(self) -> list[LoginField]:
|
130
|
+
return [
|
131
|
+
LoginField(
|
132
|
+
name="manual_login",
|
133
|
+
label="手动登录",
|
134
|
+
type="manual_login",
|
135
|
+
required=True,
|
136
|
+
description=t("login_prompt_intro"),
|
137
|
+
)
|
138
|
+
]
|
139
|
+
|
140
|
+
@classmethod
|
141
|
+
def homepage_url(cls) -> str:
|
142
|
+
"""
|
143
|
+
Construct the URL for the site home page.
|
144
|
+
|
145
|
+
:return: Fully qualified URL of the home page.
|
146
|
+
"""
|
147
|
+
return cls.HOMEPAGE_URL
|
148
|
+
|
149
|
+
@classmethod
|
150
|
+
def bookcase_url(cls) -> str:
|
151
|
+
"""
|
152
|
+
Construct the URL for the user's bookcase page.
|
153
|
+
|
154
|
+
:return: Fully qualified URL of the bookcase.
|
155
|
+
"""
|
156
|
+
return cls.BOOKCASE_URL
|
157
|
+
|
158
|
+
@classmethod
|
159
|
+
def book_info_url(cls, book_id: str) -> str:
|
160
|
+
"""
|
161
|
+
Construct the URL for fetching a book's info page.
|
162
|
+
|
163
|
+
:param book_id: The identifier of the book.
|
164
|
+
:return: Fully qualified URL for the book info page.
|
165
|
+
"""
|
166
|
+
return cls.BOOK_INFO_URL.format(book_id=book_id)
|
167
|
+
|
168
|
+
@classmethod
|
169
|
+
def chapter_url(cls, book_id: str, chapter_id: str) -> str:
|
170
|
+
"""
|
171
|
+
Construct the URL for fetching a specific chapter.
|
172
|
+
|
173
|
+
:param book_id: The identifier of the book.
|
174
|
+
:param chapter_id: The identifier of the chapter.
|
175
|
+
:return: Fully qualified chapter URL.
|
176
|
+
"""
|
177
|
+
return cls.CHAPTER_URL.format(book_id=book_id, chapter_id=chapter_id)
|
178
|
+
|
179
|
+
@property
|
180
|
+
def hostname(self) -> str:
|
181
|
+
return "www.qidian.com"
|
182
|
+
|
183
|
+
async def _check_login_status(self) -> bool:
|
184
|
+
"""
|
185
|
+
Check whether the user is currently logged in by inspecting
|
186
|
+
the visibility of the 'sign-in' element on the homepage.
|
187
|
+
|
188
|
+
:return: True if the user appears to be logged in, False otherwise.
|
189
|
+
"""
|
190
|
+
try:
|
191
|
+
page = await self.context.new_page()
|
192
|
+
await page.goto(self.HOMEPAGE_URL, wait_until="networkidle")
|
193
|
+
await self._login_auto(page)
|
194
|
+
await self._dismiss_overlay(page)
|
195
|
+
sign_in_elem = await page.query_selector(".sign-in")
|
196
|
+
if sign_in_elem and await sign_in_elem.is_visible():
|
197
|
+
self.logger.debug("[auth] Sign-in element visible.")
|
198
|
+
await page.close()
|
199
|
+
return False
|
200
|
+
else:
|
201
|
+
self.logger.debug("[auth] Sign-in element not found.")
|
202
|
+
await page.close()
|
203
|
+
return True
|
204
|
+
except Exception as e:
|
205
|
+
self.logger.warning("[auth] Error while checking login status: %s", e)
|
206
|
+
return False
|
207
|
+
|
208
|
+
async def _dismiss_overlay(
|
209
|
+
self,
|
210
|
+
page: Page,
|
211
|
+
timeout: float = 2.0,
|
212
|
+
) -> None:
|
213
|
+
"""
|
214
|
+
Detect and close any full-page overlay mask that might block the login UI.
|
215
|
+
"""
|
216
|
+
try:
|
217
|
+
mask = await page.wait_for_selector("div.mask", timeout=timeout * 1000)
|
218
|
+
if not mask or not await mask.is_visible():
|
219
|
+
return
|
220
|
+
|
221
|
+
self.logger.debug("[auth] Overlay mask detected; attempting to close.")
|
222
|
+
|
223
|
+
iframe_element = await page.query_selector('iframe[name="loginIfr"]')
|
224
|
+
if iframe_element is None:
|
225
|
+
self.logger.debug("[auth] Login iframe not found.")
|
226
|
+
return
|
227
|
+
|
228
|
+
iframe = await iframe_element.content_frame()
|
229
|
+
if iframe is None:
|
230
|
+
self.logger.debug("[auth] Unable to access iframe content.")
|
231
|
+
return
|
232
|
+
|
233
|
+
# 点击关闭按钮
|
234
|
+
await iframe.click("#close", timeout=2000)
|
235
|
+
self.logger.debug("[auth] Overlay mask closed.")
|
236
|
+
|
237
|
+
except Exception as e:
|
238
|
+
self.logger.debug("[auth] Error handling overlay mask: %s", e)
|
239
|
+
|
240
|
+
async def _login_auto(
|
241
|
+
self,
|
242
|
+
page: Page,
|
243
|
+
timeout: float = 5.0,
|
244
|
+
) -> None:
|
245
|
+
"""
|
246
|
+
Attempt one automatic login interaction (click once and check).
|
247
|
+
|
248
|
+
:param page: Playwright Page object to interact with.
|
249
|
+
:param timeout: Seconds to wait for login box to appear.
|
250
|
+
:return: True if login successful or already logged in; False otherwise.
|
251
|
+
"""
|
252
|
+
try:
|
253
|
+
await page.goto("https://www.qidian.com/", wait_until="networkidle")
|
254
|
+
await page.wait_for_selector("#login-box", timeout=timeout * 1000)
|
255
|
+
except Exception as e:
|
256
|
+
self.logger.warning("[auth] Failed to load login box: %s", e)
|
257
|
+
return
|
258
|
+
|
259
|
+
self.logger.debug("[auth] Clicking login button once.")
|
260
|
+
try:
|
261
|
+
btn = await page.query_selector("#login-btn")
|
262
|
+
if btn and await btn.is_visible():
|
263
|
+
await btn.click()
|
264
|
+
except Exception as e:
|
265
|
+
self.logger.debug("[auth] Failed to click login button: %s", e)
|
266
|
+
return
|