kmoe-manga-downloader 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kmdr/core/__init__.py +5 -3
- kmdr/core/bases.py +53 -81
- kmdr/core/context.py +28 -0
- kmdr/core/defaults.py +64 -28
- kmdr/core/error.py +1 -1
- kmdr/core/session.py +16 -0
- kmdr/core/utils.py +33 -40
- kmdr/main.py +30 -17
- kmdr/module/authenticator/CookieAuthenticator.py +8 -4
- kmdr/module/authenticator/LoginAuthenticator.py +25 -21
- kmdr/module/authenticator/utils.py +47 -43
- kmdr/module/configurer/ConfigClearer.py +7 -2
- kmdr/module/configurer/ConfigUnsetter.py +2 -2
- kmdr/module/configurer/OptionLister.py +17 -3
- kmdr/module/configurer/OptionSetter.py +2 -2
- kmdr/module/configurer/option_validate.py +14 -12
- kmdr/module/downloader/DirectDownloader.py +7 -5
- kmdr/module/downloader/ReferViaDownloader.py +27 -24
- kmdr/module/downloader/utils.py +274 -101
- kmdr/module/lister/BookUrlLister.py +4 -3
- kmdr/module/lister/FollowedBookLister.py +59 -22
- kmdr/module/lister/utils.py +39 -28
- kmdr/module/picker/ArgsFilterPicker.py +1 -1
- kmdr/module/picker/DefaultVolPicker.py +34 -5
- {kmoe_manga_downloader-1.1.1.dist-info → kmoe_manga_downloader-1.2.0.dist-info}/METADATA +17 -11
- kmoe_manga_downloader-1.2.0.dist-info/RECORD +35 -0
- kmoe_manga_downloader-1.1.1.dist-info/RECORD +0 -33
- {kmoe_manga_downloader-1.1.1.dist-info → kmoe_manga_downloader-1.2.0.dist-info}/WHEEL +0 -0
- {kmoe_manga_downloader-1.1.1.dist-info → kmoe_manga_downloader-1.2.0.dist-info}/entry_points.txt +0 -0
- {kmoe_manga_downloader-1.1.1.dist-info → kmoe_manga_downloader-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {kmoe_manga_downloader-1.1.1.dist-info → kmoe_manga_downloader-1.2.0.dist-info}/top_level.txt +0 -0
kmdr/module/downloader/utils.py
CHANGED
|
@@ -1,142 +1,315 @@
|
|
|
1
|
-
|
|
1
|
+
import asyncio
|
|
2
2
|
import os
|
|
3
|
-
import time
|
|
4
|
-
from functools import wraps
|
|
5
|
-
|
|
6
|
-
from requests import Session, HTTPError
|
|
7
|
-
from requests.exceptions import ChunkedEncodingError
|
|
8
|
-
from tqdm import tqdm
|
|
9
3
|
import re
|
|
4
|
+
import math
|
|
5
|
+
from typing import Callable, Optional, Union, Awaitable
|
|
6
|
+
from enum import Enum
|
|
7
|
+
|
|
8
|
+
from deprecation import deprecated
|
|
9
|
+
import aiohttp
|
|
10
|
+
import aiofiles
|
|
11
|
+
import aiofiles.os as aio_os
|
|
12
|
+
from rich.progress import Progress
|
|
13
|
+
from aiohttp.client_exceptions import ClientPayloadError
|
|
10
14
|
|
|
11
15
|
BLOCK_SIZE_REDUCTION_FACTOR = 0.75
|
|
12
16
|
MIN_BLOCK_SIZE = 2048
|
|
13
17
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
18
|
+
class STATUS(Enum):
|
|
19
|
+
WAITING='[blue]等待中[/blue]'
|
|
20
|
+
DOWNLOADING='[cyan]下载中[/cyan]'
|
|
21
|
+
RETRYING='[yellow]重试中[/yellow]'
|
|
22
|
+
MERGING='[magenta]合并中[/magenta]'
|
|
23
|
+
COMPLETED='[green]完成[/green]'
|
|
24
|
+
FAILED='[red]失败[/red]'
|
|
25
|
+
|
|
26
|
+
@deprecated(details="请使用 'download_file_multipart'")
|
|
27
|
+
async def download_file(
|
|
28
|
+
session: aiohttp.ClientSession,
|
|
29
|
+
semaphore: asyncio.Semaphore,
|
|
30
|
+
progress: Progress,
|
|
31
|
+
url: Union[str, Callable[[], str], Callable[[], Awaitable[str]]],
|
|
32
|
+
dest_path: str,
|
|
33
|
+
filename: str,
|
|
34
|
+
retry_times: int = 3,
|
|
35
|
+
headers: Optional[dict] = None,
|
|
36
|
+
callback: Optional[Callable] = None,
|
|
37
|
+
):
|
|
24
38
|
"""
|
|
25
39
|
下载文件
|
|
26
40
|
|
|
27
|
-
:param session:
|
|
41
|
+
:param session: aiohttp.ClientSession 对象
|
|
42
|
+
:param semaphore: 控制并发的信号量
|
|
43
|
+
:param progress: 进度条对象
|
|
28
44
|
:param url: 下载链接或者其 Supplier
|
|
29
45
|
:param dest_path: 目标路径
|
|
30
46
|
:param filename: 文件名
|
|
31
47
|
:param retry_times: 重试次数
|
|
32
48
|
:param headers: 请求头
|
|
33
49
|
:param callback: 下载完成后的回调函数
|
|
34
|
-
:param block_size: 块大小
|
|
35
50
|
"""
|
|
36
51
|
if headers is None:
|
|
37
52
|
headers = {}
|
|
38
|
-
filename_downloading = f'{filename}.downloading'
|
|
39
53
|
|
|
40
|
-
file_path =
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if not
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
if
|
|
47
|
-
|
|
54
|
+
file_path = os.path.join(dest_path, filename)
|
|
55
|
+
filename_downloading = f'{file_path}.downloading'
|
|
56
|
+
|
|
57
|
+
if not await aio_os.path.exists(dest_path):
|
|
58
|
+
await aio_os.makedirs(dest_path, exist_ok=True)
|
|
59
|
+
|
|
60
|
+
if await aio_os.path.exists(file_path):
|
|
61
|
+
progress.console.print(f"[yellow]{filename} 已经存在[/yellow]")
|
|
48
62
|
return
|
|
49
|
-
|
|
50
|
-
if callable(url):
|
|
51
|
-
url = url()
|
|
52
63
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if os.path.exists(tmp_file_path):
|
|
57
|
-
resume_from = os.path.getsize(tmp_file_path)
|
|
58
|
-
|
|
59
|
-
if resume_from:
|
|
60
|
-
headers['Range'] = f'bytes={resume_from}-'
|
|
64
|
+
block_size = 8192
|
|
65
|
+
attempts_left = retry_times + 1
|
|
66
|
+
task_id = None
|
|
61
67
|
|
|
62
68
|
try:
|
|
63
|
-
|
|
64
|
-
|
|
69
|
+
while attempts_left > 0:
|
|
70
|
+
attempts_left -= 1
|
|
65
71
|
|
|
66
|
-
|
|
72
|
+
resume_from = (await aio_os.stat(filename_downloading)).st_size if await aio_os.path.exists(filename_downloading) else 0
|
|
67
73
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
+
if resume_from:
|
|
75
|
+
headers['Range'] = f'bytes={resume_from}-'
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
async with semaphore:
|
|
79
|
+
current_url = await fetch_url(url)
|
|
80
|
+
async with session.get(url=current_url, headers=headers) as r:
|
|
81
|
+
r.raise_for_status()
|
|
82
|
+
|
|
83
|
+
total_size_in_bytes = int(r.headers.get('content-length', 0)) + resume_from
|
|
84
|
+
|
|
85
|
+
if task_id is None:
|
|
86
|
+
task_id = progress.add_task("download", filename=filename, total=total_size_in_bytes, completed=resume_from, status=STATUS.DOWNLOADING.value)
|
|
87
|
+
else:
|
|
88
|
+
progress.update(task_id, total=total_size_in_bytes, completed=resume_from, status=STATUS.DOWNLOADING.value, refresh=True)
|
|
89
|
+
|
|
90
|
+
async with aiofiles.open(filename_downloading, 'ab') as f:
|
|
91
|
+
async for chunk in r.content.iter_chunked(block_size):
|
|
92
|
+
if chunk:
|
|
93
|
+
await f.write(chunk)
|
|
94
|
+
progress.update(task_id, advance=len(chunk))
|
|
95
|
+
|
|
96
|
+
break
|
|
74
97
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
98
|
+
except Exception as e:
|
|
99
|
+
if attempts_left > 0:
|
|
100
|
+
if task_id is not None:
|
|
101
|
+
progress.update(task_id, status=STATUS.RETRYING.value, refresh=True)
|
|
102
|
+
if isinstance(e, ClientPayloadError):
|
|
103
|
+
new_block_size = max(int(block_size * BLOCK_SIZE_REDUCTION_FACTOR), MIN_BLOCK_SIZE)
|
|
104
|
+
if new_block_size < block_size:
|
|
105
|
+
block_size = new_block_size
|
|
106
|
+
await asyncio.sleep(3)
|
|
107
|
+
else:
|
|
108
|
+
raise e
|
|
109
|
+
|
|
110
|
+
else:
|
|
111
|
+
raise IOError(f"Failed to download {filename} after {retry_times} retries.")
|
|
112
|
+
|
|
113
|
+
os.rename(filename_downloading, file_path)
|
|
114
|
+
|
|
80
115
|
except Exception as e:
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
new_block_size = block_size
|
|
84
|
-
if isinstance(e, ChunkedEncodingError):
|
|
85
|
-
new_block_size = max(int(block_size * BLOCK_SIZE_REDUCTION_FACTOR), MIN_BLOCK_SIZE)
|
|
86
|
-
|
|
87
|
-
if retry_times > 0:
|
|
88
|
-
# 重试下载
|
|
89
|
-
tqdm.write(f"{prefix} Retry after 3 seconds...")
|
|
90
|
-
time.sleep(3) # 等待3秒后重试,避免触发限流
|
|
91
|
-
download_file(session, url, dest_path, filename, retry_times - 1, headers, callback, new_block_size)
|
|
92
|
-
else:
|
|
93
|
-
tqdm.write(f"{prefix} Meet max retry times, download failed.")
|
|
94
|
-
raise e
|
|
116
|
+
if task_id is not None:
|
|
117
|
+
progress.update(task_id, status=STATUS.FAILED.value, visible=False)
|
|
95
118
|
|
|
96
|
-
|
|
119
|
+
finally:
|
|
120
|
+
if await aio_os.path.exists(file_path):
|
|
121
|
+
if task_id is not None:
|
|
122
|
+
progress.update(task_id, status=STATUS.COMPLETED.value, visible=False)
|
|
123
|
+
|
|
124
|
+
if callback:
|
|
125
|
+
callback()
|
|
126
|
+
|
|
127
|
+
async def download_file_multipart(
|
|
128
|
+
session: aiohttp.ClientSession,
|
|
129
|
+
semaphore: asyncio.Semaphore,
|
|
130
|
+
progress: Progress,
|
|
131
|
+
url: Union[str, Callable[[], str], Callable[[], Awaitable[str]]],
|
|
132
|
+
dest_path: str,
|
|
133
|
+
filename: str,
|
|
134
|
+
retry_times: int = 3,
|
|
135
|
+
chunk_size_mb: int = 10,
|
|
136
|
+
headers: Optional[dict] = None,
|
|
137
|
+
callback: Optional[Callable] = None,
|
|
138
|
+
):
|
|
97
139
|
"""
|
|
98
|
-
|
|
140
|
+
下载文件
|
|
141
|
+
|
|
142
|
+
:param session: aiohttp.ClientSession 对象
|
|
143
|
+
:param semaphore: 控制并发的信号量
|
|
144
|
+
:param progress: 进度条对象
|
|
145
|
+
:param url: 下载链接或者其 Supplier
|
|
146
|
+
:param dest_path: 目标路径
|
|
147
|
+
:param filename: 文件名
|
|
148
|
+
:param retry_times: 重试次数
|
|
149
|
+
:param headers: 请求头
|
|
150
|
+
:param callback: 下载完成后的回调函数
|
|
99
151
|
"""
|
|
100
|
-
|
|
152
|
+
if headers is None:
|
|
153
|
+
headers = {}
|
|
154
|
+
|
|
155
|
+
file_path = os.path.join(dest_path, filename)
|
|
156
|
+
filename_downloading = f'{file_path}.downloading'
|
|
157
|
+
|
|
158
|
+
if not await aio_os.path.exists(dest_path):
|
|
159
|
+
await aio_os.makedirs(dest_path, exist_ok=True)
|
|
160
|
+
|
|
161
|
+
if await aio_os.path.exists(file_path):
|
|
162
|
+
progress.console.print(f"[blue]{filename} 已经存在[/blue]")
|
|
163
|
+
return
|
|
101
164
|
|
|
165
|
+
part_paths = []
|
|
166
|
+
task_id = None
|
|
167
|
+
try:
|
|
168
|
+
current_url = await fetch_url(url)
|
|
102
169
|
|
|
103
|
-
|
|
170
|
+
async with session.head(current_url, headers=headers, allow_redirects=True) as response:
|
|
171
|
+
response.raise_for_status()
|
|
172
|
+
total_size = int(response.headers['Content-Length'])
|
|
104
173
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
174
|
+
chunk_size = chunk_size_mb * 1024 * 1024
|
|
175
|
+
num_chunks = math.ceil(total_size / chunk_size)
|
|
176
|
+
|
|
177
|
+
tasks = []
|
|
178
|
+
|
|
179
|
+
resumed_size = 0
|
|
180
|
+
for i in range(num_chunks):
|
|
181
|
+
part_path = os.path.join(dest_path, f"{filename}.{i + 1:03d}.downloading")
|
|
182
|
+
part_paths.append(part_path)
|
|
183
|
+
if await aio_os.path.exists(part_path):
|
|
184
|
+
resumed_size += (await aio_os.stat(part_path)).st_size
|
|
185
|
+
|
|
186
|
+
task_id = progress.add_task("download", filename=filename, status=STATUS.WAITING.value, total=total_size, completed=resumed_size)
|
|
187
|
+
|
|
188
|
+
for i, start in enumerate(range(0, total_size, chunk_size)):
|
|
189
|
+
end = min(start + chunk_size - 1, total_size - 1)
|
|
190
|
+
|
|
191
|
+
task = _download_part(
|
|
192
|
+
session=session,
|
|
193
|
+
semaphore=semaphore,
|
|
194
|
+
url=current_url,
|
|
195
|
+
start=start,
|
|
196
|
+
end=end,
|
|
197
|
+
part_path=part_paths[i],
|
|
198
|
+
progress=progress,
|
|
199
|
+
task_id=task_id,
|
|
200
|
+
headers=headers,
|
|
201
|
+
retry_times=retry_times
|
|
202
|
+
)
|
|
203
|
+
tasks.append(task)
|
|
204
|
+
|
|
205
|
+
await asyncio.gather(*tasks)
|
|
206
|
+
|
|
207
|
+
progress.update(task_id, status=STATUS.MERGING.value, refresh=True)
|
|
208
|
+
await _merge_parts(part_paths, filename_downloading)
|
|
209
|
+
|
|
210
|
+
os.rename(filename_downloading, file_path)
|
|
211
|
+
except Exception as e:
|
|
212
|
+
if task_id is not None:
|
|
213
|
+
progress.update(task_id, status=STATUS.FAILED.value, visible=False)
|
|
117
214
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
215
|
+
finally:
|
|
216
|
+
if await aio_os.path.exists(file_path):
|
|
217
|
+
if task_id is not None:
|
|
218
|
+
progress.update(task_id, status=STATUS.COMPLETED.value, completed=total_size, refresh=True)
|
|
121
219
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
220
|
+
cleanup_tasks = [aio_os.remove(p) for p in part_paths if await aio_os.path.exists(p)]
|
|
221
|
+
if cleanup_tasks:
|
|
222
|
+
await asyncio.gather(*cleanup_tasks)
|
|
223
|
+
if callback:
|
|
224
|
+
callback()
|
|
126
225
|
|
|
127
|
-
|
|
226
|
+
async def _download_part(
|
|
227
|
+
session: aiohttp.ClientSession,
|
|
228
|
+
semaphore: asyncio.Semaphore,
|
|
229
|
+
url: str,
|
|
230
|
+
start: int,
|
|
231
|
+
end: int,
|
|
232
|
+
part_path: str,
|
|
233
|
+
progress: Progress,
|
|
234
|
+
task_id,
|
|
235
|
+
headers: Optional[dict] = None,
|
|
236
|
+
retry_times: int = 3
|
|
237
|
+
):
|
|
238
|
+
if headers is None:
|
|
239
|
+
headers = {}
|
|
240
|
+
|
|
241
|
+
local_headers = headers.copy()
|
|
242
|
+
block_size = 8192
|
|
243
|
+
attempts_left = retry_times + 1
|
|
244
|
+
|
|
245
|
+
while attempts_left > 0:
|
|
246
|
+
attempts_left -= 1
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
resume_from = (await aio_os.path.getsize(part_path)) if await aio_os.path.exists(part_path) else 0
|
|
250
|
+
|
|
251
|
+
if resume_from >= (end - start + 1):
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
current_start = start + resume_from
|
|
255
|
+
local_headers['Range'] = f'bytes={current_start}-{end}'
|
|
256
|
+
|
|
257
|
+
async with semaphore:
|
|
258
|
+
async with session.get(url, headers=local_headers) as response:
|
|
259
|
+
response.raise_for_status()
|
|
128
260
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
return function_cache[func][key]
|
|
261
|
+
if progress.tasks[task_id].fields.get("status") != STATUS.DOWNLOADING.value:
|
|
262
|
+
progress.update(task_id, status=STATUS.DOWNLOADING.value, refresh=True)
|
|
132
263
|
|
|
133
|
-
|
|
264
|
+
async with aiofiles.open(part_path, 'ab') as f:
|
|
265
|
+
async for chunk in response.content.iter_chunked(block_size):
|
|
266
|
+
if chunk:
|
|
267
|
+
await f.write(chunk)
|
|
268
|
+
progress.update(task_id, advance=len(chunk))
|
|
269
|
+
return
|
|
270
|
+
except Exception as e:
|
|
271
|
+
if attempts_left > 0:
|
|
272
|
+
await asyncio.sleep(3)
|
|
273
|
+
else:
|
|
274
|
+
# console.print(f"[red]分片 {os.path.basename(part_path)} 下载失败: {e}[/red]")
|
|
275
|
+
raise
|
|
276
|
+
|
|
277
|
+
async def _merge_parts(part_paths: list[str], final_path: str):
|
|
278
|
+
async with aiofiles.open(final_path, 'wb') as final_file:
|
|
279
|
+
try:
|
|
280
|
+
for part_path in part_paths:
|
|
281
|
+
async with aiofiles.open(part_path, 'rb') as part_file:
|
|
282
|
+
while True:
|
|
283
|
+
chunk = await part_file.read(8192)
|
|
284
|
+
if not chunk:
|
|
285
|
+
break
|
|
286
|
+
await final_file.write(chunk)
|
|
287
|
+
except Exception as e:
|
|
288
|
+
if aio_os.path.exists(final_path):
|
|
289
|
+
await aio_os.remove(final_path)
|
|
290
|
+
raise e
|
|
134
291
|
|
|
135
|
-
def clear_cache(func):
|
|
136
|
-
assert hasattr(func, "__wrapped__"), "Function is not wrapped"
|
|
137
|
-
global function_cache
|
|
138
292
|
|
|
139
|
-
wrapped = func.__wrapped__
|
|
140
293
|
|
|
141
|
-
|
|
142
|
-
|
|
294
|
+
def safe_filename(name: str) -> str:
|
|
295
|
+
"""
|
|
296
|
+
替换非法文件名字符为下划线
|
|
297
|
+
"""
|
|
298
|
+
return re.sub(r'[\\/:*?"<>|]', '_', name)
|
|
299
|
+
|
|
300
|
+
async def fetch_url(url: Union[str, Callable[[], str], Callable[[], Awaitable[str]]], retry_times: int = 3) -> str:
|
|
301
|
+
while retry_times >= 0:
|
|
302
|
+
try:
|
|
303
|
+
if callable(url):
|
|
304
|
+
result = url()
|
|
305
|
+
if asyncio.iscoroutine(result) or isinstance(result, Awaitable):
|
|
306
|
+
return await result
|
|
307
|
+
return result
|
|
308
|
+
elif isinstance(url, str):
|
|
309
|
+
return url
|
|
310
|
+
except Exception as e:
|
|
311
|
+
retry_times -= 1
|
|
312
|
+
if retry_times < 0:
|
|
313
|
+
raise e
|
|
314
|
+
await asyncio.sleep(2)
|
|
315
|
+
raise RuntimeError("Max retries exceeded")
|
|
@@ -10,6 +10,7 @@ class BookUrlLister(Lister):
|
|
|
10
10
|
super().__init__(*args, **kwargs)
|
|
11
11
|
self._book_url = book_url
|
|
12
12
|
|
|
13
|
-
def list(self) -> tuple[BookInfo, list[VolInfo]]:
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
async def list(self) -> tuple[BookInfo, list[VolInfo]]:
|
|
14
|
+
with self._console.status("获取书籍信息..."):
|
|
15
|
+
book_info, volumes = await extract_book_info_and_volumes(self._session, self._book_url)
|
|
16
|
+
return book_info, volumes
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from bs4 import BeautifulSoup
|
|
3
|
+
from rich.table import Table
|
|
4
|
+
from rich.prompt import IntPrompt
|
|
2
5
|
|
|
3
6
|
from kmdr.core import Lister, LISTERS, BookInfo, VolInfo
|
|
7
|
+
from kmdr.core.utils import async_retry
|
|
4
8
|
|
|
5
9
|
from .utils import extract_book_info_and_volumes
|
|
6
10
|
|
|
@@ -12,27 +16,60 @@ class FollowedBookLister(Lister):
|
|
|
12
16
|
def __init__(self, *args, **kwargs):
|
|
13
17
|
super().__init__(*args, **kwargs)
|
|
14
18
|
|
|
15
|
-
def list(self) -> tuple[BookInfo, list[VolInfo]]:
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
async def list(self) -> tuple[BookInfo, list[VolInfo]]:
|
|
20
|
+
books = []
|
|
21
|
+
|
|
22
|
+
with self._console.status("正在获取关注列表..."):
|
|
23
|
+
books = await self._list_followed_books()
|
|
24
|
+
|
|
25
|
+
if not books:
|
|
26
|
+
self._console.print("[yellow]关注列表为空。[/yellow]")
|
|
27
|
+
exit(0)
|
|
28
|
+
|
|
29
|
+
table = Table(title="关注的书籍列表", show_header=True, header_style="bold blue")
|
|
30
|
+
table.add_column("序号", style="dim", width=4, justify="center")
|
|
31
|
+
table.add_column("书名", style="cyan", no_wrap=True)
|
|
32
|
+
table.add_column("作者", style="green")
|
|
33
|
+
table.add_column("最后更新", style="yellow")
|
|
34
|
+
table.add_column("状态", style="blue")
|
|
35
|
+
|
|
36
|
+
for idx, book in enumerate(books):
|
|
37
|
+
table.add_row(
|
|
38
|
+
str(idx + 1),
|
|
39
|
+
book.name,
|
|
40
|
+
book.author,
|
|
41
|
+
book.last_update,
|
|
42
|
+
book.status
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
self._console.print(table)
|
|
46
|
+
|
|
47
|
+
valid_choices = [str(i) for i in range(1, len(books) + 1)]
|
|
48
|
+
|
|
49
|
+
chosen_idx = await asyncio.to_thread(
|
|
50
|
+
IntPrompt.ask,
|
|
51
|
+
"请选择要下载的书籍序号",
|
|
52
|
+
choices=valid_choices,
|
|
53
|
+
show_choices=False,
|
|
54
|
+
show_default=False
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
book_to_download = books[chosen_idx - 1]
|
|
58
|
+
|
|
59
|
+
with self._console.status(f"正在获取 '{book_to_download.name}' 的详细信息..."):
|
|
60
|
+
book_info, volumes = await extract_book_info_and_volumes(self._session, book_to_download.url, book_to_download)
|
|
61
|
+
return book_info, volumes
|
|
62
|
+
|
|
63
|
+
@async_retry()
|
|
64
|
+
async def _list_followed_books(self) -> 'list[BookInfo]':
|
|
65
|
+
async with self._session.get(MY_FOLLOW_URL) as response:
|
|
66
|
+
response.raise_for_status()
|
|
67
|
+
html_text = await response.text()
|
|
21
68
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
69
|
+
# 如果后续有性能问题,可以先考虑使用 lxml 进行解析
|
|
70
|
+
followed_rows = BeautifulSoup(html_text, 'html.parser').find_all('tr', style='height:36px;')
|
|
71
|
+
mapped = map(lambda x: x.find_all('td'), followed_rows)
|
|
72
|
+
filtered = filter(lambda x: '書名' not in x[1].text, mapped)
|
|
73
|
+
books = list(map(lambda x: BookInfo(name=x[1].text.strip(), url=x[1].find('a')['href'], author=x[2].text.strip(), status=x[-1].text.strip(), last_update=x[-2].text.strip(), id=''), filtered))
|
|
25
74
|
|
|
26
|
-
|
|
27
|
-
while not choosed.isdigit() or int(choosed) > len(books) or int(choosed) < 1:
|
|
28
|
-
choosed = input("choose a book to download: ")
|
|
29
|
-
choosed = int(choosed) - 1
|
|
30
|
-
book = books[choosed]
|
|
31
|
-
|
|
32
|
-
book_info, volumes = extract_book_info_and_volumes(self._session, book.url)
|
|
33
|
-
book_info.author = book.author
|
|
34
|
-
book_info.status = book.status
|
|
35
|
-
book_info.last_update = book.last_update
|
|
36
|
-
|
|
37
|
-
return book_info, volumes
|
|
38
|
-
|
|
75
|
+
return books
|
kmdr/module/lister/utils.py
CHANGED
|
@@ -1,25 +1,33 @@
|
|
|
1
|
-
from requests import Session
|
|
2
1
|
from bs4 import BeautifulSoup
|
|
3
2
|
import re
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from aiohttp import ClientSession as Session
|
|
4
6
|
|
|
5
7
|
from kmdr.core import BookInfo, VolInfo, VolumeType
|
|
8
|
+
from kmdr.core.utils import async_retry
|
|
6
9
|
|
|
7
|
-
|
|
10
|
+
@async_retry()
|
|
11
|
+
async def extract_book_info_and_volumes(session: Session, url: str, book_info: Optional[BookInfo] = None) -> tuple[BookInfo, list[VolInfo]]:
|
|
8
12
|
"""
|
|
9
13
|
从指定的书籍页面 URL 中提取书籍信息和卷信息。
|
|
10
14
|
|
|
11
|
-
:param session: 已经建立的
|
|
15
|
+
:param session: 已经建立的 HTTP 会话。
|
|
12
16
|
:param url: 书籍页面的 URL。
|
|
13
17
|
:return: 包含书籍信息和卷信息的元组。
|
|
14
18
|
"""
|
|
15
|
-
|
|
19
|
+
async with session.get(url) as response:
|
|
20
|
+
response.raise_for_status()
|
|
21
|
+
|
|
22
|
+
# 如果后续有性能问题,可以先考虑使用 lxml 进行解析
|
|
23
|
+
book_page = BeautifulSoup(await response.text(), 'html.parser')
|
|
16
24
|
|
|
17
|
-
|
|
18
|
-
|
|
25
|
+
book_info = __extract_book_info(url, book_page, book_info)
|
|
26
|
+
volumes = await __extract_volumes(session, book_page)
|
|
19
27
|
|
|
20
|
-
|
|
28
|
+
return book_info, volumes
|
|
21
29
|
|
|
22
|
-
def __extract_book_info(url: str, book_page: BeautifulSoup) -> BookInfo:
|
|
30
|
+
def __extract_book_info(url: str, book_page: BeautifulSoup, book_info: Optional[BookInfo]) -> BookInfo:
|
|
23
31
|
book_name = book_page.find('font', class_='text_bglight_big').text
|
|
24
32
|
|
|
25
33
|
id = book_page.find('input', attrs={'name': 'bookid'})['value']
|
|
@@ -28,35 +36,38 @@ def __extract_book_info(url: str, book_page: BeautifulSoup) -> BookInfo:
|
|
|
28
36
|
id = id,
|
|
29
37
|
name = book_name,
|
|
30
38
|
url = url,
|
|
31
|
-
author = '',
|
|
32
|
-
status = '',
|
|
33
|
-
last_update = ''
|
|
39
|
+
author = book_info.author if book_info else '',
|
|
40
|
+
status = book_info.status if book_info else '',
|
|
41
|
+
last_update = book_info.last_update if book_info else ''
|
|
34
42
|
)
|
|
35
43
|
|
|
36
44
|
|
|
37
|
-
def __extract_volumes(session: Session, book_page: BeautifulSoup) -> list[VolInfo]:
|
|
45
|
+
async def __extract_volumes(session: Session, book_page: BeautifulSoup) -> list[VolInfo]:
|
|
38
46
|
script = book_page.find_all('script', language="javascript")[-1].text
|
|
39
47
|
|
|
40
48
|
pattern = re.compile(r'/book_data.php\?h=\w+')
|
|
41
49
|
book_data_url = pattern.search(script).group(0)
|
|
42
50
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
51
|
+
async with session.get(url = f"https://kox.moe{book_data_url}") as response:
|
|
52
|
+
response.raise_for_status()
|
|
53
|
+
|
|
54
|
+
book_data = (await response.text()).split('\n')
|
|
55
|
+
book_data = filter(lambda x: 'volinfo' in x, book_data)
|
|
56
|
+
book_data = map(lambda x: x.split("\"")[1], book_data)
|
|
57
|
+
book_data = map(lambda x: x[8:].split(','), book_data)
|
|
58
|
+
|
|
59
|
+
volume_data = list(map(lambda x: VolInfo(
|
|
60
|
+
id = x[0],
|
|
61
|
+
extra_info = __extract_extra_info(x[1]),
|
|
62
|
+
is_last = x[2] == '1',
|
|
63
|
+
vol_type = __extract_volume_type(x[3]),
|
|
64
|
+
index = int(x[4]),
|
|
65
|
+
pages = int(x[6]),
|
|
66
|
+
name = x[5],
|
|
67
|
+
size = float(x[11])), book_data))
|
|
68
|
+
volume_data: list[VolInfo] = volume_data
|
|
58
69
|
|
|
59
|
-
|
|
70
|
+
return volume_data
|
|
60
71
|
|
|
61
72
|
def __extract_extra_info(value: str) -> str:
|
|
62
73
|
if value == '0':
|
|
@@ -27,7 +27,7 @@ class ArgsFilterPicker(Picker):
|
|
|
27
27
|
volume_data = filter(lambda x: x.index in choice, volume_data)
|
|
28
28
|
|
|
29
29
|
if self._max_size is not None:
|
|
30
|
-
volume_data = filter(lambda x: x.size <= self._max_size, volume_data)
|
|
30
|
+
volume_data = filter(lambda x: self._max_size is None or x.size <= self._max_size, volume_data)
|
|
31
31
|
|
|
32
32
|
if self._limit is not None:
|
|
33
33
|
return list(volume_data)[:self._limit]
|