kmoe-manga-downloader 1.1.2__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. kmdr/core/__init__.py +5 -3
  2. kmdr/core/bases.py +61 -87
  3. kmdr/core/constants.py +79 -0
  4. kmdr/core/context.py +40 -0
  5. kmdr/core/defaults.py +92 -33
  6. kmdr/core/error.py +10 -2
  7. kmdr/core/session.py +16 -0
  8. kmdr/core/structure.py +2 -0
  9. kmdr/core/utils.py +41 -40
  10. kmdr/main.py +24 -15
  11. kmdr/module/__init__.py +5 -5
  12. kmdr/module/authenticator/CookieAuthenticator.py +14 -7
  13. kmdr/module/authenticator/LoginAuthenticator.py +37 -23
  14. kmdr/module/authenticator/__init__.py +2 -0
  15. kmdr/module/authenticator/utils.py +60 -46
  16. kmdr/module/configurer/BaseUrlUpdator.py +16 -0
  17. kmdr/module/configurer/ConfigClearer.py +7 -2
  18. kmdr/module/configurer/ConfigUnsetter.py +2 -2
  19. kmdr/module/configurer/OptionLister.py +24 -5
  20. kmdr/module/configurer/OptionSetter.py +2 -2
  21. kmdr/module/configurer/__init__.py +5 -0
  22. kmdr/module/configurer/option_validate.py +14 -12
  23. kmdr/module/downloader/DirectDownloader.py +18 -6
  24. kmdr/module/downloader/ReferViaDownloader.py +36 -24
  25. kmdr/module/downloader/__init__.py +2 -0
  26. kmdr/module/downloader/download_utils.py +322 -0
  27. kmdr/module/downloader/misc.py +62 -0
  28. kmdr/module/lister/BookUrlLister.py +4 -3
  29. kmdr/module/lister/FollowedBookLister.py +62 -24
  30. kmdr/module/lister/__init__.py +2 -0
  31. kmdr/module/lister/utils.py +49 -29
  32. kmdr/module/picker/ArgsFilterPicker.py +1 -1
  33. kmdr/module/picker/DefaultVolPicker.py +34 -5
  34. kmdr/module/picker/__init__.py +2 -0
  35. {kmoe_manga_downloader-1.1.2.dist-info → kmoe_manga_downloader-1.2.1.dist-info}/METADATA +48 -23
  36. kmoe_manga_downloader-1.2.1.dist-info/RECORD +43 -0
  37. kmdr/module/downloader/utils.py +0 -157
  38. kmoe_manga_downloader-1.1.2.dist-info/RECORD +0 -33
  39. {kmoe_manga_downloader-1.1.2.dist-info → kmoe_manga_downloader-1.2.1.dist-info}/WHEEL +0 -0
  40. {kmoe_manga_downloader-1.1.2.dist-info → kmoe_manga_downloader-1.2.1.dist-info}/entry_points.txt +0 -0
  41. {kmoe_manga_downloader-1.1.2.dist-info → kmoe_manga_downloader-1.2.1.dist-info}/licenses/LICENSE +0 -0
  42. {kmoe_manga_downloader-1.1.2.dist-info → kmoe_manga_downloader-1.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,322 @@
1
+ import asyncio
2
+ import os
3
+ import re
4
+ import math
5
+ from typing import Callable, Optional, Union, Awaitable
6
+
7
+ from typing_extensions import deprecated
8
+
9
+ import aiohttp
10
+ import aiofiles
11
+ import aiofiles.os as aio_os
12
+ from rich.progress import Progress
13
+ from aiohttp.client_exceptions import ClientPayloadError
14
+
15
+ from .misc import STATUS, StateManager
16
+
17
+ BLOCK_SIZE_REDUCTION_FACTOR = 0.75
18
+ MIN_BLOCK_SIZE = 2048
19
+
20
+
21
+ @deprecated("请使用 'download_file_multipart'")
22
+ async def download_file(
23
+ session: aiohttp.ClientSession,
24
+ semaphore: asyncio.Semaphore,
25
+ progress: Progress,
26
+ url: Union[str, Callable[[], str], Callable[[], Awaitable[str]]],
27
+ dest_path: str,
28
+ filename: str,
29
+ retry_times: int = 3,
30
+ headers: Optional[dict] = None,
31
+ callback: Optional[Callable] = None,
32
+ ):
33
+ """
34
+ 下载文件
35
+
36
+ :param session: aiohttp.ClientSession 对象
37
+ :param semaphore: 控制并发的信号量
38
+ :param progress: 进度条对象
39
+ :param url: 下载链接或者其 Supplier
40
+ :param dest_path: 目标路径
41
+ :param filename: 文件名
42
+ :param retry_times: 重试次数
43
+ :param headers: 请求头
44
+ :param callback: 下载完成后的回调函数
45
+ """
46
+ if headers is None:
47
+ headers = {}
48
+
49
+ file_path = os.path.join(dest_path, filename)
50
+ filename_downloading = f'{file_path}.downloading'
51
+
52
+ if not await aio_os.path.exists(dest_path):
53
+ await aio_os.makedirs(dest_path, exist_ok=True)
54
+
55
+ if await aio_os.path.exists(file_path):
56
+ progress.console.print(f"[yellow]{filename} 已经存在[/yellow]")
57
+ return
58
+
59
+ block_size = 8192
60
+ attempts_left = retry_times + 1
61
+ task_id = None
62
+
63
+ try:
64
+ while attempts_left > 0:
65
+ attempts_left -= 1
66
+
67
+ resume_from = (await aio_os.stat(filename_downloading)).st_size if await aio_os.path.exists(filename_downloading) else 0
68
+
69
+ if resume_from:
70
+ headers['Range'] = f'bytes={resume_from}-'
71
+
72
+ try:
73
+ async with semaphore:
74
+ current_url = await fetch_url(url)
75
+ async with session.get(url=current_url, headers=headers) as r:
76
+ r.raise_for_status()
77
+
78
+ total_size_in_bytes = int(r.headers.get('content-length', 0)) + resume_from
79
+
80
+ if task_id is None:
81
+ task_id = progress.add_task("download", filename=filename, total=total_size_in_bytes, completed=resume_from, status=STATUS.DOWNLOADING.value)
82
+ else:
83
+ progress.update(task_id, total=total_size_in_bytes, completed=resume_from, status=STATUS.DOWNLOADING.value, refresh=True)
84
+
85
+ async with aiofiles.open(filename_downloading, 'ab') as f:
86
+ async for chunk in r.content.iter_chunked(block_size):
87
+ if chunk:
88
+ await f.write(chunk)
89
+ progress.update(task_id, advance=len(chunk))
90
+
91
+ break
92
+
93
+ except Exception as e:
94
+ if attempts_left > 0:
95
+ if task_id is not None:
96
+ progress.update(task_id, status=STATUS.RETRYING.value, refresh=True)
97
+ if isinstance(e, ClientPayloadError):
98
+ new_block_size = max(int(block_size * BLOCK_SIZE_REDUCTION_FACTOR), MIN_BLOCK_SIZE)
99
+ if new_block_size < block_size:
100
+ block_size = new_block_size
101
+ await asyncio.sleep(3)
102
+ else:
103
+ raise e
104
+
105
+ else:
106
+ raise IOError(f"Failed to download {filename} after {retry_times} retries.")
107
+
108
+ os.rename(filename_downloading, file_path)
109
+
110
+ except Exception as e:
111
+ if task_id is not None:
112
+ progress.update(task_id, status=STATUS.FAILED.value, visible=False)
113
+
114
+ finally:
115
+ if await aio_os.path.exists(file_path):
116
+ if task_id is not None:
117
+ progress.update(task_id, status=STATUS.COMPLETED.value, visible=False)
118
+
119
+ if callback:
120
+ callback()
121
+
122
+ async def download_file_multipart(
123
+ session: aiohttp.ClientSession,
124
+ semaphore: asyncio.Semaphore,
125
+ progress: Progress,
126
+ url: Union[str, Callable[[], str], Callable[[], Awaitable[str]]],
127
+ dest_path: str,
128
+ filename: str,
129
+ retry_times: int = 3,
130
+ chunk_size_mb: int = 10,
131
+ headers: Optional[dict] = None,
132
+ callback: Optional[Callable] = None,
133
+ ):
134
+ """
135
+ 下载文件
136
+
137
+ :param session: aiohttp.ClientSession 对象
138
+ :param semaphore: 控制并发的信号量
139
+ :param progress: 进度条对象
140
+ :param url: 下载链接或者其 Supplier
141
+ :param dest_path: 目标路径
142
+ :param filename: 文件名
143
+ :param retry_times: 重试次数
144
+ :param headers: 请求头
145
+ :param callback: 下载完成后的回调函数
146
+ """
147
+ if headers is None:
148
+ headers = {}
149
+
150
+ file_path = os.path.join(dest_path, filename)
151
+ filename_downloading = f'{file_path}.downloading'
152
+
153
+ if not await aio_os.path.exists(dest_path):
154
+ await aio_os.makedirs(dest_path, exist_ok=True)
155
+
156
+ if await aio_os.path.exists(file_path):
157
+ progress.console.print(f"[blue]{filename} 已经存在[/blue]")
158
+ return
159
+
160
+ part_paths = []
161
+ part_expected_sizes = []
162
+ task_id = None
163
+ try:
164
+ current_url = await fetch_url(url)
165
+
166
+ async with session.head(current_url, headers=headers, allow_redirects=True) as response:
167
+ response.raise_for_status()
168
+ total_size = int(response.headers['Content-Length'])
169
+
170
+ chunk_size = chunk_size_mb * 1024 * 1024
171
+ num_chunks = math.ceil(total_size / chunk_size)
172
+
173
+ tasks = []
174
+
175
+ resumed_size = 0
176
+ for i in range(num_chunks):
177
+ part_path = os.path.join(dest_path, f"{filename}.{i + 1:03d}.downloading")
178
+ part_paths.append(part_path)
179
+ if await aio_os.path.exists(part_path):
180
+ resumed_size += (await aio_os.stat(part_path)).st_size
181
+
182
+ task_id = progress.add_task("download", filename=filename, status=STATUS.WAITING.value, total=total_size, completed=resumed_size)
183
+ state_manager = StateManager(progress=progress, task_id=task_id)
184
+
185
+ for i, start in enumerate(range(0, total_size, chunk_size)):
186
+ end = min(start + chunk_size - 1, total_size - 1)
187
+ part_expected_sizes.append(end - start + 1)
188
+
189
+ task = _download_part(
190
+ session=session,
191
+ semaphore=semaphore,
192
+ url=current_url,
193
+ start=start,
194
+ end=end,
195
+ part_path=part_paths[i],
196
+ state_manager=state_manager,
197
+ headers=headers,
198
+ retry_times=retry_times
199
+ )
200
+ tasks.append(task)
201
+
202
+ await asyncio.gather(*tasks)
203
+
204
+ assert len(part_paths) == len(part_expected_sizes)
205
+ results = await asyncio.gather(*[_validate_part(part_paths[i], part_expected_sizes[i]) for i in range(num_chunks)])
206
+ if all(results):
207
+ await state_manager.request_status_update(part_id=StateManager.PARENT_ID, status=STATUS.MERGING)
208
+ await _merge_parts(part_paths, filename_downloading)
209
+ os.rename(filename_downloading, file_path)
210
+ else:
211
+ # 如果有任何一个分片校验失败,则视为下载失败
212
+ await state_manager.request_status_update(part_id=StateManager.PARENT_ID, status=STATUS.FAILED)
213
+
214
+ finally:
215
+ if await aio_os.path.exists(file_path):
216
+ if task_id is not None:
217
+ await state_manager.request_status_update(part_id=StateManager.PARENT_ID, status=STATUS.COMPLETED)
218
+
219
+ cleanup_tasks = [aio_os.remove(p) for p in part_paths if await aio_os.path.exists(p)]
220
+ if cleanup_tasks:
221
+ await asyncio.gather(*cleanup_tasks)
222
+ if callback:
223
+ callback()
224
+ else:
225
+ if task_id is not None:
226
+ await state_manager.request_status_update(part_id=StateManager.PARENT_ID, status=STATUS.FAILED)
227
+
228
+ async def _download_part(
229
+ session: aiohttp.ClientSession,
230
+ semaphore: asyncio.Semaphore,
231
+ url: str,
232
+ start: int,
233
+ end: int,
234
+ part_path: str,
235
+ state_manager: StateManager,
236
+ headers: Optional[dict] = None,
237
+ retry_times: int = 3
238
+ ):
239
+ if headers is None:
240
+ headers = {}
241
+
242
+ local_headers = headers.copy()
243
+ block_size = 8192
244
+ attempts_left = retry_times + 1
245
+
246
+ while attempts_left > 0:
247
+ attempts_left -= 1
248
+
249
+ try:
250
+ resume_from = (await aio_os.path.getsize(part_path)) if await aio_os.path.exists(part_path) else 0
251
+
252
+ if resume_from >= (end - start + 1):
253
+ return
254
+
255
+ current_start = start + resume_from
256
+ local_headers['Range'] = f'bytes={current_start}-{end}'
257
+
258
+ async with semaphore:
259
+ async with session.get(url, headers=local_headers) as response:
260
+ response.raise_for_status()
261
+
262
+ await state_manager.request_status_update(part_id=start, status=STATUS.DOWNLOADING)
263
+
264
+ async with aiofiles.open(part_path, 'ab') as f:
265
+ async for chunk in response.content.iter_chunked(block_size):
266
+ if chunk:
267
+ await f.write(chunk)
268
+ state_manager.advance(len(chunk))
269
+ return
270
+ except Exception as e:
271
+ if attempts_left > 0:
272
+ await asyncio.sleep(3)
273
+ await state_manager.request_status_update(part_id=start, status=STATUS.WAITING)
274
+ else:
275
+ # console.print(f"[red]分片 {os.path.basename(part_path)} 下载失败: {e}[/red]")
276
+ await state_manager.request_status_update(part_id=start, status=STATUS.PARTIALLY_FAILED)
277
+
278
+ async def _validate_part(part_path: str, expected_size: int) -> bool:
279
+ if not await aio_os.path.exists(part_path):
280
+ return False
281
+ actual_size = await aio_os.path.getsize(part_path)
282
+ return actual_size == expected_size
283
+
284
+ async def _merge_parts(part_paths: list[str], final_path: str):
285
+ async with aiofiles.open(final_path, 'wb') as final_file:
286
+ try:
287
+ for part_path in part_paths:
288
+ async with aiofiles.open(part_path, 'rb') as part_file:
289
+ while True:
290
+ chunk = await part_file.read(8192)
291
+ if not chunk:
292
+ break
293
+ await final_file.write(chunk)
294
+ except Exception as e:
295
+ if aio_os.path.exists(final_path):
296
+ await aio_os.remove(final_path)
297
+ raise e
298
+
299
+
300
+
301
+ def safe_filename(name: str) -> str:
302
+ """
303
+ 替换非法文件名字符为下划线
304
+ """
305
+ return re.sub(r'[\\/:*?"<>|]', '_', name)
306
+
307
+ async def fetch_url(url: Union[str, Callable[[], str], Callable[[], Awaitable[str]]], retry_times: int = 3) -> str:
308
+ while retry_times >= 0:
309
+ try:
310
+ if callable(url):
311
+ result = url()
312
+ if asyncio.iscoroutine(result) or isinstance(result, Awaitable):
313
+ return await result
314
+ return result
315
+ elif isinstance(url, str):
316
+ return url
317
+ except Exception as e:
318
+ retry_times -= 1
319
+ if retry_times < 0:
320
+ raise e
321
+ await asyncio.sleep(2)
322
+ raise RuntimeError("Max retries exceeded")
@@ -0,0 +1,62 @@
1
+ from enum import Enum
2
+ import asyncio
3
+
4
+ from rich.progress import Progress, TaskID
5
+
6
+
7
+ class STATUS(Enum):
8
+ WAITING='[blue]等待中[/blue]'
9
+ RETRYING='[yellow]重试中[/yellow]'
10
+ DOWNLOADING='[cyan]下载中[/cyan]'
11
+ MERGING='[magenta]合并中[/magenta]'
12
+ COMPLETED='[green]完成[/green]'
13
+ PARTIALLY_FAILED='[red]分片失败[/red]'
14
+ FAILED='[red]失败[/red]'
15
+
16
+ @property
17
+ def order(self) -> int:
18
+ order_mapping = {
19
+ STATUS.WAITING: 1,
20
+ STATUS.RETRYING: 2,
21
+ STATUS.DOWNLOADING: 3,
22
+ STATUS.MERGING: 4,
23
+ STATUS.COMPLETED: 5,
24
+ STATUS.PARTIALLY_FAILED: 6,
25
+ STATUS.FAILED: 7,
26
+ }
27
+ return order_mapping[self]
28
+
29
+ def __lt__(self, other):
30
+ if not isinstance(other, STATUS):
31
+ return NotImplemented
32
+ return self.order < other.order
33
+
34
+
35
+ class StateManager:
36
+
37
+ def __init__(self, progress: Progress, task_id: TaskID):
38
+ self._part_states: dict[int, STATUS] = {}
39
+ self._progress = progress
40
+ self._task_id = task_id
41
+ self._current_status = STATUS.WAITING
42
+
43
+ self._lock = asyncio.Lock()
44
+
45
+ PARENT_ID: int = -1
46
+
47
+ def advance(self, advance: int):
48
+ self._progress.update(self._task_id, advance=advance)
49
+
50
+ def _update_status(self):
51
+ if not self._part_states:
52
+ return
53
+
54
+ highest_status = max(self._part_states.values())
55
+ if highest_status != self._current_status:
56
+ self._current_status = highest_status
57
+ self._progress.update(self._task_id, status=highest_status.value, refresh=True)
58
+
59
+ async def request_status_update(self, part_id: int, status: STATUS):
60
+ async with self._lock:
61
+ self._part_states[part_id] = status
62
+ self._update_status()
@@ -10,6 +10,7 @@ class BookUrlLister(Lister):
10
10
  super().__init__(*args, **kwargs)
11
11
  self._book_url = book_url
12
12
 
13
- def list(self) -> tuple[BookInfo, list[VolInfo]]:
14
- book_info, volumes = extract_book_info_and_volumes(self._session, self._book_url)
15
- return book_info, volumes
13
+ async def list(self) -> tuple[BookInfo, list[VolInfo]]:
14
+ with self._console.status("获取书籍信息..."):
15
+ book_info, volumes = await extract_book_info_and_volumes(self._session, self._book_url)
16
+ return book_info, volumes
@@ -1,38 +1,76 @@
1
+ import asyncio
2
+ from urllib.parse import urljoin
3
+
1
4
  from bs4 import BeautifulSoup
5
+ from rich.table import Table
6
+ from rich.prompt import IntPrompt
2
7
 
3
8
  from kmdr.core import Lister, LISTERS, BookInfo, VolInfo
9
+ from kmdr.core.utils import async_retry
10
+ from kmdr.core.constants import API_ROUTE
4
11
 
5
12
  from .utils import extract_book_info_and_volumes
6
13
 
7
- MY_FOLLOW_URL = 'https://kox.moe/myfollow.php'
8
-
9
14
  @LISTERS.register()
10
15
  class FollowedBookLister(Lister):
11
16
 
12
17
  def __init__(self, *args, **kwargs):
13
18
  super().__init__(*args, **kwargs)
14
19
 
15
- def list(self) -> tuple[BookInfo, list[VolInfo]]:
16
- followed_rows = BeautifulSoup(self._session.get(url = MY_FOLLOW_URL).text, 'html.parser').find_all('tr', style='height:36px;')
17
- mapped = map(lambda x: x.find_all('td'), followed_rows)
18
- filtered = filter(lambda x: '書名' not in x[1].text, mapped)
19
- books = map(lambda x: BookInfo(name = x[1].text, url = x[1].find('a')['href'], author = x[2].text, status = x[-1].text, last_update = x[-2].text, id = ''), filtered)
20
- books = list(books)
20
+ async def list(self) -> tuple[BookInfo, list[VolInfo]]:
21
+ books = []
22
+
23
+ with self._console.status("正在获取关注列表..."):
24
+ books = await self._list_followed_books()
25
+
26
+ if not books:
27
+ self._console.print("[yellow]关注列表为空。[/yellow]")
28
+ exit(0)
29
+
30
+ table = Table(title="关注的书籍列表", show_header=True, header_style="bold blue")
31
+ table.add_column("序号", style="dim", width=4, justify="center")
32
+ table.add_column("书名", style="cyan", no_wrap=True)
33
+ table.add_column("作者", style="green")
34
+ table.add_column("最后更新", style="yellow")
35
+ table.add_column("状态", style="blue")
36
+
37
+ for idx, book in enumerate(books):
38
+ table.add_row(
39
+ str(idx + 1),
40
+ book.name,
41
+ book.author,
42
+ book.last_update,
43
+ book.status
44
+ )
45
+
46
+ self._console.print(table)
47
+
48
+ valid_choices = [str(i) for i in range(1, len(books) + 1)]
49
+
50
+ chosen_idx = await asyncio.to_thread(
51
+ IntPrompt.ask,
52
+ "请选择要下载的书籍序号",
53
+ choices=valid_choices,
54
+ show_choices=False,
55
+ show_default=False
56
+ )
57
+
58
+ book_to_download = books[chosen_idx - 1]
59
+
60
+ with self._console.status(f"正在获取 '{book_to_download.name}' 的详细信息..."):
61
+ book_info, volumes = await extract_book_info_and_volumes(self._session, book_to_download.url, book_to_download)
62
+ return book_info, volumes
63
+
64
+ @async_retry()
65
+ async def _list_followed_books(self) -> 'list[BookInfo]':
66
+ async with self._session.get(urljoin(self._base_url, API_ROUTE.MY_FOLLOW)) as response:
67
+ response.raise_for_status()
68
+ html_text = await response.text()
21
69
 
22
- print("\t最后更新时间\t书名")
23
- for v in range(len(books)):
24
- print(f"[{v + 1}]\t{books[v].last_update}\t{books[v].name}")
70
+ # 如果后续有性能问题,可以先考虑使用 lxml 进行解析
71
+ followed_rows = BeautifulSoup(html_text, 'html.parser').find_all('tr', style='height:36px;')
72
+ mapped = map(lambda x: x.find_all('td'), followed_rows)
73
+ filtered = filter(lambda x: '書名' not in x[1].text, mapped)
74
+ books = list(map(lambda x: BookInfo(name=x[1].text.strip(), url=x[1].find('a')['href'], author=x[2].text.strip(), status=x[-1].text.strip(), last_update=x[-2].text.strip(), id=''), filtered))
25
75
 
26
- choosed = input("choose a book to download: ")
27
- while not choosed.isdigit() or int(choosed) > len(books) or int(choosed) < 1:
28
- choosed = input("choose a book to download: ")
29
- choosed = int(choosed) - 1
30
- book = books[choosed]
31
-
32
- book_info, volumes = extract_book_info_and_volumes(self._session, book.url)
33
- book_info.author = book.author
34
- book_info.status = book.status
35
- book_info.last_update = book.last_update
36
-
37
- return book_info, volumes
38
-
76
+ return books
@@ -0,0 +1,2 @@
1
+ from .BookUrlLister import BookUrlLister
2
+ from .FollowedBookLister import FollowedBookLister
@@ -1,25 +1,42 @@
1
- from requests import Session
2
1
  from bs4 import BeautifulSoup
3
2
  import re
3
+ from typing import Optional
4
+ from urllib.parse import urljoin
5
+
6
+ from yarl import URL
7
+ from aiohttp import ClientSession as Session
4
8
 
5
9
  from kmdr.core import BookInfo, VolInfo, VolumeType
10
+ from kmdr.core.utils import async_retry
6
11
 
7
- def extract_book_info_and_volumes(session: Session, url: str) -> tuple[BookInfo, list[VolInfo]]:
12
+ @async_retry()
13
+ async def extract_book_info_and_volumes(session: Session, url: str, book_info: Optional[BookInfo] = None) -> tuple[BookInfo, list[VolInfo]]:
8
14
  """
9
15
  从指定的书籍页面 URL 中提取书籍信息和卷信息。
10
16
 
11
- :param session: 已经建立的 requests.Session 实例。
17
+ :param session: 已经建立的 HTTP 会话。
12
18
  :param url: 书籍页面的 URL。
13
19
  :return: 包含书籍信息和卷信息的元组。
14
20
  """
15
- book_page = BeautifulSoup(session.get(url).text, 'html.parser')
21
+ structured_url = URL(url)
22
+
23
+ if structured_url.path.startswith('/m/'):
24
+ # 移除移动端路径部分,统一为桌面端路径
25
+ # 因为移动端页面的结构与桌面端不同,可能会影响解析
26
+ structured_url = structured_url.with_path(structured_url.path.replace('/m/', '', 1))
27
+
28
+ async with session.get(structured_url) as response:
29
+ response.raise_for_status()
30
+
31
+ # 如果后续有性能问题,可以先考虑使用 lxml 进行解析
32
+ book_page = BeautifulSoup(await response.text(), 'html.parser')
16
33
 
17
- book_info = __extract_book_info(url, book_page)
18
- volumes = __extract_volumes(session, book_page)
34
+ book_info = __extract_book_info(url, book_page, book_info)
35
+ volumes = await __extract_volumes(session, url, book_page)
19
36
 
20
- return book_info, volumes
37
+ return book_info, volumes
21
38
 
22
- def __extract_book_info(url: str, book_page: BeautifulSoup) -> BookInfo:
39
+ def __extract_book_info(url: str, book_page: BeautifulSoup, book_info: Optional[BookInfo]) -> BookInfo:
23
40
  book_name = book_page.find('font', class_='text_bglight_big').text
24
41
 
25
42
  id = book_page.find('input', attrs={'name': 'bookid'})['value']
@@ -28,35 +45,38 @@ def __extract_book_info(url: str, book_page: BeautifulSoup) -> BookInfo:
28
45
  id = id,
29
46
  name = book_name,
30
47
  url = url,
31
- author = '',
32
- status = '',
33
- last_update = ''
48
+ author = book_info.author if book_info else '',
49
+ status = book_info.status if book_info else '',
50
+ last_update = book_info.last_update if book_info else ''
34
51
  )
35
52
 
36
53
 
37
- def __extract_volumes(session: Session, book_page: BeautifulSoup) -> list[VolInfo]:
54
+ async def __extract_volumes(session: Session, url: str, book_page: BeautifulSoup) -> list[VolInfo]:
38
55
  script = book_page.find_all('script', language="javascript")[-1].text
39
56
 
40
57
  pattern = re.compile(r'/book_data.php\?h=\w+')
41
58
  book_data_url = pattern.search(script).group(0)
42
59
 
43
- book_data = session.get(url = f"https://kox.moe{book_data_url}").text.split('\n')
44
- book_data = filter(lambda x: 'volinfo' in x, book_data)
45
- book_data = map(lambda x: x.split("\"")[1], book_data)
46
- book_data = map(lambda x: x[8:].split(','), book_data)
47
-
48
- volume_data = list(map(lambda x: VolInfo(
49
- id = x[0],
50
- extra_info = __extract_extra_info(x[1]),
51
- is_last = x[2] == '1',
52
- vol_type = __extract_volume_type(x[3]),
53
- index = int(x[4]),
54
- pages = int(x[6]),
55
- name = x[5],
56
- size = float(x[11])), book_data))
57
- volume_data: list[VolInfo] = volume_data
58
-
59
- return volume_data
60
+ async with session.get(url = urljoin(url, book_data_url)) as response:
61
+ response.raise_for_status()
62
+
63
+ book_data = (await response.text()).split('\n')
64
+ book_data = filter(lambda x: 'volinfo' in x, book_data)
65
+ book_data = map(lambda x: x.split("\"")[1], book_data)
66
+ book_data = map(lambda x: x[8:].split(','), book_data)
67
+
68
+ volume_data = list(map(lambda x: VolInfo(
69
+ id = x[0],
70
+ extra_info = __extract_extra_info(x[1]),
71
+ is_last = x[2] == '1',
72
+ vol_type = __extract_volume_type(x[3]),
73
+ index = int(x[4]),
74
+ pages = int(x[6]),
75
+ name = x[5],
76
+ size = float(x[11])), book_data))
77
+ volume_data: list[VolInfo] = volume_data
78
+
79
+ return volume_data
60
80
 
61
81
  def __extract_extra_info(value: str) -> str:
62
82
  if value == '0':
@@ -27,7 +27,7 @@ class ArgsFilterPicker(Picker):
27
27
  volume_data = filter(lambda x: x.index in choice, volume_data)
28
28
 
29
29
  if self._max_size is not None:
30
- volume_data = filter(lambda x: x.size <= self._max_size, volume_data)
30
+ volume_data = filter(lambda x: self._max_size is None or x.size <= self._max_size, volume_data)
31
31
 
32
32
  if self._limit is not None:
33
33
  return list(volume_data)[:self._limit]
@@ -1,3 +1,6 @@
1
+ from rich.table import Table
2
+ from rich.prompt import Prompt
3
+
1
4
  from kmdr.core import Picker, PICKERS, VolInfo
2
5
 
3
6
  from .utils import resolve_volume
@@ -9,13 +12,39 @@ class DefaultVolPicker(Picker):
9
12
  super().__init__(*args, **kwargs)
10
13
 
11
14
  def pick(self, volumes: list[VolInfo]) -> list[VolInfo]:
12
- print("\t卷类型\t页数\t大小(MB)\t卷名")
15
+ table = Table(title="可用卷列表", show_header=True, header_style="bold blue")
16
+ table.add_column("序号", style="dim", width=4, justify="center")
17
+ table.add_column("卷名", style="cyan", no_wrap=True, min_width=20)
18
+ table.add_column("索引", style="blue", justify="center")
19
+ table.add_column("卷类型", style="green", justify="center")
20
+ table.add_column("页数", style="blue", justify="right")
21
+ table.add_column("大小(MB)", style="yellow", justify="right")
22
+
23
+ last_vol_type = None
13
24
  for index, volume in enumerate(volumes):
14
- print(f"[{index + 1}]\t{volume.vol_type.value}\t{volume.pages}\t{volume.size:.2f}\t\t{volume.name}")
25
+ if last_vol_type is not None and volume.vol_type != last_vol_type:
26
+ table.add_section()
27
+ last_vol_type = volume.vol_type
28
+
29
+ table.add_row(
30
+ str(index + 1),
31
+ volume.name,
32
+ str(volume.index),
33
+ volume.vol_type.value,
34
+ str(volume.pages),
35
+ f"{volume.size:.2f}"
36
+ )
37
+
38
+ self._console.print(table)
39
+
40
+ choice_str = Prompt.ask(
41
+ "[green]请选择要下载的卷序号 (例如 'all', '1,2,3', '1-3,4-6')[/green]",
42
+ default="all"
43
+ )
15
44
 
16
- choosed = input("choose a volume to download (e.g. 'all', '1,2,3', '1-3,4-6'):\n")
45
+ chosen_indices = resolve_volume(choice_str)
17
46
 
18
- if (chosen := resolve_volume(choosed)) is None:
47
+ if not chosen_indices:
19
48
  return volumes
20
49
 
21
- return [volumes[i - 1] for i in chosen if 1 <= i <= len(volumes)]
50
+ return [volumes[i - 1] for i in chosen_indices if 1 <= i <= len(volumes)]
@@ -0,0 +1,2 @@
1
+ from .ArgsFilterPicker import ArgsFilterPicker
2
+ from .DefaultVolPicker import DefaultVolPicker