pdman 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pdman/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .manager import Manager
2
+
3
+ manager = Manager()
4
+ __all__ = ["Manager", "manager"]
pdman/chunk.py ADDED
@@ -0,0 +1,198 @@
1
+ import random
2
+ import os
3
+ import time
4
+ import asyncio
5
+ import aiohttp
6
+ import aiofiles
7
+
8
+ from typing import TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from .downloader import Downloader
12
+
13
+
14
+ class Chunk:
15
+ def __init__(
16
+ self,
17
+ parent: Downloader,
18
+ start: int,
19
+ end: int,
20
+ chunk_path: str,
21
+ forward: Chunk = None,
22
+ next: Chunk = None,
23
+ ):
24
+ self.parent = parent
25
+ self.start = start
26
+ self.end = end
27
+ self.chunk_path = chunk_path
28
+ if os.path.exists(chunk_path):
29
+ self.size = os.path.getsize(chunk_path)
30
+ else:
31
+ self.size = 0
32
+ self.forward: Chunk = forward
33
+ self.next: Chunk = next
34
+
35
+ def __iter__(self):
36
+ current = self
37
+ while current:
38
+ yield current
39
+ current = current.next
40
+
41
+ def __str__(self):
42
+ return f"Chunk(start={self.start}, end={self.end},target size={(self.end - self.start + 1) if self.end is not None else -1}, size={self.size}, chunk_path={self.chunk_path})"
43
+
44
+ def __add__(self, other):
45
+ if not isinstance(other, Chunk):
46
+ return NotImplemented
47
+ return self.size + other.size
48
+
49
+ def __radd__(self, other):
50
+ if other == 0:
51
+ return self.size
52
+ if not isinstance(other, int):
53
+ return NotImplemented
54
+ return self.size + other
55
+
56
+ def _is_complete(self) -> bool:
57
+ if self.end is None:
58
+ self.parent._downloaded = True
59
+ return self.size > 0
60
+ return self.end is not None and self.size == self.end - self.start + 1
61
+
62
+ def _needs_download(self) -> bool:
63
+ return self.end is None or self.size < self.end - self.start + 1
64
+
65
+ def _apply_range_header(self, headers: dict):
66
+ if self.end is not None:
67
+ if self.start + self.size <= self.end:
68
+ headers["Range"] = f"bytes={self.start + self.size}-{self.end}"
69
+ else:
70
+ headers["break"] = True
71
+ else:
72
+ if "Range" in headers:
73
+ headers.pop("Range")
74
+
75
+ async def _stream_response(self, response, f) -> bool:
76
+ last_time = time.time()
77
+ pos = await f.tell()
78
+ continue_flag = False
79
+ async for data in response.content.iter_chunked(10240):
80
+ if self.end is not None:
81
+ remaining = self.end - self.start + 1 - pos
82
+ if remaining <= 0:
83
+ break
84
+ data = data[:remaining]
85
+ await f.write(data)
86
+ async with self.parent.lock:
87
+ self.size += len(data)
88
+ now = time.time()
89
+ elaps = max(now - last_time, 1e-6)
90
+ speed = len(data) / elaps
91
+ if (
92
+ self.parent.parent.chunk_retry_speed
93
+ and speed < self.parent.parent.chunk_retry_speed
94
+ ):
95
+ continue_flag = True
96
+ last_time = now
97
+ pos += len(data)
98
+ return continue_flag
99
+
100
+ async def _split_incomplete(self):
101
+ if self.size != self.end - self.start + 1:
102
+ self.parent._logger.debug(
103
+ f"Chunk not fully downloaded, splitting chunk: {self}"
104
+ )
105
+ async with self.parent.lock:
106
+ new_start = self.start + self.size
107
+ new_chunk = Chunk(
108
+ self.parent,
109
+ new_start,
110
+ self.end,
111
+ os.path.join(
112
+ self.parent.pdm_tmp,
113
+ f"{self.parent.filename}.{new_start}",
114
+ ),
115
+ self,
116
+ next=self.next,
117
+ )
118
+ self.end = new_start - 1
119
+ self.next = new_chunk
120
+
121
+ async def download(self):
122
+ assert self.end is not None or self.size >= 0
123
+ headers = {} # TODO 添加其他必要的headers
124
+ file_mode = "ab" if os.path.exists(self.chunk_path) else "wb"
125
+ async with (
126
+ aiohttp.ClientSession(
127
+ timeout=aiohttp.ClientTimeout(sock_read=30)
128
+ ) as session,
129
+ aiofiles.open(self.chunk_path, file_mode) as f,
130
+ ):
131
+ for _ in range(self.parent.parent.retry):
132
+ if os.path.exists(self.chunk_path) and self._is_complete():
133
+ return self
134
+ while True:
135
+ try:
136
+ self._apply_range_header(headers)
137
+ if headers.get("break"):
138
+ break
139
+ self.parent._logger.debug(
140
+ f"Downloading chunk: {self}, with headers: {headers}"
141
+ )
142
+ if self._needs_download():
143
+ async with session.get(
144
+ self.parent.url,
145
+ headers=headers,
146
+ timeout=self.parent.parent.chunk_timeout,
147
+ ) as response:
148
+ if response.status in (200, 206):
149
+ continue_flag = await self._stream_response(
150
+ response, f
151
+ )
152
+ if continue_flag:
153
+ await asyncio.sleep(
154
+ self.parent.parent.retry_wait
155
+ )
156
+ self.parent._logger.debug(
157
+ "speed is low restarting..."
158
+ )
159
+ continue
160
+ except aiohttp.client_exceptions.ClientPayloadError:
161
+ await asyncio.sleep(
162
+ self.parent.parent.retry_wait + random.random() * 5
163
+ )
164
+ except asyncio.TimeoutError:
165
+ self.parent._logger.debug(
166
+ f"Timeout downloading chunk {self}, retrying..."
167
+ )
168
+ await asyncio.sleep(
169
+ self.parent.parent.retry_wait + random.random() * 5
170
+ )
171
+ except ConnectionResetError:
172
+ self.parent._logger.debug(
173
+ f"Connection reset downloading chunk {self}, retrying..."
174
+ )
175
+ await asyncio.sleep(
176
+ self.parent.parent.retry_wait + random.random() * 5
177
+ )
178
+ except Exception as e:
179
+ self.parent._logger.debug(
180
+ f"Error downloading chunk {self}: {e}"
181
+ )
182
+ # traceback.print_exc()
183
+ await asyncio.sleep(self.parent.parent.retry_wait)
184
+ break
185
+ if self._is_complete():
186
+ return self
187
+ if self.end is None:
188
+ self.parent._logger.debug(
189
+ f"completed download chunk (unknown size): {self}"
190
+ )
191
+ self.parent._downloaded = True
192
+ return self
193
+ elif self._needs_download():
194
+ self.parent._logger.debug(f"retrying download chunk: {self}")
195
+ else:
196
+ self.parent._logger.debug(f"completed download chunk: {self}")
197
+ await self._split_incomplete()
198
+ return self
pdman/cli.py ADDED
@@ -0,0 +1,207 @@
1
+ #!/usr/bin/env python
2
+ # -*- encoding: utf-8 -*-
3
+ """
4
+ CLI 入口:提供命令行解析并调用 PDManager。
5
+ 安装后可通过 console_scripts 生成可执行命令。
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import argparse
11
+ import asyncio
12
+ from .manager import Manager
13
+
14
+ version = "0.2.0"
15
+
16
+
17
+ def main(argv=None):
18
+ parser = argparse.ArgumentParser()
19
+ parser.add_argument(
20
+ "-v",
21
+ "--version",
22
+ action="version",
23
+ version=f"PythonDownloadManager(PDMAN) version {version}",
24
+ help="Print the version number and exit.",
25
+ )
26
+ parser.add_argument(
27
+ "-l",
28
+ "--log",
29
+ type=str,
30
+ required=False,
31
+ default=None,
32
+ help="The file name of the log file. If '-' is specified, log is written to stdout.",
33
+ )
34
+ parser.add_argument(
35
+ "--debug",
36
+ action="store_true",
37
+ help="Enable debug mode with verbose logging.",
38
+ )
39
+ parser.add_argument(
40
+ "-d",
41
+ "--dir",
42
+ type=str,
43
+ default=os.path.join(os.getcwd(), "pdm"),
44
+ help="The directory to store the downloaded file.",
45
+ )
46
+ parser.add_argument(
47
+ "-o",
48
+ "--out",
49
+ type=str,
50
+ default=None,
51
+ help="The file name of the downloaded file. It is always relative to the directory given in -d option. When the -Z option is used, this option will be ignored.",
52
+ )
53
+ parser.add_argument(
54
+ "-V",
55
+ "--check-integrity",
56
+ action="store_true",
57
+ help="Check file integrity by validating piece hashes or a hash of the entire file.",
58
+ )
59
+ parser.add_argument(
60
+ "-c",
61
+ "--continue",
62
+ dest="continue_download",
63
+ action="store_true",
64
+ help="Continue downloading a partially downloaded file.",
65
+ )
66
+ parser.add_argument(
67
+ "-i",
68
+ "--input-file",
69
+ type=str,
70
+ default=[],
71
+ action="append",
72
+ help="Downloads URIs found in FILE(s). Supports JSON, YAML, or plain text.",
73
+ )
74
+ parser.add_argument(
75
+ "-x",
76
+ "--max-concurrent-downloads",
77
+ type=int,
78
+ default=5,
79
+ help="Set maximum number of parallel downloads for each URL or task.",
80
+ )
81
+ parser.add_argument(
82
+ "--chunk-retry-speed",
83
+ default="",
84
+ help="If the chunk speed falls below SIZE bytes/second, restart that chunk. Append K/M.",
85
+ )
86
+ parser.add_argument(
87
+ "-r",
88
+ "--retry",
89
+ type=int,
90
+ default=3,
91
+ help="Number of times to retry downloading a URL upon failure.",
92
+ )
93
+ parser.add_argument(
94
+ "-W",
95
+ "--retry-wait",
96
+ type=int,
97
+ default=5,
98
+ help="Maximum wait time in seconds between retries.",
99
+ )
100
+ parser.add_argument(
101
+ "--timeout",
102
+ type=int,
103
+ default=None,
104
+ help="Timeout in seconds for each download request.",
105
+ )
106
+ parser.add_argument(
107
+ "--chunk-timeout",
108
+ type=int,
109
+ default=None,
110
+ help="Timeout in seconds for each chunk download request.",
111
+ )
112
+ parser.add_argument(
113
+ "-N",
114
+ "--max-downloads",
115
+ type=int,
116
+ default=4,
117
+ help="The maximum number of concurrent downloads.",
118
+ )
119
+ parser.add_argument(
120
+ "--no-auto-file-renaming",
121
+ action="store_false",
122
+ help="Disable auto renaming when target file exists.",
123
+ )
124
+ parser.add_argument(
125
+ "-Z",
126
+ "--force-sequential",
127
+ action="store_true",
128
+ help="Fetch URIs sequentially.",
129
+ )
130
+ parser.add_argument(
131
+ "-k",
132
+ "--min-split-size",
133
+ type=str,
134
+ default="1M",
135
+ help="Minimum split size. Append K/M.",
136
+ )
137
+ parser.add_argument(
138
+ "--tmp",
139
+ type=str,
140
+ default=None,
141
+ help="Temporary directory for chunk files.",
142
+ )
143
+ parser.add_argument(
144
+ "-t",
145
+ "--threads",
146
+ type=int,
147
+ default=4,
148
+ help="Alias of max-downloads (deprecated).",
149
+ )
150
+ parser.add_argument(
151
+ "-ua",
152
+ "--user-agent",
153
+ type=str,
154
+ default="PDMAN-Downloader/1.0",
155
+ help="The User-Agent string to use for HTTP requests.",
156
+ )
157
+ parser.add_argument(
158
+ "urls",
159
+ type=str,
160
+ nargs="*",
161
+ default=None,
162
+ help="The URL(s) to download.",
163
+ )
164
+
165
+ args = parser.parse_args(argv)
166
+ if args.log == "-":
167
+ args.log = sys.stdout
168
+ if args.force_sequential and args.out is not None:
169
+ args.out = None
170
+
171
+ pdm = Manager(
172
+ max_downloads=args.max_downloads,
173
+ log_path=args.log,
174
+ debug=args.debug,
175
+ continue_download=args.continue_download,
176
+ max_concurrent_downloads=args.max_concurrent_downloads,
177
+ min_split_size=args.min_split_size,
178
+ force_sequential=args.force_sequential,
179
+ tmp_dir=args.tmp,
180
+ check_integrity=args.check_integrity,
181
+ user_agent=args.user_agent,
182
+ chunk_retry_speed=args.chunk_retry_speed,
183
+ retry=args.retry,
184
+ retry_wait=args.retry_wait,
185
+ timeout=args.timeout,
186
+ chunk_timeout=args.chunk_timeout,
187
+ auto_file_renaming=args.no_auto_file_renaming,
188
+ out_dir=args.dir,
189
+ )
190
+
191
+ if args.urls and len(args.urls) == 1 and args.out is not None:
192
+ pdm.append(args.urls[0], file_name=args.out)
193
+ else:
194
+ if args.out is not None:
195
+ pass # ignore --out when multiple urls
196
+ pdm.add_urls(args.urls or [])
197
+
198
+ if args.input_file:
199
+ for file in args.input_file:
200
+ if os.path.exists(file):
201
+ pdm.load_input_file(file)
202
+
203
+ asyncio.run(pdm.download())
204
+
205
+
206
+ if __name__ == "__main__":
207
+ main()