pdman 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdman/__init__.py +4 -0
- pdman/chunk.py +198 -0
- pdman/cli.py +207 -0
- pdman/downloader.py +498 -0
- pdman/manager.py +452 -0
- pdman/test.py +12 -0
- pdman/utils.py +22 -0
- pdman-0.2.0.dist-info/METADATA +909 -0
- pdman-0.2.0.dist-info/RECORD +13 -0
- pdman-0.2.0.dist-info/WHEEL +5 -0
- pdman-0.2.0.dist-info/entry_points.txt +2 -0
- pdman-0.2.0.dist-info/licenses/LICENSE +674 -0
- pdman-0.2.0.dist-info/top_level.txt +1 -0
pdman/__init__.py
ADDED
pdman/chunk.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import asyncio
|
|
5
|
+
import aiohttp
|
|
6
|
+
import aiofiles
|
|
7
|
+
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from .downloader import Downloader
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Chunk:
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
parent: Downloader,
|
|
18
|
+
start: int,
|
|
19
|
+
end: int,
|
|
20
|
+
chunk_path: str,
|
|
21
|
+
forward: Chunk = None,
|
|
22
|
+
next: Chunk = None,
|
|
23
|
+
):
|
|
24
|
+
self.parent = parent
|
|
25
|
+
self.start = start
|
|
26
|
+
self.end = end
|
|
27
|
+
self.chunk_path = chunk_path
|
|
28
|
+
if os.path.exists(chunk_path):
|
|
29
|
+
self.size = os.path.getsize(chunk_path)
|
|
30
|
+
else:
|
|
31
|
+
self.size = 0
|
|
32
|
+
self.forward: Chunk = forward
|
|
33
|
+
self.next: Chunk = next
|
|
34
|
+
|
|
35
|
+
def __iter__(self):
|
|
36
|
+
current = self
|
|
37
|
+
while current:
|
|
38
|
+
yield current
|
|
39
|
+
current = current.next
|
|
40
|
+
|
|
41
|
+
def __str__(self):
|
|
42
|
+
return f"Chunk(start={self.start}, end={self.end},target size={(self.end - self.start + 1) if self.end is not None else -1}, size={self.size}, chunk_path={self.chunk_path})"
|
|
43
|
+
|
|
44
|
+
def __add__(self, other):
|
|
45
|
+
if not isinstance(other, Chunk):
|
|
46
|
+
return NotImplemented
|
|
47
|
+
return self.size + other.size
|
|
48
|
+
|
|
49
|
+
def __radd__(self, other):
|
|
50
|
+
if other == 0:
|
|
51
|
+
return self.size
|
|
52
|
+
if not isinstance(other, int):
|
|
53
|
+
return NotImplemented
|
|
54
|
+
return self.size + other
|
|
55
|
+
|
|
56
|
+
def _is_complete(self) -> bool:
|
|
57
|
+
if self.end is None:
|
|
58
|
+
self.parent._downloaded = True
|
|
59
|
+
return self.size > 0
|
|
60
|
+
return self.end is not None and self.size == self.end - self.start + 1
|
|
61
|
+
|
|
62
|
+
def _needs_download(self) -> bool:
|
|
63
|
+
return self.end is None or self.size < self.end - self.start + 1
|
|
64
|
+
|
|
65
|
+
def _apply_range_header(self, headers: dict):
|
|
66
|
+
if self.end is not None:
|
|
67
|
+
if self.start + self.size <= self.end:
|
|
68
|
+
headers["Range"] = f"bytes={self.start + self.size}-{self.end}"
|
|
69
|
+
else:
|
|
70
|
+
headers["break"] = True
|
|
71
|
+
else:
|
|
72
|
+
if "Range" in headers:
|
|
73
|
+
headers.pop("Range")
|
|
74
|
+
|
|
75
|
+
async def _stream_response(self, response, f) -> bool:
|
|
76
|
+
last_time = time.time()
|
|
77
|
+
pos = await f.tell()
|
|
78
|
+
continue_flag = False
|
|
79
|
+
async for data in response.content.iter_chunked(10240):
|
|
80
|
+
if self.end is not None:
|
|
81
|
+
remaining = self.end - self.start + 1 - pos
|
|
82
|
+
if remaining <= 0:
|
|
83
|
+
break
|
|
84
|
+
data = data[:remaining]
|
|
85
|
+
await f.write(data)
|
|
86
|
+
async with self.parent.lock:
|
|
87
|
+
self.size += len(data)
|
|
88
|
+
now = time.time()
|
|
89
|
+
elaps = max(now - last_time, 1e-6)
|
|
90
|
+
speed = len(data) / elaps
|
|
91
|
+
if (
|
|
92
|
+
self.parent.parent.chunk_retry_speed
|
|
93
|
+
and speed < self.parent.parent.chunk_retry_speed
|
|
94
|
+
):
|
|
95
|
+
continue_flag = True
|
|
96
|
+
last_time = now
|
|
97
|
+
pos += len(data)
|
|
98
|
+
return continue_flag
|
|
99
|
+
|
|
100
|
+
async def _split_incomplete(self):
|
|
101
|
+
if self.size != self.end - self.start + 1:
|
|
102
|
+
self.parent._logger.debug(
|
|
103
|
+
f"Chunk not fully downloaded, splitting chunk: {self}"
|
|
104
|
+
)
|
|
105
|
+
async with self.parent.lock:
|
|
106
|
+
new_start = self.start + self.size
|
|
107
|
+
new_chunk = Chunk(
|
|
108
|
+
self.parent,
|
|
109
|
+
new_start,
|
|
110
|
+
self.end,
|
|
111
|
+
os.path.join(
|
|
112
|
+
self.parent.pdm_tmp,
|
|
113
|
+
f"{self.parent.filename}.{new_start}",
|
|
114
|
+
),
|
|
115
|
+
self,
|
|
116
|
+
next=self.next,
|
|
117
|
+
)
|
|
118
|
+
self.end = new_start - 1
|
|
119
|
+
self.next = new_chunk
|
|
120
|
+
|
|
121
|
+
async def download(self):
|
|
122
|
+
assert self.end is not None or self.size >= 0
|
|
123
|
+
headers = {} # TODO 添加其他必要的headers
|
|
124
|
+
file_mode = "ab" if os.path.exists(self.chunk_path) else "wb"
|
|
125
|
+
async with (
|
|
126
|
+
aiohttp.ClientSession(
|
|
127
|
+
timeout=aiohttp.ClientTimeout(sock_read=30)
|
|
128
|
+
) as session,
|
|
129
|
+
aiofiles.open(self.chunk_path, file_mode) as f,
|
|
130
|
+
):
|
|
131
|
+
for _ in range(self.parent.parent.retry):
|
|
132
|
+
if os.path.exists(self.chunk_path) and self._is_complete():
|
|
133
|
+
return self
|
|
134
|
+
while True:
|
|
135
|
+
try:
|
|
136
|
+
self._apply_range_header(headers)
|
|
137
|
+
if headers.get("break"):
|
|
138
|
+
break
|
|
139
|
+
self.parent._logger.debug(
|
|
140
|
+
f"Downloading chunk: {self}, with headers: {headers}"
|
|
141
|
+
)
|
|
142
|
+
if self._needs_download():
|
|
143
|
+
async with session.get(
|
|
144
|
+
self.parent.url,
|
|
145
|
+
headers=headers,
|
|
146
|
+
timeout=self.parent.parent.chunk_timeout,
|
|
147
|
+
) as response:
|
|
148
|
+
if response.status in (200, 206):
|
|
149
|
+
continue_flag = await self._stream_response(
|
|
150
|
+
response, f
|
|
151
|
+
)
|
|
152
|
+
if continue_flag:
|
|
153
|
+
await asyncio.sleep(
|
|
154
|
+
self.parent.parent.retry_wait
|
|
155
|
+
)
|
|
156
|
+
self.parent._logger.debug(
|
|
157
|
+
"speed is low restarting..."
|
|
158
|
+
)
|
|
159
|
+
continue
|
|
160
|
+
except aiohttp.client_exceptions.ClientPayloadError:
|
|
161
|
+
await asyncio.sleep(
|
|
162
|
+
self.parent.parent.retry_wait + random.random() * 5
|
|
163
|
+
)
|
|
164
|
+
except asyncio.TimeoutError:
|
|
165
|
+
self.parent._logger.debug(
|
|
166
|
+
f"Timeout downloading chunk {self}, retrying..."
|
|
167
|
+
)
|
|
168
|
+
await asyncio.sleep(
|
|
169
|
+
self.parent.parent.retry_wait + random.random() * 5
|
|
170
|
+
)
|
|
171
|
+
except ConnectionResetError:
|
|
172
|
+
self.parent._logger.debug(
|
|
173
|
+
f"Connection reset downloading chunk {self}, retrying..."
|
|
174
|
+
)
|
|
175
|
+
await asyncio.sleep(
|
|
176
|
+
self.parent.parent.retry_wait + random.random() * 5
|
|
177
|
+
)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
self.parent._logger.debug(
|
|
180
|
+
f"Error downloading chunk {self}: {e}"
|
|
181
|
+
)
|
|
182
|
+
# traceback.print_exc()
|
|
183
|
+
await asyncio.sleep(self.parent.parent.retry_wait)
|
|
184
|
+
break
|
|
185
|
+
if self._is_complete():
|
|
186
|
+
return self
|
|
187
|
+
if self.end is None:
|
|
188
|
+
self.parent._logger.debug(
|
|
189
|
+
f"completed download chunk (unknown size): {self}"
|
|
190
|
+
)
|
|
191
|
+
self.parent._downloaded = True
|
|
192
|
+
return self
|
|
193
|
+
elif self._needs_download():
|
|
194
|
+
self.parent._logger.debug(f"retrying download chunk: {self}")
|
|
195
|
+
else:
|
|
196
|
+
self.parent._logger.debug(f"completed download chunk: {self}")
|
|
197
|
+
await self._split_incomplete()
|
|
198
|
+
return self
|
pdman/cli.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- encoding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
CLI 入口:提供命令行解析并调用 PDManager。
|
|
5
|
+
安装后可通过 console_scripts 生成可执行命令。
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import argparse
|
|
11
|
+
import asyncio
|
|
12
|
+
from .manager import Manager
|
|
13
|
+
|
|
14
|
+
version = "0.2.0"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main(argv=None):
|
|
18
|
+
parser = argparse.ArgumentParser()
|
|
19
|
+
parser.add_argument(
|
|
20
|
+
"-v",
|
|
21
|
+
"--version",
|
|
22
|
+
action="version",
|
|
23
|
+
version=f"PythonDownloadManager(PDMAN) version {version}",
|
|
24
|
+
help="Print the version number and exit.",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"-l",
|
|
28
|
+
"--log",
|
|
29
|
+
type=str,
|
|
30
|
+
required=False,
|
|
31
|
+
default=None,
|
|
32
|
+
help="The file name of the log file. If '-' is specified, log is written to stdout.",
|
|
33
|
+
)
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--debug",
|
|
36
|
+
action="store_true",
|
|
37
|
+
help="Enable debug mode with verbose logging.",
|
|
38
|
+
)
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"-d",
|
|
41
|
+
"--dir",
|
|
42
|
+
type=str,
|
|
43
|
+
default=os.path.join(os.getcwd(), "pdm"),
|
|
44
|
+
help="The directory to store the downloaded file.",
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"-o",
|
|
48
|
+
"--out",
|
|
49
|
+
type=str,
|
|
50
|
+
default=None,
|
|
51
|
+
help="The file name of the downloaded file. It is always relative to the directory given in -d option. When the -Z option is used, this option will be ignored.",
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"-V",
|
|
55
|
+
"--check-integrity",
|
|
56
|
+
action="store_true",
|
|
57
|
+
help="Check file integrity by validating piece hashes or a hash of the entire file.",
|
|
58
|
+
)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"-c",
|
|
61
|
+
"--continue",
|
|
62
|
+
dest="continue_download",
|
|
63
|
+
action="store_true",
|
|
64
|
+
help="Continue downloading a partially downloaded file.",
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"-i",
|
|
68
|
+
"--input-file",
|
|
69
|
+
type=str,
|
|
70
|
+
default=[],
|
|
71
|
+
action="append",
|
|
72
|
+
help="Downloads URIs found in FILE(s). Supports JSON, YAML, or plain text.",
|
|
73
|
+
)
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
"-x",
|
|
76
|
+
"--max-concurrent-downloads",
|
|
77
|
+
type=int,
|
|
78
|
+
default=5,
|
|
79
|
+
help="Set maximum number of parallel downloads for each URL or task.",
|
|
80
|
+
)
|
|
81
|
+
parser.add_argument(
|
|
82
|
+
"--chunk-retry-speed",
|
|
83
|
+
default="",
|
|
84
|
+
help="If the chunk speed falls below SIZE bytes/second, restart that chunk. Append K/M.",
|
|
85
|
+
)
|
|
86
|
+
parser.add_argument(
|
|
87
|
+
"-r",
|
|
88
|
+
"--retry",
|
|
89
|
+
type=int,
|
|
90
|
+
default=3,
|
|
91
|
+
help="Number of times to retry downloading a URL upon failure.",
|
|
92
|
+
)
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
"-W",
|
|
95
|
+
"--retry-wait",
|
|
96
|
+
type=int,
|
|
97
|
+
default=5,
|
|
98
|
+
help="Maximum wait time in seconds between retries.",
|
|
99
|
+
)
|
|
100
|
+
parser.add_argument(
|
|
101
|
+
"--timeout",
|
|
102
|
+
type=int,
|
|
103
|
+
default=None,
|
|
104
|
+
help="Timeout in seconds for each download request.",
|
|
105
|
+
)
|
|
106
|
+
parser.add_argument(
|
|
107
|
+
"--chunk-timeout",
|
|
108
|
+
type=int,
|
|
109
|
+
default=None,
|
|
110
|
+
help="Timeout in seconds for each chunk download request.",
|
|
111
|
+
)
|
|
112
|
+
parser.add_argument(
|
|
113
|
+
"-N",
|
|
114
|
+
"--max-downloads",
|
|
115
|
+
type=int,
|
|
116
|
+
default=4,
|
|
117
|
+
help="The maximum number of concurrent downloads.",
|
|
118
|
+
)
|
|
119
|
+
parser.add_argument(
|
|
120
|
+
"--no-auto-file-renaming",
|
|
121
|
+
action="store_false",
|
|
122
|
+
help="Disable auto renaming when target file exists.",
|
|
123
|
+
)
|
|
124
|
+
parser.add_argument(
|
|
125
|
+
"-Z",
|
|
126
|
+
"--force-sequential",
|
|
127
|
+
action="store_true",
|
|
128
|
+
help="Fetch URIs sequentially.",
|
|
129
|
+
)
|
|
130
|
+
parser.add_argument(
|
|
131
|
+
"-k",
|
|
132
|
+
"--min-split-size",
|
|
133
|
+
type=str,
|
|
134
|
+
default="1M",
|
|
135
|
+
help="Minimum split size. Append K/M.",
|
|
136
|
+
)
|
|
137
|
+
parser.add_argument(
|
|
138
|
+
"--tmp",
|
|
139
|
+
type=str,
|
|
140
|
+
default=None,
|
|
141
|
+
help="Temporary directory for chunk files.",
|
|
142
|
+
)
|
|
143
|
+
parser.add_argument(
|
|
144
|
+
"-t",
|
|
145
|
+
"--threads",
|
|
146
|
+
type=int,
|
|
147
|
+
default=4,
|
|
148
|
+
help="Alias of max-downloads (deprecated).",
|
|
149
|
+
)
|
|
150
|
+
parser.add_argument(
|
|
151
|
+
"-ua",
|
|
152
|
+
"--user-agent",
|
|
153
|
+
type=str,
|
|
154
|
+
default="PDMAN-Downloader/1.0",
|
|
155
|
+
help="The User-Agent string to use for HTTP requests.",
|
|
156
|
+
)
|
|
157
|
+
parser.add_argument(
|
|
158
|
+
"urls",
|
|
159
|
+
type=str,
|
|
160
|
+
nargs="*",
|
|
161
|
+
default=None,
|
|
162
|
+
help="The URL(s) to download.",
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
args = parser.parse_args(argv)
|
|
166
|
+
if args.log == "-":
|
|
167
|
+
args.log = sys.stdout
|
|
168
|
+
if args.force_sequential and args.out is not None:
|
|
169
|
+
args.out = None
|
|
170
|
+
|
|
171
|
+
pdm = Manager(
|
|
172
|
+
max_downloads=args.max_downloads,
|
|
173
|
+
log_path=args.log,
|
|
174
|
+
debug=args.debug,
|
|
175
|
+
continue_download=args.continue_download,
|
|
176
|
+
max_concurrent_downloads=args.max_concurrent_downloads,
|
|
177
|
+
min_split_size=args.min_split_size,
|
|
178
|
+
force_sequential=args.force_sequential,
|
|
179
|
+
tmp_dir=args.tmp,
|
|
180
|
+
check_integrity=args.check_integrity,
|
|
181
|
+
user_agent=args.user_agent,
|
|
182
|
+
chunk_retry_speed=args.chunk_retry_speed,
|
|
183
|
+
retry=args.retry,
|
|
184
|
+
retry_wait=args.retry_wait,
|
|
185
|
+
timeout=args.timeout,
|
|
186
|
+
chunk_timeout=args.chunk_timeout,
|
|
187
|
+
auto_file_renaming=args.no_auto_file_renaming,
|
|
188
|
+
out_dir=args.dir,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
if args.urls and len(args.urls) == 1 and args.out is not None:
|
|
192
|
+
pdm.append(args.urls[0], file_name=args.out)
|
|
193
|
+
else:
|
|
194
|
+
if args.out is not None:
|
|
195
|
+
pass # ignore --out when multiple urls
|
|
196
|
+
pdm.add_urls(args.urls or [])
|
|
197
|
+
|
|
198
|
+
if args.input_file:
|
|
199
|
+
for file in args.input_file:
|
|
200
|
+
if os.path.exists(file):
|
|
201
|
+
pdm.load_input_file(file)
|
|
202
|
+
|
|
203
|
+
asyncio.run(pdm.download())
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
if __name__ == "__main__":
|
|
207
|
+
main()
|