python-download 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- python_download-0.0.1/LICENSE +21 -0
- python_download-0.0.1/PKG-INFO +50 -0
- python_download-0.0.1/download/__init__.py +444 -0
- python_download-0.0.1/download/__main__.py +99 -0
- python_download-0.0.1/download/py.typed +0 -0
- python_download-0.0.1/pyproject.toml +49 -0
- python_download-0.0.1/readme.md +15 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 ChenyangGao <https://github.com/ChenyangGao>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: python-download
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Python functions for download.
|
|
5
|
+
Home-page: https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-download
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: python-download,posixpath
|
|
8
|
+
Author: ChenyangGao
|
|
9
|
+
Author-email: wosiwujm@gmail.com
|
|
10
|
+
Requires-Python: >=3.11,<4.0
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
20
|
+
Classifier: Topic :: Software Development
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Requires-Dist: aiohttp_client_request
|
|
24
|
+
Requires-Dist: http_response
|
|
25
|
+
Requires-Dist: python-asynctools
|
|
26
|
+
Requires-Dist: python-concurrenttools
|
|
27
|
+
Requires-Dist: python-filewrap
|
|
28
|
+
Requires-Dist: python-http_request
|
|
29
|
+
Requires-Dist: python-iterutils
|
|
30
|
+
Requires-Dist: python-urlopen
|
|
31
|
+
Requires-Dist: requests_request
|
|
32
|
+
Project-URL: Repository, https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-download
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# Python functions for download..
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
You can install from [pypi](https://pypi.org/project/python-download/)
|
|
40
|
+
|
|
41
|
+
```console
|
|
42
|
+
pip install -U python-download
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import download
|
|
49
|
+
```
|
|
50
|
+
|
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# encoding: utf
|
|
3
|
+
|
|
4
|
+
__author__ = "ChenyangGao <https://chenyanggao.github.io>"
|
|
5
|
+
__version__ = (0, 0, 1)
|
|
6
|
+
__all__ = [
|
|
7
|
+
"DownloadTask", "download_iter", "download", "requests_download",
|
|
8
|
+
"download_async_iter", "async_download",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
# NOTE https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests
|
|
12
|
+
# NOTE https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Ranges
|
|
13
|
+
# NOTE https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Range
|
|
14
|
+
# NOTE https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/206
|
|
15
|
+
# NOTE https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag
|
|
16
|
+
# NOTE https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Range
|
|
17
|
+
# NOTE https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Range
|
|
18
|
+
# NOTE https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
|
|
19
|
+
# NOTE https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Encoding
|
|
20
|
+
|
|
21
|
+
import errno
|
|
22
|
+
|
|
23
|
+
from collections.abc import AsyncGenerator, AsyncIterator, Callable, Generator, Iterator
|
|
24
|
+
from inspect import isasyncgen, isgenerator
|
|
25
|
+
from os import fsdecode, fstat, makedirs, PathLike
|
|
26
|
+
from os.path import abspath, dirname, isdir, join as joinpath
|
|
27
|
+
from shutil import COPY_BUFSIZE # type: ignore
|
|
28
|
+
from threading import Event
|
|
29
|
+
from typing import cast, Any, NamedTuple, Optional, Self
|
|
30
|
+
|
|
31
|
+
from aiohttp_client_request import request as aiohttp_request
|
|
32
|
+
from asynctools import ensure_async, as_thread
|
|
33
|
+
from concurrenttools import run_as_thread
|
|
34
|
+
from filewrap import bio_skip_iter, bio_skip_async_iter, SupportsRead, SupportsWrite
|
|
35
|
+
from http_request import headers_str_to_dict
|
|
36
|
+
from http_response import get_filename, get_length, is_chunked, is_range_request
|
|
37
|
+
from iterutils import cut_iter
|
|
38
|
+
from requests_request import request as requests_request
|
|
39
|
+
from urlopen import urlopen
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
from aiofile import async_open, FileIOWrapperBase
|
|
43
|
+
aiofile_installed = True
|
|
44
|
+
except ImportError:
|
|
45
|
+
aiofile_installed = False
|
|
46
|
+
else:
|
|
47
|
+
if "__getattr__" not in FileIOWrapperBase.__dict__:
|
|
48
|
+
setattr(FileIOWrapperBase, "__getattr__", lambda self, attr, /: getattr(self.file, attr))
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class DownloadProgress(NamedTuple):
|
|
52
|
+
total: int
|
|
53
|
+
downloaded: int
|
|
54
|
+
skipped: int
|
|
55
|
+
last_incr: int = 0
|
|
56
|
+
extra: Any = None
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def completed(self, /) -> int:
|
|
60
|
+
return self.downloaded + self.skipped
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def remaining(self, /) -> int:
|
|
64
|
+
return max(0, self.total - self.completed)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def ratio(self, /) -> float:
|
|
68
|
+
return self.completed / self.total
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def task_done(self, /) -> bool:
|
|
72
|
+
return self.completed >= self.total
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class DownloadTask:
|
|
76
|
+
|
|
77
|
+
def __init__(self, /, gen, submit=run_as_thread):
|
|
78
|
+
if not callable(submit):
|
|
79
|
+
submit = submit.submit
|
|
80
|
+
self._submit = submit
|
|
81
|
+
self._state = "PENDING"
|
|
82
|
+
self._gen = gen
|
|
83
|
+
self._done_event = Event()
|
|
84
|
+
|
|
85
|
+
def __repr__(self, /) -> str:
|
|
86
|
+
return f"<{type(self).__qualname__} :: state={self.state!r} progress={self.progress!r}>"
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def create_task(
|
|
90
|
+
cls,
|
|
91
|
+
/,
|
|
92
|
+
*args,
|
|
93
|
+
submit=run_as_thread,
|
|
94
|
+
**kwargs,
|
|
95
|
+
) -> Self:
|
|
96
|
+
return cls(download_iter(*args, **kwargs), submit=submit)
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def closed(self, /) -> bool:
|
|
100
|
+
return self._state in ("CANCELED", "FAILED", "FINISHED")
|
|
101
|
+
|
|
102
|
+
@property
|
|
103
|
+
def progress(self, /) -> Optional[DownloadProgress]:
|
|
104
|
+
return self.__dict__.get("_progress")
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def result(self, /):
|
|
108
|
+
self._done_event.wait()
|
|
109
|
+
return self._result
|
|
110
|
+
|
|
111
|
+
@result.setter
|
|
112
|
+
def result(self, val, /):
|
|
113
|
+
self._result = val
|
|
114
|
+
self._done_event.set()
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def state(self, /) -> str:
|
|
118
|
+
return self._state
|
|
119
|
+
|
|
120
|
+
def close(self, /):
|
|
121
|
+
if self._state in ("CANCELED", "FAILED", "FINISHED"):
|
|
122
|
+
pass
|
|
123
|
+
else:
|
|
124
|
+
state = self._state
|
|
125
|
+
self._state = "CANCELED"
|
|
126
|
+
if state != "RUNNING":
|
|
127
|
+
self.run()
|
|
128
|
+
|
|
129
|
+
def pause(self, /):
|
|
130
|
+
if self._state in ("PAUSED", "RUNNING"):
|
|
131
|
+
self._state = "PAUSED"
|
|
132
|
+
else:
|
|
133
|
+
raise RuntimeError(f"can't pause when state={self._state!r}")
|
|
134
|
+
|
|
135
|
+
def _run(self, /):
|
|
136
|
+
if self._state in ("PENDING", "PAUSED"):
|
|
137
|
+
self._state = "RUNNING"
|
|
138
|
+
else:
|
|
139
|
+
raise RuntimeError(f"can't run when state={self._state!r}")
|
|
140
|
+
gen = self._gen
|
|
141
|
+
try:
|
|
142
|
+
while self._state == "RUNNING":
|
|
143
|
+
self._progress = next(gen)
|
|
144
|
+
except KeyboardInterrupt:
|
|
145
|
+
raise
|
|
146
|
+
except StopIteration as exc:
|
|
147
|
+
self._state = "FINISHED"
|
|
148
|
+
self.result = exc.value
|
|
149
|
+
except BaseException as exc:
|
|
150
|
+
self._state = "FAILED"
|
|
151
|
+
self.result = exc
|
|
152
|
+
else:
|
|
153
|
+
if self._state == "CANCELED":
|
|
154
|
+
try:
|
|
155
|
+
gen.close()
|
|
156
|
+
finally:
|
|
157
|
+
self.result = None
|
|
158
|
+
|
|
159
|
+
def run(self, /):
|
|
160
|
+
return self._submit(self._run)
|
|
161
|
+
|
|
162
|
+
def run_wait(self, /):
|
|
163
|
+
if not self._done_event.is_set():
|
|
164
|
+
if self._state == "RUNNING":
|
|
165
|
+
self._done_event.wait()
|
|
166
|
+
else:
|
|
167
|
+
self._run()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def download_iter(
|
|
171
|
+
url: str | Callable[[], str],
|
|
172
|
+
file: bytes | str | PathLike | SupportsWrite[bytes] = "",
|
|
173
|
+
resume: bool = False,
|
|
174
|
+
chunksize: int = COPY_BUFSIZE,
|
|
175
|
+
headers: None | dict[str, str] | Callable[[], dict[str, str]] = None,
|
|
176
|
+
urlopen: Callable = urlopen,
|
|
177
|
+
) -> Generator[DownloadProgress, None, None]:
|
|
178
|
+
"""
|
|
179
|
+
"""
|
|
180
|
+
if not isinstance(url, str):
|
|
181
|
+
url = url()
|
|
182
|
+
|
|
183
|
+
if callable(headers):
|
|
184
|
+
headers = headers()
|
|
185
|
+
if headers:
|
|
186
|
+
headers = {**headers, "Accept-Encoding": "identity"}
|
|
187
|
+
else:
|
|
188
|
+
headers = {"Accept-Encoding": "identity"}
|
|
189
|
+
|
|
190
|
+
if chunksize <= 0:
|
|
191
|
+
chunksize = COPY_BUFSIZE
|
|
192
|
+
|
|
193
|
+
resp = urlopen(url, headers=headers)
|
|
194
|
+
try:
|
|
195
|
+
length = get_length(resp)
|
|
196
|
+
if length == 0 and is_chunked(resp):
|
|
197
|
+
length = None
|
|
198
|
+
|
|
199
|
+
fdst: SupportsWrite[bytes]
|
|
200
|
+
if hasattr(file, "write"):
|
|
201
|
+
file = fdst = cast(SupportsWrite[bytes], file)
|
|
202
|
+
else:
|
|
203
|
+
file = abspath(fsdecode(file))
|
|
204
|
+
if isdir(file):
|
|
205
|
+
file = joinpath(file, get_filename(resp, "download"))
|
|
206
|
+
try:
|
|
207
|
+
fdst = open(file, "ab" if resume else "wb")
|
|
208
|
+
except FileNotFoundError:
|
|
209
|
+
makedirs(dirname(file), exist_ok=True)
|
|
210
|
+
fdst = open(file, "ab" if resume else "wb")
|
|
211
|
+
|
|
212
|
+
extra = {"url": url, "file": file, "resume": resume}
|
|
213
|
+
|
|
214
|
+
filesize = 0
|
|
215
|
+
if resume:
|
|
216
|
+
try:
|
|
217
|
+
filesize = fstat(fdst.fileno()).st_size # type: ignore
|
|
218
|
+
except (AttributeError, OSError):
|
|
219
|
+
pass
|
|
220
|
+
else:
|
|
221
|
+
if filesize == length:
|
|
222
|
+
yield DownloadProgress(length, 0, length, length, extra)
|
|
223
|
+
return
|
|
224
|
+
elif length is not None and filesize > length:
|
|
225
|
+
raise OSError(errno.EIO, f"file {file!r} is larger than url {url!r}: {filesize} > {length} (in bytes)")
|
|
226
|
+
elif length == 0:
|
|
227
|
+
yield DownloadProgress(0, 0, 0, 0, extra)
|
|
228
|
+
return
|
|
229
|
+
|
|
230
|
+
if filesize and is_range_request(resp):
|
|
231
|
+
resp.close()
|
|
232
|
+
resp = urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize})
|
|
233
|
+
if not is_range_request(resp):
|
|
234
|
+
raise OSError(errno.EIO, f"range request failed: {url!r}")
|
|
235
|
+
|
|
236
|
+
yield DownloadProgress(length or 0, 0, 0, 0, extra)
|
|
237
|
+
|
|
238
|
+
length_downloaded = 0
|
|
239
|
+
length_skipped = 0
|
|
240
|
+
|
|
241
|
+
fsrc_read = resp.read
|
|
242
|
+
fdst_write = fdst.write
|
|
243
|
+
if filesize:
|
|
244
|
+
if is_range_request(resp):
|
|
245
|
+
length_skipped = filesize
|
|
246
|
+
yield DownloadProgress(length or length_skipped, 0, length_skipped, length_skipped, extra)
|
|
247
|
+
else:
|
|
248
|
+
for skiplen in bio_skip_iter(resp, filesize):
|
|
249
|
+
length_skipped += skiplen
|
|
250
|
+
yield DownloadProgress(length or length_skipped, 0, length_skipped, skiplen, extra)
|
|
251
|
+
|
|
252
|
+
while (chunk := fsrc_read(chunksize)):
|
|
253
|
+
fdst_write(chunk)
|
|
254
|
+
downlen = len(chunk)
|
|
255
|
+
length_downloaded += downlen
|
|
256
|
+
yield DownloadProgress(length or (length_skipped + length_downloaded), length_downloaded, length_skipped, downlen, extra)
|
|
257
|
+
finally:
|
|
258
|
+
resp.close()
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def download(
|
|
262
|
+
url: str | Callable[[], str],
|
|
263
|
+
file: bytes | str | PathLike | SupportsWrite[bytes] = "",
|
|
264
|
+
resume: bool = False,
|
|
265
|
+
chunksize: int = COPY_BUFSIZE,
|
|
266
|
+
headers: None | dict[str, str] | Callable[[], dict[str, str]] = None,
|
|
267
|
+
urlopen: Callable = urlopen,
|
|
268
|
+
make_reporthook: None | Callable[[None | int], Callable[[int], Any] | Generator[int, Any, Any]] = None,
|
|
269
|
+
):
|
|
270
|
+
"""
|
|
271
|
+
"""
|
|
272
|
+
gen = download_iter(url, file, resume=resume, chunksize=chunksize, headers=headers, urlopen=urlopen)
|
|
273
|
+
if make_reporthook:
|
|
274
|
+
progress = next(gen)
|
|
275
|
+
reporthook = make_reporthook(progress.total)
|
|
276
|
+
if isgenerator(reporthook):
|
|
277
|
+
next(reporthook)
|
|
278
|
+
reporthook = reporthook.send
|
|
279
|
+
reporthook = cast(Callable[[int], Any], reporthook)
|
|
280
|
+
reporthook(progress.last_incr)
|
|
281
|
+
else:
|
|
282
|
+
reporthook = None
|
|
283
|
+
|
|
284
|
+
for progress in gen:
|
|
285
|
+
reporthook and reporthook(progress.last_incr)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def requests_download(
|
|
289
|
+
url: str | Callable[[], str],
|
|
290
|
+
urlopen: Callable = requests_request,
|
|
291
|
+
**kwargs,
|
|
292
|
+
):
|
|
293
|
+
"""
|
|
294
|
+
"""
|
|
295
|
+
def urlopen_wrapper(url, headers):
|
|
296
|
+
resp = urlopen(url, headers=headers, stream=True)
|
|
297
|
+
resp.raise_for_status()
|
|
298
|
+
resp.read = resp.raw.read
|
|
299
|
+
return resp
|
|
300
|
+
return download(url, urlopen=urlopen_wrapper, **kwargs)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
async def download_async_iter(
|
|
304
|
+
url: str | Callable[[], str],
|
|
305
|
+
file: bytes | str | PathLike | SupportsWrite[bytes] = "",
|
|
306
|
+
resume: bool = False,
|
|
307
|
+
chunksize: int = COPY_BUFSIZE,
|
|
308
|
+
headers: None | dict[str, str] | Callable[[], dict[str, str]] = None,
|
|
309
|
+
urlopen: Callable = aiohttp_request,
|
|
310
|
+
) -> AsyncGenerator[DownloadProgress, None]:
|
|
311
|
+
"""
|
|
312
|
+
"""
|
|
313
|
+
if not isinstance(url, str):
|
|
314
|
+
url = await ensure_async(url)()
|
|
315
|
+
|
|
316
|
+
if callable(headers):
|
|
317
|
+
headers = await ensure_async(headers)()
|
|
318
|
+
if headers:
|
|
319
|
+
headers = {**headers, "Accept-Encoding": "identity"}
|
|
320
|
+
else:
|
|
321
|
+
headers = {"Accept-Encoding": "identity"}
|
|
322
|
+
|
|
323
|
+
if chunksize <= 0:
|
|
324
|
+
chunksize = COPY_BUFSIZE
|
|
325
|
+
|
|
326
|
+
urlopen = ensure_async(urlopen)
|
|
327
|
+
|
|
328
|
+
resp = await urlopen(url, headers=headers)
|
|
329
|
+
file_async_close: None | Callable = None
|
|
330
|
+
try:
|
|
331
|
+
length = get_length(resp)
|
|
332
|
+
if length == 0 and is_chunked(resp):
|
|
333
|
+
length = None
|
|
334
|
+
|
|
335
|
+
fdst: SupportsWrite[bytes]
|
|
336
|
+
if hasattr(file, "write"):
|
|
337
|
+
file = fdst = cast(SupportsWrite[bytes], file)
|
|
338
|
+
else:
|
|
339
|
+
file = abspath(fsdecode(file))
|
|
340
|
+
if isdir(file):
|
|
341
|
+
file = joinpath(file, get_filename(resp, "download"))
|
|
342
|
+
if aiofile_installed:
|
|
343
|
+
try:
|
|
344
|
+
fdst = await async_open(file, "ab" if resume else "wb")
|
|
345
|
+
except FileNotFoundError:
|
|
346
|
+
makedirs(dirname(file), exist_ok=True)
|
|
347
|
+
fdst = await async_open(file, "ab" if resume else "wb")
|
|
348
|
+
file_async_close = fdst.close
|
|
349
|
+
else:
|
|
350
|
+
try:
|
|
351
|
+
fdst = open(file, "ab" if resume else "wb")
|
|
352
|
+
except FileNotFoundError:
|
|
353
|
+
makedirs(dirname(file), exist_ok=True)
|
|
354
|
+
fdst = open(file, "ab" if resume else "wb")
|
|
355
|
+
file_async_close = as_thread(fdst.close)
|
|
356
|
+
|
|
357
|
+
extra = {"url": url, "file": file, "resume": resume}
|
|
358
|
+
filesize = 0
|
|
359
|
+
if resume:
|
|
360
|
+
try:
|
|
361
|
+
filesize = fstat(fdst.fileno()).st_size # type: ignore
|
|
362
|
+
except (AttributeError, OSError):
|
|
363
|
+
pass
|
|
364
|
+
else:
|
|
365
|
+
if filesize == length:
|
|
366
|
+
yield DownloadProgress(length, 0, length, length, extra)
|
|
367
|
+
return
|
|
368
|
+
elif length is not None and filesize > length:
|
|
369
|
+
raise OSError(errno.EIO, f"file {file!r} is larger than url {url!r}: {filesize} > {length} (in bytes)")
|
|
370
|
+
elif length == 0:
|
|
371
|
+
yield DownloadProgress(0, 0, 0, 0, extra)
|
|
372
|
+
return
|
|
373
|
+
|
|
374
|
+
if filesize and is_range_request(resp):
|
|
375
|
+
await ensure_async(resp.close)()
|
|
376
|
+
resp = await urlopen(url, headers={**headers, "Range": "bytes=%d-" % filesize})
|
|
377
|
+
if not is_range_request(resp):
|
|
378
|
+
raise OSError(errno.EIO, f"range request failed: {url!r}")
|
|
379
|
+
|
|
380
|
+
yield DownloadProgress(length or 0, 0, 0, 0, extra)
|
|
381
|
+
|
|
382
|
+
length_downloaded = 0
|
|
383
|
+
length_skipped = 0
|
|
384
|
+
|
|
385
|
+
fsrc_read = ensure_async(resp.read)
|
|
386
|
+
fdst_write = ensure_async(fdst.write)
|
|
387
|
+
if filesize:
|
|
388
|
+
if is_range_request(resp):
|
|
389
|
+
length_skipped = filesize
|
|
390
|
+
yield DownloadProgress(length or length_skipped, 0, length_skipped, length_skipped, extra)
|
|
391
|
+
else:
|
|
392
|
+
async for skiplen in bio_skip_async_iter(resp, filesize):
|
|
393
|
+
length_skipped += skiplen
|
|
394
|
+
yield DownloadProgress(length or length_skipped, 0, length_skipped, skiplen, extra)
|
|
395
|
+
|
|
396
|
+
while (chunk := (await fsrc_read(chunksize))):
|
|
397
|
+
await fdst_write(chunk)
|
|
398
|
+
downlen = len(chunk)
|
|
399
|
+
length_downloaded += downlen
|
|
400
|
+
yield DownloadProgress(length or (length_skipped + length_downloaded), length_downloaded, length_skipped, downlen, extra)
|
|
401
|
+
finally:
|
|
402
|
+
await ensure_async(resp.close)()
|
|
403
|
+
if file_async_close:
|
|
404
|
+
await file_async_close()
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
async def async_download(
|
|
408
|
+
url: str | Callable[[], str],
|
|
409
|
+
file: bytes | str | PathLike | SupportsWrite[bytes] = "",
|
|
410
|
+
resume: bool = False,
|
|
411
|
+
chunksize: int = COPY_BUFSIZE,
|
|
412
|
+
headers: None | dict[str, str] | Callable[[], dict[str, str]] = None,
|
|
413
|
+
urlopen: Callable = urlopen,
|
|
414
|
+
make_reporthook: None | Callable[[None | int], Callable[[int], Any] | Generator[int, Any, Any] | AsyncGenerator[int, Any]] = None,
|
|
415
|
+
):
|
|
416
|
+
"""
|
|
417
|
+
"""
|
|
418
|
+
gen = download_async_iter(url, file, resume=resume, chunksize=chunksize, headers=headers, urlopen=urlopen)
|
|
419
|
+
reporthook_close: None | Callable = None
|
|
420
|
+
if make_reporthook:
|
|
421
|
+
progress = await anext(gen)
|
|
422
|
+
reporthook = make_reporthook(progress.total)
|
|
423
|
+
if isasyncgen(reporthook):
|
|
424
|
+
await anext(reporthook)
|
|
425
|
+
reporthook_close = reporthook.aclose
|
|
426
|
+
reporthook = reporthook.asend
|
|
427
|
+
elif isgenerator(reporthook):
|
|
428
|
+
await as_thread(next)(reporthook)
|
|
429
|
+
reporthook_close = as_thread(reporthook.close)
|
|
430
|
+
reporthook = as_thread(reporthook.send)
|
|
431
|
+
else:
|
|
432
|
+
reporthook = ensure_async(cast(Callable, reporthook))
|
|
433
|
+
await reporthook(progress.last_incr)
|
|
434
|
+
else:
|
|
435
|
+
reporthook = None
|
|
436
|
+
|
|
437
|
+
try:
|
|
438
|
+
async for progress in gen:
|
|
439
|
+
reporthook and await reporthook(progress.last_incr)
|
|
440
|
+
finally:
|
|
441
|
+
await gen.aclose()
|
|
442
|
+
if reporthook_close:
|
|
443
|
+
await reporthook_close()
|
|
444
|
+
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
|
|
4
|
+
__author__ = "ChenyangGao <https://chenyanggao.github.io>"
|
|
5
|
+
__doc__ = "python url downloader"
|
|
6
|
+
|
|
7
|
+
from argparse import ArgumentParser, RawTextHelpFormatter
|
|
8
|
+
from collections import deque
|
|
9
|
+
from collections.abc import Callable
|
|
10
|
+
from os import makedirs
|
|
11
|
+
from time import perf_counter
|
|
12
|
+
|
|
13
|
+
from . import download, requests_download, __version__
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_args():
|
|
17
|
+
parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)
|
|
18
|
+
parser.add_argument("urls", metavar="url", nargs="?", help="URL(s) to be downloaded (one URL per line), if omitted, read from stdin")
|
|
19
|
+
parser.add_argument("-d", "--savedir", default="", help="path to the downloaded file")
|
|
20
|
+
parser.add_argument("-r", "--resume", action="store_true", help="skip downloaded data")
|
|
21
|
+
parser.add_argument("-hs", "--headers", help="dictionary of HTTP Headers to send with")
|
|
22
|
+
parser.add_argument("-rq", "--use-requests", action="store_true", help="use `requests` module")
|
|
23
|
+
parser.add_argument("-v", "--version", action="store_true", help="print the current version")
|
|
24
|
+
args = parser.parse_args()
|
|
25
|
+
if args.version:
|
|
26
|
+
print(".".join(map(str, __version__)))
|
|
27
|
+
raise SystemExit(0)
|
|
28
|
+
return args
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def headers_str_to_dict(headers: str, /) -> dict[str, str]:
|
|
32
|
+
return dict(
|
|
33
|
+
header.split(": ", 1)
|
|
34
|
+
for header in headers.strip("\n").split("\n")
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def progress(total=None):
|
|
39
|
+
dq: deque[tuple[int, float]] = deque(maxlen=64)
|
|
40
|
+
read_num = 0
|
|
41
|
+
dq.append((read_num, perf_counter()))
|
|
42
|
+
while True:
|
|
43
|
+
read_num += yield
|
|
44
|
+
cur_t = perf_counter()
|
|
45
|
+
speed = (read_num - dq[0][0]) / 1024 / 1024 / (cur_t - dq[0][1])
|
|
46
|
+
if total:
|
|
47
|
+
percentage = read_num / total * 100
|
|
48
|
+
print(f"\r\x1b[K{read_num} / {total} | {speed:.2f} MB/s | {percentage:.2f} %", end="", flush=True)
|
|
49
|
+
else:
|
|
50
|
+
print(f"\r\x1b[K{read_num} | {speed:.2f} MB/s", end="", flush=True)
|
|
51
|
+
dq.append((read_num, cur_t))
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def main():
|
|
55
|
+
args = parse_args()
|
|
56
|
+
|
|
57
|
+
if args.urls:
|
|
58
|
+
urls = args.urls.splitlines()
|
|
59
|
+
else:
|
|
60
|
+
from sys import stdin
|
|
61
|
+
urls = (l.removesuffix("\n") for l in stdin)
|
|
62
|
+
|
|
63
|
+
savedir = args.savedir
|
|
64
|
+
if savedir:
|
|
65
|
+
makedirs(savedir, exist_ok=True)
|
|
66
|
+
|
|
67
|
+
if args.use_requests:
|
|
68
|
+
downloader: Callable = requests_download
|
|
69
|
+
else:
|
|
70
|
+
downloader = download
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
headers = args.headers
|
|
74
|
+
if headers is not None:
|
|
75
|
+
headers = headers_str_to_dict(headers)
|
|
76
|
+
for url in urls:
|
|
77
|
+
if not url:
|
|
78
|
+
continue
|
|
79
|
+
try:
|
|
80
|
+
file = downloader(
|
|
81
|
+
url,
|
|
82
|
+
file=savedir,
|
|
83
|
+
resume=args.resume,
|
|
84
|
+
make_reporthook=progress,
|
|
85
|
+
headers=headers,
|
|
86
|
+
)
|
|
87
|
+
print(f"\r\x1b[K\x1b[1;32mDOWNLOADED\x1b[0m \x1b[4;34m{url!r}\x1b[0m\n |_ ⏬ \x1b[4;34m{file!r}\x1b[0m")
|
|
88
|
+
except BaseException as e:
|
|
89
|
+
print(f"\r\x1b[K\x1b[1;31mERROR\x1b[0m \x1b[4;34m{url!r}\x1b[0m\n |_ 🙅 \x1b[1;31m{type(e).__qualname__}\x1b[0m: {e}")
|
|
90
|
+
except (EOFError, KeyboardInterrupt):
|
|
91
|
+
pass
|
|
92
|
+
except BrokenPipeError:
|
|
93
|
+
from sys import stderr
|
|
94
|
+
stderr.close()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
if __name__ == "__main__":
|
|
98
|
+
main()
|
|
99
|
+
|
|
File without changes
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "python-download"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Python functions for download."
|
|
5
|
+
authors = ["ChenyangGao <wosiwujm@gmail.com>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "readme.md"
|
|
8
|
+
homepage = "https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-download"
|
|
9
|
+
repository = "https://github.com/ChenyangGao/web-mount-packs/tree/main/python-module/python-download"
|
|
10
|
+
keywords = ["python-download", "posixpath"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"License :: OSI Approved :: MIT License",
|
|
13
|
+
"Development Status :: 5 - Production/Stable",
|
|
14
|
+
"Programming Language :: Python",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"Topic :: Software Development",
|
|
21
|
+
"Topic :: Software Development :: Libraries",
|
|
22
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
23
|
+
]
|
|
24
|
+
include = [
|
|
25
|
+
"LICENSE",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[tool.poetry.dependencies]
|
|
29
|
+
python = "^3.11"
|
|
30
|
+
aiohttp_client_request = "*"
|
|
31
|
+
http_response = "*"
|
|
32
|
+
python-asynctools = "*"
|
|
33
|
+
python-concurrenttools = "*"
|
|
34
|
+
python-filewrap = "*"
|
|
35
|
+
python-iterutils = "*"
|
|
36
|
+
python-http_request = "*"
|
|
37
|
+
python-urlopen = "*"
|
|
38
|
+
requests_request = "*"
|
|
39
|
+
|
|
40
|
+
[tool.poetry.scripts]
|
|
41
|
+
python-download = "download.__main__:main"
|
|
42
|
+
download = "download.__main__:main"
|
|
43
|
+
|
|
44
|
+
[build-system]
|
|
45
|
+
requires = ["poetry-core"]
|
|
46
|
+
build-backend = "poetry.core.masonry.api"
|
|
47
|
+
|
|
48
|
+
[[tool.poetry.packages]]
|
|
49
|
+
include = "download"
|