megfile 3.0.4__py3-none-any.whl → 3.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/cli.py +21 -10
- megfile/config.py +9 -0
- megfile/errors.py +37 -21
- megfile/fs.py +9 -5
- megfile/fs_path.py +72 -15
- megfile/hdfs.py +3 -2
- megfile/hdfs_path.py +16 -6
- megfile/http_path.py +151 -22
- megfile/lib/base_prefetch_reader.py +2 -2
- megfile/lib/hdfs_prefetch_reader.py +2 -2
- megfile/lib/http_prefetch_reader.py +28 -12
- megfile/lib/s3_prefetch_reader.py +2 -2
- megfile/lib/s3_share_cache_reader.py +2 -2
- megfile/pathlike.py +10 -3
- megfile/s3.py +14 -7
- megfile/s3_path.py +57 -27
- megfile/sftp.py +18 -9
- megfile/sftp_path.py +63 -31
- megfile/smart.py +60 -16
- megfile/version.py +1 -1
- {megfile-3.0.4.dist-info → megfile-3.0.6.dist-info}/METADATA +1 -1
- {megfile-3.0.4.dist-info → megfile-3.0.6.dist-info}/RECORD +27 -27
- {megfile-3.0.4.dist-info → megfile-3.0.6.dist-info}/LICENSE +0 -0
- {megfile-3.0.4.dist-info → megfile-3.0.6.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.0.4.dist-info → megfile-3.0.6.dist-info}/WHEEL +0 -0
- {megfile-3.0.4.dist-info → megfile-3.0.6.dist-info}/entry_points.txt +0 -0
- {megfile-3.0.4.dist-info → megfile-3.0.6.dist-info}/top_level.txt +0 -0
megfile/http_path.py
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
|
-
import io
|
|
2
1
|
import time
|
|
2
|
+
from copy import deepcopy
|
|
3
3
|
from functools import partial
|
|
4
|
-
from io import BufferedReader
|
|
4
|
+
from io import BufferedReader, BytesIO
|
|
5
5
|
from logging import getLogger as get_logger
|
|
6
|
+
from threading import Lock
|
|
6
7
|
from typing import Iterable, Iterator, Optional, Tuple, Union
|
|
7
8
|
|
|
8
9
|
import requests
|
|
10
|
+
from urllib3 import HTTPResponse
|
|
9
11
|
|
|
10
|
-
from megfile.config import DEFAULT_BLOCK_SIZE
|
|
12
|
+
from megfile.config import DEFAULT_BLOCK_SIZE, HTTP_MAX_RETRY_TIMES
|
|
11
13
|
from megfile.errors import http_should_retry, patch_method, translate_http_error
|
|
12
|
-
from megfile.interfaces import PathLike, StatResult, URIPath
|
|
14
|
+
from megfile.interfaces import PathLike, Readable, StatResult, URIPath
|
|
13
15
|
from megfile.lib.compat import fspath
|
|
14
|
-
from megfile.lib.http_prefetch_reader import HttpPrefetchReader
|
|
16
|
+
from megfile.lib.http_prefetch_reader import DEFAULT_TIMEOUT, HttpPrefetchReader
|
|
15
17
|
from megfile.lib.s3_buffered_writer import DEFAULT_MAX_BUFFER_SIZE
|
|
16
18
|
from megfile.lib.url import get_url_scheme
|
|
17
19
|
from megfile.pathlike import PathLike
|
|
@@ -27,11 +29,11 @@ __all__ = [
|
|
|
27
29
|
]
|
|
28
30
|
|
|
29
31
|
_logger = get_logger(__name__)
|
|
30
|
-
max_retries =
|
|
32
|
+
max_retries = HTTP_MAX_RETRY_TIMES
|
|
31
33
|
|
|
32
34
|
|
|
33
35
|
def get_http_session(
|
|
34
|
-
timeout: Union[int, Tuple[int, int]] =
|
|
36
|
+
timeout: Optional[Union[int, Tuple[int, int]]] = DEFAULT_TIMEOUT,
|
|
35
37
|
status_forcelist: Iterable[int] = (500, 502, 503, 504)
|
|
36
38
|
) -> requests.Session:
|
|
37
39
|
session = requests.Session()
|
|
@@ -81,7 +83,7 @@ def get_http_session(
|
|
|
81
83
|
return file_object
|
|
82
84
|
elif hasattr(file_object, 'name'):
|
|
83
85
|
with SmartPath(file_object.name).open('rb') as f:
|
|
84
|
-
return
|
|
86
|
+
return BytesIO(f.read())
|
|
85
87
|
else:
|
|
86
88
|
_logger.warning(
|
|
87
89
|
f'Can not retry http request, because the file object is not seekable and unsupport "name"'
|
|
@@ -171,10 +173,12 @@ class HttpPath(URIPath):
|
|
|
171
173
|
protocol = "http"
|
|
172
174
|
|
|
173
175
|
def __init__(self, path: PathLike, *other_paths: PathLike):
|
|
174
|
-
if str(path).startswith('https://'):
|
|
175
|
-
self.protocol = 'https'
|
|
176
176
|
super().__init__(path, *other_paths)
|
|
177
177
|
|
|
178
|
+
if fspath(path).startswith('https://'):
|
|
179
|
+
self.protocol = 'https'
|
|
180
|
+
self.request_kwargs = {}
|
|
181
|
+
|
|
178
182
|
@binary_open
|
|
179
183
|
def open(
|
|
180
184
|
self,
|
|
@@ -203,9 +207,15 @@ class HttpPath(URIPath):
|
|
|
203
207
|
raise ValueError('unacceptable mode: %r' % mode)
|
|
204
208
|
|
|
205
209
|
response = None
|
|
210
|
+
request_kwargs = deepcopy(self.request_kwargs)
|
|
211
|
+
timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
|
|
212
|
+
stream = request_kwargs.pop('stream', True)
|
|
206
213
|
try:
|
|
207
|
-
response = get_http_session(
|
|
208
|
-
|
|
214
|
+
response = get_http_session(
|
|
215
|
+
timeout=timeout,
|
|
216
|
+
status_forcelist=(),
|
|
217
|
+
).get(
|
|
218
|
+
self.path_with_protocol, stream=stream, **request_kwargs)
|
|
209
219
|
response.raise_for_status()
|
|
210
220
|
except Exception as error:
|
|
211
221
|
if response:
|
|
@@ -213,8 +223,9 @@ class HttpPath(URIPath):
|
|
|
213
223
|
raise translate_http_error(error, self.path_with_protocol)
|
|
214
224
|
|
|
215
225
|
content_size = int(response.headers['Content-Length'])
|
|
216
|
-
if response.headers.get(
|
|
217
|
-
|
|
226
|
+
if (response.headers.get('Accept-Ranges') == 'bytes' and
|
|
227
|
+
content_size >= block_size * 2 and
|
|
228
|
+
not response.headers.get('Content-Encoding')):
|
|
218
229
|
response.close()
|
|
219
230
|
|
|
220
231
|
block_capacity = max_buffer_size // block_size
|
|
@@ -224,7 +235,7 @@ class HttpPath(URIPath):
|
|
|
224
235
|
block_forward = max(int(block_capacity * forward_ratio), 1)
|
|
225
236
|
|
|
226
237
|
reader = HttpPrefetchReader(
|
|
227
|
-
self
|
|
238
|
+
self,
|
|
228
239
|
content_size=content_size,
|
|
229
240
|
max_retries=max_retries,
|
|
230
241
|
max_workers=max_concurrency,
|
|
@@ -233,12 +244,15 @@ class HttpPath(URIPath):
|
|
|
233
244
|
block_size=block_size,
|
|
234
245
|
)
|
|
235
246
|
if _is_pickle(reader): # pytype: disable=wrong-arg-types
|
|
236
|
-
reader =
|
|
247
|
+
reader = BufferedReader(reader) # pytype: disable=wrong-arg-types
|
|
237
248
|
return reader
|
|
238
249
|
|
|
239
|
-
response.raw.auto_close = False
|
|
240
250
|
response.raw.name = self.path_with_protocol
|
|
241
|
-
|
|
251
|
+
# TODO: When python version must bigger than 3.10, use urllib3>=2.0.0 instead of 'Response'
|
|
252
|
+
# response.raw.auto_close = False
|
|
253
|
+
# response.raw.decode_content = True
|
|
254
|
+
# return BufferedReader(response.raw)
|
|
255
|
+
return BufferedReader(Response(response.raw)) # pytype: disable=wrong-arg-types
|
|
242
256
|
|
|
243
257
|
def stat(self, follow_symlinks=True) -> StatResult:
|
|
244
258
|
'''
|
|
@@ -249,9 +263,14 @@ class HttpPath(URIPath):
|
|
|
249
263
|
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
250
264
|
'''
|
|
251
265
|
|
|
266
|
+
request_kwargs = deepcopy(self.request_kwargs)
|
|
267
|
+
timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
|
|
268
|
+
stream = request_kwargs.pop('stream', True)
|
|
269
|
+
|
|
252
270
|
try:
|
|
253
|
-
with get_http_session(status_forcelist=()).get(
|
|
254
|
-
self.path_with_protocol, stream=
|
|
271
|
+
with get_http_session(timeout=timeout, status_forcelist=()).get(
|
|
272
|
+
self.path_with_protocol, stream=stream,
|
|
273
|
+
**request_kwargs) as response:
|
|
255
274
|
response.raise_for_status()
|
|
256
275
|
headers = response.headers
|
|
257
276
|
except Exception as error:
|
|
@@ -302,9 +321,14 @@ class HttpPath(URIPath):
|
|
|
302
321
|
:return: return True if exists
|
|
303
322
|
:rtype: bool
|
|
304
323
|
"""
|
|
324
|
+
request_kwargs = deepcopy(self.request_kwargs)
|
|
325
|
+
timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
|
|
326
|
+
stream = request_kwargs.pop('stream', True)
|
|
327
|
+
|
|
305
328
|
try:
|
|
306
|
-
with get_http_session(status_forcelist=()).get(
|
|
307
|
-
self.path_with_protocol, stream=
|
|
329
|
+
with get_http_session(timeout=timeout, status_forcelist=()).get(
|
|
330
|
+
self.path_with_protocol, stream=stream,
|
|
331
|
+
**request_kwargs) as response:
|
|
308
332
|
if response.status_code == 404:
|
|
309
333
|
return False
|
|
310
334
|
return True
|
|
@@ -316,3 +340,108 @@ class HttpPath(URIPath):
|
|
|
316
340
|
class HttpsPath(HttpPath):
|
|
317
341
|
|
|
318
342
|
protocol = "https"
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
class Response(Readable):
|
|
346
|
+
|
|
347
|
+
def __init__(self, raw: HTTPResponse) -> None:
|
|
348
|
+
super().__init__()
|
|
349
|
+
|
|
350
|
+
raw.auto_close = False
|
|
351
|
+
self._block_size = 128 * 2**10 # 128KB
|
|
352
|
+
self._raw = raw
|
|
353
|
+
self._offset = 0
|
|
354
|
+
self._buffer = BytesIO()
|
|
355
|
+
self._lock = Lock()
|
|
356
|
+
|
|
357
|
+
@property
|
|
358
|
+
def name(self):
|
|
359
|
+
return self._raw.name
|
|
360
|
+
|
|
361
|
+
@property
|
|
362
|
+
def mode(self):
|
|
363
|
+
return 'rb'
|
|
364
|
+
|
|
365
|
+
def tell(self) -> int:
|
|
366
|
+
return self._offset
|
|
367
|
+
|
|
368
|
+
def _clear_buffer(self) -> None:
|
|
369
|
+
self._buffer.seek(0)
|
|
370
|
+
self._buffer.truncate()
|
|
371
|
+
|
|
372
|
+
def read(self, size: Optional[int] = None) -> bytes:
|
|
373
|
+
if size == 0:
|
|
374
|
+
return b''
|
|
375
|
+
if size is not None and size < 0:
|
|
376
|
+
size = None
|
|
377
|
+
|
|
378
|
+
with self._lock:
|
|
379
|
+
while not size or self._buffer.tell() < size:
|
|
380
|
+
data = self._raw.read(self._block_size, decode_content=True)
|
|
381
|
+
if not data:
|
|
382
|
+
break
|
|
383
|
+
self._buffer.write(data)
|
|
384
|
+
self._buffer.seek(0)
|
|
385
|
+
content = self._buffer.read(size)
|
|
386
|
+
residue = self._buffer.read()
|
|
387
|
+
self._clear_buffer()
|
|
388
|
+
if residue:
|
|
389
|
+
self._buffer.write(residue)
|
|
390
|
+
self._offset += len(content)
|
|
391
|
+
return content
|
|
392
|
+
|
|
393
|
+
def readline(self, size: Optional[int] = None) -> bytes:
|
|
394
|
+
if size == 0:
|
|
395
|
+
return b''
|
|
396
|
+
if size is not None and size < 0:
|
|
397
|
+
size = None
|
|
398
|
+
|
|
399
|
+
with self._lock:
|
|
400
|
+
self._buffer.seek(0)
|
|
401
|
+
buffer = self._buffer.read()
|
|
402
|
+
self._clear_buffer()
|
|
403
|
+
if b'\n' in buffer:
|
|
404
|
+
content = buffer[:buffer.index(b'\n') + 1]
|
|
405
|
+
if size:
|
|
406
|
+
content = content[:size]
|
|
407
|
+
self._buffer.write(buffer[len(content):])
|
|
408
|
+
elif size and len(buffer) >= size:
|
|
409
|
+
content = buffer[:size]
|
|
410
|
+
self._buffer.write(buffer[size:])
|
|
411
|
+
else:
|
|
412
|
+
content = None
|
|
413
|
+
self._buffer.write(buffer)
|
|
414
|
+
while True:
|
|
415
|
+
if size and self._buffer.tell() >= size:
|
|
416
|
+
break
|
|
417
|
+
data = self._raw.read(self._block_size, decode_content=True)
|
|
418
|
+
if not data:
|
|
419
|
+
break
|
|
420
|
+
elif b"\n" in data:
|
|
421
|
+
last_content, residue = data.split(b"\n", 1)
|
|
422
|
+
self._buffer.write(last_content)
|
|
423
|
+
self._buffer.write(b"\n")
|
|
424
|
+
self._buffer.seek(0)
|
|
425
|
+
content = self._buffer.read()
|
|
426
|
+
self._clear_buffer()
|
|
427
|
+
if size and len(content) > size:
|
|
428
|
+
self._buffer.write(content[size:])
|
|
429
|
+
content = content[:size]
|
|
430
|
+
if residue:
|
|
431
|
+
self._buffer.write(residue)
|
|
432
|
+
break
|
|
433
|
+
else:
|
|
434
|
+
self._buffer.write(data)
|
|
435
|
+
|
|
436
|
+
if content is None:
|
|
437
|
+
self._buffer.seek(0)
|
|
438
|
+
content = self._buffer.read(size)
|
|
439
|
+
residue = self._buffer.read()
|
|
440
|
+
self._clear_buffer()
|
|
441
|
+
if residue:
|
|
442
|
+
self._buffer.write(residue)
|
|
443
|
+
self._offset += len(content)
|
|
444
|
+
return content
|
|
445
|
+
|
|
446
|
+
def _close(self) -> None:
|
|
447
|
+
return self._raw.close()
|
|
@@ -8,7 +8,7 @@ from math import ceil
|
|
|
8
8
|
from statistics import mean
|
|
9
9
|
from typing import Optional
|
|
10
10
|
|
|
11
|
-
from megfile.config import BACKOFF_FACTOR, BACKOFF_INITIAL, DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, GLOBAL_MAX_WORKERS, NEWLINE
|
|
11
|
+
from megfile.config import BACKOFF_FACTOR, BACKOFF_INITIAL, DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, DEFAULT_MAX_RETRY_TIMES, GLOBAL_MAX_WORKERS, NEWLINE
|
|
12
12
|
from megfile.interfaces import Readable, Seekable
|
|
13
13
|
from megfile.utils import get_human_size, process_local
|
|
14
14
|
|
|
@@ -39,7 +39,7 @@ class BasePrefetchReader(Readable, Seekable, ABC):
|
|
|
39
39
|
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
40
40
|
block_capacity: int = DEFAULT_BLOCK_CAPACITY,
|
|
41
41
|
block_forward: Optional[int] = None,
|
|
42
|
-
max_retries: int =
|
|
42
|
+
max_retries: int = DEFAULT_MAX_RETRY_TIMES,
|
|
43
43
|
max_workers: Optional[int] = None,
|
|
44
44
|
**kwargs):
|
|
45
45
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from io import BytesIO
|
|
2
2
|
from typing import Optional
|
|
3
3
|
|
|
4
|
-
from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE
|
|
4
|
+
from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, HDFS_MAX_RETRY_TIMES
|
|
5
5
|
from megfile.errors import raise_hdfs_error
|
|
6
6
|
from megfile.lib.base_prefetch_reader import BasePrefetchReader
|
|
7
7
|
|
|
@@ -20,7 +20,7 @@ class HdfsPrefetchReader(BasePrefetchReader):
|
|
|
20
20
|
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
21
21
|
block_capacity: int = DEFAULT_BLOCK_CAPACITY,
|
|
22
22
|
block_forward: Optional[int] = None,
|
|
23
|
-
max_retries: int =
|
|
23
|
+
max_retries: int = HDFS_MAX_RETRY_TIMES,
|
|
24
24
|
max_workers: Optional[int] = None,
|
|
25
25
|
profile_name: Optional[str] = None):
|
|
26
26
|
self._path = hdfs_path
|
|
@@ -1,12 +1,15 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from io import BytesIO
|
|
3
2
|
from typing import Optional
|
|
4
3
|
|
|
5
4
|
import requests
|
|
6
5
|
|
|
7
|
-
from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE
|
|
8
|
-
from megfile.errors import UnsupportedError, http_should_retry, patch_method
|
|
6
|
+
from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, HTTP_MAX_RETRY_TIMES
|
|
7
|
+
from megfile.errors import HttpBodyIncompleteError, UnsupportedError, http_should_retry, patch_method
|
|
9
8
|
from megfile.lib.base_prefetch_reader import BasePrefetchReader
|
|
9
|
+
from megfile.lib.compat import fspath
|
|
10
|
+
from megfile.pathlike import PathLike
|
|
11
|
+
|
|
12
|
+
DEFAULT_TIMEOUT = (60, 60 * 60 * 24)
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
class HttpPrefetchReader(BasePrefetchReader):
|
|
@@ -19,13 +22,13 @@ class HttpPrefetchReader(BasePrefetchReader):
|
|
|
19
22
|
|
|
20
23
|
def __init__(
|
|
21
24
|
self,
|
|
22
|
-
url:
|
|
25
|
+
url: PathLike,
|
|
23
26
|
*,
|
|
24
27
|
content_size: Optional[int] = None,
|
|
25
28
|
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
26
29
|
block_capacity: int = DEFAULT_BLOCK_CAPACITY,
|
|
27
30
|
block_forward: Optional[int] = None,
|
|
28
|
-
max_retries: int =
|
|
31
|
+
max_retries: int = HTTP_MAX_RETRY_TIMES,
|
|
29
32
|
max_workers: Optional[int] = None):
|
|
30
33
|
|
|
31
34
|
self._url = url
|
|
@@ -46,22 +49,28 @@ class HttpPrefetchReader(BasePrefetchReader):
|
|
|
46
49
|
if first_index_response['Headers'].get('Accept-Ranges') != 'bytes':
|
|
47
50
|
raise UnsupportedError(
|
|
48
51
|
f'Unsupported server, server must support Accept-Ranges: {self._url}',
|
|
49
|
-
path=self._url,
|
|
52
|
+
path=fspath(self._url),
|
|
50
53
|
)
|
|
51
54
|
return first_index_response['Headers']['Content-Length']
|
|
52
55
|
|
|
53
56
|
@property
|
|
54
57
|
def name(self) -> str:
|
|
55
|
-
return self._url
|
|
58
|
+
return fspath(self._url)
|
|
56
59
|
|
|
57
60
|
def _fetch_response(
|
|
58
61
|
self, start: Optional[int] = None,
|
|
59
62
|
end: Optional[int] = None) -> dict:
|
|
60
63
|
|
|
61
64
|
def fetch_response() -> dict:
|
|
65
|
+
request_kwargs = {}
|
|
66
|
+
if hasattr(self._url, 'request_kwargs'):
|
|
67
|
+
request_kwargs = self._url.request_kwargs
|
|
68
|
+
timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
|
|
69
|
+
stream = request_kwargs.pop('stream', True)
|
|
70
|
+
|
|
62
71
|
if start is None or end is None:
|
|
63
|
-
with requests.get(self._url, timeout=
|
|
64
|
-
stream=
|
|
72
|
+
with requests.get(fspath(self._url), timeout=timeout,
|
|
73
|
+
stream=stream, **request_kwargs) as response:
|
|
65
74
|
return {
|
|
66
75
|
'Headers': response.headers,
|
|
67
76
|
'Cookies': response.cookies,
|
|
@@ -71,9 +80,16 @@ class HttpPrefetchReader(BasePrefetchReader):
|
|
|
71
80
|
range_end = end
|
|
72
81
|
if self._content_size is not None:
|
|
73
82
|
range_end = min(range_end, self._content_size - 1)
|
|
74
|
-
headers =
|
|
75
|
-
|
|
76
|
-
|
|
83
|
+
headers = request_kwargs.pop('headers', {})
|
|
84
|
+
headers["Range"] = f"bytes={start}-{range_end}"
|
|
85
|
+
with requests.get(fspath(self._url), timeout=timeout,
|
|
86
|
+
headers=headers, stream=stream,
|
|
87
|
+
**request_kwargs) as response:
|
|
88
|
+
if len(response.content) != int(
|
|
89
|
+
response.headers['Content-Length']):
|
|
90
|
+
raise HttpBodyIncompleteError(
|
|
91
|
+
f"The downloaded content is incomplete, expected size: {response.headers['Content-Length']}, actual size: {len(response.content)}",
|
|
92
|
+
)
|
|
77
93
|
return {
|
|
78
94
|
'Body': BytesIO(response.content),
|
|
79
95
|
'Headers': response.headers,
|
|
@@ -3,7 +3,7 @@ from concurrent.futures import Future
|
|
|
3
3
|
from io import BytesIO
|
|
4
4
|
from typing import Optional
|
|
5
5
|
|
|
6
|
-
from megfile.config import BACKOFF_FACTOR, BACKOFF_INITIAL, DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, GLOBAL_MAX_WORKERS, NEWLINE
|
|
6
|
+
from megfile.config import BACKOFF_FACTOR, BACKOFF_INITIAL, DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, GLOBAL_MAX_WORKERS, NEWLINE, S3_MAX_RETRY_TIMES
|
|
7
7
|
from megfile.errors import S3FileChangedError, S3InvalidRangeError, patch_method, raise_s3_error, s3_should_retry
|
|
8
8
|
from megfile.lib.base_prefetch_reader import BasePrefetchReader, LRUCacheFutureManager
|
|
9
9
|
|
|
@@ -34,7 +34,7 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
34
34
|
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
35
35
|
block_capacity: int = DEFAULT_BLOCK_CAPACITY,
|
|
36
36
|
block_forward: Optional[int] = None,
|
|
37
|
-
max_retries: int =
|
|
37
|
+
max_retries: int = S3_MAX_RETRY_TIMES,
|
|
38
38
|
max_workers: Optional[int] = None,
|
|
39
39
|
profile_name: Optional[str] = None):
|
|
40
40
|
|
|
@@ -3,7 +3,7 @@ from concurrent.futures import Future
|
|
|
3
3
|
from logging import getLogger as get_logger
|
|
4
4
|
from typing import Optional
|
|
5
5
|
|
|
6
|
-
from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE
|
|
6
|
+
from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, S3_MAX_RETRY_TIMES
|
|
7
7
|
from megfile.lib.s3_prefetch_reader import LRUCacheFutureManager, S3PrefetchReader
|
|
8
8
|
from megfile.utils import thread_local
|
|
9
9
|
|
|
@@ -25,7 +25,7 @@ class S3ShareCacheReader(S3PrefetchReader):
|
|
|
25
25
|
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
26
26
|
block_capacity: int = DEFAULT_BLOCK_CAPACITY,
|
|
27
27
|
block_forward: Optional[int] = None,
|
|
28
|
-
max_retries: int =
|
|
28
|
+
max_retries: int = S3_MAX_RETRY_TIMES,
|
|
29
29
|
cache_key: str = 'lru',
|
|
30
30
|
max_workers: Optional[int] = None,
|
|
31
31
|
profile_name: Optional[str] = None):
|
megfile/pathlike.py
CHANGED
|
@@ -730,16 +730,23 @@ class URIPath(BaseURIPath):
|
|
|
730
730
|
with self.open(mode='r') as f:
|
|
731
731
|
return f.read()
|
|
732
732
|
|
|
733
|
-
def rename(self, dst_path: PathLike) -> 'URIPath':
|
|
733
|
+
def rename(self, dst_path: PathLike, overwrite: bool = True) -> 'URIPath':
|
|
734
|
+
'''
|
|
735
|
+
rename file
|
|
736
|
+
|
|
737
|
+
:param dst_path: Given destination path
|
|
738
|
+
:param overwrite: whether or not overwrite file when exists
|
|
739
|
+
'''
|
|
734
740
|
raise NotImplementedError(f"'rename' is unsupported on '{type(self)}'")
|
|
735
741
|
|
|
736
|
-
def replace(self, dst_path: PathLike) -> 'URIPath':
|
|
742
|
+
def replace(self, dst_path: PathLike, overwrite: bool = True) -> 'URIPath':
|
|
737
743
|
'''
|
|
738
744
|
move file
|
|
739
745
|
|
|
740
746
|
:param dst_path: Given destination path
|
|
747
|
+
:param overwrite: whether or not overwrite file when exists
|
|
741
748
|
'''
|
|
742
|
-
return self.rename(dst_path=dst_path)
|
|
749
|
+
return self.rename(dst_path=dst_path, overwrite=overwrite)
|
|
743
750
|
|
|
744
751
|
def rglob(self, pattern) -> List['URIPath']:
|
|
745
752
|
'''
|
megfile/s3.py
CHANGED
|
@@ -169,14 +169,16 @@ def s3_hasbucket(path: PathLike) -> bool:
|
|
|
169
169
|
return S3Path(path).hasbucket()
|
|
170
170
|
|
|
171
171
|
|
|
172
|
-
def s3_move(
|
|
172
|
+
def s3_move(
|
|
173
|
+
src_url: PathLike, dst_url: PathLike, overwrite: bool = True) -> None:
|
|
173
174
|
'''
|
|
174
175
|
Move file/directory path from src_url to dst_url
|
|
175
176
|
|
|
176
177
|
:param src_url: Given path
|
|
177
178
|
:param dst_url: Given destination path
|
|
179
|
+
:param overwrite: whether or not overwrite file when exists
|
|
178
180
|
'''
|
|
179
|
-
return S3Path(src_url).move(dst_url)
|
|
181
|
+
return S3Path(src_url).move(dst_url, overwrite)
|
|
180
182
|
|
|
181
183
|
|
|
182
184
|
def s3_remove(path: PathLike, missing_ok: bool = False) -> None:
|
|
@@ -305,8 +307,9 @@ def s3_getmd5(
|
|
|
305
307
|
def s3_copy(
|
|
306
308
|
src_url: PathLike,
|
|
307
309
|
dst_url: PathLike,
|
|
310
|
+
callback: Optional[Callable[[int], None]] = None,
|
|
308
311
|
followlinks: bool = False,
|
|
309
|
-
|
|
312
|
+
overwrite: bool = True) -> None:
|
|
310
313
|
''' File copy on S3
|
|
311
314
|
Copy content of file on `src_path` to `dst_path`.
|
|
312
315
|
It's caller's responsebility to ensure the s3_isfile(src_url) == True
|
|
@@ -314,24 +317,28 @@ def s3_copy(
|
|
|
314
317
|
:param src_url: Given path
|
|
315
318
|
:param dst_path: Target file path
|
|
316
319
|
:param callback: Called periodically during copy, and the input parameter is the data size (in bytes) of copy since the last call
|
|
320
|
+
:param followlinks: False if regard symlink as file, else True
|
|
321
|
+
:param overwrite: whether or not overwrite file when exists, default is True
|
|
317
322
|
'''
|
|
318
|
-
return S3Path(src_url).copy(dst_url, followlinks,
|
|
323
|
+
return S3Path(src_url).copy(dst_url, callback, followlinks, overwrite)
|
|
319
324
|
|
|
320
325
|
|
|
321
326
|
def s3_sync(
|
|
322
327
|
src_url: PathLike,
|
|
323
328
|
dst_url: PathLike,
|
|
324
329
|
followlinks: bool = False,
|
|
325
|
-
force: bool = False
|
|
330
|
+
force: bool = False,
|
|
331
|
+
overwrite: bool = True) -> None:
|
|
326
332
|
'''
|
|
327
333
|
Copy file/directory on src_url to dst_url
|
|
328
334
|
|
|
329
335
|
:param src_url: Given path
|
|
330
336
|
:param dst_url: Given destination path
|
|
331
337
|
:param followlinks: False if regard symlink as file, else True
|
|
332
|
-
:param force: Sync file forcely, do not ignore same files
|
|
338
|
+
:param force: Sync file forcely, do not ignore same files, priority is higher than 'overwrite', default is False
|
|
339
|
+
:param overwrite: whether or not overwrite file when exists, default is True
|
|
333
340
|
'''
|
|
334
|
-
return S3Path(src_url).sync(dst_url, followlinks, force)
|
|
341
|
+
return S3Path(src_url).sync(dst_url, followlinks, force, overwrite)
|
|
335
342
|
|
|
336
343
|
|
|
337
344
|
def s3_symlink(src_path: PathLike, dst_path: PathLike) -> None:
|