megfile 3.1.1__py3-none-any.whl → 3.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +2 -4
- megfile/__init__.py +394 -203
- megfile/cli.py +258 -238
- megfile/config.py +25 -21
- megfile/errors.py +126 -114
- megfile/fs.py +174 -140
- megfile/fs_path.py +462 -354
- megfile/hdfs.py +133 -101
- megfile/hdfs_path.py +290 -236
- megfile/http.py +15 -14
- megfile/http_path.py +111 -107
- megfile/interfaces.py +70 -65
- megfile/lib/base_prefetch_reader.py +84 -65
- megfile/lib/combine_reader.py +12 -12
- megfile/lib/compare.py +17 -13
- megfile/lib/compat.py +1 -5
- megfile/lib/fnmatch.py +29 -30
- megfile/lib/glob.py +46 -54
- megfile/lib/hdfs_prefetch_reader.py +40 -25
- megfile/lib/hdfs_tools.py +1 -3
- megfile/lib/http_prefetch_reader.py +69 -46
- megfile/lib/joinpath.py +5 -5
- megfile/lib/lazy_handler.py +7 -3
- megfile/lib/s3_buffered_writer.py +58 -51
- megfile/lib/s3_cached_handler.py +13 -14
- megfile/lib/s3_limited_seekable_writer.py +37 -28
- megfile/lib/s3_memory_handler.py +34 -30
- megfile/lib/s3_pipe_handler.py +24 -25
- megfile/lib/s3_prefetch_reader.py +71 -52
- megfile/lib/s3_share_cache_reader.py +37 -24
- megfile/lib/shadow_handler.py +7 -3
- megfile/lib/stdio_handler.py +9 -8
- megfile/lib/url.py +3 -3
- megfile/pathlike.py +259 -228
- megfile/s3.py +220 -153
- megfile/s3_path.py +977 -802
- megfile/sftp.py +190 -156
- megfile/sftp_path.py +540 -450
- megfile/smart.py +397 -330
- megfile/smart_path.py +100 -105
- megfile/stdio.py +10 -9
- megfile/stdio_path.py +32 -35
- megfile/utils/__init__.py +73 -54
- megfile/utils/mutex.py +11 -14
- megfile/version.py +1 -1
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/METADATA +5 -8
- megfile-3.1.3.dist-info/RECORD +55 -0
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/WHEEL +1 -1
- scripts/convert_results_to_sarif.py +45 -78
- scripts/generate_file.py +140 -64
- megfile-3.1.1.dist-info/RECORD +0 -55
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/LICENSE +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/entry_points.txt +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/top_level.txt +0 -0
megfile/config.py
CHANGED
|
@@ -3,32 +3,33 @@ from logging import getLogger
|
|
|
3
3
|
|
|
4
4
|
_logger = getLogger(__name__)
|
|
5
5
|
|
|
6
|
-
DEFAULT_BLOCK_SIZE = int(os.getenv(
|
|
6
|
+
DEFAULT_BLOCK_SIZE = int(os.getenv("MEGFILE_BLOCK_SIZE") or 8 * 2**20)
|
|
7
7
|
|
|
8
|
-
if os.getenv(
|
|
9
|
-
DEFAULT_MAX_BUFFER_SIZE = int(os.environ[
|
|
8
|
+
if os.getenv("MEGFILE_MAX_BUFFER_SIZE"):
|
|
9
|
+
DEFAULT_MAX_BUFFER_SIZE = int(os.environ["MEGFILE_MAX_BUFFER_SIZE"])
|
|
10
10
|
if DEFAULT_MAX_BUFFER_SIZE < DEFAULT_BLOCK_SIZE:
|
|
11
11
|
DEFAULT_MAX_BUFFER_SIZE = DEFAULT_BLOCK_SIZE
|
|
12
12
|
_logger.warning(
|
|
13
|
-
"Env 'MEGFILE_MAX_BUFFER_SIZE' is smaller than block size,
|
|
13
|
+
"Env 'MEGFILE_MAX_BUFFER_SIZE' is smaller than block size, "
|
|
14
|
+
"will not use buffer."
|
|
14
15
|
)
|
|
15
16
|
DEFAULT_BLOCK_CAPACITY = DEFAULT_MAX_BUFFER_SIZE // DEFAULT_BLOCK_SIZE
|
|
16
|
-
if os.getenv(
|
|
17
|
+
if os.getenv("MEGFILE_BLOCK_CAPACITY"):
|
|
17
18
|
_logger.warning(
|
|
18
|
-
"Env 'MEGFILE_MAX_BUFFER_SIZE' and 'MEGFILE_BLOCK_CAPACITY' are both set,
|
|
19
|
+
"Env 'MEGFILE_MAX_BUFFER_SIZE' and 'MEGFILE_BLOCK_CAPACITY' are both set, "
|
|
20
|
+
"'MEGFILE_BLOCK_CAPACITY' will be ignored."
|
|
19
21
|
)
|
|
20
|
-
elif os.getenv(
|
|
21
|
-
DEFAULT_BLOCK_CAPACITY = int(os.environ[
|
|
22
|
+
elif os.getenv("MEGFILE_BLOCK_CAPACITY"):
|
|
23
|
+
DEFAULT_BLOCK_CAPACITY = int(os.environ["MEGFILE_BLOCK_CAPACITY"])
|
|
22
24
|
DEFAULT_MAX_BUFFER_SIZE = DEFAULT_BLOCK_SIZE * DEFAULT_BLOCK_CAPACITY
|
|
23
25
|
else:
|
|
24
26
|
DEFAULT_MAX_BUFFER_SIZE = 128 * 2**20
|
|
25
27
|
DEFAULT_BLOCK_CAPACITY = 16
|
|
26
28
|
|
|
27
|
-
DEFAULT_MIN_BLOCK_SIZE = int(
|
|
28
|
-
os.getenv('MEGFILE_MIN_BLOCK_SIZE') or DEFAULT_BLOCK_SIZE)
|
|
29
|
+
DEFAULT_MIN_BLOCK_SIZE = int(os.getenv("MEGFILE_MIN_BLOCK_SIZE") or DEFAULT_BLOCK_SIZE)
|
|
29
30
|
|
|
30
|
-
if os.getenv(
|
|
31
|
-
DEFAULT_MAX_BLOCK_SIZE = int(os.environ[
|
|
31
|
+
if os.getenv("MEGFILE_MAX_BLOCK_SIZE"):
|
|
32
|
+
DEFAULT_MAX_BLOCK_SIZE = int(os.environ["MEGFILE_MAX_BLOCK_SIZE"])
|
|
32
33
|
if DEFAULT_MAX_BLOCK_SIZE < DEFAULT_BLOCK_SIZE:
|
|
33
34
|
DEFAULT_MAX_BLOCK_SIZE = DEFAULT_BLOCK_SIZE
|
|
34
35
|
_logger.warning(
|
|
@@ -37,22 +38,25 @@ if os.getenv('MEGFILE_MAX_BLOCK_SIZE'):
|
|
|
37
38
|
else:
|
|
38
39
|
DEFAULT_MAX_BLOCK_SIZE = max(128 * 2**20, DEFAULT_BLOCK_SIZE)
|
|
39
40
|
|
|
40
|
-
GLOBAL_MAX_WORKERS = int(os.getenv(
|
|
41
|
-
DEFAULT_MAX_RETRY_TIMES = int(os.getenv(
|
|
41
|
+
GLOBAL_MAX_WORKERS = int(os.getenv("MEGFILE_MAX_WORKERS") or 32)
|
|
42
|
+
DEFAULT_MAX_RETRY_TIMES = int(os.getenv("MEGFILE_MAX_RETRY_TIMES") or 10)
|
|
42
43
|
|
|
43
44
|
# for logging the size of file had read or wrote
|
|
44
45
|
BACKOFF_INITIAL = 64 * 2**20 # 64MB
|
|
45
46
|
BACKOFF_FACTOR = 4
|
|
46
47
|
|
|
47
|
-
NEWLINE = ord(
|
|
48
|
+
NEWLINE = ord("\n")
|
|
48
49
|
|
|
49
|
-
S3_CLIENT_CACHE_MODE = os.getenv(
|
|
50
|
-
'MEGFILE_S3_CLIENT_CACHE_MODE') or 'thread_local'
|
|
50
|
+
S3_CLIENT_CACHE_MODE = os.getenv("MEGFILE_S3_CLIENT_CACHE_MODE") or "thread_local"
|
|
51
51
|
S3_MAX_RETRY_TIMES = int(
|
|
52
|
-
os.getenv(
|
|
52
|
+
os.getenv("MEGFILE_S3_MAX_RETRY_TIMES") or DEFAULT_MAX_RETRY_TIMES
|
|
53
|
+
)
|
|
53
54
|
HTTP_MAX_RETRY_TIMES = int(
|
|
54
|
-
os.getenv(
|
|
55
|
+
os.getenv("MEGFILE_HTTP_MAX_RETRY_TIMES") or DEFAULT_MAX_RETRY_TIMES
|
|
56
|
+
)
|
|
55
57
|
HDFS_MAX_RETRY_TIMES = int(
|
|
56
|
-
os.getenv(
|
|
58
|
+
os.getenv("MEGFILE_HDFS_MAX_RETRY_TIMES") or DEFAULT_MAX_RETRY_TIMES
|
|
59
|
+
)
|
|
57
60
|
SFTP_MAX_RETRY_TIMES = int(
|
|
58
|
-
os.getenv(
|
|
61
|
+
os.getenv("MEGFILE_SFTP_MAX_RETRY_TIMES") or DEFAULT_MAX_RETRY_TIMES
|
|
62
|
+
)
|
megfile/errors.py
CHANGED
|
@@ -15,31 +15,31 @@ from requests.exceptions import HTTPError
|
|
|
15
15
|
from megfile.interfaces import PathLike
|
|
16
16
|
|
|
17
17
|
__all__ = [
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
18
|
+
"S3FileNotFoundError",
|
|
19
|
+
"S3BucketNotFoundError",
|
|
20
|
+
"S3FileExistsError",
|
|
21
|
+
"S3NotADirectoryError",
|
|
22
|
+
"S3IsADirectoryError",
|
|
23
|
+
"S3PermissionError",
|
|
24
|
+
"S3ConfigError",
|
|
25
|
+
"UnknownError",
|
|
26
|
+
"UnsupportedError",
|
|
27
|
+
"HttpPermissionError",
|
|
28
|
+
"HttpFileNotFoundError",
|
|
29
|
+
"HttpBodyIncompleteError",
|
|
30
|
+
"HttpUnknownError",
|
|
31
|
+
"HttpException",
|
|
32
|
+
"ProtocolExistsError",
|
|
33
|
+
"ProtocolNotFoundError",
|
|
34
|
+
"S3UnknownError",
|
|
35
|
+
"SameFileError",
|
|
36
|
+
"translate_http_error",
|
|
37
|
+
"translate_s3_error",
|
|
38
|
+
"patch_method",
|
|
39
|
+
"raise_s3_error",
|
|
40
|
+
"s3_should_retry",
|
|
41
|
+
"translate_fs_error",
|
|
42
|
+
"http_should_retry",
|
|
43
43
|
]
|
|
44
44
|
|
|
45
45
|
_logger = getLogger(__name__)
|
|
@@ -54,8 +54,7 @@ def s3_endpoint_url(path: Optional[PathLike] = None):
|
|
|
54
54
|
profile_name = S3Path(path)._profile_name
|
|
55
55
|
endpoint_url = get_endpoint_url(profile_name=profile_name)
|
|
56
56
|
if endpoint_url is None:
|
|
57
|
-
endpoint_url = get_s3_client(
|
|
58
|
-
profile_name=profile_name).meta.endpoint_url
|
|
57
|
+
endpoint_url = get_s3_client(profile_name=profile_name).meta.endpoint_url
|
|
59
58
|
return endpoint_url
|
|
60
59
|
|
|
61
60
|
|
|
@@ -71,24 +70,24 @@ def full_class_name(obj):
|
|
|
71
70
|
if module is None or module == str.__class__.__module__:
|
|
72
71
|
return obj.__class__.__name__ # Avoid reporting __builtin__
|
|
73
72
|
else:
|
|
74
|
-
return module +
|
|
73
|
+
return module + "." + obj.__class__.__name__
|
|
75
74
|
|
|
76
75
|
|
|
77
76
|
def full_error_message(error):
|
|
78
|
-
return
|
|
77
|
+
return "%s(%r)" % (full_class_name(error), str(error))
|
|
79
78
|
|
|
80
79
|
|
|
81
80
|
def client_error_code(error: ClientError) -> str:
|
|
82
|
-
error_data = error.response.get(
|
|
83
|
-
return error_data.get(
|
|
81
|
+
error_data = error.response.get("Error", {})
|
|
82
|
+
return error_data.get("Code") or error_data.get("code", "Unknown")
|
|
84
83
|
|
|
85
84
|
|
|
86
85
|
def client_error_message(error: ClientError) -> str:
|
|
87
|
-
return error.response.get(
|
|
86
|
+
return error.response.get("Error", {}).get("Message", "Unknown")
|
|
88
87
|
|
|
89
88
|
|
|
90
89
|
def param_validation_error_report(error: ParamValidationError) -> str:
|
|
91
|
-
return error.kwargs.get(
|
|
90
|
+
return error.kwargs.get("report", "Unknown")
|
|
92
91
|
|
|
93
92
|
|
|
94
93
|
s3_retry_exceptions = [
|
|
@@ -105,10 +104,10 @@ s3_retry_exceptions = [
|
|
|
105
104
|
urllib3.exceptions.ReadTimeoutError,
|
|
106
105
|
urllib3.exceptions.HeaderParsingError,
|
|
107
106
|
]
|
|
108
|
-
if hasattr(botocore.exceptions,
|
|
109
|
-
'ResponseStreamingError'): # backport botocore==1.23.24
|
|
107
|
+
if hasattr(botocore.exceptions, "ResponseStreamingError"): # backport botocore==1.23.24
|
|
110
108
|
s3_retry_exceptions.append(
|
|
111
|
-
botocore.exceptions.ResponseStreamingError
|
|
109
|
+
botocore.exceptions.ResponseStreamingError # pyre-ignore[6]
|
|
110
|
+
)
|
|
112
111
|
s3_retry_exceptions = tuple(s3_retry_exceptions) # pyre-ignore[9]
|
|
113
112
|
|
|
114
113
|
|
|
@@ -117,26 +116,34 @@ def s3_should_retry(error: Exception) -> bool:
|
|
|
117
116
|
return True
|
|
118
117
|
if isinstance(error, botocore.exceptions.ClientError):
|
|
119
118
|
return client_error_code(error) in (
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
119
|
+
"429", # noqa: E501 # TOS ExceedAccountQPSLimit
|
|
120
|
+
"499", # noqa: E501 # Some cloud providers may send response with http code 499 if the connection not send data in 1 min.
|
|
121
|
+
"500",
|
|
122
|
+
"501",
|
|
123
|
+
"502",
|
|
124
|
+
"503",
|
|
125
|
+
"InternalError",
|
|
126
|
+
"ServiceUnavailable",
|
|
127
|
+
"SlowDown",
|
|
128
|
+
"ContextCanceled",
|
|
129
|
+
"Timeout", # noqa: E501 # TOS Timeout
|
|
130
|
+
"RequestTimeout",
|
|
131
|
+
"ExceedAccountQPSLimit",
|
|
132
|
+
"ExceedAccountRateLimit",
|
|
133
|
+
"ExceedBucketQPSLimit",
|
|
134
|
+
"ExceedBucketRateLimit",
|
|
135
|
+
)
|
|
129
136
|
return False
|
|
130
137
|
|
|
131
138
|
|
|
132
139
|
def patch_method(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
+
func: Callable,
|
|
141
|
+
max_retries: int,
|
|
142
|
+
should_retry: Callable[[Exception], bool],
|
|
143
|
+
before_callback: Optional[Callable] = None,
|
|
144
|
+
after_callback: Optional[Callable] = None,
|
|
145
|
+
retry_callback: Optional[Callable] = None,
|
|
146
|
+
):
|
|
140
147
|
@wraps(func)
|
|
141
148
|
def wrapper(*args, **kwargs):
|
|
142
149
|
if before_callback is not None:
|
|
@@ -148,8 +155,7 @@ def patch_method(
|
|
|
148
155
|
if after_callback is not None:
|
|
149
156
|
result = after_callback(result, *args, **kwargs)
|
|
150
157
|
if retries > 1:
|
|
151
|
-
_logger.info(
|
|
152
|
-
f'Error already fixed by retry {retries - 1} times')
|
|
158
|
+
_logger.info(f"Error already fixed by retry {retries - 1} times")
|
|
153
159
|
return result
|
|
154
160
|
except Exception as error:
|
|
155
161
|
if not should_retry(error):
|
|
@@ -160,8 +166,10 @@ def patch_method(
|
|
|
160
166
|
raise
|
|
161
167
|
retry_interval = min(0.1 * 2**retries, 30)
|
|
162
168
|
_logger.info(
|
|
163
|
-
|
|
164
|
-
%
|
|
169
|
+
"unknown error encountered: %s, retry in %0.1f seconds "
|
|
170
|
+
"after %d tries"
|
|
171
|
+
% (full_error_message(error), retry_interval, retries)
|
|
172
|
+
)
|
|
165
173
|
time.sleep(retry_interval)
|
|
166
174
|
|
|
167
175
|
return wrapper
|
|
@@ -182,16 +190,13 @@ def _create_missing_ok_generator(generator, missing_ok: bool, error: Exception):
|
|
|
182
190
|
|
|
183
191
|
|
|
184
192
|
class UnknownError(Exception):
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
error
|
|
189
|
-
|
|
190
|
-
extra: Optional[str] = None):
|
|
191
|
-
message = 'Unknown error encountered: %r, error: %s' % (
|
|
192
|
-
path, full_error_message(error))
|
|
193
|
+
def __init__(self, error: Exception, path: PathLike, extra: Optional[str] = None):
|
|
194
|
+
message = "Unknown error encountered: %r, error: %s" % (
|
|
195
|
+
path,
|
|
196
|
+
full_error_message(error),
|
|
197
|
+
)
|
|
193
198
|
if extra is not None:
|
|
194
|
-
message +=
|
|
199
|
+
message += ", " + extra
|
|
195
200
|
super().__init__(message)
|
|
196
201
|
self.path = path
|
|
197
202
|
self.extra = extra
|
|
@@ -202,10 +207,8 @@ class UnknownError(Exception):
|
|
|
202
207
|
|
|
203
208
|
|
|
204
209
|
class UnsupportedError(Exception):
|
|
205
|
-
|
|
206
210
|
def __init__(self, operation: str, path: PathLike):
|
|
207
|
-
super().__init__(
|
|
208
|
-
'Unsupported operation: %r, operation: %r' % (path, operation))
|
|
211
|
+
super().__init__("Unsupported operation: %r, operation: %r" % (path, operation))
|
|
209
212
|
self.path = path
|
|
210
213
|
self.operation = operation
|
|
211
214
|
|
|
@@ -214,10 +217,10 @@ class UnsupportedError(Exception):
|
|
|
214
217
|
|
|
215
218
|
|
|
216
219
|
class S3Exception(Exception):
|
|
217
|
-
|
|
220
|
+
"""
|
|
218
221
|
Base type for all s3 errors, should NOT be constructed directly.
|
|
219
222
|
When you try to do so, consider adding a new type of error.
|
|
220
|
-
|
|
223
|
+
"""
|
|
221
224
|
|
|
222
225
|
|
|
223
226
|
class S3FileNotFoundError(S3Exception, FileNotFoundError):
|
|
@@ -249,8 +252,10 @@ class S3PermissionError(S3Exception, PermissionError):
|
|
|
249
252
|
|
|
250
253
|
|
|
251
254
|
class S3ConfigError(S3Exception, EnvironmentError):
|
|
252
|
-
|
|
253
|
-
|
|
255
|
+
"""
|
|
256
|
+
Error raised by wrong S3 config, including wrong config file format,
|
|
257
|
+
wrong aws_secret_access_key / aws_access_key_id, and etc.
|
|
258
|
+
"""
|
|
254
259
|
|
|
255
260
|
|
|
256
261
|
class S3NotALinkError(S3FileNotFoundError, PermissionError):
|
|
@@ -266,16 +271,15 @@ class S3InvalidRangeError(S3Exception):
|
|
|
266
271
|
|
|
267
272
|
|
|
268
273
|
class S3UnknownError(S3Exception, UnknownError):
|
|
269
|
-
|
|
270
274
|
def __init__(self, error: Exception, path: PathLike):
|
|
271
|
-
super().__init__(error, path,
|
|
275
|
+
super().__init__(error, path, "endpoint: %r" % s3_endpoint_url(path))
|
|
272
276
|
|
|
273
277
|
|
|
274
278
|
class HttpException(Exception):
|
|
275
|
-
|
|
279
|
+
"""
|
|
276
280
|
Base type for all http errors, should NOT be constructed directly.
|
|
277
281
|
When you try to do so, consider adding a new type of error.
|
|
278
|
-
|
|
282
|
+
"""
|
|
279
283
|
|
|
280
284
|
|
|
281
285
|
class HttpPermissionError(HttpException, PermissionError):
|
|
@@ -330,43 +334,52 @@ def translate_fs_error(fs_error: Exception, fs_path: PathLike) -> Exception:
|
|
|
330
334
|
|
|
331
335
|
|
|
332
336
|
def translate_s3_error(s3_error: Exception, s3_url: PathLike) -> Exception:
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
337
|
+
""":param s3_error: error raised by boto3
|
|
338
|
+
:param s3_url: s3_url
|
|
339
|
+
"""
|
|
336
340
|
if isinstance(s3_error, S3Exception):
|
|
337
341
|
return s3_error
|
|
338
342
|
elif isinstance(s3_error, ClientError):
|
|
339
343
|
code = client_error_code(s3_error)
|
|
340
|
-
if code in (
|
|
344
|
+
if code in ("NoSuchBucket"):
|
|
341
345
|
return S3BucketNotFoundError(
|
|
342
|
-
|
|
343
|
-
s3_error.response.get(
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
346
|
+
"No such bucket: %r"
|
|
347
|
+
% s3_error.response.get( # pytype: disable=attribute-error
|
|
348
|
+
"Error", {}
|
|
349
|
+
).get("BucketName")
|
|
350
|
+
or s3_url
|
|
351
|
+
)
|
|
352
|
+
if code in ("404", "NoSuchKey"):
|
|
353
|
+
return S3FileNotFoundError("No such file: %r" % s3_url)
|
|
354
|
+
if code in ("401", "403", "AccessDenied"):
|
|
347
355
|
message = client_error_message(s3_error)
|
|
348
356
|
return S3PermissionError(
|
|
349
|
-
|
|
350
|
-
(s3_url, code, message, s3_endpoint_url(s3_url))
|
|
351
|
-
|
|
357
|
+
"Permission denied: %r, code: %r, message: %r, endpoint: %r"
|
|
358
|
+
% (s3_url, code, message, s3_endpoint_url(s3_url))
|
|
359
|
+
)
|
|
360
|
+
if code in ("InvalidAccessKeyId", "SignatureDoesNotMatch"):
|
|
352
361
|
message = client_error_message(s3_error)
|
|
353
362
|
return S3ConfigError(
|
|
354
|
-
|
|
355
|
-
% (s3_url, code, message, s3_endpoint_url(s3_url))
|
|
356
|
-
|
|
363
|
+
"Invalid configuration: %r, code: %r, message: %r, endpoint: %r"
|
|
364
|
+
% (s3_url, code, message, s3_endpoint_url(s3_url))
|
|
365
|
+
)
|
|
366
|
+
if code in ("InvalidRange"):
|
|
357
367
|
return S3InvalidRangeError(
|
|
358
|
-
|
|
359
|
-
(
|
|
360
|
-
s3_url,
|
|
361
|
-
|
|
368
|
+
"Index out of range: %r, code: %r, message: %r, endpoint: %r"
|
|
369
|
+
% (
|
|
370
|
+
s3_url,
|
|
371
|
+
code,
|
|
372
|
+
client_error_message(s3_error),
|
|
373
|
+
s3_endpoint_url(s3_url),
|
|
374
|
+
)
|
|
375
|
+
)
|
|
362
376
|
return S3UnknownError(s3_error, s3_url)
|
|
363
377
|
elif isinstance(s3_error, ParamValidationError):
|
|
364
378
|
report = param_validation_error_report(s3_error)
|
|
365
|
-
if
|
|
366
|
-
return S3BucketNotFoundError(
|
|
367
|
-
if
|
|
368
|
-
return S3FileNotFoundError(
|
|
369
|
-
'Invalid length for parameter Key: %r' % s3_url)
|
|
379
|
+
if "Invalid bucket name" in report:
|
|
380
|
+
return S3BucketNotFoundError("Invalid bucket name: %r" % s3_url)
|
|
381
|
+
if "Invalid length for parameter Key" in report:
|
|
382
|
+
return S3FileNotFoundError("Invalid length for parameter Key: %r" % s3_url)
|
|
370
383
|
return S3UnknownError(s3_error, s3_url)
|
|
371
384
|
elif isinstance(s3_error, NoCredentialsError):
|
|
372
385
|
return S3ConfigError(str(s3_error))
|
|
@@ -374,7 +387,7 @@ def translate_s3_error(s3_error: Exception, s3_url: PathLike) -> Exception:
|
|
|
374
387
|
|
|
375
388
|
|
|
376
389
|
def translate_http_error(http_error: Exception, http_url: str) -> Exception:
|
|
377
|
-
|
|
390
|
+
"""Generate exception according to http_error and status_code
|
|
378
391
|
|
|
379
392
|
.. note ::
|
|
380
393
|
|
|
@@ -382,15 +395,15 @@ def translate_http_error(http_error: Exception, http_url: str) -> Exception:
|
|
|
382
395
|
|
|
383
396
|
:param http_error: error raised by requests
|
|
384
397
|
:param http_url: http url
|
|
385
|
-
|
|
398
|
+
"""
|
|
386
399
|
if isinstance(http_error, HttpException):
|
|
387
400
|
return http_error
|
|
388
401
|
if isinstance(http_error, HTTPError):
|
|
389
402
|
status_code = http_error.response.status_code
|
|
390
403
|
if status_code == 401 or status_code == 403:
|
|
391
|
-
return HttpPermissionError(
|
|
404
|
+
return HttpPermissionError("Permission denied: %r" % http_url)
|
|
392
405
|
elif status_code == 404:
|
|
393
|
-
return HttpFileNotFoundError(
|
|
406
|
+
return HttpFileNotFoundError("No such file: %r" % http_url)
|
|
394
407
|
return HttpUnknownError(http_error, http_url)
|
|
395
408
|
|
|
396
409
|
|
|
@@ -403,27 +416,26 @@ def raise_s3_error(s3_url: PathLike):
|
|
|
403
416
|
|
|
404
417
|
|
|
405
418
|
def s3_error_code_should_retry(error: str) -> bool:
|
|
406
|
-
if error in [
|
|
419
|
+
if error in ["InternalError", "ServiceUnavailable", "SlowDown"]:
|
|
407
420
|
return True
|
|
408
421
|
return False
|
|
409
422
|
|
|
410
423
|
|
|
411
|
-
def translate_hdfs_error(
|
|
412
|
-
hdfs_error: Exception, hdfs_path: PathLike) -> Exception:
|
|
424
|
+
def translate_hdfs_error(hdfs_error: Exception, hdfs_path: PathLike) -> Exception:
|
|
413
425
|
from megfile.lib.hdfs_tools import hdfs_api
|
|
414
426
|
|
|
415
427
|
# pytype: disable=attribute-error
|
|
416
428
|
if hdfs_api and isinstance(hdfs_error, hdfs_api.HdfsError):
|
|
417
|
-
if hdfs_error.message and
|
|
418
|
-
return IsADirectoryError(
|
|
419
|
-
elif hdfs_error.message and
|
|
420
|
-
return NotADirectoryError(
|
|
429
|
+
if hdfs_error.message and "Path is not a file" in hdfs_error.message:
|
|
430
|
+
return IsADirectoryError("Is a directory: %r" % hdfs_path)
|
|
431
|
+
elif hdfs_error.message and "Path is not a directory" in hdfs_error.message:
|
|
432
|
+
return NotADirectoryError("Not a directory: %r" % hdfs_path)
|
|
421
433
|
elif hdfs_error.status_code in (401, 403):
|
|
422
|
-
return PermissionError(
|
|
434
|
+
return PermissionError("Permission denied: %r" % hdfs_path)
|
|
423
435
|
elif hdfs_error.status_code == 400:
|
|
424
|
-
return ValueError(f
|
|
436
|
+
return ValueError(f"{hdfs_error.message}, path: {hdfs_path}")
|
|
425
437
|
elif hdfs_error.status_code == 404:
|
|
426
|
-
return FileNotFoundError(f
|
|
438
|
+
return FileNotFoundError(f"No match file: {hdfs_path}")
|
|
427
439
|
# pytype: enable=attribute-error
|
|
428
440
|
return hdfs_error
|
|
429
441
|
|