megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +2 -4
- megfile/__init__.py +394 -203
- megfile/cli.py +258 -238
- megfile/config.py +25 -21
- megfile/errors.py +124 -114
- megfile/fs.py +174 -140
- megfile/fs_path.py +462 -354
- megfile/hdfs.py +133 -101
- megfile/hdfs_path.py +290 -236
- megfile/http.py +15 -14
- megfile/http_path.py +111 -107
- megfile/interfaces.py +70 -65
- megfile/lib/base_prefetch_reader.py +84 -65
- megfile/lib/combine_reader.py +12 -12
- megfile/lib/compare.py +17 -13
- megfile/lib/compat.py +1 -5
- megfile/lib/fnmatch.py +29 -30
- megfile/lib/glob.py +46 -54
- megfile/lib/hdfs_prefetch_reader.py +40 -25
- megfile/lib/hdfs_tools.py +1 -3
- megfile/lib/http_prefetch_reader.py +69 -46
- megfile/lib/joinpath.py +5 -5
- megfile/lib/lazy_handler.py +7 -3
- megfile/lib/s3_buffered_writer.py +58 -51
- megfile/lib/s3_cached_handler.py +13 -14
- megfile/lib/s3_limited_seekable_writer.py +37 -28
- megfile/lib/s3_memory_handler.py +34 -30
- megfile/lib/s3_pipe_handler.py +24 -25
- megfile/lib/s3_prefetch_reader.py +71 -52
- megfile/lib/s3_share_cache_reader.py +37 -24
- megfile/lib/shadow_handler.py +7 -3
- megfile/lib/stdio_handler.py +9 -8
- megfile/lib/url.py +3 -3
- megfile/pathlike.py +259 -228
- megfile/s3.py +220 -153
- megfile/s3_path.py +977 -802
- megfile/sftp.py +190 -156
- megfile/sftp_path.py +540 -450
- megfile/smart.py +397 -330
- megfile/smart_path.py +100 -105
- megfile/stdio.py +10 -9
- megfile/stdio_path.py +32 -35
- megfile/utils/__init__.py +73 -54
- megfile/utils/mutex.py +11 -14
- megfile/version.py +1 -1
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
- megfile-3.1.2.dist-info/RECORD +55 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
- scripts/convert_results_to_sarif.py +45 -78
- scripts/generate_file.py +140 -64
- megfile-3.1.1.dist-info/RECORD +0 -55
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/config.py
CHANGED
|
@@ -3,32 +3,33 @@ from logging import getLogger
|
|
|
3
3
|
|
|
4
4
|
_logger = getLogger(__name__)
|
|
5
5
|
|
|
6
|
-
DEFAULT_BLOCK_SIZE = int(os.getenv(
|
|
6
|
+
DEFAULT_BLOCK_SIZE = int(os.getenv("MEGFILE_BLOCK_SIZE") or 8 * 2**20)
|
|
7
7
|
|
|
8
|
-
if os.getenv(
|
|
9
|
-
DEFAULT_MAX_BUFFER_SIZE = int(os.environ[
|
|
8
|
+
if os.getenv("MEGFILE_MAX_BUFFER_SIZE"):
|
|
9
|
+
DEFAULT_MAX_BUFFER_SIZE = int(os.environ["MEGFILE_MAX_BUFFER_SIZE"])
|
|
10
10
|
if DEFAULT_MAX_BUFFER_SIZE < DEFAULT_BLOCK_SIZE:
|
|
11
11
|
DEFAULT_MAX_BUFFER_SIZE = DEFAULT_BLOCK_SIZE
|
|
12
12
|
_logger.warning(
|
|
13
|
-
"Env 'MEGFILE_MAX_BUFFER_SIZE' is smaller than block size,
|
|
13
|
+
"Env 'MEGFILE_MAX_BUFFER_SIZE' is smaller than block size, "
|
|
14
|
+
"will not use buffer."
|
|
14
15
|
)
|
|
15
16
|
DEFAULT_BLOCK_CAPACITY = DEFAULT_MAX_BUFFER_SIZE // DEFAULT_BLOCK_SIZE
|
|
16
|
-
if os.getenv(
|
|
17
|
+
if os.getenv("MEGFILE_BLOCK_CAPACITY"):
|
|
17
18
|
_logger.warning(
|
|
18
|
-
"Env 'MEGFILE_MAX_BUFFER_SIZE' and 'MEGFILE_BLOCK_CAPACITY' are both set,
|
|
19
|
+
"Env 'MEGFILE_MAX_BUFFER_SIZE' and 'MEGFILE_BLOCK_CAPACITY' are both set, "
|
|
20
|
+
"'MEGFILE_BLOCK_CAPACITY' will be ignored."
|
|
19
21
|
)
|
|
20
|
-
elif os.getenv(
|
|
21
|
-
DEFAULT_BLOCK_CAPACITY = int(os.environ[
|
|
22
|
+
elif os.getenv("MEGFILE_BLOCK_CAPACITY"):
|
|
23
|
+
DEFAULT_BLOCK_CAPACITY = int(os.environ["MEGFILE_BLOCK_CAPACITY"])
|
|
22
24
|
DEFAULT_MAX_BUFFER_SIZE = DEFAULT_BLOCK_SIZE * DEFAULT_BLOCK_CAPACITY
|
|
23
25
|
else:
|
|
24
26
|
DEFAULT_MAX_BUFFER_SIZE = 128 * 2**20
|
|
25
27
|
DEFAULT_BLOCK_CAPACITY = 16
|
|
26
28
|
|
|
27
|
-
DEFAULT_MIN_BLOCK_SIZE = int(
|
|
28
|
-
os.getenv('MEGFILE_MIN_BLOCK_SIZE') or DEFAULT_BLOCK_SIZE)
|
|
29
|
+
DEFAULT_MIN_BLOCK_SIZE = int(os.getenv("MEGFILE_MIN_BLOCK_SIZE") or DEFAULT_BLOCK_SIZE)
|
|
29
30
|
|
|
30
|
-
if os.getenv(
|
|
31
|
-
DEFAULT_MAX_BLOCK_SIZE = int(os.environ[
|
|
31
|
+
if os.getenv("MEGFILE_MAX_BLOCK_SIZE"):
|
|
32
|
+
DEFAULT_MAX_BLOCK_SIZE = int(os.environ["MEGFILE_MAX_BLOCK_SIZE"])
|
|
32
33
|
if DEFAULT_MAX_BLOCK_SIZE < DEFAULT_BLOCK_SIZE:
|
|
33
34
|
DEFAULT_MAX_BLOCK_SIZE = DEFAULT_BLOCK_SIZE
|
|
34
35
|
_logger.warning(
|
|
@@ -37,22 +38,25 @@ if os.getenv('MEGFILE_MAX_BLOCK_SIZE'):
|
|
|
37
38
|
else:
|
|
38
39
|
DEFAULT_MAX_BLOCK_SIZE = max(128 * 2**20, DEFAULT_BLOCK_SIZE)
|
|
39
40
|
|
|
40
|
-
GLOBAL_MAX_WORKERS = int(os.getenv(
|
|
41
|
-
DEFAULT_MAX_RETRY_TIMES = int(os.getenv(
|
|
41
|
+
GLOBAL_MAX_WORKERS = int(os.getenv("MEGFILE_MAX_WORKERS") or 32)
|
|
42
|
+
DEFAULT_MAX_RETRY_TIMES = int(os.getenv("MEGFILE_MAX_RETRY_TIMES") or 10)
|
|
42
43
|
|
|
43
44
|
# for logging the size of file had read or wrote
|
|
44
45
|
BACKOFF_INITIAL = 64 * 2**20 # 64MB
|
|
45
46
|
BACKOFF_FACTOR = 4
|
|
46
47
|
|
|
47
|
-
NEWLINE = ord(
|
|
48
|
+
NEWLINE = ord("\n")
|
|
48
49
|
|
|
49
|
-
S3_CLIENT_CACHE_MODE = os.getenv(
|
|
50
|
-
'MEGFILE_S3_CLIENT_CACHE_MODE') or 'thread_local'
|
|
50
|
+
S3_CLIENT_CACHE_MODE = os.getenv("MEGFILE_S3_CLIENT_CACHE_MODE") or "thread_local"
|
|
51
51
|
S3_MAX_RETRY_TIMES = int(
|
|
52
|
-
os.getenv(
|
|
52
|
+
os.getenv("MEGFILE_S3_MAX_RETRY_TIMES") or DEFAULT_MAX_RETRY_TIMES
|
|
53
|
+
)
|
|
53
54
|
HTTP_MAX_RETRY_TIMES = int(
|
|
54
|
-
os.getenv(
|
|
55
|
+
os.getenv("MEGFILE_HTTP_MAX_RETRY_TIMES") or DEFAULT_MAX_RETRY_TIMES
|
|
56
|
+
)
|
|
55
57
|
HDFS_MAX_RETRY_TIMES = int(
|
|
56
|
-
os.getenv(
|
|
58
|
+
os.getenv("MEGFILE_HDFS_MAX_RETRY_TIMES") or DEFAULT_MAX_RETRY_TIMES
|
|
59
|
+
)
|
|
57
60
|
SFTP_MAX_RETRY_TIMES = int(
|
|
58
|
-
os.getenv(
|
|
61
|
+
os.getenv("MEGFILE_SFTP_MAX_RETRY_TIMES") or DEFAULT_MAX_RETRY_TIMES
|
|
62
|
+
)
|
megfile/errors.py
CHANGED
|
@@ -15,31 +15,31 @@ from requests.exceptions import HTTPError
|
|
|
15
15
|
from megfile.interfaces import PathLike
|
|
16
16
|
|
|
17
17
|
__all__ = [
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
18
|
+
"S3FileNotFoundError",
|
|
19
|
+
"S3BucketNotFoundError",
|
|
20
|
+
"S3FileExistsError",
|
|
21
|
+
"S3NotADirectoryError",
|
|
22
|
+
"S3IsADirectoryError",
|
|
23
|
+
"S3PermissionError",
|
|
24
|
+
"S3ConfigError",
|
|
25
|
+
"UnknownError",
|
|
26
|
+
"UnsupportedError",
|
|
27
|
+
"HttpPermissionError",
|
|
28
|
+
"HttpFileNotFoundError",
|
|
29
|
+
"HttpBodyIncompleteError",
|
|
30
|
+
"HttpUnknownError",
|
|
31
|
+
"HttpException",
|
|
32
|
+
"ProtocolExistsError",
|
|
33
|
+
"ProtocolNotFoundError",
|
|
34
|
+
"S3UnknownError",
|
|
35
|
+
"SameFileError",
|
|
36
|
+
"translate_http_error",
|
|
37
|
+
"translate_s3_error",
|
|
38
|
+
"patch_method",
|
|
39
|
+
"raise_s3_error",
|
|
40
|
+
"s3_should_retry",
|
|
41
|
+
"translate_fs_error",
|
|
42
|
+
"http_should_retry",
|
|
43
43
|
]
|
|
44
44
|
|
|
45
45
|
_logger = getLogger(__name__)
|
|
@@ -54,8 +54,7 @@ def s3_endpoint_url(path: Optional[PathLike] = None):
|
|
|
54
54
|
profile_name = S3Path(path)._profile_name
|
|
55
55
|
endpoint_url = get_endpoint_url(profile_name=profile_name)
|
|
56
56
|
if endpoint_url is None:
|
|
57
|
-
endpoint_url = get_s3_client(
|
|
58
|
-
profile_name=profile_name).meta.endpoint_url
|
|
57
|
+
endpoint_url = get_s3_client(profile_name=profile_name).meta.endpoint_url
|
|
59
58
|
return endpoint_url
|
|
60
59
|
|
|
61
60
|
|
|
@@ -71,24 +70,24 @@ def full_class_name(obj):
|
|
|
71
70
|
if module is None or module == str.__class__.__module__:
|
|
72
71
|
return obj.__class__.__name__ # Avoid reporting __builtin__
|
|
73
72
|
else:
|
|
74
|
-
return module +
|
|
73
|
+
return module + "." + obj.__class__.__name__
|
|
75
74
|
|
|
76
75
|
|
|
77
76
|
def full_error_message(error):
|
|
78
|
-
return
|
|
77
|
+
return "%s(%r)" % (full_class_name(error), str(error))
|
|
79
78
|
|
|
80
79
|
|
|
81
80
|
def client_error_code(error: ClientError) -> str:
|
|
82
|
-
error_data = error.response.get(
|
|
83
|
-
return error_data.get(
|
|
81
|
+
error_data = error.response.get("Error", {})
|
|
82
|
+
return error_data.get("Code") or error_data.get("code", "Unknown")
|
|
84
83
|
|
|
85
84
|
|
|
86
85
|
def client_error_message(error: ClientError) -> str:
|
|
87
|
-
return error.response.get(
|
|
86
|
+
return error.response.get("Error", {}).get("Message", "Unknown")
|
|
88
87
|
|
|
89
88
|
|
|
90
89
|
def param_validation_error_report(error: ParamValidationError) -> str:
|
|
91
|
-
return error.kwargs.get(
|
|
90
|
+
return error.kwargs.get("report", "Unknown")
|
|
92
91
|
|
|
93
92
|
|
|
94
93
|
s3_retry_exceptions = [
|
|
@@ -105,10 +104,10 @@ s3_retry_exceptions = [
|
|
|
105
104
|
urllib3.exceptions.ReadTimeoutError,
|
|
106
105
|
urllib3.exceptions.HeaderParsingError,
|
|
107
106
|
]
|
|
108
|
-
if hasattr(botocore.exceptions,
|
|
109
|
-
'ResponseStreamingError'): # backport botocore==1.23.24
|
|
107
|
+
if hasattr(botocore.exceptions, "ResponseStreamingError"): # backport botocore==1.23.24
|
|
110
108
|
s3_retry_exceptions.append(
|
|
111
|
-
botocore.exceptions.ResponseStreamingError
|
|
109
|
+
botocore.exceptions.ResponseStreamingError # pyre-ignore[6]
|
|
110
|
+
)
|
|
112
111
|
s3_retry_exceptions = tuple(s3_retry_exceptions) # pyre-ignore[9]
|
|
113
112
|
|
|
114
113
|
|
|
@@ -117,26 +116,32 @@ def s3_should_retry(error: Exception) -> bool:
|
|
|
117
116
|
return True
|
|
118
117
|
if isinstance(error, botocore.exceptions.ClientError):
|
|
119
118
|
return client_error_code(error) in (
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
119
|
+
"429", # noqa: E501 # TOS ExceedAccountQPSLimit
|
|
120
|
+
"499", # noqa: E501 # Some cloud providers may send response with http code 499 if the connection not send data in 1 min.
|
|
121
|
+
"500",
|
|
122
|
+
"501",
|
|
123
|
+
"502",
|
|
124
|
+
"503",
|
|
125
|
+
"InternalError",
|
|
126
|
+
"ServiceUnavailable",
|
|
127
|
+
"SlowDown",
|
|
128
|
+
"ContextCanceled",
|
|
129
|
+
"ExceedAccountQPSLimit",
|
|
130
|
+
"ExceedAccountRateLimit",
|
|
131
|
+
"ExceedBucketQPSLimit",
|
|
132
|
+
"ExceedBucketRateLimit",
|
|
133
|
+
)
|
|
129
134
|
return False
|
|
130
135
|
|
|
131
136
|
|
|
132
137
|
def patch_method(
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
138
|
+
func: Callable,
|
|
139
|
+
max_retries: int,
|
|
140
|
+
should_retry: Callable[[Exception], bool],
|
|
141
|
+
before_callback: Optional[Callable] = None,
|
|
142
|
+
after_callback: Optional[Callable] = None,
|
|
143
|
+
retry_callback: Optional[Callable] = None,
|
|
144
|
+
):
|
|
140
145
|
@wraps(func)
|
|
141
146
|
def wrapper(*args, **kwargs):
|
|
142
147
|
if before_callback is not None:
|
|
@@ -148,8 +153,7 @@ def patch_method(
|
|
|
148
153
|
if after_callback is not None:
|
|
149
154
|
result = after_callback(result, *args, **kwargs)
|
|
150
155
|
if retries > 1:
|
|
151
|
-
_logger.info(
|
|
152
|
-
f'Error already fixed by retry {retries - 1} times')
|
|
156
|
+
_logger.info(f"Error already fixed by retry {retries - 1} times")
|
|
153
157
|
return result
|
|
154
158
|
except Exception as error:
|
|
155
159
|
if not should_retry(error):
|
|
@@ -160,8 +164,10 @@ def patch_method(
|
|
|
160
164
|
raise
|
|
161
165
|
retry_interval = min(0.1 * 2**retries, 30)
|
|
162
166
|
_logger.info(
|
|
163
|
-
|
|
164
|
-
%
|
|
167
|
+
"unknown error encountered: %s, retry in %0.1f seconds "
|
|
168
|
+
"after %d tries"
|
|
169
|
+
% (full_error_message(error), retry_interval, retries)
|
|
170
|
+
)
|
|
165
171
|
time.sleep(retry_interval)
|
|
166
172
|
|
|
167
173
|
return wrapper
|
|
@@ -182,16 +188,13 @@ def _create_missing_ok_generator(generator, missing_ok: bool, error: Exception):
|
|
|
182
188
|
|
|
183
189
|
|
|
184
190
|
class UnknownError(Exception):
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
error
|
|
189
|
-
|
|
190
|
-
extra: Optional[str] = None):
|
|
191
|
-
message = 'Unknown error encountered: %r, error: %s' % (
|
|
192
|
-
path, full_error_message(error))
|
|
191
|
+
def __init__(self, error: Exception, path: PathLike, extra: Optional[str] = None):
|
|
192
|
+
message = "Unknown error encountered: %r, error: %s" % (
|
|
193
|
+
path,
|
|
194
|
+
full_error_message(error),
|
|
195
|
+
)
|
|
193
196
|
if extra is not None:
|
|
194
|
-
message +=
|
|
197
|
+
message += ", " + extra
|
|
195
198
|
super().__init__(message)
|
|
196
199
|
self.path = path
|
|
197
200
|
self.extra = extra
|
|
@@ -202,10 +205,8 @@ class UnknownError(Exception):
|
|
|
202
205
|
|
|
203
206
|
|
|
204
207
|
class UnsupportedError(Exception):
|
|
205
|
-
|
|
206
208
|
def __init__(self, operation: str, path: PathLike):
|
|
207
|
-
super().__init__(
|
|
208
|
-
'Unsupported operation: %r, operation: %r' % (path, operation))
|
|
209
|
+
super().__init__("Unsupported operation: %r, operation: %r" % (path, operation))
|
|
209
210
|
self.path = path
|
|
210
211
|
self.operation = operation
|
|
211
212
|
|
|
@@ -214,10 +215,10 @@ class UnsupportedError(Exception):
|
|
|
214
215
|
|
|
215
216
|
|
|
216
217
|
class S3Exception(Exception):
|
|
217
|
-
|
|
218
|
+
"""
|
|
218
219
|
Base type for all s3 errors, should NOT be constructed directly.
|
|
219
220
|
When you try to do so, consider adding a new type of error.
|
|
220
|
-
|
|
221
|
+
"""
|
|
221
222
|
|
|
222
223
|
|
|
223
224
|
class S3FileNotFoundError(S3Exception, FileNotFoundError):
|
|
@@ -249,8 +250,10 @@ class S3PermissionError(S3Exception, PermissionError):
|
|
|
249
250
|
|
|
250
251
|
|
|
251
252
|
class S3ConfigError(S3Exception, EnvironmentError):
|
|
252
|
-
|
|
253
|
-
|
|
253
|
+
"""
|
|
254
|
+
Error raised by wrong S3 config, including wrong config file format,
|
|
255
|
+
wrong aws_secret_access_key / aws_access_key_id, and etc.
|
|
256
|
+
"""
|
|
254
257
|
|
|
255
258
|
|
|
256
259
|
class S3NotALinkError(S3FileNotFoundError, PermissionError):
|
|
@@ -266,16 +269,15 @@ class S3InvalidRangeError(S3Exception):
|
|
|
266
269
|
|
|
267
270
|
|
|
268
271
|
class S3UnknownError(S3Exception, UnknownError):
|
|
269
|
-
|
|
270
272
|
def __init__(self, error: Exception, path: PathLike):
|
|
271
|
-
super().__init__(error, path,
|
|
273
|
+
super().__init__(error, path, "endpoint: %r" % s3_endpoint_url(path))
|
|
272
274
|
|
|
273
275
|
|
|
274
276
|
class HttpException(Exception):
|
|
275
|
-
|
|
277
|
+
"""
|
|
276
278
|
Base type for all http errors, should NOT be constructed directly.
|
|
277
279
|
When you try to do so, consider adding a new type of error.
|
|
278
|
-
|
|
280
|
+
"""
|
|
279
281
|
|
|
280
282
|
|
|
281
283
|
class HttpPermissionError(HttpException, PermissionError):
|
|
@@ -330,43 +332,52 @@ def translate_fs_error(fs_error: Exception, fs_path: PathLike) -> Exception:
|
|
|
330
332
|
|
|
331
333
|
|
|
332
334
|
def translate_s3_error(s3_error: Exception, s3_url: PathLike) -> Exception:
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
335
|
+
""":param s3_error: error raised by boto3
|
|
336
|
+
:param s3_url: s3_url
|
|
337
|
+
"""
|
|
336
338
|
if isinstance(s3_error, S3Exception):
|
|
337
339
|
return s3_error
|
|
338
340
|
elif isinstance(s3_error, ClientError):
|
|
339
341
|
code = client_error_code(s3_error)
|
|
340
|
-
if code in (
|
|
342
|
+
if code in ("NoSuchBucket"):
|
|
341
343
|
return S3BucketNotFoundError(
|
|
342
|
-
|
|
343
|
-
s3_error.response.get(
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
344
|
+
"No such bucket: %r"
|
|
345
|
+
% s3_error.response.get( # pytype: disable=attribute-error
|
|
346
|
+
"Error", {}
|
|
347
|
+
).get("BucketName")
|
|
348
|
+
or s3_url
|
|
349
|
+
)
|
|
350
|
+
if code in ("404", "NoSuchKey"):
|
|
351
|
+
return S3FileNotFoundError("No such file: %r" % s3_url)
|
|
352
|
+
if code in ("401", "403", "AccessDenied"):
|
|
347
353
|
message = client_error_message(s3_error)
|
|
348
354
|
return S3PermissionError(
|
|
349
|
-
|
|
350
|
-
(s3_url, code, message, s3_endpoint_url(s3_url))
|
|
351
|
-
|
|
355
|
+
"Permission denied: %r, code: %r, message: %r, endpoint: %r"
|
|
356
|
+
% (s3_url, code, message, s3_endpoint_url(s3_url))
|
|
357
|
+
)
|
|
358
|
+
if code in ("InvalidAccessKeyId", "SignatureDoesNotMatch"):
|
|
352
359
|
message = client_error_message(s3_error)
|
|
353
360
|
return S3ConfigError(
|
|
354
|
-
|
|
355
|
-
% (s3_url, code, message, s3_endpoint_url(s3_url))
|
|
356
|
-
|
|
361
|
+
"Invalid configuration: %r, code: %r, message: %r, endpoint: %r"
|
|
362
|
+
% (s3_url, code, message, s3_endpoint_url(s3_url))
|
|
363
|
+
)
|
|
364
|
+
if code in ("InvalidRange"):
|
|
357
365
|
return S3InvalidRangeError(
|
|
358
|
-
|
|
359
|
-
(
|
|
360
|
-
s3_url,
|
|
361
|
-
|
|
366
|
+
"Index out of range: %r, code: %r, message: %r, endpoint: %r"
|
|
367
|
+
% (
|
|
368
|
+
s3_url,
|
|
369
|
+
code,
|
|
370
|
+
client_error_message(s3_error),
|
|
371
|
+
s3_endpoint_url(s3_url),
|
|
372
|
+
)
|
|
373
|
+
)
|
|
362
374
|
return S3UnknownError(s3_error, s3_url)
|
|
363
375
|
elif isinstance(s3_error, ParamValidationError):
|
|
364
376
|
report = param_validation_error_report(s3_error)
|
|
365
|
-
if
|
|
366
|
-
return S3BucketNotFoundError(
|
|
367
|
-
if
|
|
368
|
-
return S3FileNotFoundError(
|
|
369
|
-
'Invalid length for parameter Key: %r' % s3_url)
|
|
377
|
+
if "Invalid bucket name" in report:
|
|
378
|
+
return S3BucketNotFoundError("Invalid bucket name: %r" % s3_url)
|
|
379
|
+
if "Invalid length for parameter Key" in report:
|
|
380
|
+
return S3FileNotFoundError("Invalid length for parameter Key: %r" % s3_url)
|
|
370
381
|
return S3UnknownError(s3_error, s3_url)
|
|
371
382
|
elif isinstance(s3_error, NoCredentialsError):
|
|
372
383
|
return S3ConfigError(str(s3_error))
|
|
@@ -374,7 +385,7 @@ def translate_s3_error(s3_error: Exception, s3_url: PathLike) -> Exception:
|
|
|
374
385
|
|
|
375
386
|
|
|
376
387
|
def translate_http_error(http_error: Exception, http_url: str) -> Exception:
|
|
377
|
-
|
|
388
|
+
"""Generate exception according to http_error and status_code
|
|
378
389
|
|
|
379
390
|
.. note ::
|
|
380
391
|
|
|
@@ -382,15 +393,15 @@ def translate_http_error(http_error: Exception, http_url: str) -> Exception:
|
|
|
382
393
|
|
|
383
394
|
:param http_error: error raised by requests
|
|
384
395
|
:param http_url: http url
|
|
385
|
-
|
|
396
|
+
"""
|
|
386
397
|
if isinstance(http_error, HttpException):
|
|
387
398
|
return http_error
|
|
388
399
|
if isinstance(http_error, HTTPError):
|
|
389
400
|
status_code = http_error.response.status_code
|
|
390
401
|
if status_code == 401 or status_code == 403:
|
|
391
|
-
return HttpPermissionError(
|
|
402
|
+
return HttpPermissionError("Permission denied: %r" % http_url)
|
|
392
403
|
elif status_code == 404:
|
|
393
|
-
return HttpFileNotFoundError(
|
|
404
|
+
return HttpFileNotFoundError("No such file: %r" % http_url)
|
|
394
405
|
return HttpUnknownError(http_error, http_url)
|
|
395
406
|
|
|
396
407
|
|
|
@@ -403,27 +414,26 @@ def raise_s3_error(s3_url: PathLike):
|
|
|
403
414
|
|
|
404
415
|
|
|
405
416
|
def s3_error_code_should_retry(error: str) -> bool:
|
|
406
|
-
if error in [
|
|
417
|
+
if error in ["InternalError", "ServiceUnavailable", "SlowDown"]:
|
|
407
418
|
return True
|
|
408
419
|
return False
|
|
409
420
|
|
|
410
421
|
|
|
411
|
-
def translate_hdfs_error(
|
|
412
|
-
hdfs_error: Exception, hdfs_path: PathLike) -> Exception:
|
|
422
|
+
def translate_hdfs_error(hdfs_error: Exception, hdfs_path: PathLike) -> Exception:
|
|
413
423
|
from megfile.lib.hdfs_tools import hdfs_api
|
|
414
424
|
|
|
415
425
|
# pytype: disable=attribute-error
|
|
416
426
|
if hdfs_api and isinstance(hdfs_error, hdfs_api.HdfsError):
|
|
417
|
-
if hdfs_error.message and
|
|
418
|
-
return IsADirectoryError(
|
|
419
|
-
elif hdfs_error.message and
|
|
420
|
-
return NotADirectoryError(
|
|
427
|
+
if hdfs_error.message and "Path is not a file" in hdfs_error.message:
|
|
428
|
+
return IsADirectoryError("Is a directory: %r" % hdfs_path)
|
|
429
|
+
elif hdfs_error.message and "Path is not a directory" in hdfs_error.message:
|
|
430
|
+
return NotADirectoryError("Not a directory: %r" % hdfs_path)
|
|
421
431
|
elif hdfs_error.status_code in (401, 403):
|
|
422
|
-
return PermissionError(
|
|
432
|
+
return PermissionError("Permission denied: %r" % hdfs_path)
|
|
423
433
|
elif hdfs_error.status_code == 400:
|
|
424
|
-
return ValueError(f
|
|
434
|
+
return ValueError(f"{hdfs_error.message}, path: {hdfs_path}")
|
|
425
435
|
elif hdfs_error.status_code == 404:
|
|
426
|
-
return FileNotFoundError(f
|
|
436
|
+
return FileNotFoundError(f"No match file: {hdfs_path}")
|
|
427
437
|
# pytype: enable=attribute-error
|
|
428
438
|
return hdfs_error
|
|
429
439
|
|