megfile 5.0.2__py3-none-any.whl → 5.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/cli.py +14 -1
- megfile/errors.py +39 -9
- megfile/fs_path.py +25 -35
- megfile/hdfs_path.py +10 -0
- megfile/lib/base_memory_handler.py +9 -0
- megfile/lib/s3_cached_handler.py +4 -0
- megfile/lib/s3_memory_handler.py +2 -1
- megfile/lib/webdav_memory_handler.py +2 -1
- megfile/pathlike.py +11 -3
- megfile/s3_path.py +17 -7
- megfile/sftp_path.py +22 -0
- megfile/utils/__init__.py +2 -2
- megfile/utils/atomic.py +143 -0
- megfile/version.py +1 -1
- megfile/webdav_path.py +2 -0
- {megfile-5.0.2.dist-info → megfile-5.0.4.dist-info}/METADATA +2 -2
- {megfile-5.0.2.dist-info → megfile-5.0.4.dist-info}/RECORD +22 -21
- {megfile-5.0.2.dist-info → megfile-5.0.4.dist-info}/licenses/LICENSE +2 -2
- {megfile-5.0.2.dist-info → megfile-5.0.4.dist-info}/WHEEL +0 -0
- {megfile-5.0.2.dist-info → megfile-5.0.4.dist-info}/entry_points.txt +0 -0
- {megfile-5.0.2.dist-info → megfile-5.0.4.dist-info}/licenses/LICENSE.pyre +0 -0
- {megfile-5.0.2.dist-info → megfile-5.0.4.dist-info}/top_level.txt +0 -0
megfile/cli.py
CHANGED
|
@@ -92,6 +92,8 @@ def safe_cli(): # pragma: no cover
|
|
|
92
92
|
|
|
93
93
|
|
|
94
94
|
def get_echo_path(file_stat, base_path: str = "", full: bool = False):
|
|
95
|
+
if base_path.startswith("file://"):
|
|
96
|
+
base_path = base_path[7:]
|
|
95
97
|
if base_path == file_stat.path:
|
|
96
98
|
path = file_stat.name
|
|
97
99
|
elif full:
|
|
@@ -150,6 +152,8 @@ def _sftp_prompt_host_key(path):
|
|
|
150
152
|
|
|
151
153
|
def _ls(path: str, long: bool, full: bool, recursive: bool, human_readable: bool):
|
|
152
154
|
base_path = path
|
|
155
|
+
if path == "file://":
|
|
156
|
+
path = "./"
|
|
153
157
|
if has_magic(path):
|
|
154
158
|
scan_func = smart_glob_stat
|
|
155
159
|
base_path = get_non_glob_dir(path)
|
|
@@ -186,7 +190,7 @@ class PathType(ParamType):
|
|
|
186
190
|
name = "path"
|
|
187
191
|
|
|
188
192
|
def shell_complete(self, ctx, param, incomplete):
|
|
189
|
-
if
|
|
193
|
+
if not incomplete:
|
|
190
194
|
completions = [
|
|
191
195
|
CompletionItem(f"{protocol}://")
|
|
192
196
|
for protocol in SmartPath._registered_protocols
|
|
@@ -196,6 +200,15 @@ class PathType(ParamType):
|
|
|
196
200
|
continue
|
|
197
201
|
completions.append(CompletionItem(f"s3+{name}://"))
|
|
198
202
|
return completions
|
|
203
|
+
if incomplete.startswith("file://"):
|
|
204
|
+
return [
|
|
205
|
+
CompletionItem(
|
|
206
|
+
f"file://{entry.path}/"
|
|
207
|
+
if entry.is_dir()
|
|
208
|
+
else f"file://{entry.path}"
|
|
209
|
+
)
|
|
210
|
+
for entry in islice(smart_glob_stat(incomplete[7:] + "*"), 128)
|
|
211
|
+
]
|
|
199
212
|
try:
|
|
200
213
|
return [
|
|
201
214
|
CompletionItem(f"{entry.path}/" if entry.is_dir() else entry.path)
|
megfile/errors.py
CHANGED
|
@@ -182,7 +182,7 @@ def patch_method(
|
|
|
182
182
|
f"Cannot handle error {full_error_message(error)} "
|
|
183
183
|
f"after {retries} tries"
|
|
184
184
|
)
|
|
185
|
-
raise
|
|
185
|
+
raise MaxRetriesExceededError(error, retries=retries)
|
|
186
186
|
retry_interval = min(0.1 * 2**retries, 30)
|
|
187
187
|
_logger.info(
|
|
188
188
|
f"unknown error encountered: {full_error_message(error)}, "
|
|
@@ -209,14 +209,34 @@ def _create_missing_ok_generator(generator, missing_ok: bool, error: Exception):
|
|
|
209
209
|
return create_generator()
|
|
210
210
|
|
|
211
211
|
|
|
212
|
-
class
|
|
213
|
-
def __init__(self, error: Exception,
|
|
214
|
-
|
|
215
|
-
|
|
212
|
+
class MaxRetriesExceededError(Exception):
|
|
213
|
+
def __init__(self, error: Exception, retries: int = 1):
|
|
214
|
+
while isinstance(error, MaxRetriesExceededError):
|
|
215
|
+
retries *= error.retries
|
|
216
|
+
error = error.__cause__
|
|
217
|
+
message = "Max retires exceeded: %s, after %d tries" % (
|
|
216
218
|
full_error_message(error),
|
|
219
|
+
retries,
|
|
217
220
|
)
|
|
221
|
+
super().__init__(message)
|
|
222
|
+
self.retries = retries
|
|
223
|
+
self.__cause__ = error
|
|
224
|
+
|
|
225
|
+
def __reduce__(self):
|
|
226
|
+
return (self.__class__, (self.__cause__, self.retries))
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class UnknownError(Exception):
|
|
230
|
+
def __init__(self, error: Exception, path: PathLike, extra: Optional[str] = None):
|
|
231
|
+
parts = [f"Unknown error encountered: {path!r}"]
|
|
232
|
+
if isinstance(error, MaxRetriesExceededError):
|
|
233
|
+
parts.append(f"error: {full_error_message(error.__cause__)}")
|
|
234
|
+
parts.append(f"after {error.retries} tries")
|
|
235
|
+
else:
|
|
236
|
+
parts.append(f"error: {full_error_message(error)}")
|
|
218
237
|
if extra is not None:
|
|
219
|
-
|
|
238
|
+
parts.append(extra)
|
|
239
|
+
message = ", ".join(parts)
|
|
220
240
|
super().__init__(message)
|
|
221
241
|
self.path = path
|
|
222
242
|
self.extra = extra
|
|
@@ -350,6 +370,8 @@ def translate_fs_error(fs_error: Exception, fs_path: PathLike) -> Exception:
|
|
|
350
370
|
if fs_error.filename is None:
|
|
351
371
|
fs_error.filename = fs_path
|
|
352
372
|
return fs_error
|
|
373
|
+
if isinstance(fs_error, MaxRetriesExceededError):
|
|
374
|
+
return fs_error.__cause__
|
|
353
375
|
return fs_error
|
|
354
376
|
|
|
355
377
|
|
|
@@ -359,7 +381,10 @@ def translate_s3_error(s3_error: Exception, s3_url: PathLike) -> Exception:
|
|
|
359
381
|
"""
|
|
360
382
|
if isinstance(s3_error, S3Exception):
|
|
361
383
|
return s3_error
|
|
362
|
-
|
|
384
|
+
ori_error = s3_error
|
|
385
|
+
if isinstance(s3_error, MaxRetriesExceededError):
|
|
386
|
+
s3_error = s3_error.__cause__
|
|
387
|
+
if isinstance(s3_error, ClientError):
|
|
363
388
|
code = client_error_code(s3_error)
|
|
364
389
|
if code in ("NoSuchBucket"):
|
|
365
390
|
bucket_or_url = (
|
|
@@ -419,7 +444,7 @@ def translate_s3_error(s3_error: Exception, s3_url: PathLike) -> Exception:
|
|
|
419
444
|
return S3InvalidRangeError("Invalid range: %r" % s3_url)
|
|
420
445
|
elif "AccessDenied" in str(s3_error):
|
|
421
446
|
return S3PermissionError("Access denied: %r" % s3_url)
|
|
422
|
-
return S3UnknownError(
|
|
447
|
+
return S3UnknownError(ori_error, s3_url)
|
|
423
448
|
|
|
424
449
|
|
|
425
450
|
def translate_http_error(http_error: Exception, http_url: str) -> Exception:
|
|
@@ -434,13 +459,16 @@ def translate_http_error(http_error: Exception, http_url: str) -> Exception:
|
|
|
434
459
|
"""
|
|
435
460
|
if isinstance(http_error, HttpException):
|
|
436
461
|
return http_error
|
|
462
|
+
ori_error = http_error
|
|
463
|
+
if isinstance(http_error, MaxRetriesExceededError):
|
|
464
|
+
http_error = http_error.__cause__
|
|
437
465
|
if isinstance(http_error, HTTPError):
|
|
438
466
|
status_code = http_error.response.status_code
|
|
439
467
|
if status_code == 401 or status_code == 403:
|
|
440
468
|
return HttpPermissionError("Permission denied: %r" % http_url)
|
|
441
469
|
elif status_code == 404:
|
|
442
470
|
return HttpFileNotFoundError("No such file: %r" % http_url)
|
|
443
|
-
return HttpUnknownError(
|
|
471
|
+
return HttpUnknownError(ori_error, http_url)
|
|
444
472
|
|
|
445
473
|
|
|
446
474
|
@contextmanager
|
|
@@ -476,6 +504,8 @@ def translate_hdfs_error(hdfs_error: Exception, hdfs_path: PathLike) -> Exceptio
|
|
|
476
504
|
elif hdfs_error.status_code == 404:
|
|
477
505
|
return FileNotFoundError(f"No match file: {hdfs_path}")
|
|
478
506
|
# pytype: enable=attribute-error
|
|
507
|
+
if isinstance(hdfs_error, MaxRetriesExceededError):
|
|
508
|
+
return hdfs_error.__cause__
|
|
479
509
|
return hdfs_error
|
|
480
510
|
|
|
481
511
|
|
megfile/fs_path.py
CHANGED
|
@@ -17,7 +17,6 @@ from megfile.interfaces import (
|
|
|
17
17
|
Access,
|
|
18
18
|
ContextIterator,
|
|
19
19
|
FileEntry,
|
|
20
|
-
FileLike,
|
|
21
20
|
PathLike,
|
|
22
21
|
StatResult,
|
|
23
22
|
URIPath,
|
|
@@ -29,6 +28,7 @@ from megfile.lib.joinpath import path_join
|
|
|
29
28
|
from megfile.lib.url import get_url_scheme
|
|
30
29
|
from megfile.smart_path import SmartPath
|
|
31
30
|
from megfile.utils import calculate_md5, copyfd
|
|
31
|
+
from megfile.utils.atomic import FSFuncForAtomic, WrapAtomic
|
|
32
32
|
|
|
33
33
|
__all__ = [
|
|
34
34
|
"FSPath",
|
|
@@ -115,36 +115,6 @@ def _fs_rename_file(
|
|
|
115
115
|
shutil.move(src_path, dst_path)
|
|
116
116
|
|
|
117
117
|
|
|
118
|
-
class WrapAtomic(FileLike):
|
|
119
|
-
__atomic__ = True
|
|
120
|
-
|
|
121
|
-
def __init__(self, fileobj):
|
|
122
|
-
self.fileobj = fileobj
|
|
123
|
-
self.temp_name = f"{self.name}.temp"
|
|
124
|
-
os.rename(self.name, self.temp_name)
|
|
125
|
-
|
|
126
|
-
@property
|
|
127
|
-
def name(self):
|
|
128
|
-
return self.fileobj.name
|
|
129
|
-
|
|
130
|
-
@property
|
|
131
|
-
def mode(self):
|
|
132
|
-
return self.fileobj.mode
|
|
133
|
-
|
|
134
|
-
def _close(self):
|
|
135
|
-
self.fileobj.close()
|
|
136
|
-
os.rename(self.temp_name, self.name)
|
|
137
|
-
|
|
138
|
-
def _abort(self):
|
|
139
|
-
try:
|
|
140
|
-
os.unlink(self.temp_name)
|
|
141
|
-
except FileNotFoundError:
|
|
142
|
-
pass
|
|
143
|
-
|
|
144
|
-
def __getattr__(self, name: str):
|
|
145
|
-
return getattr(self.fileobj, name)
|
|
146
|
-
|
|
147
|
-
|
|
148
118
|
@SmartPath.register
|
|
149
119
|
class FSPath(URIPath):
|
|
150
120
|
"""file protocol
|
|
@@ -995,7 +965,30 @@ class FSPath(URIPath):
|
|
|
995
965
|
self.path_without_protocol # pyre-ignore[6]
|
|
996
966
|
)
|
|
997
967
|
).mkdir(parents=True, exist_ok=True)
|
|
998
|
-
|
|
968
|
+
|
|
969
|
+
if atomic and mode not in ("r", "rb"):
|
|
970
|
+
if isinstance(self.path_without_protocol, int):
|
|
971
|
+
raise TypeError("atomic is not supported for file descriptor path")
|
|
972
|
+
|
|
973
|
+
fs_func = FSFuncForAtomic(
|
|
974
|
+
exists=os.path.exists,
|
|
975
|
+
copy=shutil.copyfile,
|
|
976
|
+
replace=os.replace,
|
|
977
|
+
open=io.open,
|
|
978
|
+
unlink=os.unlink,
|
|
979
|
+
)
|
|
980
|
+
return WrapAtomic(
|
|
981
|
+
self.path_without_protocol,
|
|
982
|
+
mode,
|
|
983
|
+
fs_func,
|
|
984
|
+
buffering=buffering,
|
|
985
|
+
encoding=encoding,
|
|
986
|
+
errors=errors,
|
|
987
|
+
newline=newline,
|
|
988
|
+
closefd=closefd,
|
|
989
|
+
)
|
|
990
|
+
|
|
991
|
+
return io.open(
|
|
999
992
|
self.path_without_protocol,
|
|
1000
993
|
mode,
|
|
1001
994
|
buffering=buffering,
|
|
@@ -1004,9 +997,6 @@ class FSPath(URIPath):
|
|
|
1004
997
|
newline=newline,
|
|
1005
998
|
closefd=closefd,
|
|
1006
999
|
)
|
|
1007
|
-
if atomic and ("w" in mode or "x" in mode or "a" in mode):
|
|
1008
|
-
return WrapAtomic(fp)
|
|
1009
|
-
return fp
|
|
1010
1000
|
|
|
1011
1001
|
@cached_property
|
|
1012
1002
|
def parts(self) -> Tuple[str, ...]:
|
megfile/hdfs_path.py
CHANGED
|
@@ -4,6 +4,7 @@ import io
|
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
6
|
from functools import cached_property, lru_cache
|
|
7
|
+
from logging import getLogger
|
|
7
8
|
from typing import IO, BinaryIO, Iterator, List, Optional, Tuple
|
|
8
9
|
|
|
9
10
|
from megfile.config import (
|
|
@@ -26,6 +27,8 @@ __all__ = [
|
|
|
26
27
|
"is_hdfs",
|
|
27
28
|
]
|
|
28
29
|
|
|
30
|
+
_logger = getLogger(__name__)
|
|
31
|
+
|
|
29
32
|
HDFS_USER = "HDFS_USER"
|
|
30
33
|
HDFS_URL = "HDFS_URL"
|
|
31
34
|
HDFS_ROOT = "HDFS_ROOT"
|
|
@@ -590,6 +593,7 @@ class HdfsPath(URIPath):
|
|
|
590
593
|
max_buffer_size: int = READER_MAX_BUFFER_SIZE,
|
|
591
594
|
block_forward: Optional[int] = None,
|
|
592
595
|
block_size: int = READER_BLOCK_SIZE,
|
|
596
|
+
atomic: bool = False,
|
|
593
597
|
**kwargs,
|
|
594
598
|
) -> IO:
|
|
595
599
|
"""
|
|
@@ -619,6 +623,12 @@ class HdfsPath(URIPath):
|
|
|
619
623
|
elif not encoding:
|
|
620
624
|
encoding = sys.getdefaultencoding()
|
|
621
625
|
|
|
626
|
+
if atomic:
|
|
627
|
+
_logger.warning(
|
|
628
|
+
"`atomic` parameter in HdfsPath.open is not supported yet. "
|
|
629
|
+
"The parameter will be ignored."
|
|
630
|
+
)
|
|
631
|
+
|
|
622
632
|
with raise_hdfs_error(self.path_with_protocol):
|
|
623
633
|
if mode in ("r", "rb"):
|
|
624
634
|
file_obj = HdfsPrefetchReader(
|
|
@@ -10,6 +10,8 @@ class BaseMemoryHandler(Readable[bytes], Seekable, Writable[bytes], ABC):
|
|
|
10
10
|
def __init__(
|
|
11
11
|
self,
|
|
12
12
|
mode: str,
|
|
13
|
+
*,
|
|
14
|
+
atomic: bool = False,
|
|
13
15
|
):
|
|
14
16
|
self._mode = mode
|
|
15
17
|
|
|
@@ -19,6 +21,9 @@ class BaseMemoryHandler(Readable[bytes], Seekable, Writable[bytes], ABC):
|
|
|
19
21
|
self._fileobj = BytesIO()
|
|
20
22
|
self._download_fileobj()
|
|
21
23
|
|
|
24
|
+
if atomic:
|
|
25
|
+
self.__atomic__ = True
|
|
26
|
+
|
|
22
27
|
@property
|
|
23
28
|
@abstractmethod
|
|
24
29
|
def name(self) -> str:
|
|
@@ -90,3 +95,7 @@ class BaseMemoryHandler(Readable[bytes], Seekable, Writable[bytes], ABC):
|
|
|
90
95
|
if need_upload:
|
|
91
96
|
self._upload_fileobj()
|
|
92
97
|
self._fileobj.close()
|
|
98
|
+
|
|
99
|
+
def _abort(self):
|
|
100
|
+
if hasattr(self, "_fileobj"):
|
|
101
|
+
self._fileobj.close()
|
megfile/lib/s3_cached_handler.py
CHANGED
|
@@ -17,6 +17,7 @@ class S3CachedHandler(S3MemoryHandler):
|
|
|
17
17
|
cache_path: Optional[str] = None,
|
|
18
18
|
remove_cache_when_open: bool = True,
|
|
19
19
|
profile_name: Optional[str] = None,
|
|
20
|
+
atomic: bool = False,
|
|
20
21
|
):
|
|
21
22
|
self._bucket = bucket
|
|
22
23
|
self._key = key
|
|
@@ -37,6 +38,9 @@ class S3CachedHandler(S3MemoryHandler):
|
|
|
37
38
|
if remove_cache_when_open:
|
|
38
39
|
os.unlink(self._cache_path) # pyre-ignore[6]
|
|
39
40
|
|
|
41
|
+
if atomic:
|
|
42
|
+
self.__atomic__ = True
|
|
43
|
+
|
|
40
44
|
def fileno(self) -> int:
|
|
41
45
|
# allow numpy.array to create a memmaped ndarray
|
|
42
46
|
return self._fileobj.fileno()
|
megfile/lib/s3_memory_handler.py
CHANGED
|
@@ -19,12 +19,13 @@ class S3MemoryHandler(BaseMemoryHandler):
|
|
|
19
19
|
*,
|
|
20
20
|
s3_client,
|
|
21
21
|
profile_name: Optional[str] = None,
|
|
22
|
+
atomic: bool = False,
|
|
22
23
|
):
|
|
23
24
|
self._bucket = bucket
|
|
24
25
|
self._key = key
|
|
25
26
|
self._client = s3_client
|
|
26
27
|
self._profile_name = profile_name
|
|
27
|
-
super().__init__(mode=mode)
|
|
28
|
+
super().__init__(mode=mode, atomic=atomic)
|
|
28
29
|
|
|
29
30
|
@property
|
|
30
31
|
def name(self) -> str:
|
|
@@ -48,11 +48,12 @@ class WebdavMemoryHandler(BaseMemoryHandler):
|
|
|
48
48
|
*,
|
|
49
49
|
webdav_client: WebdavClient,
|
|
50
50
|
name: str,
|
|
51
|
+
atomic: bool = False,
|
|
51
52
|
):
|
|
52
53
|
self._remote_path = remote_path
|
|
53
54
|
self._client = webdav_client
|
|
54
55
|
self._name = name
|
|
55
|
-
super().__init__(mode=mode)
|
|
56
|
+
super().__init__(mode=mode, atomic=atomic)
|
|
56
57
|
|
|
57
58
|
@property
|
|
58
59
|
def name(self) -> str:
|
megfile/pathlike.py
CHANGED
|
@@ -565,7 +565,7 @@ class BasePath:
|
|
|
565
565
|
|
|
566
566
|
@cached_property
|
|
567
567
|
def anchor(self) -> str:
|
|
568
|
-
return self.root
|
|
568
|
+
return self.root
|
|
569
569
|
|
|
570
570
|
def joinpath(self: Self, *other_paths: "PathLike") -> Self:
|
|
571
571
|
"""
|
|
@@ -863,7 +863,7 @@ class URIPath(BasePath):
|
|
|
863
863
|
path = path.lstrip("/")
|
|
864
864
|
if path != "":
|
|
865
865
|
parts.extend(path.split("/"))
|
|
866
|
-
return tuple(parts)
|
|
866
|
+
return tuple(parts)
|
|
867
867
|
|
|
868
868
|
@cached_property
|
|
869
869
|
def parents(self) -> "URIPathParents":
|
|
@@ -917,10 +917,18 @@ class URIPathParents(Sequence):
|
|
|
917
917
|
self.parts = parts
|
|
918
918
|
|
|
919
919
|
def __len__(self):
|
|
920
|
+
if (
|
|
921
|
+
(self.prefix == "" or "://" in self.prefix)
|
|
922
|
+
and len(self.parts) > 0
|
|
923
|
+
and self.parts[0] != "/"
|
|
924
|
+
):
|
|
925
|
+
return len(self.parts)
|
|
920
926
|
return max(len(self.parts) - 1, 0)
|
|
921
927
|
|
|
922
928
|
def __getitem__(self, idx):
|
|
923
|
-
if idx < 0
|
|
929
|
+
if idx < 0:
|
|
930
|
+
idx += len(self)
|
|
931
|
+
if idx < 0 or idx >= len(self):
|
|
924
932
|
raise IndexError(idx)
|
|
925
933
|
|
|
926
934
|
if len(self.parts[: -idx - 1]) > 1:
|
megfile/s3_path.py
CHANGED
|
@@ -76,9 +76,9 @@ from megfile.lib.url import get_url_scheme
|
|
|
76
76
|
from megfile.smart_path import SmartPath
|
|
77
77
|
from megfile.utils import (
|
|
78
78
|
_is_pickle,
|
|
79
|
+
binary_open,
|
|
79
80
|
calculate_md5,
|
|
80
81
|
generate_cache_path,
|
|
81
|
-
get_binary_mode,
|
|
82
82
|
get_content_offset,
|
|
83
83
|
is_domain_or_subdomain,
|
|
84
84
|
is_readable,
|
|
@@ -683,11 +683,15 @@ def _s3_binary_mode(s3_open_func):
|
|
|
683
683
|
raise S3FileExistsError("File exists: %r" % s3_url)
|
|
684
684
|
mode = mode.replace("x", "w")
|
|
685
685
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
686
|
+
return binary_open(
|
|
687
|
+
s3_open_func,
|
|
688
|
+
)(
|
|
689
|
+
s3_url,
|
|
690
|
+
mode,
|
|
691
|
+
encoding=encoding,
|
|
692
|
+
errors=errors,
|
|
693
|
+
**kwargs,
|
|
694
|
+
)
|
|
691
695
|
|
|
692
696
|
return wrapper
|
|
693
697
|
|
|
@@ -991,7 +995,12 @@ def s3_buffered_open(
|
|
|
991
995
|
if "a" in mode or "+" in mode:
|
|
992
996
|
if cache_path is None:
|
|
993
997
|
return S3MemoryHandler(
|
|
994
|
-
bucket,
|
|
998
|
+
bucket,
|
|
999
|
+
key,
|
|
1000
|
+
mode,
|
|
1001
|
+
s3_client=client,
|
|
1002
|
+
profile_name=s3_url._profile_name,
|
|
1003
|
+
atomic=atomic,
|
|
995
1004
|
)
|
|
996
1005
|
return S3CachedHandler(
|
|
997
1006
|
bucket,
|
|
@@ -1000,6 +1009,7 @@ def s3_buffered_open(
|
|
|
1000
1009
|
s3_client=client,
|
|
1001
1010
|
cache_path=cache_path,
|
|
1002
1011
|
profile_name=s3_url._profile_name,
|
|
1012
|
+
atomic=atomic,
|
|
1003
1013
|
)
|
|
1004
1014
|
|
|
1005
1015
|
if mode == "rb":
|
megfile/sftp_path.py
CHANGED
|
@@ -24,6 +24,7 @@ from megfile.lib.glob import FSFunc, iglob
|
|
|
24
24
|
from megfile.pathlike import URIPath
|
|
25
25
|
from megfile.smart_path import SmartPath
|
|
26
26
|
from megfile.utils import calculate_md5, copyfileobj, thread_local
|
|
27
|
+
from megfile.utils.atomic import FSFuncForAtomic, WrapAtomic
|
|
27
28
|
|
|
28
29
|
_logger = get_logger(__name__)
|
|
29
30
|
|
|
@@ -1234,6 +1235,7 @@ class SftpPath(URIPath):
|
|
|
1234
1235
|
buffering=-1,
|
|
1235
1236
|
encoding: Optional[str] = None,
|
|
1236
1237
|
errors: Optional[str] = None,
|
|
1238
|
+
atomic: bool = False,
|
|
1237
1239
|
**kwargs,
|
|
1238
1240
|
) -> IO:
|
|
1239
1241
|
"""Open a file on the path.
|
|
@@ -1253,6 +1255,26 @@ class SftpPath(URIPath):
|
|
|
1253
1255
|
self.parent.mkdir(parents=True, exist_ok=True)
|
|
1254
1256
|
elif not self.exists():
|
|
1255
1257
|
raise FileNotFoundError("No such file: %r" % self.path_with_protocol)
|
|
1258
|
+
|
|
1259
|
+
if atomic and mode not in ("r", "rb"):
|
|
1260
|
+
fs_func = FSFuncForAtomic(
|
|
1261
|
+
exists=lambda path: self.from_path(path).exists(),
|
|
1262
|
+
copy=lambda src, dst: self.from_path(src).copy(dst),
|
|
1263
|
+
replace=lambda src, dst: self.from_path(src).replace(dst),
|
|
1264
|
+
open=lambda path, *args, **kwargs: self.from_path(path).open(
|
|
1265
|
+
*args, **kwargs
|
|
1266
|
+
),
|
|
1267
|
+
unlink=lambda path: self.from_path(path).unlink(),
|
|
1268
|
+
)
|
|
1269
|
+
return WrapAtomic(
|
|
1270
|
+
self.path_with_protocol,
|
|
1271
|
+
mode,
|
|
1272
|
+
fs_func,
|
|
1273
|
+
buffering=buffering,
|
|
1274
|
+
encoding=encoding,
|
|
1275
|
+
errors=errors,
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1256
1278
|
fileobj = self._client.open(self._real_path, mode, bufsize=buffering)
|
|
1257
1279
|
fileobj.name = self.path
|
|
1258
1280
|
if "r" in mode and "b" not in mode:
|
megfile/utils/__init__.py
CHANGED
|
@@ -13,7 +13,6 @@ from io import (
|
|
|
13
13
|
BytesIO,
|
|
14
14
|
StringIO,
|
|
15
15
|
TextIOBase,
|
|
16
|
-
TextIOWrapper,
|
|
17
16
|
)
|
|
18
17
|
from threading import RLock
|
|
19
18
|
from typing import IO, Callable, List, Optional
|
|
@@ -23,6 +22,7 @@ from megfile.config import (
|
|
|
23
22
|
DEFAULT_HASH_BUFFER_SIZE,
|
|
24
23
|
READER_LAZY_PREFETCH,
|
|
25
24
|
)
|
|
25
|
+
from megfile.utils.atomic import AtomicTextIOWrapper
|
|
26
26
|
from megfile.utils.mutex import ProcessLocal, ThreadLocal
|
|
27
27
|
|
|
28
28
|
|
|
@@ -210,7 +210,7 @@ def binary_open(open_func):
|
|
|
210
210
|
):
|
|
211
211
|
fileobj = open_func(path, get_binary_mode(mode), **kwargs)
|
|
212
212
|
if "b" not in mode:
|
|
213
|
-
fileobj =
|
|
213
|
+
fileobj = AtomicTextIOWrapper(fileobj, encoding=encoding, errors=errors)
|
|
214
214
|
fileobj.mode = mode # pyre-ignore[41]
|
|
215
215
|
return fileobj
|
|
216
216
|
|
megfile/utils/atomic.py
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
import typing as T
|
|
2
|
+
from io import TextIOWrapper
|
|
3
|
+
from logging import getLogger
|
|
4
|
+
|
|
5
|
+
from megfile.interfaces import FileLike
|
|
6
|
+
|
|
7
|
+
_logger = getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FSFuncForAtomic(T.NamedTuple):
|
|
11
|
+
exists: T.Callable[[str], bool]
|
|
12
|
+
copy: T.Callable[[str, str], T.Any]
|
|
13
|
+
replace: T.Callable[[str, str], T.Any]
|
|
14
|
+
open: T.Callable[..., T.IO]
|
|
15
|
+
unlink: T.Callable[[str], T.Any]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class WrapAtomic(FileLike):
|
|
19
|
+
"""Wrap a file object to provide atomic close/abort semantics."""
|
|
20
|
+
|
|
21
|
+
__atomic__ = True
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
path: str,
|
|
26
|
+
mode: str,
|
|
27
|
+
fs_func: FSFuncForAtomic,
|
|
28
|
+
*,
|
|
29
|
+
buffering: int = -1,
|
|
30
|
+
encoding: T.Optional[str] = None,
|
|
31
|
+
errors: T.Optional[str] = None,
|
|
32
|
+
newline: T.Optional[str] = None,
|
|
33
|
+
closefd: bool = True,
|
|
34
|
+
):
|
|
35
|
+
self.fs_func = fs_func
|
|
36
|
+
if "x" in mode and self.fs_func.exists(path):
|
|
37
|
+
raise FileExistsError(f"File exists: {path}")
|
|
38
|
+
|
|
39
|
+
self._path = path
|
|
40
|
+
self._mode = mode
|
|
41
|
+
self._temp_path = self._path + ".temp"
|
|
42
|
+
|
|
43
|
+
if self._should_copy():
|
|
44
|
+
self.fs_func.copy(self._path, self._temp_path)
|
|
45
|
+
|
|
46
|
+
# Open temp file with the same mode/encoding parameters.
|
|
47
|
+
open_mode = mode.replace("x", "w", 1) if "x" in mode else mode
|
|
48
|
+
|
|
49
|
+
self.fileobj = self.fs_func.open(
|
|
50
|
+
self._temp_path,
|
|
51
|
+
open_mode,
|
|
52
|
+
buffering=buffering,
|
|
53
|
+
encoding=encoding,
|
|
54
|
+
errors=errors,
|
|
55
|
+
newline=newline,
|
|
56
|
+
closefd=closefd,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
self.read = self.fileobj.read
|
|
60
|
+
self.readline = self.fileobj.readline
|
|
61
|
+
self.readlines = self.fileobj.readlines
|
|
62
|
+
self.write = self.fileobj.write
|
|
63
|
+
self.writelines = self.fileobj.writelines
|
|
64
|
+
self.truncate = self.fileobj.truncate
|
|
65
|
+
self.seek = self.fileobj.seek
|
|
66
|
+
self.tell = self.fileobj.tell
|
|
67
|
+
self.flush = self.fileobj.flush
|
|
68
|
+
self.readable = self.fileobj.readable
|
|
69
|
+
self.writable = self.fileobj.writable
|
|
70
|
+
self.seekable = self.fileobj.seekable
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def name(self):
|
|
74
|
+
return self._path
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def mode(self):
|
|
78
|
+
return self._mode
|
|
79
|
+
|
|
80
|
+
def _should_copy(self) -> bool:
|
|
81
|
+
if self.fs_func.exists(self._path):
|
|
82
|
+
return True
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
def _close(self):
|
|
86
|
+
self.fileobj.close()
|
|
87
|
+
self.fs_func.replace(self._temp_path, self._path)
|
|
88
|
+
|
|
89
|
+
def _abort(self):
|
|
90
|
+
try:
|
|
91
|
+
self.fileobj.close()
|
|
92
|
+
except Exception:
|
|
93
|
+
pass
|
|
94
|
+
try:
|
|
95
|
+
self.fs_func.unlink(self._temp_path)
|
|
96
|
+
except FileNotFoundError:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class AtomicTextIOWrapper(TextIOWrapper):
|
|
101
|
+
"""TextIOWrapper that keeps atomic semantics of the underlying raw object."""
|
|
102
|
+
|
|
103
|
+
def __init__(self, buffer, *args, **kwargs):
|
|
104
|
+
# Keep a reference to the raw object so we can call abort later.
|
|
105
|
+
self._raw = buffer
|
|
106
|
+
super().__init__(buffer, *args, **kwargs)
|
|
107
|
+
|
|
108
|
+
@property
|
|
109
|
+
def atomic(self) -> bool:
|
|
110
|
+
return getattr(self._raw, "atomic", False)
|
|
111
|
+
|
|
112
|
+
def abort(self) -> bool:
|
|
113
|
+
"""Abort the atomic operation.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
bool: True if the abort was performed, False otherwise.
|
|
117
|
+
"""
|
|
118
|
+
if hasattr(self._raw, "abort"):
|
|
119
|
+
return self._raw._abort()
|
|
120
|
+
return False
|
|
121
|
+
|
|
122
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
123
|
+
if self.atomic and exc_val is not None:
|
|
124
|
+
if self.abort():
|
|
125
|
+
from megfile.errors import full_error_message
|
|
126
|
+
|
|
127
|
+
_logger.warning(
|
|
128
|
+
f"skip closing atomic file-like object: {self}, "
|
|
129
|
+
f"since error encountered: {full_error_message(exc_val)}"
|
|
130
|
+
)
|
|
131
|
+
return
|
|
132
|
+
|
|
133
|
+
super().__exit__(exc_type, exc_val, exc_tb)
|
|
134
|
+
|
|
135
|
+
def __del__(self):
|
|
136
|
+
if self.atomic:
|
|
137
|
+
if self.abort():
|
|
138
|
+
_logger.warning(
|
|
139
|
+
f"skip closing atomic file-like object before deletion: {self}"
|
|
140
|
+
)
|
|
141
|
+
return
|
|
142
|
+
self.flush()
|
|
143
|
+
self.close()
|
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "5.0.
|
|
1
|
+
VERSION = "5.0.4"
|
megfile/webdav_path.py
CHANGED
|
@@ -800,6 +800,7 @@ class WebdavPath(URIPath):
|
|
|
800
800
|
max_buffer_size: int = READER_MAX_BUFFER_SIZE,
|
|
801
801
|
block_forward: Optional[int] = None,
|
|
802
802
|
block_size: int = READER_BLOCK_SIZE,
|
|
803
|
+
atomic: bool = False,
|
|
803
804
|
**kwargs,
|
|
804
805
|
) -> IO:
|
|
805
806
|
"""Open a file on the path.
|
|
@@ -840,6 +841,7 @@ class WebdavPath(URIPath):
|
|
|
840
841
|
mode,
|
|
841
842
|
webdav_client=self._client,
|
|
842
843
|
name=self.path_with_protocol,
|
|
844
|
+
atomic=atomic,
|
|
843
845
|
)
|
|
844
846
|
|
|
845
847
|
def chmod(self, mode: int, *, follow_symlinks: bool = True):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version: 5.0.
|
|
3
|
+
Version: 5.0.4
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
5
|
Author-email: megvii-reng <megvii-reng@googlegroups.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/megvii-research/megfile
|
|
@@ -59,7 +59,7 @@ megfile - Megvii FILE library
|
|
|
59
59
|
* Faster file read and write operations
|
|
60
60
|
* Excellent error retry mechanism to help you handle network issues
|
|
61
61
|
* Supports popular protocols, even making it easy to use the same protocol with different endpoints
|
|
62
|
-
* Stable and secure, with CI coverage
|
|
62
|
+
* Stable and secure, with high CI test coverage, used by multiple industry giants
|
|
63
63
|
* Perfect type hints and built-in documentation. You can enjoy the IDE's auto-completion and static checking
|
|
64
64
|
|
|
65
65
|
## Support Protocols
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
megfile/__init__.py,sha256=4XbMsR-lM7MxbnPGBI16m2sq6ghuA2-zZj2XF4bbX2Y,3291
|
|
2
|
-
megfile/cli.py,sha256=
|
|
2
|
+
megfile/cli.py,sha256=TjUhfcOB_weEBa_e55nmiajq_XObAhQa7sx5oivrQCw,30620
|
|
3
3
|
megfile/config.py,sha256=K3B_o2dnI7qGsGnK8Jg18-S5YYLYuzskfNJowlSMkQM,5065
|
|
4
|
-
megfile/errors.py,sha256=
|
|
5
|
-
megfile/fs_path.py,sha256=
|
|
6
|
-
megfile/hdfs_path.py,sha256=
|
|
4
|
+
megfile/errors.py,sha256=zKwM5r5j89mlbWZNeax26Hq63NmQhl9iGMfTtgyvYNA,16830
|
|
5
|
+
megfile/fs_path.py,sha256=RxdhMDoc1HRmQtyaCehEGk_UJtHGLrrwiUIHrS4LJiY,41027
|
|
6
|
+
megfile/hdfs_path.py,sha256=PWqws54Ou136VxaYp9K_UFRr5BoiZWsO330n9ig5IG0,26338
|
|
7
7
|
megfile/http_path.py,sha256=08OmzmRMyLSyq1Yr1K2HbzexesURJrIoA6AibwYzUiA,13844
|
|
8
8
|
megfile/interfaces.py,sha256=XU46U5pl4k1Gse63i4z5SvxcjWeKLj0xyB0Y6fYiWWo,9887
|
|
9
|
-
megfile/pathlike.py,sha256=
|
|
10
|
-
megfile/s3_path.py,sha256=
|
|
9
|
+
megfile/pathlike.py,sha256=4RuYHqUc5_6rZDCcVo_18il0Hy7BlOYt-rtYwCtp9Gg,31446
|
|
10
|
+
megfile/s3_path.py,sha256=C6z6pqZb0LMlNqUbZiaC8_deKX3feGxfPfSYjusqJq0,94507
|
|
11
11
|
megfile/sftp2_path.py,sha256=K90bnMVAx0MQPGXP6LogGuDRzaD4MPR6lMOfdY9C9-0,37942
|
|
12
|
-
megfile/sftp_path.py,sha256=
|
|
12
|
+
megfile/sftp_path.py,sha256=zxuT1hk7sgoOUwq6KBXS__caX8Hk_LgPjINQheTZWAU,52063
|
|
13
13
|
megfile/smart.py,sha256=Lab2jxprj-zvPw5GqUWlWiEY8bcpRlviks_qp9r-km8,38224
|
|
14
14
|
megfile/smart_path.py,sha256=kGidkM5S58ChE3LVZMcUACs3IQgsqh9m04sp6-wxuhk,12615
|
|
15
15
|
megfile/stdio_path.py,sha256=cxaDr8rtisTPnN-rjtaEpqQnshwiqwXFUJBM9xWY7Cg,2711
|
|
16
|
-
megfile/version.py,sha256=
|
|
17
|
-
megfile/webdav_path.py,sha256=
|
|
16
|
+
megfile/version.py,sha256=nxcOThQpxgVcGzsD8ROZ-d6RNNhY7dxyBkl0oU0Aahs,19
|
|
17
|
+
megfile/webdav_path.py,sha256=QrRYKBGWXkUZXEeHxAfVJkxnCfnczocBSRkVgDC_qC4,31421
|
|
18
18
|
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
megfile/lib/base_memory_handler.py,sha256=
|
|
19
|
+
megfile/lib/base_memory_handler.py,sha256=K4mq28vXVQD-05I64AMH2_7_3y5n7splcMNQZ7jYdxw,2871
|
|
20
20
|
megfile/lib/base_prefetch_reader.py,sha256=MYaWOkXc3geZMYNPHlPZmmpOR6uSz-AMuCZwYdoz7t0,13296
|
|
21
21
|
megfile/lib/combine_reader.py,sha256=Kp2wEloOUpTlIU7dve87MBpSzmIM-F9OtpTawAjFkiU,4828
|
|
22
22
|
megfile/lib/compare.py,sha256=CPSbyqsQ396oSfxa7h0NdUUqBw5A3WOn6fHrNKkuinw,2188
|
|
@@ -29,23 +29,24 @@ megfile/lib/http_prefetch_reader.py,sha256=x0m5tKN8QLl5QkZyNEDW662UJaZoMf61GDHVv
|
|
|
29
29
|
megfile/lib/joinpath.py,sha256=R3sz3pvcgbv1e793vZUnwGH-NmDlEghEPNwq5IEMm4I,1251
|
|
30
30
|
megfile/lib/lazy_handler.py,sha256=bE7RGt1x_xYWMgGAvHr7dwEt52qy-D3z90X3oyCvE6g,1875
|
|
31
31
|
megfile/lib/s3_buffered_writer.py,sha256=a42tr48QXToTQtZD1XfK1Veju6qLCLF0RJC0V_P3pW8,8252
|
|
32
|
-
megfile/lib/s3_cached_handler.py,sha256=
|
|
32
|
+
megfile/lib/s3_cached_handler.py,sha256=_57t5wO3N1B2HhxGy2sRKStUm-XFrpGZJO9DkgLMHKk,1511
|
|
33
33
|
megfile/lib/s3_limited_seekable_writer.py,sha256=rBlGCsrIJdVRKdsJ1uIAE_R6EN96Kl2JMskk-5czYmE,6289
|
|
34
|
-
megfile/lib/s3_memory_handler.py,sha256=
|
|
34
|
+
megfile/lib/s3_memory_handler.py,sha256=6gWFlVgwJSHssofP5HJ5AuGHwkL6r1Fh8UR-H7g-GJ0,2170
|
|
35
35
|
megfile/lib/s3_pipe_handler.py,sha256=g3iAN1P9pCdvSNsGeJBGcBa10S62oqIg_9W3b3wc7os,3809
|
|
36
36
|
megfile/lib/s3_prefetch_reader.py,sha256=AqfADmbbZYA6nw4vxBOiFWX5q5CSYOd0hq1LWcf1PY0,4524
|
|
37
37
|
megfile/lib/s3_share_cache_reader.py,sha256=8uip5IdVjPXCquXrskjocsZx2-TiXqWZPY0gX8JC144,4020
|
|
38
38
|
megfile/lib/shadow_handler.py,sha256=TntewlvIW9ZxCfmqASDQREHoiZ8v42faOe9sovQYQz0,2779
|
|
39
39
|
megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,1987
|
|
40
40
|
megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
|
|
41
|
-
megfile/lib/webdav_memory_handler.py,sha256=
|
|
41
|
+
megfile/lib/webdav_memory_handler.py,sha256=7nq4o69ck_7dFh6xlYTBG8-rj49Q7gxwa3V2bHXEQz4,2551
|
|
42
42
|
megfile/lib/webdav_prefetch_reader.py,sha256=M0X6E6t-DS5q9KiLvjVZx_AZuiW9SaIkBnIPLc774GQ,3941
|
|
43
|
-
megfile/utils/__init__.py,sha256=
|
|
43
|
+
megfile/utils/__init__.py,sha256=lfJze58nO18ug8EUfSJgTTxOwj1p7FQdsnO1keBeMSo,12740
|
|
44
|
+
megfile/utils/atomic.py,sha256=W3NInmDxytBBecktxY_D3S4rA0SX2v2M13ab8jXa4Yk,4061
|
|
44
45
|
megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
|
|
45
|
-
megfile-5.0.
|
|
46
|
-
megfile-5.0.
|
|
47
|
-
megfile-5.0.
|
|
48
|
-
megfile-5.0.
|
|
49
|
-
megfile-5.0.
|
|
50
|
-
megfile-5.0.
|
|
51
|
-
megfile-5.0.
|
|
46
|
+
megfile-5.0.4.dist-info/licenses/LICENSE,sha256=xuY_rHyygMLmf0LgkKj_-wb-BxveHp9rTN0VDE73PrE,11365
|
|
47
|
+
megfile-5.0.4.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
48
|
+
megfile-5.0.4.dist-info/METADATA,sha256=1SCUBMhm0XIjq06T-utC_zgtakXc3qjJF29a4-Yljvo,9226
|
|
49
|
+
megfile-5.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
50
|
+
megfile-5.0.4.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
51
|
+
megfile-5.0.4.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
|
|
52
|
+
megfile-5.0.4.dist-info/RECORD,,
|
|
@@ -187,7 +187,7 @@
|
|
|
187
187
|
same "printed page" as the copyright notice for easier
|
|
188
188
|
identification within third-party archives.
|
|
189
189
|
|
|
190
|
-
Copyright [
|
|
190
|
+
Copyright [megvii-reng] [name of copyright owner]
|
|
191
191
|
|
|
192
192
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
193
193
|
you may not use this file except in compliance with the License.
|
|
@@ -199,4 +199,4 @@
|
|
|
199
199
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
200
200
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
201
201
|
See the License for the specific language governing permissions and
|
|
202
|
-
limitations under the License.
|
|
202
|
+
limitations under the License.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|