megfile 4.2.3__py3-none-any.whl → 4.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/__init__.py +10 -0
- megfile/cli.py +5 -9
- megfile/config.py +5 -0
- megfile/fs.py +14 -1
- megfile/fs_path.py +48 -19
- megfile/interfaces.py +33 -0
- megfile/lib/base_prefetch_reader.py +18 -7
- megfile/lib/joinpath.py +13 -0
- megfile/lib/s3_buffered_writer.py +13 -0
- megfile/lib/s3_limited_seekable_writer.py +2 -0
- megfile/lib/s3_prefetch_reader.py +2 -1
- megfile/lib/s3_share_cache_reader.py +15 -10
- megfile/s3_path.py +12 -5
- megfile/sftp2.py +827 -0
- megfile/sftp2_path.py +1090 -0
- megfile/sftp_path.py +4 -16
- megfile/smart.py +5 -17
- megfile/utils/__init__.py +92 -9
- megfile/version.py +1 -1
- megfile/webdav.py +552 -0
- megfile/webdav_path.py +958 -0
- {megfile-4.2.3.dist-info → megfile-4.2.5.dist-info}/METADATA +6 -1
- {megfile-4.2.3.dist-info → megfile-4.2.5.dist-info}/RECORD +28 -24
- {megfile-4.2.3.dist-info → megfile-4.2.5.dist-info}/WHEEL +0 -0
- {megfile-4.2.3.dist-info → megfile-4.2.5.dist-info}/entry_points.txt +0 -0
- {megfile-4.2.3.dist-info → megfile-4.2.5.dist-info}/licenses/LICENSE +0 -0
- {megfile-4.2.3.dist-info → megfile-4.2.5.dist-info}/licenses/LICENSE.pyre +0 -0
- {megfile-4.2.3.dist-info → megfile-4.2.5.dist-info}/top_level.txt +0 -0
megfile/__init__.py
CHANGED
|
@@ -206,6 +206,16 @@ from megfile.stdio import is_stdio, stdio_open
|
|
|
206
206
|
from megfile.stdio_path import StdioPath
|
|
207
207
|
from megfile.version import VERSION as __version__ # noqa: F401
|
|
208
208
|
|
|
209
|
+
try:
|
|
210
|
+
from megfile.sftp2_path import Sftp2Path
|
|
211
|
+
except ImportError:
|
|
212
|
+
Sftp2Path = None
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
from megfile.webdav_path import WebdavPath
|
|
216
|
+
except ImportError:
|
|
217
|
+
WebdavPath = None
|
|
218
|
+
|
|
209
219
|
__all__ = [
|
|
210
220
|
"smart_access",
|
|
211
221
|
"smart_cache",
|
megfile/cli.py
CHANGED
|
@@ -47,7 +47,7 @@ from megfile.smart import (
|
|
|
47
47
|
smart_unlink,
|
|
48
48
|
)
|
|
49
49
|
from megfile.smart_path import SmartPath
|
|
50
|
-
from megfile.utils import get_human_size
|
|
50
|
+
from megfile.utils import copyfileobj_multi, get_human_size
|
|
51
51
|
from megfile.version import VERSION
|
|
52
52
|
|
|
53
53
|
options = {}
|
|
@@ -646,14 +646,10 @@ def to(path: str, append: bool, stdout: bool):
|
|
|
646
646
|
smart_open(path, mode) as f,
|
|
647
647
|
smart_open("stdio://1", "wb") as stdout_fd,
|
|
648
648
|
):
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
break
|
|
654
|
-
f.write(buf)
|
|
655
|
-
if stdout:
|
|
656
|
-
stdout_fd.write(buf)
|
|
649
|
+
destinations = [f]
|
|
650
|
+
if stdout:
|
|
651
|
+
destinations.append(stdout_fd)
|
|
652
|
+
copyfileobj_multi(stdin, destinations)
|
|
657
653
|
|
|
658
654
|
|
|
659
655
|
@cli.command(short_help="Produce an md5sum file for all the objects in the path.")
|
megfile/config.py
CHANGED
|
@@ -83,6 +83,7 @@ if READER_BLOCK_SIZE <= 0:
|
|
|
83
83
|
READER_MAX_BUFFER_SIZE = parse_quantity(
|
|
84
84
|
os.getenv("MEGFILE_READER_MAX_BUFFER_SIZE") or 128 * 2**20
|
|
85
85
|
)
|
|
86
|
+
READER_LAZY_PREFETCH = parse_boolean(os.getenv("MEGFILE_READER_LAZY_PREFETCH"), False)
|
|
86
87
|
|
|
87
88
|
# Multi-upload in aws s3 has a maximum of 10,000 parts,
|
|
88
89
|
# so the maximum supported file size is MEGFILE_WRITE_BLOCK_SIZE * 10,000,
|
|
@@ -105,6 +106,10 @@ GLOBAL_MAX_WORKERS = int(os.getenv("MEGFILE_MAX_WORKERS") or 8)
|
|
|
105
106
|
|
|
106
107
|
NEWLINE = ord("\n")
|
|
107
108
|
|
|
109
|
+
# Default buffer sizes for various operations
|
|
110
|
+
DEFAULT_COPY_BUFFER_SIZE = 16 * 1024 # 16KB, same as shutil.copyfileobj
|
|
111
|
+
DEFAULT_HASH_BUFFER_SIZE = 4 * 1024 # 4KB for hash calculations
|
|
112
|
+
|
|
108
113
|
S3_CLIENT_CACHE_MODE = os.getenv("MEGFILE_S3_CLIENT_CACHE_MODE") or "thread_local"
|
|
109
114
|
|
|
110
115
|
DEFAULT_MAX_RETRY_TIMES = int(os.getenv("MEGFILE_MAX_RETRY_TIMES") or 10)
|
megfile/fs.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from stat import S_ISDIR as stat_isdir
|
|
3
3
|
from stat import S_ISLNK as stat_islnk
|
|
4
|
-
from typing import BinaryIO, Callable, Iterator, List, Optional, Tuple
|
|
4
|
+
from typing import IO, BinaryIO, Callable, Iterator, List, Optional, Tuple
|
|
5
5
|
|
|
6
6
|
from megfile.fs_path import (
|
|
7
7
|
FSPath,
|
|
@@ -52,6 +52,7 @@ __all__ = [
|
|
|
52
52
|
"fs_islink",
|
|
53
53
|
"fs_ismount",
|
|
54
54
|
"fs_save_as",
|
|
55
|
+
"fs_open",
|
|
55
56
|
]
|
|
56
57
|
|
|
57
58
|
|
|
@@ -612,3 +613,15 @@ def fs_move(src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> N
|
|
|
612
613
|
:param overwrite: whether or not overwrite file when exists
|
|
613
614
|
"""
|
|
614
615
|
return fs_rename(src_path, dst_path, overwrite)
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def fs_open(path: PathLike, mode: str = "r", **kwargs) -> IO:
|
|
619
|
+
"""
|
|
620
|
+
Open file on fs
|
|
621
|
+
|
|
622
|
+
:param path: Given path
|
|
623
|
+
:param mode: File open mode, like built-in open function
|
|
624
|
+
:param buffering: Buffering policy, like built-in open function
|
|
625
|
+
:returns: A file-like object
|
|
626
|
+
"""
|
|
627
|
+
return FSPath(path).open(mode, **kwargs)
|
megfile/fs_path.py
CHANGED
|
@@ -17,6 +17,7 @@ from megfile.interfaces import (
|
|
|
17
17
|
Access,
|
|
18
18
|
ContextIterator,
|
|
19
19
|
FileEntry,
|
|
20
|
+
FileLike,
|
|
20
21
|
PathLike,
|
|
21
22
|
StatResult,
|
|
22
23
|
URIPath,
|
|
@@ -27,7 +28,7 @@ from megfile.lib.glob import iglob
|
|
|
27
28
|
from megfile.lib.joinpath import path_join
|
|
28
29
|
from megfile.lib.url import get_url_scheme
|
|
29
30
|
from megfile.smart_path import SmartPath
|
|
30
|
-
from megfile.utils import calculate_md5
|
|
31
|
+
from megfile.utils import calculate_md5, copyfd
|
|
31
32
|
|
|
32
33
|
__all__ = [
|
|
33
34
|
"FSPath",
|
|
@@ -85,6 +86,36 @@ def _fs_rename_file(
|
|
|
85
86
|
shutil.move(src_path, dst_path)
|
|
86
87
|
|
|
87
88
|
|
|
89
|
+
class WrapAtomic(FileLike):
|
|
90
|
+
__atomic__ = True
|
|
91
|
+
|
|
92
|
+
def __init__(self, fileobj):
|
|
93
|
+
self.fileobj = fileobj
|
|
94
|
+
self.temp_name = f"{self.name}.temp"
|
|
95
|
+
os.rename(self.name, self.temp_name)
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def name(self):
|
|
99
|
+
return self.fileobj.name
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def mode(self):
|
|
103
|
+
return self.fileobj.mode
|
|
104
|
+
|
|
105
|
+
def _close(self):
|
|
106
|
+
self.fileobj.close()
|
|
107
|
+
os.rename(self.temp_name, self.name)
|
|
108
|
+
|
|
109
|
+
def _abort(self):
|
|
110
|
+
try:
|
|
111
|
+
os.unlink(self.temp_name)
|
|
112
|
+
except FileNotFoundError:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
def __getattr__(self, name: str):
|
|
116
|
+
return getattr(self.fileobj, name)
|
|
117
|
+
|
|
118
|
+
|
|
88
119
|
@SmartPath.register
|
|
89
120
|
class FSPath(URIPath):
|
|
90
121
|
"""file protocol
|
|
@@ -627,9 +658,11 @@ class FSPath(URIPath):
|
|
|
627
658
|
"""
|
|
628
659
|
self._check_int_path()
|
|
629
660
|
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
661
|
+
try:
|
|
662
|
+
os.unlink(self.path_without_protocol) # pyre-ignore[6]
|
|
663
|
+
except FileNotFoundError:
|
|
664
|
+
if not missing_ok:
|
|
665
|
+
raise
|
|
633
666
|
|
|
634
667
|
def walk(
|
|
635
668
|
self, followlinks: bool = False
|
|
@@ -737,15 +770,7 @@ class FSPath(URIPath):
|
|
|
737
770
|
):
|
|
738
771
|
if isinstance(self.path_without_protocol, int):
|
|
739
772
|
with open(fspath(dst_path), "wb") as fdst:
|
|
740
|
-
|
|
741
|
-
length = 16 * 1024
|
|
742
|
-
while True:
|
|
743
|
-
buf = os.read(self.path_without_protocol, length) # pyre-ignore[6]
|
|
744
|
-
if not buf:
|
|
745
|
-
break
|
|
746
|
-
fdst.write(buf)
|
|
747
|
-
if callback:
|
|
748
|
-
callback(len(buf))
|
|
773
|
+
copyfd(self.path_without_protocol, fdst, callback)
|
|
749
774
|
else:
|
|
750
775
|
shutil.copy2(
|
|
751
776
|
self.path_without_protocol, # pyre-ignore[6]
|
|
@@ -925,11 +950,12 @@ class FSPath(URIPath):
|
|
|
925
950
|
def open(
|
|
926
951
|
self,
|
|
927
952
|
mode: str = "r",
|
|
928
|
-
buffering
|
|
929
|
-
encoding=None,
|
|
930
|
-
errors=None,
|
|
931
|
-
newline=None,
|
|
932
|
-
closefd=True,
|
|
953
|
+
buffering: int = -1,
|
|
954
|
+
encoding: Optional[str] = None,
|
|
955
|
+
errors: Optional[str] = None,
|
|
956
|
+
newline: Optional[str] = None,
|
|
957
|
+
closefd: bool = True,
|
|
958
|
+
atomic: bool = False,
|
|
933
959
|
**kwargs,
|
|
934
960
|
) -> IO:
|
|
935
961
|
if not isinstance(self.path_without_protocol, int) and (
|
|
@@ -940,7 +966,7 @@ class FSPath(URIPath):
|
|
|
940
966
|
self.path_without_protocol # pyre-ignore[6]
|
|
941
967
|
)
|
|
942
968
|
).mkdir(parents=True, exist_ok=True)
|
|
943
|
-
|
|
969
|
+
fp = io.open(
|
|
944
970
|
self.path_without_protocol,
|
|
945
971
|
mode,
|
|
946
972
|
buffering=buffering,
|
|
@@ -949,6 +975,9 @@ class FSPath(URIPath):
|
|
|
949
975
|
newline=newline,
|
|
950
976
|
closefd=closefd,
|
|
951
977
|
)
|
|
978
|
+
if atomic and ("w" in mode or "x" in mode or "a" in mode):
|
|
979
|
+
return WrapAtomic(fp)
|
|
980
|
+
return fp
|
|
952
981
|
|
|
953
982
|
@cached_property
|
|
954
983
|
def parts(self) -> Tuple[str, ...]:
|
megfile/interfaces.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
from io import IOBase, UnsupportedOperation
|
|
4
|
+
from logging import getLogger as get_logger
|
|
4
5
|
from typing import IO, AnyStr, Iterable, List, Optional
|
|
5
6
|
|
|
6
7
|
from megfile.pathlike import (
|
|
@@ -31,6 +32,8 @@ __all__ = [
|
|
|
31
32
|
"URIPath",
|
|
32
33
|
]
|
|
33
34
|
|
|
35
|
+
_logger = get_logger(__name__)
|
|
36
|
+
|
|
34
37
|
|
|
35
38
|
def fullname(o):
|
|
36
39
|
klass = o.__class__
|
|
@@ -43,16 +46,28 @@ def fullname(o):
|
|
|
43
46
|
# 1. Default value of closed is False
|
|
44
47
|
# 2. closed is set to True when close() are called
|
|
45
48
|
# 3. close() will only be called once
|
|
49
|
+
# 4. atomic means the file-like object should not be closed automatically
|
|
50
|
+
# when an exception is raised in the context manager or when the object is
|
|
51
|
+
# garbage collected.
|
|
52
|
+
# 5. atomic is False by default
|
|
46
53
|
class Closable(ABC):
|
|
47
54
|
@property
|
|
48
55
|
def closed(self) -> bool:
|
|
49
56
|
"""Return True if the file-like object is closed."""
|
|
50
57
|
return getattr(self, "__closed__", False)
|
|
51
58
|
|
|
59
|
+
@property
|
|
60
|
+
def atomic(self) -> bool:
|
|
61
|
+
"""Return True if the file-like object is atomic."""
|
|
62
|
+
return getattr(self, "__atomic__", False)
|
|
63
|
+
|
|
52
64
|
@abstractmethod
|
|
53
65
|
def _close(self) -> None:
|
|
54
66
|
pass # pragma: no cover
|
|
55
67
|
|
|
68
|
+
def _abort(self) -> None:
|
|
69
|
+
pass
|
|
70
|
+
|
|
56
71
|
def close(self) -> None:
|
|
57
72
|
"""Flush and close the file-like object.
|
|
58
73
|
|
|
@@ -66,6 +81,24 @@ class Closable(ABC):
|
|
|
66
81
|
return self
|
|
67
82
|
|
|
68
83
|
def __exit__(self, type, value, traceback) -> None:
|
|
84
|
+
if self.atomic and value is not None:
|
|
85
|
+
from megfile.errors import full_error_message
|
|
86
|
+
|
|
87
|
+
_logger.warning(
|
|
88
|
+
f"skip closing atomic file-like object: {self}, "
|
|
89
|
+
f"since error encountered: {full_error_message(value)}"
|
|
90
|
+
)
|
|
91
|
+
self._abort()
|
|
92
|
+
return
|
|
93
|
+
self.close()
|
|
94
|
+
|
|
95
|
+
def __del__(self):
|
|
96
|
+
if self.atomic:
|
|
97
|
+
_logger.warning(
|
|
98
|
+
f"skip closing atomic file-like object before deletion: {self}"
|
|
99
|
+
)
|
|
100
|
+
self._abort()
|
|
101
|
+
return
|
|
69
102
|
self.close()
|
|
70
103
|
|
|
71
104
|
|
|
@@ -82,9 +82,9 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
82
82
|
|
|
83
83
|
self._offset = 0
|
|
84
84
|
self._cached_buffer = None
|
|
85
|
-
self._block_index =
|
|
85
|
+
self._block_index = 0 # Current block index
|
|
86
|
+
self._cached_offset = 0 # Current offset in the current block
|
|
86
87
|
self._seek_history = []
|
|
87
|
-
|
|
88
88
|
self._seek_buffer(0)
|
|
89
89
|
|
|
90
90
|
_logger.debug("open file: %r, mode: %s" % (self.name, self.mode))
|
|
@@ -98,7 +98,9 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
98
98
|
return self._process_local("futures", self._get_futures)
|
|
99
99
|
|
|
100
100
|
def _get_futures(self):
|
|
101
|
-
|
|
101
|
+
futures = LRUCacheFutureManager()
|
|
102
|
+
futures.register(self.name)
|
|
103
|
+
return futures
|
|
102
104
|
|
|
103
105
|
@property
|
|
104
106
|
@abstractmethod
|
|
@@ -207,9 +209,8 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
207
209
|
if size == 0 or self._offset >= self._content_size:
|
|
208
210
|
return b""
|
|
209
211
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
].read()
|
|
212
|
+
resp = self._fetch_response(start=self._offset, end=self._offset + size - 1)
|
|
213
|
+
data = resp["Body"].read()
|
|
213
214
|
self.seek(size, os.SEEK_CUR)
|
|
214
215
|
return data
|
|
215
216
|
|
|
@@ -369,12 +370,17 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
369
370
|
class LRUCacheFutureManager(OrderedDict):
|
|
370
371
|
def __init__(self):
|
|
371
372
|
super().__init__()
|
|
373
|
+
self._name = None
|
|
374
|
+
|
|
375
|
+
def register(self, name):
|
|
376
|
+
self._name = name
|
|
372
377
|
|
|
373
378
|
def submit(self, executor, key, *args, **kwargs):
|
|
374
379
|
if key in self:
|
|
375
380
|
self.move_to_end(key, last=True)
|
|
376
381
|
return
|
|
377
382
|
self[key] = executor.submit(*args, **kwargs)
|
|
383
|
+
_logger.debug("submit future: %r, key: %r" % (self._name, key))
|
|
378
384
|
|
|
379
385
|
@property
|
|
380
386
|
def finished(self):
|
|
@@ -385,7 +391,12 @@ class LRUCacheFutureManager(OrderedDict):
|
|
|
385
391
|
return self[key].result()
|
|
386
392
|
|
|
387
393
|
def cleanup(self, block_capacity: int):
|
|
394
|
+
keys = []
|
|
388
395
|
while len(self) > block_capacity:
|
|
389
|
-
|
|
396
|
+
key, future = self.popitem(last=False)
|
|
397
|
+
keys.append(key)
|
|
390
398
|
if not future.done():
|
|
391
399
|
future.cancel()
|
|
400
|
+
if keys:
|
|
401
|
+
_logger.debug("cleanup futures: %r, keys: %s" % (self._name, keys))
|
|
402
|
+
return keys
|
megfile/lib/joinpath.py
CHANGED
|
@@ -33,3 +33,16 @@ def uri_join(path: str, *other_paths: str) -> str:
|
|
|
33
33
|
|
|
34
34
|
# Imp. 3
|
|
35
35
|
# return '/'.join((path, *other_paths))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def uri_norm(path: str) -> str:
|
|
39
|
+
parts = path.split("/")
|
|
40
|
+
new_parts = []
|
|
41
|
+
for part in parts:
|
|
42
|
+
if part == ".":
|
|
43
|
+
continue
|
|
44
|
+
if part == ".." and new_parts and new_parts[-1] != "..":
|
|
45
|
+
new_parts.pop()
|
|
46
|
+
else:
|
|
47
|
+
new_parts.append(part)
|
|
48
|
+
return "/".join(new_parts)
|
|
@@ -53,11 +53,13 @@ class S3BufferedWriter(Writable[bytes]):
|
|
|
53
53
|
max_buffer_size: int = WRITER_MAX_BUFFER_SIZE,
|
|
54
54
|
max_workers: Optional[int] = None,
|
|
55
55
|
profile_name: Optional[str] = None,
|
|
56
|
+
atomic: bool = False,
|
|
56
57
|
):
|
|
57
58
|
self._bucket = bucket
|
|
58
59
|
self._key = key
|
|
59
60
|
self._client = s3_client
|
|
60
61
|
self._profile_name = profile_name
|
|
62
|
+
self.__atomic__ = atomic
|
|
61
63
|
|
|
62
64
|
# user maybe put block_size with 'numpy.uint64' type
|
|
63
65
|
self._base_block_size = int(block_size)
|
|
@@ -213,6 +215,17 @@ class S3BufferedWriter(Writable[bytes]):
|
|
|
213
215
|
if not self._is_global_executor:
|
|
214
216
|
self._executor.shutdown()
|
|
215
217
|
|
|
218
|
+
def _abort(self):
|
|
219
|
+
_logger.debug("abort file: %r" % self.name)
|
|
220
|
+
|
|
221
|
+
if self._is_multipart:
|
|
222
|
+
with raise_s3_error(self.name):
|
|
223
|
+
self._client.abort_multipart_upload(
|
|
224
|
+
Bucket=self._bucket, Key=self._key, UploadId=self._upload_id
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
self._shutdown()
|
|
228
|
+
|
|
216
229
|
def _close(self):
|
|
217
230
|
_logger.debug("close file: %r" % self.name)
|
|
218
231
|
|
|
@@ -33,6 +33,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
|
|
|
33
33
|
max_buffer_size: int = WRITER_MAX_BUFFER_SIZE,
|
|
34
34
|
max_workers: Optional[int] = None,
|
|
35
35
|
profile_name: Optional[str] = None,
|
|
36
|
+
atomic: bool = False,
|
|
36
37
|
):
|
|
37
38
|
super().__init__(
|
|
38
39
|
bucket,
|
|
@@ -42,6 +43,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
|
|
|
42
43
|
max_buffer_size=max_buffer_size,
|
|
43
44
|
max_workers=max_workers,
|
|
44
45
|
profile_name=profile_name,
|
|
46
|
+
atomic=atomic,
|
|
45
47
|
)
|
|
46
48
|
|
|
47
49
|
self._head_block_size = head_block_size or block_size
|
|
@@ -4,6 +4,7 @@ from typing import Optional
|
|
|
4
4
|
|
|
5
5
|
from megfile.config import (
|
|
6
6
|
READER_BLOCK_SIZE,
|
|
7
|
+
READER_LAZY_PREFETCH,
|
|
7
8
|
READER_MAX_BUFFER_SIZE,
|
|
8
9
|
S3_MAX_RETRY_TIMES,
|
|
9
10
|
)
|
|
@@ -62,7 +63,7 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
62
63
|
)
|
|
63
64
|
|
|
64
65
|
def _get_content_size(self):
|
|
65
|
-
if self._block_capacity <= 0:
|
|
66
|
+
if self._block_capacity <= 0 or READER_LAZY_PREFETCH:
|
|
66
67
|
response = self._client.head_object(Bucket=self._bucket, Key=self._key)
|
|
67
68
|
self._content_etag = response.get("ETag")
|
|
68
69
|
return int(response["ContentLength"])
|
|
@@ -101,16 +101,21 @@ class ShareCacheFutureManager(LRUCacheFutureManager):
|
|
|
101
101
|
super().__init__()
|
|
102
102
|
self._references = Counter()
|
|
103
103
|
|
|
104
|
-
def register(self,
|
|
105
|
-
self._references[
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
104
|
+
def register(self, name):
|
|
105
|
+
self._references[name] += 1
|
|
106
|
+
_logger.debug("register reader: %r, count: %d" % (name, self._references[name]))
|
|
107
|
+
|
|
108
|
+
def unregister(self, name):
|
|
109
|
+
self._references[name] -= 1
|
|
110
|
+
_logger.debug(
|
|
111
|
+
"unregister reader: %r, count: %d" % (name, self._references[name])
|
|
112
|
+
)
|
|
113
|
+
if self._references[name] == 0:
|
|
114
|
+
self._references.pop(name)
|
|
115
|
+
for key in list(self):
|
|
116
|
+
if key[0] != name:
|
|
113
117
|
continue
|
|
114
|
-
future = self.pop(
|
|
118
|
+
future = self.pop(key)
|
|
115
119
|
if not future.done():
|
|
116
120
|
future.cancel() # pragma: no cover
|
|
121
|
+
_logger.debug("cleanup all futures of reader: %r" % name)
|
megfile/s3_path.py
CHANGED
|
@@ -230,7 +230,7 @@ def get_endpoint_url(profile_name: Optional[str] = None) -> str:
|
|
|
230
230
|
config_endpoint_url = config.get("s3", {}).get("endpoint_url")
|
|
231
231
|
config_endpoint_url = config_endpoint_url or config.get("endpoint_url")
|
|
232
232
|
if config_endpoint_url:
|
|
233
|
-
warning_endpoint_url("~/.aws/config", config_endpoint_url)
|
|
233
|
+
warning_endpoint_url("~/.aws/config or ~/.aws/credentials", config_endpoint_url)
|
|
234
234
|
return config_endpoint_url
|
|
235
235
|
return endpoint_url
|
|
236
236
|
|
|
@@ -937,6 +937,7 @@ def s3_buffered_open(
|
|
|
937
937
|
buffered: bool = False,
|
|
938
938
|
share_cache_key: Optional[str] = None,
|
|
939
939
|
cache_path: Optional[str] = None,
|
|
940
|
+
atomic: bool = False,
|
|
940
941
|
) -> IO:
|
|
941
942
|
"""Open an asynchronous prefetch reader, to support fast sequential read
|
|
942
943
|
|
|
@@ -969,6 +970,8 @@ def s3_buffered_open(
|
|
|
969
970
|
(both file head part and tail part can seek block_size).
|
|
970
971
|
Notes: This parameter are valid only for write-handle.
|
|
971
972
|
Read-handle support arbitrary seek
|
|
973
|
+
:param buffered: If you are operating pickle file without .pkl or .pickle extension,
|
|
974
|
+
please set this to True to avoid the performance issue.
|
|
972
975
|
:returns: An opened File object
|
|
973
976
|
:raises: S3FileNotFoundError
|
|
974
977
|
"""
|
|
@@ -1000,6 +1003,7 @@ def s3_buffered_open(
|
|
|
1000
1003
|
)
|
|
1001
1004
|
|
|
1002
1005
|
if mode == "rb":
|
|
1006
|
+
block_size = block_size or READER_BLOCK_SIZE
|
|
1003
1007
|
if share_cache_key is not None:
|
|
1004
1008
|
reader = S3ShareCacheReader(
|
|
1005
1009
|
bucket,
|
|
@@ -1008,7 +1012,7 @@ def s3_buffered_open(
|
|
|
1008
1012
|
s3_client=client,
|
|
1009
1013
|
max_retries=max_retries,
|
|
1010
1014
|
max_workers=max_workers,
|
|
1011
|
-
block_size=block_size
|
|
1015
|
+
block_size=block_size,
|
|
1012
1016
|
block_forward=block_forward,
|
|
1013
1017
|
profile_name=s3_url._profile_name,
|
|
1014
1018
|
)
|
|
@@ -1023,13 +1027,14 @@ def s3_buffered_open(
|
|
|
1023
1027
|
max_workers=max_workers,
|
|
1024
1028
|
max_buffer_size=max_buffer_size,
|
|
1025
1029
|
block_forward=block_forward,
|
|
1026
|
-
block_size=block_size
|
|
1030
|
+
block_size=block_size,
|
|
1027
1031
|
profile_name=s3_url._profile_name,
|
|
1028
1032
|
)
|
|
1029
1033
|
if buffered or _is_pickle(reader):
|
|
1030
1034
|
reader = io.BufferedReader(reader) # type: ignore
|
|
1031
1035
|
return reader
|
|
1032
1036
|
|
|
1037
|
+
block_size = block_size or WRITER_BLOCK_SIZE
|
|
1033
1038
|
if limited_seekable:
|
|
1034
1039
|
if max_buffer_size is None:
|
|
1035
1040
|
max_buffer_size = WRITER_MAX_BUFFER_SIZE
|
|
@@ -1038,9 +1043,10 @@ def s3_buffered_open(
|
|
|
1038
1043
|
key,
|
|
1039
1044
|
s3_client=client,
|
|
1040
1045
|
max_workers=max_workers,
|
|
1041
|
-
block_size=block_size
|
|
1046
|
+
block_size=block_size,
|
|
1042
1047
|
max_buffer_size=max_buffer_size,
|
|
1043
1048
|
profile_name=s3_url._profile_name,
|
|
1049
|
+
atomic=atomic,
|
|
1044
1050
|
)
|
|
1045
1051
|
else:
|
|
1046
1052
|
if max_buffer_size is None:
|
|
@@ -1050,9 +1056,10 @@ def s3_buffered_open(
|
|
|
1050
1056
|
key,
|
|
1051
1057
|
s3_client=client,
|
|
1052
1058
|
max_workers=max_workers,
|
|
1053
|
-
block_size=block_size
|
|
1059
|
+
block_size=block_size,
|
|
1054
1060
|
max_buffer_size=max_buffer_size,
|
|
1055
1061
|
profile_name=s3_url._profile_name,
|
|
1062
|
+
atomic=atomic,
|
|
1056
1063
|
)
|
|
1057
1064
|
if buffered or _is_pickle(writer):
|
|
1058
1065
|
writer = io.BufferedWriter(writer) # type: ignore
|