megfile 4.2.4__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/__init__.py +16 -291
- megfile/cli.py +37 -20
- megfile/config.py +10 -1
- megfile/errors.py +2 -2
- megfile/fs_path.py +78 -12
- megfile/interfaces.py +44 -0
- megfile/lib/base_memory_handler.py +92 -0
- megfile/lib/glob.py +3 -3
- megfile/lib/http_prefetch_reader.py +22 -22
- megfile/lib/joinpath.py +13 -0
- megfile/lib/s3_buffered_writer.py +13 -0
- megfile/lib/s3_limited_seekable_writer.py +2 -0
- megfile/lib/s3_memory_handler.py +14 -81
- megfile/lib/webdav_memory_handler.py +83 -0
- megfile/lib/webdav_prefetch_reader.py +115 -0
- megfile/pathlike.py +3 -4
- megfile/s3_path.py +44 -33
- megfile/sftp2_path.py +44 -62
- megfile/sftp_path.py +239 -2
- megfile/smart.py +70 -29
- megfile/smart_path.py +181 -85
- megfile/version.py +1 -1
- megfile/webdav_path.py +952 -0
- {megfile-4.2.4.dist-info → megfile-5.0.0.dist-info}/METADATA +30 -39
- megfile-5.0.0.dist-info/RECORD +51 -0
- megfile/fs.py +0 -614
- megfile/hdfs.py +0 -408
- megfile/http.py +0 -114
- megfile/s3.py +0 -540
- megfile/sftp.py +0 -821
- megfile/sftp2.py +0 -827
- megfile/stdio.py +0 -30
- megfile-4.2.4.dist-info/RECORD +0 -54
- {megfile-4.2.4.dist-info → megfile-5.0.0.dist-info}/WHEEL +0 -0
- {megfile-4.2.4.dist-info → megfile-5.0.0.dist-info}/entry_points.txt +0 -0
- {megfile-4.2.4.dist-info → megfile-5.0.0.dist-info}/licenses/LICENSE +0 -0
- {megfile-4.2.4.dist-info → megfile-5.0.0.dist-info}/licenses/LICENSE.pyre +0 -0
- {megfile-4.2.4.dist-info → megfile-5.0.0.dist-info}/top_level.txt +0 -0
megfile/interfaces.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from abc import ABC, abstractmethod
|
|
3
3
|
from io import IOBase, UnsupportedOperation
|
|
4
|
+
from logging import getLogger as get_logger
|
|
4
5
|
from typing import IO, AnyStr, Iterable, List, Optional
|
|
5
6
|
|
|
6
7
|
from megfile.pathlike import (
|
|
@@ -31,6 +32,8 @@ __all__ = [
|
|
|
31
32
|
"URIPath",
|
|
32
33
|
]
|
|
33
34
|
|
|
35
|
+
_logger = get_logger(__name__)
|
|
36
|
+
|
|
34
37
|
|
|
35
38
|
def fullname(o):
|
|
36
39
|
klass = o.__class__
|
|
@@ -43,16 +46,39 @@ def fullname(o):
|
|
|
43
46
|
# 1. Default value of closed is False
|
|
44
47
|
# 2. closed is set to True when close() are called
|
|
45
48
|
# 3. close() will only be called once
|
|
49
|
+
# 4. atomic means the file-like object should not be closed automatically
|
|
50
|
+
# when an exception is raised in the context manager or when the object is
|
|
51
|
+
# garbage collected.
|
|
52
|
+
# 5. atomic is False by default
|
|
46
53
|
class Closable(ABC):
|
|
47
54
|
@property
|
|
48
55
|
def closed(self) -> bool:
|
|
49
56
|
"""Return True if the file-like object is closed."""
|
|
50
57
|
return getattr(self, "__closed__", False)
|
|
51
58
|
|
|
59
|
+
@property
|
|
60
|
+
def atomic(self) -> bool:
|
|
61
|
+
"""Return True if the file-like object is atomic."""
|
|
62
|
+
return getattr(self, "__atomic__", False)
|
|
63
|
+
|
|
52
64
|
@abstractmethod
|
|
53
65
|
def _close(self) -> None:
|
|
54
66
|
pass # pragma: no cover
|
|
55
67
|
|
|
68
|
+
def _abort(self) -> None:
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
def abort(self) -> bool:
|
|
72
|
+
"""Abort the file-like object without saving.
|
|
73
|
+
|
|
74
|
+
This method has no effect if the file is already closed.
|
|
75
|
+
"""
|
|
76
|
+
if not getattr(self, "__closed__", False):
|
|
77
|
+
self._abort()
|
|
78
|
+
setattr(self, "__closed__", True)
|
|
79
|
+
return True
|
|
80
|
+
return False
|
|
81
|
+
|
|
56
82
|
def close(self) -> None:
|
|
57
83
|
"""Flush and close the file-like object.
|
|
58
84
|
|
|
@@ -66,6 +92,24 @@ class Closable(ABC):
|
|
|
66
92
|
return self
|
|
67
93
|
|
|
68
94
|
def __exit__(self, type, value, traceback) -> None:
|
|
95
|
+
if self.atomic and value is not None:
|
|
96
|
+
if self.abort():
|
|
97
|
+
from megfile.errors import full_error_message
|
|
98
|
+
|
|
99
|
+
_logger.warning(
|
|
100
|
+
f"skip closing atomic file-like object: {self}, "
|
|
101
|
+
f"since error encountered: {full_error_message(value)}"
|
|
102
|
+
)
|
|
103
|
+
return
|
|
104
|
+
self.close()
|
|
105
|
+
|
|
106
|
+
def __del__(self):
|
|
107
|
+
if self.atomic:
|
|
108
|
+
if self.abort():
|
|
109
|
+
_logger.warning(
|
|
110
|
+
f"skip closing atomic file-like object before deletion: {self}"
|
|
111
|
+
)
|
|
112
|
+
return
|
|
69
113
|
self.close()
|
|
70
114
|
|
|
71
115
|
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from io import BytesIO, UnsupportedOperation
|
|
4
|
+
from typing import Iterable, List, Optional
|
|
5
|
+
|
|
6
|
+
from megfile.interfaces import Readable, Seekable, Writable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BaseMemoryHandler(Readable[bytes], Seekable, Writable[bytes], ABC):
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
mode: str,
|
|
13
|
+
):
|
|
14
|
+
self._mode = mode
|
|
15
|
+
|
|
16
|
+
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
17
|
+
raise ValueError("unacceptable mode: %r" % mode)
|
|
18
|
+
|
|
19
|
+
self._fileobj = BytesIO()
|
|
20
|
+
self._download_fileobj()
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def name(self) -> str:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def mode(self) -> str:
|
|
29
|
+
return self._mode
|
|
30
|
+
|
|
31
|
+
def tell(self) -> int:
|
|
32
|
+
return self._fileobj.tell()
|
|
33
|
+
|
|
34
|
+
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
35
|
+
return self._fileobj.seek(offset, whence)
|
|
36
|
+
|
|
37
|
+
def readable(self) -> bool:
|
|
38
|
+
return self._mode[0] == "r" or self._mode[-1] == "+"
|
|
39
|
+
|
|
40
|
+
def read(self, size: Optional[int] = None) -> bytes:
|
|
41
|
+
if not self.readable():
|
|
42
|
+
raise UnsupportedOperation("not readable")
|
|
43
|
+
return self._fileobj.read(size)
|
|
44
|
+
|
|
45
|
+
def readline(self, size: Optional[int] = None) -> bytes:
|
|
46
|
+
if not self.readable():
|
|
47
|
+
raise UnsupportedOperation("not readable")
|
|
48
|
+
if size is None:
|
|
49
|
+
size = -1
|
|
50
|
+
return self._fileobj.readline(size)
|
|
51
|
+
|
|
52
|
+
def readlines(self, hint: Optional[int] = None) -> List[bytes]:
|
|
53
|
+
if not self.readable():
|
|
54
|
+
raise UnsupportedOperation("not readable")
|
|
55
|
+
if hint is None:
|
|
56
|
+
hint = -1
|
|
57
|
+
return self._fileobj.readlines(hint)
|
|
58
|
+
|
|
59
|
+
def writable(self) -> bool:
|
|
60
|
+
return self._mode[0] == "w" or self._mode[0] == "a" or self._mode[-1] == "+"
|
|
61
|
+
|
|
62
|
+
def flush(self):
|
|
63
|
+
self._fileobj.flush()
|
|
64
|
+
|
|
65
|
+
def write(self, data: bytes) -> int:
|
|
66
|
+
if not self.writable():
|
|
67
|
+
raise UnsupportedOperation("not writable")
|
|
68
|
+
if self._mode[0] == "a":
|
|
69
|
+
self.seek(0, os.SEEK_END)
|
|
70
|
+
return self._fileobj.write(data)
|
|
71
|
+
|
|
72
|
+
def writelines(self, lines: Iterable[bytes]):
|
|
73
|
+
if not self.writable():
|
|
74
|
+
raise UnsupportedOperation("not writable")
|
|
75
|
+
if self._mode[0] == "a":
|
|
76
|
+
self.seek(0, os.SEEK_END)
|
|
77
|
+
self._fileobj.writelines(lines)
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def _download_fileobj(self):
|
|
81
|
+
pass
|
|
82
|
+
|
|
83
|
+
@abstractmethod
|
|
84
|
+
def _upload_fileobj(self):
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
def _close(self, need_upload: bool = True):
|
|
88
|
+
if hasattr(self, "_fileobj"):
|
|
89
|
+
need_upload = need_upload and self.writable()
|
|
90
|
+
if need_upload:
|
|
91
|
+
self._upload_fileobj()
|
|
92
|
+
self._fileobj.close()
|
megfile/lib/glob.py
CHANGED
|
@@ -289,9 +289,9 @@ def get_non_glob_dir(glob: str):
|
|
|
289
289
|
root_dir = []
|
|
290
290
|
if glob.startswith("/"):
|
|
291
291
|
root_dir.append("/")
|
|
292
|
-
elif "
|
|
293
|
-
|
|
294
|
-
root_dir.append(f"{
|
|
292
|
+
elif "//" in glob:
|
|
293
|
+
protocol_or_domain, glob = glob.rsplit("//", 1)
|
|
294
|
+
root_dir.append(f"{protocol_or_domain}//")
|
|
295
295
|
for name in glob.split("/"):
|
|
296
296
|
if has_magic(name):
|
|
297
297
|
break
|
|
@@ -85,29 +85,29 @@ class HttpPrefetchReader(BasePrefetchReader):
|
|
|
85
85
|
"Cookies": response.cookies,
|
|
86
86
|
"StatusCode": response.status_code,
|
|
87
87
|
}
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
)
|
|
88
|
+
|
|
89
|
+
range_end = end
|
|
90
|
+
if self._content_size is not None:
|
|
91
|
+
range_end = min(range_end, self._content_size - 1)
|
|
92
|
+
headers = {"Range": f"bytes={start}-{range_end}"}
|
|
93
|
+
with self._session.get(
|
|
94
|
+
fspath(self._url), headers=headers, stream=True
|
|
95
|
+
) as response:
|
|
96
|
+
if len(response.content) != int(response.headers["Content-Length"]):
|
|
97
|
+
raise HttpBodyIncompleteError(
|
|
98
|
+
"The downloaded content is incomplete, "
|
|
99
|
+
"expected size: %s, actual size: %d"
|
|
100
|
+
% (
|
|
101
|
+
response.headers["Content-Length"],
|
|
102
|
+
len(response.content),
|
|
104
103
|
)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
104
|
+
)
|
|
105
|
+
return {
|
|
106
|
+
"Body": BytesIO(response.content),
|
|
107
|
+
"Headers": response.headers,
|
|
108
|
+
"Cookies": response.cookies,
|
|
109
|
+
"StatusCode": response.status_code,
|
|
110
|
+
}
|
|
111
111
|
|
|
112
112
|
fetch_response = patch_method(
|
|
113
113
|
fetch_response,
|
megfile/lib/joinpath.py
CHANGED
|
@@ -33,3 +33,16 @@ def uri_join(path: str, *other_paths: str) -> str:
|
|
|
33
33
|
|
|
34
34
|
# Imp. 3
|
|
35
35
|
# return '/'.join((path, *other_paths))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def uri_norm(path: str) -> str:
|
|
39
|
+
parts = path.split("/")
|
|
40
|
+
new_parts = []
|
|
41
|
+
for part in parts:
|
|
42
|
+
if part == ".":
|
|
43
|
+
continue
|
|
44
|
+
if part == ".." and new_parts and new_parts[-1] != "..":
|
|
45
|
+
new_parts.pop()
|
|
46
|
+
else:
|
|
47
|
+
new_parts.append(part)
|
|
48
|
+
return "/".join(new_parts)
|
|
@@ -53,11 +53,13 @@ class S3BufferedWriter(Writable[bytes]):
|
|
|
53
53
|
max_buffer_size: int = WRITER_MAX_BUFFER_SIZE,
|
|
54
54
|
max_workers: Optional[int] = None,
|
|
55
55
|
profile_name: Optional[str] = None,
|
|
56
|
+
atomic: bool = False,
|
|
56
57
|
):
|
|
57
58
|
self._bucket = bucket
|
|
58
59
|
self._key = key
|
|
59
60
|
self._client = s3_client
|
|
60
61
|
self._profile_name = profile_name
|
|
62
|
+
self.__atomic__ = atomic
|
|
61
63
|
|
|
62
64
|
# user maybe put block_size with 'numpy.uint64' type
|
|
63
65
|
self._base_block_size = int(block_size)
|
|
@@ -213,6 +215,17 @@ class S3BufferedWriter(Writable[bytes]):
|
|
|
213
215
|
if not self._is_global_executor:
|
|
214
216
|
self._executor.shutdown()
|
|
215
217
|
|
|
218
|
+
def _abort(self):
|
|
219
|
+
_logger.debug("abort file: %r" % self.name)
|
|
220
|
+
|
|
221
|
+
if self._is_multipart:
|
|
222
|
+
with raise_s3_error(self.name):
|
|
223
|
+
self._client.abort_multipart_upload(
|
|
224
|
+
Bucket=self._bucket, Key=self._key, UploadId=self._upload_id
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
self._shutdown()
|
|
228
|
+
|
|
216
229
|
def _close(self):
|
|
217
230
|
_logger.debug("close file: %r" % self.name)
|
|
218
231
|
|
|
@@ -33,6 +33,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
|
|
|
33
33
|
max_buffer_size: int = WRITER_MAX_BUFFER_SIZE,
|
|
34
34
|
max_workers: Optional[int] = None,
|
|
35
35
|
profile_name: Optional[str] = None,
|
|
36
|
+
atomic: bool = False,
|
|
36
37
|
):
|
|
37
38
|
super().__init__(
|
|
38
39
|
bucket,
|
|
@@ -42,6 +43,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
|
|
|
42
43
|
max_buffer_size=max_buffer_size,
|
|
43
44
|
max_workers=max_workers,
|
|
44
45
|
profile_name=profile_name,
|
|
46
|
+
atomic=atomic,
|
|
45
47
|
)
|
|
46
48
|
|
|
47
49
|
self._head_block_size = head_block_size or block_size
|
megfile/lib/s3_memory_handler.py
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from
|
|
3
|
-
from typing import Iterable, List, Optional
|
|
2
|
+
from typing import Optional
|
|
4
3
|
|
|
5
4
|
from megfile.errors import (
|
|
6
5
|
S3ConfigError,
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
S3PermissionError,
|
|
7
|
+
S3UnknownError,
|
|
9
8
|
translate_s3_error,
|
|
10
9
|
)
|
|
11
|
-
from megfile.
|
|
10
|
+
from megfile.lib.base_memory_handler import BaseMemoryHandler
|
|
12
11
|
|
|
13
12
|
|
|
14
|
-
class S3MemoryHandler(
|
|
13
|
+
class S3MemoryHandler(BaseMemoryHandler):
|
|
15
14
|
def __init__(
|
|
16
15
|
self,
|
|
17
16
|
bucket: str,
|
|
@@ -23,75 +22,14 @@ class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
|
|
|
23
22
|
):
|
|
24
23
|
self._bucket = bucket
|
|
25
24
|
self._key = key
|
|
26
|
-
self._mode = mode
|
|
27
25
|
self._client = s3_client
|
|
28
26
|
self._profile_name = profile_name
|
|
29
|
-
|
|
30
|
-
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
31
|
-
raise ValueError("unacceptable mode: %r" % mode)
|
|
32
|
-
|
|
33
|
-
self._fileobj = BytesIO()
|
|
34
|
-
self._download_fileobj()
|
|
27
|
+
super().__init__(mode=mode)
|
|
35
28
|
|
|
36
29
|
@property
|
|
37
30
|
def name(self) -> str:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
self._bucket,
|
|
41
|
-
self._key,
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
@property
|
|
45
|
-
def mode(self) -> str:
|
|
46
|
-
return self._mode
|
|
47
|
-
|
|
48
|
-
def tell(self) -> int:
|
|
49
|
-
return self._fileobj.tell()
|
|
50
|
-
|
|
51
|
-
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
52
|
-
return self._fileobj.seek(offset, whence)
|
|
53
|
-
|
|
54
|
-
def readable(self) -> bool:
|
|
55
|
-
return self._mode[0] == "r" or self._mode[-1] == "+"
|
|
56
|
-
|
|
57
|
-
def read(self, size: Optional[int] = None) -> bytes:
|
|
58
|
-
if not self.readable():
|
|
59
|
-
raise UnsupportedOperation("not readable")
|
|
60
|
-
return self._fileobj.read(size)
|
|
61
|
-
|
|
62
|
-
def readline(self, size: Optional[int] = None) -> bytes:
|
|
63
|
-
if not self.readable():
|
|
64
|
-
raise UnsupportedOperation("not readable")
|
|
65
|
-
if size is None:
|
|
66
|
-
size = -1
|
|
67
|
-
return self._fileobj.readline(size)
|
|
68
|
-
|
|
69
|
-
def readlines(self, hint: Optional[int] = None) -> List[bytes]:
|
|
70
|
-
if not self.readable():
|
|
71
|
-
raise UnsupportedOperation("not readable")
|
|
72
|
-
if hint is None:
|
|
73
|
-
hint = -1
|
|
74
|
-
return self._fileobj.readlines(hint)
|
|
75
|
-
|
|
76
|
-
def writable(self) -> bool:
|
|
77
|
-
return self._mode[0] == "w" or self._mode[0] == "a" or self._mode[-1] == "+"
|
|
78
|
-
|
|
79
|
-
def flush(self):
|
|
80
|
-
self._fileobj.flush()
|
|
81
|
-
|
|
82
|
-
def write(self, data: bytes) -> int:
|
|
83
|
-
if not self.writable():
|
|
84
|
-
raise UnsupportedOperation("not writable")
|
|
85
|
-
if self._mode[0] == "a":
|
|
86
|
-
self.seek(0, os.SEEK_END)
|
|
87
|
-
return self._fileobj.write(data)
|
|
88
|
-
|
|
89
|
-
def writelines(self, lines: Iterable[bytes]):
|
|
90
|
-
if not self.writable():
|
|
91
|
-
raise UnsupportedOperation("not writable")
|
|
92
|
-
if self._mode[0] == "a":
|
|
93
|
-
self.seek(0, os.SEEK_END)
|
|
94
|
-
self._fileobj.writelines(lines)
|
|
31
|
+
protocol = f"s3+{self._profile_name}" if self._profile_name else "s3"
|
|
32
|
+
return f"{protocol}://{self._bucket}/{self._key}"
|
|
95
33
|
|
|
96
34
|
def _translate_error(self, error: Exception):
|
|
97
35
|
return translate_s3_error(error, self.name)
|
|
@@ -101,15 +39,14 @@ class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
|
|
|
101
39
|
self._client.head_object(Bucket=self._bucket, Key=self._key)
|
|
102
40
|
except Exception as error:
|
|
103
41
|
error = self._translate_error(error)
|
|
104
|
-
if isinstance(error, (
|
|
42
|
+
if isinstance(error, (S3UnknownError, S3ConfigError, S3PermissionError)):
|
|
105
43
|
raise error
|
|
106
44
|
return False
|
|
107
45
|
return True
|
|
108
46
|
|
|
109
47
|
def _download_fileobj(self):
|
|
110
|
-
need_download = self._mode[0] == "r"
|
|
111
|
-
|
|
112
|
-
)
|
|
48
|
+
need_download = self._mode[0] == "r"
|
|
49
|
+
need_download = need_download or (self._mode[0] == "a" and self._file_exists())
|
|
113
50
|
if not need_download:
|
|
114
51
|
return
|
|
115
52
|
# directly download to the file handle
|
|
@@ -126,11 +63,7 @@ class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
|
|
|
126
63
|
return
|
|
127
64
|
# directly upload from file handle
|
|
128
65
|
self.seek(0, os.SEEK_SET)
|
|
129
|
-
|
|
66
|
+
try:
|
|
130
67
|
self._client.upload_fileobj(self._fileobj, self._bucket, self._key)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
if hasattr(self, "_fileobj"):
|
|
134
|
-
if need_upload:
|
|
135
|
-
self._upload_fileobj()
|
|
136
|
-
self._fileobj.close()
|
|
68
|
+
except Exception as error:
|
|
69
|
+
raise self._translate_error(error)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from webdav3.client import Client as WebdavClient
|
|
4
|
+
from webdav3.client import Urn, WebDavXmlUtils, wrap_connection_error
|
|
5
|
+
from webdav3.exceptions import (
|
|
6
|
+
OptionNotValid,
|
|
7
|
+
RemoteResourceNotFound,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from megfile.lib.base_memory_handler import BaseMemoryHandler
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _webdav_stat(client: WebdavClient, remote_path: str):
|
|
14
|
+
urn = Urn(remote_path)
|
|
15
|
+
response = client.execute_request(
|
|
16
|
+
action="info", path=urn.quote(), headers_ext=["Depth: 0"]
|
|
17
|
+
)
|
|
18
|
+
path = client.get_full_path(urn)
|
|
19
|
+
info = WebDavXmlUtils.parse_info_response(
|
|
20
|
+
response.content, path, client.webdav.hostname
|
|
21
|
+
)
|
|
22
|
+
info["is_dir"] = WebDavXmlUtils.parse_is_dir_response(
|
|
23
|
+
response.content, path, client.webdav.hostname
|
|
24
|
+
)
|
|
25
|
+
return info
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@wrap_connection_error
|
|
29
|
+
def _webdav_download_from(client: WebdavClient, buff, remote_path):
|
|
30
|
+
urn = Urn(remote_path)
|
|
31
|
+
if client.is_dir(urn.path()):
|
|
32
|
+
raise OptionNotValid(name="remote_path", value=remote_path)
|
|
33
|
+
|
|
34
|
+
if not client.check(urn.path()):
|
|
35
|
+
raise RemoteResourceNotFound(urn.path())
|
|
36
|
+
|
|
37
|
+
response = client.execute_request(action="download", path=urn.quote())
|
|
38
|
+
|
|
39
|
+
for chunk in response.iter_content(chunk_size=client.chunk_size):
|
|
40
|
+
buff.write(chunk)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class WebdavMemoryHandler(BaseMemoryHandler):
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
remote_path: str,
|
|
47
|
+
mode: str,
|
|
48
|
+
*,
|
|
49
|
+
webdav_client: WebdavClient,
|
|
50
|
+
name: str,
|
|
51
|
+
):
|
|
52
|
+
self._remote_path = remote_path
|
|
53
|
+
self._client = webdav_client
|
|
54
|
+
self._name = name
|
|
55
|
+
super().__init__(mode=mode)
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def name(self) -> str:
|
|
59
|
+
return self._name
|
|
60
|
+
|
|
61
|
+
def _file_exists(self) -> bool:
|
|
62
|
+
try:
|
|
63
|
+
return not _webdav_stat(self._client, self._remote_path)["is_dir"]
|
|
64
|
+
except RemoteResourceNotFound:
|
|
65
|
+
return False
|
|
66
|
+
|
|
67
|
+
def _download_fileobj(self):
|
|
68
|
+
need_download = self._mode[0] == "r"
|
|
69
|
+
need_download = need_download or (self._mode[0] == "a" and self._file_exists())
|
|
70
|
+
if not need_download:
|
|
71
|
+
return
|
|
72
|
+
# directly download to the file handle
|
|
73
|
+
_webdav_download_from(self._client, self._fileobj, self._remote_path)
|
|
74
|
+
if self._mode[0] == "r":
|
|
75
|
+
self.seek(0, os.SEEK_SET)
|
|
76
|
+
|
|
77
|
+
def _upload_fileobj(self):
|
|
78
|
+
need_upload = self.writable()
|
|
79
|
+
if not need_upload:
|
|
80
|
+
return
|
|
81
|
+
# directly upload from file handle
|
|
82
|
+
self.seek(0, os.SEEK_SET)
|
|
83
|
+
self._client.upload_to(self._fileobj, self._remote_path)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from webdav3.client import Client as WebdavClient
|
|
5
|
+
from webdav3.client import Urn
|
|
6
|
+
|
|
7
|
+
from megfile.config import (
|
|
8
|
+
READER_BLOCK_SIZE,
|
|
9
|
+
READER_MAX_BUFFER_SIZE,
|
|
10
|
+
WEBDAV_MAX_RETRY_TIMES,
|
|
11
|
+
)
|
|
12
|
+
from megfile.errors import (
|
|
13
|
+
HttpBodyIncompleteError,
|
|
14
|
+
http_should_retry,
|
|
15
|
+
patch_method,
|
|
16
|
+
)
|
|
17
|
+
from megfile.lib.base_prefetch_reader import BasePrefetchReader
|
|
18
|
+
from megfile.lib.webdav_memory_handler import _webdav_stat
|
|
19
|
+
|
|
20
|
+
DEFAULT_TIMEOUT = (60, 60 * 60 * 24)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class WebdavPrefetchReader(BasePrefetchReader):
|
|
24
|
+
"""
|
|
25
|
+
Reader to fast read the http content, service must support Accept-Ranges.
|
|
26
|
+
|
|
27
|
+
This will divide the file content into equal parts of block_size size, and will use
|
|
28
|
+
LRU to cache at most blocks in max_buffer_size memory.
|
|
29
|
+
|
|
30
|
+
open(), seek() and read() will trigger prefetch read.
|
|
31
|
+
|
|
32
|
+
The prefetch will cached block_forward blocks of data from offset position
|
|
33
|
+
(the position after reading if the called function is read).
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
remote_path: str,
|
|
39
|
+
*,
|
|
40
|
+
client: Optional[WebdavClient] = None,
|
|
41
|
+
block_size: int = READER_BLOCK_SIZE,
|
|
42
|
+
max_buffer_size: int = READER_MAX_BUFFER_SIZE,
|
|
43
|
+
block_forward: Optional[int] = None,
|
|
44
|
+
max_retries: int = WEBDAV_MAX_RETRY_TIMES,
|
|
45
|
+
max_workers: Optional[int] = None,
|
|
46
|
+
):
|
|
47
|
+
self._urn = Urn(remote_path)
|
|
48
|
+
self._remote_path = remote_path
|
|
49
|
+
self._client = client or WebdavClient({})
|
|
50
|
+
|
|
51
|
+
super().__init__(
|
|
52
|
+
block_size=block_size,
|
|
53
|
+
max_buffer_size=max_buffer_size,
|
|
54
|
+
block_forward=block_forward,
|
|
55
|
+
max_retries=max_retries,
|
|
56
|
+
max_workers=max_workers,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def _get_content_size(self) -> int:
|
|
60
|
+
info = _webdav_stat(self._client, self._remote_path)
|
|
61
|
+
return int(info.get("size") or 0)
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def name(self) -> str:
|
|
65
|
+
return self._remote_path
|
|
66
|
+
|
|
67
|
+
def _fetch_response(
|
|
68
|
+
self, start: Optional[int] = None, end: Optional[int] = None
|
|
69
|
+
) -> dict:
|
|
70
|
+
def fetch_response() -> dict:
|
|
71
|
+
if start is None or end is None:
|
|
72
|
+
with self._client.execute_request(
|
|
73
|
+
action="download", path=self._urn.quote()
|
|
74
|
+
) as response:
|
|
75
|
+
return {
|
|
76
|
+
"Headers": response.headers,
|
|
77
|
+
"Cookies": response.cookies,
|
|
78
|
+
"StatusCode": response.status_code,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
range_end = end
|
|
82
|
+
if self._content_size is not None:
|
|
83
|
+
range_end = min(range_end, self._content_size - 1)
|
|
84
|
+
headers_ext = [f"Range: bytes={start}-{range_end}"]
|
|
85
|
+
with self._client.execute_request(
|
|
86
|
+
action="download", path=self._urn.quote(), headers_ext=headers_ext
|
|
87
|
+
) as response:
|
|
88
|
+
headers = response.headers
|
|
89
|
+
if (
|
|
90
|
+
"Content-Length" in headers
|
|
91
|
+
and len(response.content) != int(headers["Content-Length"])
|
|
92
|
+
and not headers.get("Content-Encoding")
|
|
93
|
+
):
|
|
94
|
+
raise HttpBodyIncompleteError(
|
|
95
|
+
"The downloaded content is incomplete, "
|
|
96
|
+
"expected size: %s, actual size: %d"
|
|
97
|
+
% (
|
|
98
|
+
headers["Content-Length"],
|
|
99
|
+
len(response.content),
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
return {
|
|
103
|
+
"Body": BytesIO(response.content),
|
|
104
|
+
"Headers": response.headers,
|
|
105
|
+
"Cookies": response.cookies,
|
|
106
|
+
"StatusCode": response.status_code,
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
fetch_response = patch_method(
|
|
110
|
+
fetch_response,
|
|
111
|
+
max_retries=self._max_retries,
|
|
112
|
+
should_retry=http_should_retry,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
return fetch_response()
|
megfile/pathlike.py
CHANGED
|
@@ -22,7 +22,6 @@ from megfile.lib.compat import PathLike as _PathLike
|
|
|
22
22
|
from megfile.lib.compat import fspath
|
|
23
23
|
from megfile.lib.fnmatch import _compile_pattern
|
|
24
24
|
from megfile.lib.joinpath import uri_join
|
|
25
|
-
from megfile.utils import classproperty
|
|
26
25
|
|
|
27
26
|
Self = TypeVar("Self")
|
|
28
27
|
|
|
@@ -556,15 +555,15 @@ class BasePath:
|
|
|
556
555
|
) as f:
|
|
557
556
|
return f.write(data)
|
|
558
557
|
|
|
559
|
-
@
|
|
558
|
+
@cached_property
|
|
560
559
|
def drive(self) -> str:
|
|
561
560
|
return ""
|
|
562
561
|
|
|
563
|
-
@
|
|
562
|
+
@cached_property
|
|
564
563
|
def root(self) -> str:
|
|
565
564
|
return self.protocol + "://"
|
|
566
565
|
|
|
567
|
-
@
|
|
566
|
+
@cached_property
|
|
568
567
|
def anchor(self) -> str:
|
|
569
568
|
return self.root # pyre-ignore[7]
|
|
570
569
|
|