megfile 4.2.5__py3-none-any.whl → 5.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/__init__.py +14 -291
- megfile/cli.py +83 -40
- megfile/config.py +35 -1
- megfile/errors.py +2 -2
- megfile/fs_path.py +32 -3
- megfile/interfaces.py +21 -10
- megfile/lib/base_memory_handler.py +92 -0
- megfile/lib/glob.py +3 -3
- megfile/lib/http_prefetch_reader.py +22 -22
- megfile/lib/s3_memory_handler.py +14 -81
- megfile/lib/webdav_memory_handler.py +83 -0
- megfile/lib/webdav_prefetch_reader.py +115 -0
- megfile/pathlike.py +3 -4
- megfile/s3_path.py +40 -32
- megfile/sftp2_path.py +38 -62
- megfile/sftp_path.py +238 -1
- megfile/smart.py +70 -29
- megfile/smart_path.py +198 -96
- megfile/version.py +1 -1
- megfile/webdav_path.py +161 -166
- {megfile-4.2.5.dist-info → megfile-5.0.1.dist-info}/METADATA +27 -39
- megfile-5.0.1.dist-info/RECORD +51 -0
- megfile/fs.py +0 -627
- megfile/hdfs.py +0 -408
- megfile/http.py +0 -114
- megfile/s3.py +0 -540
- megfile/sftp.py +0 -821
- megfile/sftp2.py +0 -827
- megfile/stdio.py +0 -30
- megfile/webdav.py +0 -552
- megfile-4.2.5.dist-info/RECORD +0 -56
- {megfile-4.2.5.dist-info → megfile-5.0.1.dist-info}/WHEEL +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.1.dist-info}/entry_points.txt +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.1.dist-info}/licenses/LICENSE +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.1.dist-info}/licenses/LICENSE.pyre +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.1.dist-info}/top_level.txt +0 -0
megfile/webdav_path.py
CHANGED
|
@@ -2,6 +2,9 @@ import hashlib
|
|
|
2
2
|
import io
|
|
3
3
|
import os
|
|
4
4
|
import re
|
|
5
|
+
import shlex
|
|
6
|
+
import subprocess
|
|
7
|
+
import time
|
|
5
8
|
from functools import cached_property
|
|
6
9
|
from logging import getLogger as get_logger
|
|
7
10
|
from typing import IO, BinaryIO, Callable, Iterable, Iterator, List, Optional, Tuple
|
|
@@ -10,27 +13,46 @@ from urllib.parse import quote, unquote, urlsplit, urlunsplit
|
|
|
10
13
|
import dateutil.parser
|
|
11
14
|
from webdav3.client import Client as WebdavClient
|
|
12
15
|
from webdav3.client import WebDavXmlUtils
|
|
13
|
-
from webdav3.exceptions import
|
|
16
|
+
from webdav3.exceptions import (
|
|
17
|
+
RemoteResourceNotFound,
|
|
18
|
+
ResponseErrorCode,
|
|
19
|
+
WebDavException,
|
|
20
|
+
)
|
|
14
21
|
from webdav3.urn import Urn
|
|
15
22
|
|
|
16
|
-
from megfile.
|
|
23
|
+
from megfile.config import (
|
|
24
|
+
READER_BLOCK_SIZE,
|
|
25
|
+
READER_MAX_BUFFER_SIZE,
|
|
26
|
+
WEBDAV_MAX_RETRY_TIMES,
|
|
27
|
+
)
|
|
28
|
+
from megfile.errors import (
|
|
29
|
+
SameFileError,
|
|
30
|
+
_create_missing_ok_generator,
|
|
31
|
+
http_should_retry,
|
|
32
|
+
patch_method,
|
|
33
|
+
)
|
|
17
34
|
from megfile.interfaces import (
|
|
18
35
|
ContextIterator,
|
|
19
36
|
FileEntry,
|
|
20
37
|
PathLike,
|
|
21
|
-
Readable,
|
|
22
|
-
Seekable,
|
|
23
38
|
StatResult,
|
|
24
|
-
Writable,
|
|
25
39
|
)
|
|
26
40
|
from megfile.lib.compare import is_same_file
|
|
27
41
|
from megfile.lib.compat import fspath
|
|
28
42
|
from megfile.lib.fnmatch import translate
|
|
29
43
|
from megfile.lib.glob import has_magic
|
|
30
44
|
from megfile.lib.joinpath import uri_join, uri_norm
|
|
45
|
+
from megfile.lib.webdav_memory_handler import WebdavMemoryHandler, _webdav_stat
|
|
46
|
+
from megfile.lib.webdav_prefetch_reader import WebdavPrefetchReader
|
|
31
47
|
from megfile.pathlike import URIPath
|
|
32
48
|
from megfile.smart_path import SmartPath
|
|
33
|
-
from megfile.utils import
|
|
49
|
+
from megfile.utils import (
|
|
50
|
+
_is_pickle,
|
|
51
|
+
binary_open,
|
|
52
|
+
calculate_md5,
|
|
53
|
+
copyfileobj,
|
|
54
|
+
thread_local,
|
|
55
|
+
)
|
|
34
56
|
|
|
35
57
|
_logger = get_logger(__name__)
|
|
36
58
|
|
|
@@ -42,6 +64,7 @@ __all__ = [
|
|
|
42
64
|
WEBDAV_USERNAME = "WEBDAV_USERNAME"
|
|
43
65
|
WEBDAV_PASSWORD = "WEBDAV_PASSWORD"
|
|
44
66
|
WEBDAV_TOKEN = "WEBDAV_TOKEN"
|
|
67
|
+
WEBDAV_TOKEN_COMMAND = "WEBDAV_TOKEN_COMMAND"
|
|
45
68
|
WEBDAV_TIMEOUT = "WEBDAV_TIMEOUT"
|
|
46
69
|
|
|
47
70
|
|
|
@@ -55,7 +78,7 @@ def _make_stat(info: dict) -> StatResult:
|
|
|
55
78
|
except Exception:
|
|
56
79
|
mtime = 0.0
|
|
57
80
|
|
|
58
|
-
isdir = info.get("
|
|
81
|
+
isdir = info.get("isdir", False)
|
|
59
82
|
|
|
60
83
|
return StatResult(
|
|
61
84
|
size=size,
|
|
@@ -81,6 +104,7 @@ def provide_connect_info(
|
|
|
81
104
|
username: Optional[str] = None,
|
|
82
105
|
password: Optional[str] = None,
|
|
83
106
|
token: Optional[str] = None,
|
|
107
|
+
token_command: Optional[str] = None,
|
|
84
108
|
) -> dict:
|
|
85
109
|
"""Provide connection info for WebDAV client"""
|
|
86
110
|
if not username:
|
|
@@ -89,6 +113,8 @@ def provide_connect_info(
|
|
|
89
113
|
password = os.getenv(WEBDAV_PASSWORD)
|
|
90
114
|
if not token:
|
|
91
115
|
token = os.getenv(WEBDAV_TOKEN)
|
|
116
|
+
if not token_command:
|
|
117
|
+
token_command = os.getenv(WEBDAV_TOKEN_COMMAND)
|
|
92
118
|
|
|
93
119
|
timeout = int(os.getenv(WEBDAV_TIMEOUT, "30"))
|
|
94
120
|
|
|
@@ -98,7 +124,9 @@ def provide_connect_info(
|
|
|
98
124
|
"webdav_disable_check": True,
|
|
99
125
|
}
|
|
100
126
|
|
|
101
|
-
if
|
|
127
|
+
if token_command:
|
|
128
|
+
options["webdav_token_command"] = token_command
|
|
129
|
+
elif token:
|
|
102
130
|
options["webdav_token"] = token
|
|
103
131
|
elif username and password:
|
|
104
132
|
options["webdav_login"] = username
|
|
@@ -107,15 +135,83 @@ def provide_connect_info(
|
|
|
107
135
|
return options
|
|
108
136
|
|
|
109
137
|
|
|
138
|
+
def _patch_execute_request(
|
|
139
|
+
client: WebdavClient,
|
|
140
|
+
status_forcelist: Iterable[int] = (500, 502, 503, 504),
|
|
141
|
+
max_retries: int = WEBDAV_MAX_RETRY_TIMES,
|
|
142
|
+
) -> WebdavClient:
|
|
143
|
+
def webdav_update_token_by_command():
|
|
144
|
+
cmds = shlex.split(client.webdav.token_command)
|
|
145
|
+
client.webdav.token_command_last_call = time.time()
|
|
146
|
+
client.webdav.token = subprocess.check_output(cmds).decode().strip()
|
|
147
|
+
_logger.debug("update webdav token by command: %s", client.webdav.token_command)
|
|
148
|
+
|
|
149
|
+
def webdav_should_retry(error: Exception) -> bool:
|
|
150
|
+
if http_should_retry(error):
|
|
151
|
+
return True
|
|
152
|
+
if (
|
|
153
|
+
isinstance(error, ResponseErrorCode)
|
|
154
|
+
and error.code == 401 # pytype: disable=attribute-error
|
|
155
|
+
):
|
|
156
|
+
token_command = client.webdav.token_command # pyre-ignore[16]
|
|
157
|
+
last_call = client.webdav.token_command_last_call # pyre-ignore[16]
|
|
158
|
+
if token_command is not None and time.time() - last_call > 5:
|
|
159
|
+
webdav_update_token_by_command()
|
|
160
|
+
return True
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
def after_callback(response, *args, **kwargs):
|
|
164
|
+
if response.status_code in status_forcelist:
|
|
165
|
+
response.raise_for_status()
|
|
166
|
+
return response
|
|
167
|
+
|
|
168
|
+
def before_callback(action, path, data=None, headers_ext=None):
|
|
169
|
+
# refresh token if needed
|
|
170
|
+
if client.webdav.token_command is not None and not client.webdav.token:
|
|
171
|
+
webdav_update_token_by_command()
|
|
172
|
+
_logger.debug(
|
|
173
|
+
"send http request: %s %r, with parameters: %s, headers: %s",
|
|
174
|
+
action,
|
|
175
|
+
path,
|
|
176
|
+
data,
|
|
177
|
+
headers_ext,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def retry_callback(error, action, path, data=None, headers_ext=None):
|
|
181
|
+
if data and hasattr(data, "seek"):
|
|
182
|
+
data.seek(0)
|
|
183
|
+
elif isinstance(data, Iterator):
|
|
184
|
+
_logger.warning("Can not retry http request with iterator data")
|
|
185
|
+
raise
|
|
186
|
+
|
|
187
|
+
client.execute_request = patch_method(
|
|
188
|
+
client.execute_request,
|
|
189
|
+
max_retries=max_retries,
|
|
190
|
+
should_retry=webdav_should_retry,
|
|
191
|
+
before_callback=before_callback,
|
|
192
|
+
after_callback=after_callback,
|
|
193
|
+
retry_callback=retry_callback,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
return client
|
|
197
|
+
|
|
198
|
+
|
|
110
199
|
def _get_webdav_client(
|
|
111
200
|
hostname: str,
|
|
112
201
|
username: Optional[str] = None,
|
|
113
202
|
password: Optional[str] = None,
|
|
114
203
|
token: Optional[str] = None,
|
|
204
|
+
token_command: Optional[str] = None,
|
|
115
205
|
) -> WebdavClient:
|
|
116
206
|
"""Get WebDAV client"""
|
|
117
|
-
options = provide_connect_info(hostname, username, password, token)
|
|
118
|
-
|
|
207
|
+
options = provide_connect_info(hostname, username, password, token, token_command)
|
|
208
|
+
client = WebdavClient(options)
|
|
209
|
+
client.webdav.token_command = options.pop( # pyre-ignore[16]
|
|
210
|
+
"webdav_token_command", None
|
|
211
|
+
)
|
|
212
|
+
client.webdav.token_command_last_call = 0 # pyre-ignore[16]
|
|
213
|
+
client = _patch_execute_request(client)
|
|
214
|
+
return client
|
|
119
215
|
|
|
120
216
|
|
|
121
217
|
def get_webdav_client(
|
|
@@ -123,10 +219,11 @@ def get_webdav_client(
|
|
|
123
219
|
username: Optional[str] = None,
|
|
124
220
|
password: Optional[str] = None,
|
|
125
221
|
token: Optional[str] = None,
|
|
222
|
+
token_command: Optional[str] = None,
|
|
126
223
|
) -> WebdavClient:
|
|
127
224
|
"""Get cached WebDAV client"""
|
|
128
225
|
return thread_local(
|
|
129
|
-
f"webdav_client:{hostname},{username},{password},{token}",
|
|
226
|
+
f"webdav_client:{hostname},{username},{password},{token},{token_command}",
|
|
130
227
|
_get_webdav_client,
|
|
131
228
|
hostname,
|
|
132
229
|
username,
|
|
@@ -160,23 +257,6 @@ def _webdav_scan_pairs(
|
|
|
160
257
|
yield src_file_path, dst_file_path
|
|
161
258
|
|
|
162
259
|
|
|
163
|
-
def _webdav_stat(client: WebdavClient, remote_path: str):
|
|
164
|
-
urn = Urn(remote_path)
|
|
165
|
-
client._check_remote_resource(remote_path, urn)
|
|
166
|
-
|
|
167
|
-
response = client.execute_request(
|
|
168
|
-
action="info", path=urn.quote(), headers_ext=["Depth: 0"]
|
|
169
|
-
)
|
|
170
|
-
path = client.get_full_path(urn)
|
|
171
|
-
info = WebDavXmlUtils.parse_info_response(
|
|
172
|
-
response.content, path, client.webdav.hostname
|
|
173
|
-
)
|
|
174
|
-
info["is_dir"] = WebDavXmlUtils.parse_is_dir_response(
|
|
175
|
-
response.content, path, client.webdav.hostname
|
|
176
|
-
)
|
|
177
|
-
return info
|
|
178
|
-
|
|
179
|
-
|
|
180
260
|
def _webdav_scan(client: WebdavClient, remote_path: str) -> List[dict]:
|
|
181
261
|
directory_urn = Urn(remote_path, directory=True)
|
|
182
262
|
if directory_urn.path() != WebdavClient.root and not client.check(
|
|
@@ -208,112 +288,12 @@ def _webdav_split_magic(path: str) -> Tuple[str, str]:
|
|
|
208
288
|
return path, ""
|
|
209
289
|
|
|
210
290
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
webdav_client: WebdavClient,
|
|
218
|
-
name: str,
|
|
219
|
-
):
|
|
220
|
-
self._real_path = real_path
|
|
221
|
-
self._mode = mode
|
|
222
|
-
self._client = webdav_client
|
|
223
|
-
self._name = name
|
|
224
|
-
|
|
225
|
-
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
226
|
-
raise ValueError("unacceptable mode: %r" % mode)
|
|
227
|
-
|
|
228
|
-
self._fileobj = io.BytesIO()
|
|
229
|
-
self._download_fileobj()
|
|
230
|
-
|
|
231
|
-
@property
|
|
232
|
-
def name(self) -> str:
|
|
233
|
-
return self._name
|
|
234
|
-
|
|
235
|
-
@property
|
|
236
|
-
def mode(self) -> str:
|
|
237
|
-
return self._mode
|
|
238
|
-
|
|
239
|
-
def tell(self) -> int:
|
|
240
|
-
return self._fileobj.tell()
|
|
241
|
-
|
|
242
|
-
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
243
|
-
return self._fileobj.seek(offset, whence)
|
|
244
|
-
|
|
245
|
-
def readable(self) -> bool:
|
|
246
|
-
return self._mode[0] == "r" or self._mode[-1] == "+"
|
|
247
|
-
|
|
248
|
-
def read(self, size: Optional[int] = None) -> bytes:
|
|
249
|
-
if not self.readable():
|
|
250
|
-
raise io.UnsupportedOperation("not readable")
|
|
251
|
-
return self._fileobj.read(size)
|
|
252
|
-
|
|
253
|
-
def readline(self, size: Optional[int] = None) -> bytes:
|
|
254
|
-
if not self.readable():
|
|
255
|
-
raise io.UnsupportedOperation("not readable")
|
|
256
|
-
if size is None:
|
|
257
|
-
size = -1
|
|
258
|
-
return self._fileobj.readline(size)
|
|
259
|
-
|
|
260
|
-
def readlines(self, hint: Optional[int] = None) -> List[bytes]:
|
|
261
|
-
if not self.readable():
|
|
262
|
-
raise io.UnsupportedOperation("not readable")
|
|
263
|
-
if hint is None:
|
|
264
|
-
hint = -1
|
|
265
|
-
return self._fileobj.readlines(hint)
|
|
266
|
-
|
|
267
|
-
def writable(self) -> bool:
|
|
268
|
-
return self._mode[0] == "w" or self._mode[0] == "a" or self._mode[-1] == "+"
|
|
269
|
-
|
|
270
|
-
def flush(self):
|
|
271
|
-
self._fileobj.flush()
|
|
272
|
-
|
|
273
|
-
def write(self, data: bytes) -> int:
|
|
274
|
-
if not self.writable():
|
|
275
|
-
raise io.UnsupportedOperation("not writable")
|
|
276
|
-
if self._mode[0] == "a":
|
|
277
|
-
self.seek(0, os.SEEK_END)
|
|
278
|
-
return self._fileobj.write(data)
|
|
279
|
-
|
|
280
|
-
def writelines(self, lines: Iterable[bytes]):
|
|
281
|
-
if not self.writable():
|
|
282
|
-
raise io.UnsupportedOperation("not writable")
|
|
283
|
-
if self._mode[0] == "a":
|
|
284
|
-
self.seek(0, os.SEEK_END)
|
|
285
|
-
self._fileobj.writelines(lines)
|
|
286
|
-
|
|
287
|
-
def _file_exists(self) -> bool:
|
|
288
|
-
try:
|
|
289
|
-
return not self._client.is_dir(self._real_path)
|
|
290
|
-
except RemoteResourceNotFound:
|
|
291
|
-
return False
|
|
292
|
-
|
|
293
|
-
def _download_fileobj(self):
|
|
294
|
-
need_download = self._mode[0] == "r" or (
|
|
295
|
-
self._mode[0] == "a" and self._file_exists()
|
|
296
|
-
)
|
|
297
|
-
if not need_download:
|
|
298
|
-
return
|
|
299
|
-
# directly download to the file handle
|
|
300
|
-
self._client.download_from(self._fileobj, self._real_path)
|
|
301
|
-
if self._mode[0] == "r":
|
|
302
|
-
self.seek(0, os.SEEK_SET)
|
|
303
|
-
|
|
304
|
-
def _upload_fileobj(self):
|
|
305
|
-
need_upload = self.writable()
|
|
306
|
-
if not need_upload:
|
|
307
|
-
return
|
|
308
|
-
# directly upload from file handle
|
|
309
|
-
self.seek(0, os.SEEK_SET)
|
|
310
|
-
self._client.upload_to(self._fileobj, self._real_path)
|
|
311
|
-
|
|
312
|
-
def _close(self, need_upload: bool = True):
|
|
313
|
-
if hasattr(self, "_fileobj"):
|
|
314
|
-
if need_upload:
|
|
315
|
-
self._upload_fileobj()
|
|
316
|
-
self._fileobj.close()
|
|
291
|
+
def _webdav_check_accept_ranges(client: WebdavClient, remote_path: str):
|
|
292
|
+
urn = Urn(remote_path)
|
|
293
|
+
response = client.execute_request(action="download", path=urn.quote())
|
|
294
|
+
response.close()
|
|
295
|
+
headers = response.headers
|
|
296
|
+
return headers.get("Accept-Ranges") == "bytes"
|
|
317
297
|
|
|
318
298
|
|
|
319
299
|
@SmartPath.register
|
|
@@ -346,16 +326,15 @@ class WebdavPath(URIPath):
|
|
|
346
326
|
if parts.port:
|
|
347
327
|
self._hostname += f":{parts.port}"
|
|
348
328
|
|
|
349
|
-
self.
|
|
329
|
+
self._remote_path = unquote(parts.path) if parts.path else "/"
|
|
350
330
|
|
|
351
331
|
@cached_property
|
|
352
332
|
def parts(self) -> Tuple[str, ...]:
|
|
353
333
|
"""A tuple giving access to the path's various components"""
|
|
354
|
-
|
|
355
|
-
parts: List[str] = [urlunsplit(new_parts)] # pyre-ignore[9]
|
|
334
|
+
parts = [urlunsplit(self._urlsplit_parts._replace(path=""))]
|
|
356
335
|
path = self._urlsplit_parts.path.lstrip("/")
|
|
357
336
|
if path != "":
|
|
358
|
-
parts.extend(
|
|
337
|
+
parts.extend(path.split("/"))
|
|
359
338
|
return tuple(parts)
|
|
360
339
|
|
|
361
340
|
@property
|
|
@@ -385,7 +364,7 @@ class WebdavPath(URIPath):
|
|
|
385
364
|
:returns: True if the path exists, else False
|
|
386
365
|
"""
|
|
387
366
|
try:
|
|
388
|
-
_webdav_stat(self._client, self.
|
|
367
|
+
_webdav_stat(self._client, self._remote_path)
|
|
389
368
|
return True
|
|
390
369
|
except RemoteResourceNotFound:
|
|
391
370
|
return False
|
|
@@ -436,11 +415,11 @@ class WebdavPath(URIPath):
|
|
|
436
415
|
raise FileNotFoundError
|
|
437
416
|
:returns: An iterator contains tuples of path and file stat
|
|
438
417
|
"""
|
|
439
|
-
remote_path = self.
|
|
418
|
+
remote_path = self._remote_path
|
|
440
419
|
if pattern:
|
|
441
420
|
remote_path = os.path.join(remote_path, pattern)
|
|
442
421
|
remote_path, pattern = _webdav_split_magic(remote_path)
|
|
443
|
-
root = os.path.relpath(remote_path, self.
|
|
422
|
+
root = os.path.relpath(remote_path, self._remote_path)
|
|
444
423
|
root = uri_join(self.path_with_protocol, root)
|
|
445
424
|
root = uri_norm(root)
|
|
446
425
|
pattern = re.compile(translate(pattern))
|
|
@@ -479,7 +458,7 @@ class WebdavPath(URIPath):
|
|
|
479
458
|
:returns: True if the path is a directory, else False
|
|
480
459
|
"""
|
|
481
460
|
try:
|
|
482
|
-
return _webdav_stat(self._client, self.
|
|
461
|
+
return _webdav_stat(self._client, self._remote_path)["isdir"]
|
|
483
462
|
except RemoteResourceNotFound:
|
|
484
463
|
return False
|
|
485
464
|
|
|
@@ -491,7 +470,7 @@ class WebdavPath(URIPath):
|
|
|
491
470
|
:returns: True if the path is a file, else False
|
|
492
471
|
"""
|
|
493
472
|
try:
|
|
494
|
-
return not _webdav_stat(self._client, self.
|
|
473
|
+
return not _webdav_stat(self._client, self._remote_path)["isdir"]
|
|
495
474
|
except RemoteResourceNotFound:
|
|
496
475
|
return False
|
|
497
476
|
|
|
@@ -550,7 +529,7 @@ class WebdavPath(URIPath):
|
|
|
550
529
|
parent_path_object.mkdir(mode=mode, parents=False, exist_ok=True)
|
|
551
530
|
|
|
552
531
|
try:
|
|
553
|
-
self._client.mkdir(self.
|
|
532
|
+
self._client.mkdir(self._remote_path)
|
|
554
533
|
except WebDavException:
|
|
555
534
|
# Catch exception when mkdir concurrently
|
|
556
535
|
if not self.exists():
|
|
@@ -590,7 +569,9 @@ class WebdavPath(URIPath):
|
|
|
590
569
|
if self._is_same_backend(dst_path):
|
|
591
570
|
if overwrite:
|
|
592
571
|
dst_path.remove(missing_ok=True)
|
|
593
|
-
self._client.move(
|
|
572
|
+
self._client.move(
|
|
573
|
+
self._remote_path, dst_path._remote_path, overwrite=overwrite
|
|
574
|
+
)
|
|
594
575
|
else:
|
|
595
576
|
if self.is_dir():
|
|
596
577
|
for file_entry in self.scandir():
|
|
@@ -626,7 +607,7 @@ class WebdavPath(URIPath):
|
|
|
626
607
|
if missing_ok and not self.exists():
|
|
627
608
|
return
|
|
628
609
|
try:
|
|
629
|
-
self._client.clean(self.
|
|
610
|
+
self._client.clean(self._remote_path)
|
|
630
611
|
except RemoteResourceNotFound:
|
|
631
612
|
if not missing_ok:
|
|
632
613
|
raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
|
|
@@ -665,8 +646,8 @@ class WebdavPath(URIPath):
|
|
|
665
646
|
)
|
|
666
647
|
return
|
|
667
648
|
|
|
668
|
-
for info in _webdav_scan(self._client, self.
|
|
669
|
-
entry = _make_entry(info, self.
|
|
649
|
+
for info in _webdav_scan(self._client, self._remote_path):
|
|
650
|
+
entry = _make_entry(info, self._remote_path, self.path_with_protocol)
|
|
670
651
|
if entry.is_dir():
|
|
671
652
|
continue
|
|
672
653
|
yield entry
|
|
@@ -691,8 +672,8 @@ class WebdavPath(URIPath):
|
|
|
691
672
|
raise NotADirectoryError(f"Not a directory: '{self.path_with_protocol}'")
|
|
692
673
|
|
|
693
674
|
def create_generator():
|
|
694
|
-
for info in _webdav_scandir(self._client, self.
|
|
695
|
-
yield _make_entry(info, self.
|
|
675
|
+
for info in _webdav_scandir(self._client, self._remote_path):
|
|
676
|
+
yield _make_entry(info, self._remote_path, self.path_with_protocol)
|
|
696
677
|
|
|
697
678
|
return ContextIterator(create_generator())
|
|
698
679
|
|
|
@@ -703,7 +684,7 @@ class WebdavPath(URIPath):
|
|
|
703
684
|
:returns: StatResult
|
|
704
685
|
"""
|
|
705
686
|
try:
|
|
706
|
-
info = _webdav_stat(self._client, self.
|
|
687
|
+
info = _webdav_stat(self._client, self._remote_path)
|
|
707
688
|
return _make_stat(info)
|
|
708
689
|
except RemoteResourceNotFound:
|
|
709
690
|
raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
|
|
@@ -717,7 +698,7 @@ class WebdavPath(URIPath):
|
|
|
717
698
|
if missing_ok and not self.exists():
|
|
718
699
|
return
|
|
719
700
|
try:
|
|
720
|
-
self._client.clean(self.
|
|
701
|
+
self._client.clean(self._remote_path)
|
|
721
702
|
except RemoteResourceNotFound:
|
|
722
703
|
if not missing_ok:
|
|
723
704
|
raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
|
|
@@ -736,7 +717,7 @@ class WebdavPath(URIPath):
|
|
|
736
717
|
if self.is_file():
|
|
737
718
|
return
|
|
738
719
|
|
|
739
|
-
stack = [self.
|
|
720
|
+
stack = [self._remote_path]
|
|
740
721
|
while stack:
|
|
741
722
|
root = stack.pop()
|
|
742
723
|
dirs, files = [], []
|
|
@@ -810,13 +791,15 @@ class WebdavPath(URIPath):
|
|
|
810
791
|
with self.open(mode="wb") as output:
|
|
811
792
|
output.write(file_object.read())
|
|
812
793
|
|
|
794
|
+
@binary_open
|
|
813
795
|
def open(
|
|
814
796
|
self,
|
|
815
|
-
mode: str = "
|
|
797
|
+
mode: str = "rb",
|
|
816
798
|
*,
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
799
|
+
max_workers: Optional[int] = None,
|
|
800
|
+
max_buffer_size: int = READER_MAX_BUFFER_SIZE,
|
|
801
|
+
block_forward: Optional[int] = None,
|
|
802
|
+
block_size: int = READER_BLOCK_SIZE,
|
|
820
803
|
**kwargs,
|
|
821
804
|
) -> IO:
|
|
822
805
|
"""Open a file on the path.
|
|
@@ -837,15 +820,27 @@ class WebdavPath(URIPath):
|
|
|
837
820
|
elif not self.exists():
|
|
838
821
|
raise FileNotFoundError("No such file: %r" % self.path_with_protocol)
|
|
839
822
|
|
|
840
|
-
|
|
841
|
-
self.
|
|
842
|
-
|
|
823
|
+
if mode == "rb":
|
|
824
|
+
if _webdav_check_accept_ranges(self._client, self._remote_path):
|
|
825
|
+
reader = WebdavPrefetchReader(
|
|
826
|
+
self._remote_path,
|
|
827
|
+
client=self._client,
|
|
828
|
+
block_size=block_size,
|
|
829
|
+
max_buffer_size=max_buffer_size,
|
|
830
|
+
block_forward=block_forward,
|
|
831
|
+
max_retries=WEBDAV_MAX_RETRY_TIMES,
|
|
832
|
+
max_workers=max_workers,
|
|
833
|
+
)
|
|
834
|
+
if _is_pickle(reader):
|
|
835
|
+
reader = io.BufferedReader(reader) # type: ignore
|
|
836
|
+
return reader
|
|
837
|
+
|
|
838
|
+
return WebdavMemoryHandler(
|
|
839
|
+
self._remote_path,
|
|
840
|
+
mode,
|
|
843
841
|
webdav_client=self._client,
|
|
844
842
|
name=self.path_with_protocol,
|
|
845
843
|
)
|
|
846
|
-
if "b" not in mode:
|
|
847
|
-
return io.TextIOWrapper(buffer, encoding=encoding, errors=errors)
|
|
848
|
-
return buffer
|
|
849
844
|
|
|
850
845
|
def chmod(self, mode: int, *, follow_symlinks: bool = True):
|
|
851
846
|
"""
|
|
@@ -870,7 +865,7 @@ class WebdavPath(URIPath):
|
|
|
870
865
|
"""
|
|
871
866
|
if len(self.listdir()) > 0:
|
|
872
867
|
raise OSError(f"Directory not empty: '{self.path_with_protocol}'")
|
|
873
|
-
self._client.clean(self.
|
|
868
|
+
self._client.clean(self._remote_path)
|
|
874
869
|
|
|
875
870
|
def copy(
|
|
876
871
|
self,
|
|
@@ -902,11 +897,11 @@ class WebdavPath(URIPath):
|
|
|
902
897
|
dst_path = self.from_path(dst_path)
|
|
903
898
|
|
|
904
899
|
if self._is_same_backend(dst_path):
|
|
905
|
-
if self.
|
|
900
|
+
if self._remote_path == dst_path._remote_path:
|
|
906
901
|
raise SameFileError(
|
|
907
902
|
f"'{self.path}' and '{dst_path.path}' are the same file"
|
|
908
903
|
)
|
|
909
|
-
self._client.copy(self.
|
|
904
|
+
self._client.copy(self._remote_path, dst_path._remote_path)
|
|
910
905
|
if callback:
|
|
911
906
|
callback(self.stat().size)
|
|
912
907
|
else:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.1
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
|
-
Author-email: megvii <
|
|
5
|
+
Author-email: megvii-reng <megvii-reng@googlegroups.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/megvii-research/megfile
|
|
7
7
|
Classifier: Development Status :: 5 - Production/Stable
|
|
8
8
|
Classifier: Environment :: Console
|
|
@@ -12,12 +12,12 @@ Classifier: Operating System :: POSIX :: Linux
|
|
|
12
12
|
Classifier: Programming Language :: Python
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
14
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
-
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
License-File: LICENSE
|
|
23
23
|
License-File: LICENSE.pyre
|
|
@@ -51,21 +51,16 @@ megfile - Megvii FILE library
|
|
|
51
51
|
|
|
52
52
|
* Docs: http://megvii-research.github.io/megfile
|
|
53
53
|
|
|
54
|
-
`megfile` provides a silky operation experience with different backends
|
|
54
|
+
`megfile` provides a silky operation experience with different backends, which enable you to focus more on the logic of your own project instead of the question of "Which backend is used for this file?"
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
## Why megfile
|
|
57
57
|
|
|
58
|
-
*
|
|
59
|
-
*
|
|
60
|
-
*
|
|
61
|
-
*
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
* `smart_open` can open resources that use various protocols. Especially, reader / writer of s3 in `megfile` is implemented with multi-thread, which is faster than known competitors.
|
|
66
|
-
* `smart_glob` is available on majority protocols. And it supports zsh extended pattern syntax of `[]`, e.g. `s3://bucket/video.{mp4,avi}`.
|
|
67
|
-
* All-inclusive functions like `smart_exists` / `smart_stat` / `smart_sync`. If you don't find the functions you want, [submit an issue](https://github.com/megvii-research/megfile/issues).
|
|
68
|
-
* Compatible with `pathlib.Path` interface, referring to `SmartPath` and other protocol classes like `S3Path`.
|
|
58
|
+
* Same interfaces as the python standard library, low learning curve
|
|
59
|
+
* Faster file read and write operations
|
|
60
|
+
* Excellent error retry mechanism to help you handle network issues
|
|
61
|
+
* Supports popular protocols, even making it easy to use the same protocol with different endpoints
|
|
62
|
+
* Stable and secure, with CI coverage over 95%, used by multiple industry giants
|
|
63
|
+
* Perfect type hints and built-in documentation. You can enjoy the IDE's auto-completion and static checking
|
|
69
64
|
|
|
70
65
|
## Support Protocols
|
|
71
66
|
- fs(local filesystem)
|
|
@@ -73,11 +68,15 @@ megfile - Megvii FILE library
|
|
|
73
68
|
- sftp
|
|
74
69
|
- http
|
|
75
70
|
- stdio
|
|
76
|
-
- hdfs: `
|
|
71
|
+
- hdfs: `pip3 install 'megfile[hdfs]'`
|
|
72
|
+
- webdav: `pip3 install 'megfile[webdav]'`
|
|
77
73
|
|
|
78
74
|
## Quick Start
|
|
79
75
|
|
|
80
|
-
|
|
76
|
+
The interfaces of `megfile` correspond to those in the Python standard library. For example, `open` -> `smart_open` and `pathlib.Path` -> `SmartPath`. You only need to [configure the protocol settings](https://megvii-research.github.io/megfile/configuration.html) and provide the path in the corresponding format to use them conveniently.
|
|
77
|
+
|
|
78
|
+
Path string in `megfile` almost is `protocol://path/to/file`, for example `s3://bucketA/key`. More details see [path format document](https://megvii-research.github.io/megfile/path_format.html).
|
|
79
|
+
|
|
81
80
|
Here's an example of writing a file to s3 / fs, syncing to local, reading and finally deleting it.
|
|
82
81
|
|
|
83
82
|
### Functional Interface
|
|
@@ -106,7 +105,7 @@ smart_glob('s3://playground/megfile-?.{mp4,avi}')
|
|
|
106
105
|
|
|
107
106
|
### SmartPath Interface
|
|
108
107
|
|
|
109
|
-
`SmartPath` has a similar interface with pathlib.Path
|
|
108
|
+
`SmartPath` has a similar interface with `pathlib.Path`.
|
|
110
109
|
|
|
111
110
|
```python
|
|
112
111
|
from megfile.smart_path import SmartPath
|
|
@@ -138,28 +137,15 @@ $ megfile cp s3://playground/megfile-test /tmp/playground/megfile-test
|
|
|
138
137
|
|
|
139
138
|
```bash
|
|
140
139
|
pip3 install megfile
|
|
141
|
-
```
|
|
142
140
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
pip3 install "megfile~=0.0"
|
|
146
|
-
```
|
|
141
|
+
# for cli support
|
|
142
|
+
pip3 install 'megfile[cli]'
|
|
147
143
|
|
|
148
|
-
|
|
144
|
+
# for hdfs support
|
|
145
|
+
pip3 install 'megfile[hdfs]'
|
|
149
146
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
git clone git@github.com:megvii-research/megfile.git
|
|
153
|
-
cd megfile
|
|
154
|
-
pip3 install -U .
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
### Development Environment
|
|
158
|
-
|
|
159
|
-
```bash
|
|
160
|
-
git clone git@github.com:megvii-research/megfile.git
|
|
161
|
-
cd megfile
|
|
162
|
-
pip3 install -r requirements.txt -r requirements-dev.txt
|
|
147
|
+
# for webdav support
|
|
148
|
+
pip3 install 'megfile[webdav]'
|
|
163
149
|
```
|
|
164
150
|
|
|
165
151
|
## Configuration
|
|
@@ -218,6 +204,8 @@ You can get the configuration from `~/.config/megfile/aliases.conf`, like:
|
|
|
218
204
|
protocol = s3+tos
|
|
219
205
|
```
|
|
220
206
|
|
|
207
|
+
You can use alias in path, like `tos://bucket/key`, the same as `s3+tos://bucket/key`.
|
|
208
|
+
|
|
221
209
|
## Benchmark
|
|
222
210
|
[](https://megvii-research.github.io/megfile/benchmark.html)
|
|
223
211
|
[](https://megvii-research.github.io/megfile/benchmark.html)
|