megfile 4.2.5__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/__init__.py +13 -293
- megfile/cli.py +37 -20
- megfile/config.py +10 -1
- megfile/errors.py +2 -2
- megfile/fs_path.py +32 -3
- megfile/interfaces.py +21 -10
- megfile/lib/base_memory_handler.py +92 -0
- megfile/lib/glob.py +3 -3
- megfile/lib/http_prefetch_reader.py +22 -22
- megfile/lib/s3_memory_handler.py +14 -81
- megfile/lib/webdav_memory_handler.py +83 -0
- megfile/lib/webdav_prefetch_reader.py +115 -0
- megfile/pathlike.py +3 -4
- megfile/s3_path.py +40 -32
- megfile/sftp2_path.py +38 -62
- megfile/sftp_path.py +238 -1
- megfile/smart.py +70 -29
- megfile/smart_path.py +181 -85
- megfile/version.py +1 -1
- megfile/webdav_path.py +159 -165
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/METADATA +27 -39
- megfile-5.0.0.dist-info/RECORD +51 -0
- megfile/fs.py +0 -627
- megfile/hdfs.py +0 -408
- megfile/http.py +0 -114
- megfile/s3.py +0 -540
- megfile/sftp.py +0 -821
- megfile/sftp2.py +0 -827
- megfile/stdio.py +0 -30
- megfile/webdav.py +0 -552
- megfile-4.2.5.dist-info/RECORD +0 -56
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/WHEEL +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/entry_points.txt +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/licenses/LICENSE +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/licenses/LICENSE.pyre +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/top_level.txt +0 -0
megfile/webdav_path.py
CHANGED
|
@@ -2,6 +2,9 @@ import hashlib
|
|
|
2
2
|
import io
|
|
3
3
|
import os
|
|
4
4
|
import re
|
|
5
|
+
import shlex
|
|
6
|
+
import subprocess
|
|
7
|
+
import time
|
|
5
8
|
from functools import cached_property
|
|
6
9
|
from logging import getLogger as get_logger
|
|
7
10
|
from typing import IO, BinaryIO, Callable, Iterable, Iterator, List, Optional, Tuple
|
|
@@ -10,27 +13,46 @@ from urllib.parse import quote, unquote, urlsplit, urlunsplit
|
|
|
10
13
|
import dateutil.parser
|
|
11
14
|
from webdav3.client import Client as WebdavClient
|
|
12
15
|
from webdav3.client import WebDavXmlUtils
|
|
13
|
-
from webdav3.exceptions import
|
|
16
|
+
from webdav3.exceptions import (
|
|
17
|
+
RemoteResourceNotFound,
|
|
18
|
+
ResponseErrorCode,
|
|
19
|
+
WebDavException,
|
|
20
|
+
)
|
|
14
21
|
from webdav3.urn import Urn
|
|
15
22
|
|
|
16
|
-
from megfile.
|
|
23
|
+
from megfile.config import (
|
|
24
|
+
READER_BLOCK_SIZE,
|
|
25
|
+
READER_MAX_BUFFER_SIZE,
|
|
26
|
+
WEBDAV_MAX_RETRY_TIMES,
|
|
27
|
+
)
|
|
28
|
+
from megfile.errors import (
|
|
29
|
+
SameFileError,
|
|
30
|
+
_create_missing_ok_generator,
|
|
31
|
+
http_should_retry,
|
|
32
|
+
patch_method,
|
|
33
|
+
)
|
|
17
34
|
from megfile.interfaces import (
|
|
18
35
|
ContextIterator,
|
|
19
36
|
FileEntry,
|
|
20
37
|
PathLike,
|
|
21
|
-
Readable,
|
|
22
|
-
Seekable,
|
|
23
38
|
StatResult,
|
|
24
|
-
Writable,
|
|
25
39
|
)
|
|
26
40
|
from megfile.lib.compare import is_same_file
|
|
27
41
|
from megfile.lib.compat import fspath
|
|
28
42
|
from megfile.lib.fnmatch import translate
|
|
29
43
|
from megfile.lib.glob import has_magic
|
|
30
44
|
from megfile.lib.joinpath import uri_join, uri_norm
|
|
45
|
+
from megfile.lib.webdav_memory_handler import WebdavMemoryHandler, _webdav_stat
|
|
46
|
+
from megfile.lib.webdav_prefetch_reader import WebdavPrefetchReader
|
|
31
47
|
from megfile.pathlike import URIPath
|
|
32
48
|
from megfile.smart_path import SmartPath
|
|
33
|
-
from megfile.utils import
|
|
49
|
+
from megfile.utils import (
|
|
50
|
+
_is_pickle,
|
|
51
|
+
binary_open,
|
|
52
|
+
calculate_md5,
|
|
53
|
+
copyfileobj,
|
|
54
|
+
thread_local,
|
|
55
|
+
)
|
|
34
56
|
|
|
35
57
|
_logger = get_logger(__name__)
|
|
36
58
|
|
|
@@ -42,6 +64,7 @@ __all__ = [
|
|
|
42
64
|
WEBDAV_USERNAME = "WEBDAV_USERNAME"
|
|
43
65
|
WEBDAV_PASSWORD = "WEBDAV_PASSWORD"
|
|
44
66
|
WEBDAV_TOKEN = "WEBDAV_TOKEN"
|
|
67
|
+
WEBDAV_TOKEN_COMMAND = "WEBDAV_TOKEN_COMMAND"
|
|
45
68
|
WEBDAV_TIMEOUT = "WEBDAV_TIMEOUT"
|
|
46
69
|
|
|
47
70
|
|
|
@@ -81,6 +104,7 @@ def provide_connect_info(
|
|
|
81
104
|
username: Optional[str] = None,
|
|
82
105
|
password: Optional[str] = None,
|
|
83
106
|
token: Optional[str] = None,
|
|
107
|
+
token_command: Optional[str] = None,
|
|
84
108
|
) -> dict:
|
|
85
109
|
"""Provide connection info for WebDAV client"""
|
|
86
110
|
if not username:
|
|
@@ -89,6 +113,8 @@ def provide_connect_info(
|
|
|
89
113
|
password = os.getenv(WEBDAV_PASSWORD)
|
|
90
114
|
if not token:
|
|
91
115
|
token = os.getenv(WEBDAV_TOKEN)
|
|
116
|
+
if not token_command:
|
|
117
|
+
token_command = os.getenv(WEBDAV_TOKEN_COMMAND)
|
|
92
118
|
|
|
93
119
|
timeout = int(os.getenv(WEBDAV_TIMEOUT, "30"))
|
|
94
120
|
|
|
@@ -98,7 +124,9 @@ def provide_connect_info(
|
|
|
98
124
|
"webdav_disable_check": True,
|
|
99
125
|
}
|
|
100
126
|
|
|
101
|
-
if
|
|
127
|
+
if token_command:
|
|
128
|
+
options["webdav_token_command"] = token_command
|
|
129
|
+
elif token:
|
|
102
130
|
options["webdav_token"] = token
|
|
103
131
|
elif username and password:
|
|
104
132
|
options["webdav_login"] = username
|
|
@@ -107,15 +135,82 @@ def provide_connect_info(
|
|
|
107
135
|
return options
|
|
108
136
|
|
|
109
137
|
|
|
138
|
+
def _patch_execute_request(
|
|
139
|
+
client: WebdavClient,
|
|
140
|
+
status_forcelist: Iterable[int] = (500, 502, 503, 504),
|
|
141
|
+
max_retries: int = WEBDAV_MAX_RETRY_TIMES,
|
|
142
|
+
) -> WebdavClient:
|
|
143
|
+
def webdav_update_token_by_command():
|
|
144
|
+
cmds = shlex.split(client.webdav.token_command)
|
|
145
|
+
client.webdav.token_command_last_call = time.time()
|
|
146
|
+
client.webdav.token = subprocess.check_output(cmds).decode().strip()
|
|
147
|
+
|
|
148
|
+
def webdav_should_retry(error: Exception) -> bool:
|
|
149
|
+
if http_should_retry(error):
|
|
150
|
+
return True
|
|
151
|
+
if (
|
|
152
|
+
isinstance(error, ResponseErrorCode)
|
|
153
|
+
and error.code == 401 # pytype: disable=attribute-error
|
|
154
|
+
):
|
|
155
|
+
token_command = client.webdav.token_command # pyre-ignore[16]
|
|
156
|
+
last_call = client.webdav.token_command_last_call # pyre-ignore[16]
|
|
157
|
+
if token_command is not None and time.time() - last_call > 5:
|
|
158
|
+
webdav_update_token_by_command()
|
|
159
|
+
return True
|
|
160
|
+
return False
|
|
161
|
+
|
|
162
|
+
def after_callback(response, *args, **kwargs):
|
|
163
|
+
if response.status_code in status_forcelist:
|
|
164
|
+
response.raise_for_status()
|
|
165
|
+
return response
|
|
166
|
+
|
|
167
|
+
def before_callback(action, path, data=None, headers_ext=None):
|
|
168
|
+
# refresh token if needed
|
|
169
|
+
if client.webdav.token_command is not None and not client.webdav.token:
|
|
170
|
+
webdav_update_token_by_command()
|
|
171
|
+
_logger.debug(
|
|
172
|
+
"send http request: %s %r, with parameters: %s, headers: %s",
|
|
173
|
+
action,
|
|
174
|
+
path,
|
|
175
|
+
data,
|
|
176
|
+
headers_ext,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
def retry_callback(error, action, path, data=None, headers_ext=None):
|
|
180
|
+
if data and hasattr(data, "seek"):
|
|
181
|
+
data.seek(0)
|
|
182
|
+
elif isinstance(data, Iterator):
|
|
183
|
+
_logger.warning("Can not retry http request with iterator data")
|
|
184
|
+
raise
|
|
185
|
+
|
|
186
|
+
client.execute_request = patch_method(
|
|
187
|
+
client.execute_request,
|
|
188
|
+
max_retries=max_retries,
|
|
189
|
+
should_retry=webdav_should_retry,
|
|
190
|
+
before_callback=before_callback,
|
|
191
|
+
after_callback=after_callback,
|
|
192
|
+
retry_callback=retry_callback,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
return client
|
|
196
|
+
|
|
197
|
+
|
|
110
198
|
def _get_webdav_client(
|
|
111
199
|
hostname: str,
|
|
112
200
|
username: Optional[str] = None,
|
|
113
201
|
password: Optional[str] = None,
|
|
114
202
|
token: Optional[str] = None,
|
|
203
|
+
token_command: Optional[str] = None,
|
|
115
204
|
) -> WebdavClient:
|
|
116
205
|
"""Get WebDAV client"""
|
|
117
|
-
options = provide_connect_info(hostname, username, password, token)
|
|
118
|
-
|
|
206
|
+
options = provide_connect_info(hostname, username, password, token, token_command)
|
|
207
|
+
client = WebdavClient(options)
|
|
208
|
+
client.webdav.token_command = options.pop( # pyre-ignore[16]
|
|
209
|
+
"webdav_token_command", None
|
|
210
|
+
)
|
|
211
|
+
client.webdav.token_command_last_call = 0 # pyre-ignore[16]
|
|
212
|
+
client = _patch_execute_request(client)
|
|
213
|
+
return client
|
|
119
214
|
|
|
120
215
|
|
|
121
216
|
def get_webdav_client(
|
|
@@ -123,10 +218,11 @@ def get_webdav_client(
|
|
|
123
218
|
username: Optional[str] = None,
|
|
124
219
|
password: Optional[str] = None,
|
|
125
220
|
token: Optional[str] = None,
|
|
221
|
+
token_command: Optional[str] = None,
|
|
126
222
|
) -> WebdavClient:
|
|
127
223
|
"""Get cached WebDAV client"""
|
|
128
224
|
return thread_local(
|
|
129
|
-
f"webdav_client:{hostname},{username},{password},{token}",
|
|
225
|
+
f"webdav_client:{hostname},{username},{password},{token},{token_command}",
|
|
130
226
|
_get_webdav_client,
|
|
131
227
|
hostname,
|
|
132
228
|
username,
|
|
@@ -160,23 +256,6 @@ def _webdav_scan_pairs(
|
|
|
160
256
|
yield src_file_path, dst_file_path
|
|
161
257
|
|
|
162
258
|
|
|
163
|
-
def _webdav_stat(client: WebdavClient, remote_path: str):
|
|
164
|
-
urn = Urn(remote_path)
|
|
165
|
-
client._check_remote_resource(remote_path, urn)
|
|
166
|
-
|
|
167
|
-
response = client.execute_request(
|
|
168
|
-
action="info", path=urn.quote(), headers_ext=["Depth: 0"]
|
|
169
|
-
)
|
|
170
|
-
path = client.get_full_path(urn)
|
|
171
|
-
info = WebDavXmlUtils.parse_info_response(
|
|
172
|
-
response.content, path, client.webdav.hostname
|
|
173
|
-
)
|
|
174
|
-
info["is_dir"] = WebDavXmlUtils.parse_is_dir_response(
|
|
175
|
-
response.content, path, client.webdav.hostname
|
|
176
|
-
)
|
|
177
|
-
return info
|
|
178
|
-
|
|
179
|
-
|
|
180
259
|
def _webdav_scan(client: WebdavClient, remote_path: str) -> List[dict]:
|
|
181
260
|
directory_urn = Urn(remote_path, directory=True)
|
|
182
261
|
if directory_urn.path() != WebdavClient.root and not client.check(
|
|
@@ -208,112 +287,12 @@ def _webdav_split_magic(path: str) -> Tuple[str, str]:
|
|
|
208
287
|
return path, ""
|
|
209
288
|
|
|
210
289
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
webdav_client: WebdavClient,
|
|
218
|
-
name: str,
|
|
219
|
-
):
|
|
220
|
-
self._real_path = real_path
|
|
221
|
-
self._mode = mode
|
|
222
|
-
self._client = webdav_client
|
|
223
|
-
self._name = name
|
|
224
|
-
|
|
225
|
-
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
226
|
-
raise ValueError("unacceptable mode: %r" % mode)
|
|
227
|
-
|
|
228
|
-
self._fileobj = io.BytesIO()
|
|
229
|
-
self._download_fileobj()
|
|
230
|
-
|
|
231
|
-
@property
|
|
232
|
-
def name(self) -> str:
|
|
233
|
-
return self._name
|
|
234
|
-
|
|
235
|
-
@property
|
|
236
|
-
def mode(self) -> str:
|
|
237
|
-
return self._mode
|
|
238
|
-
|
|
239
|
-
def tell(self) -> int:
|
|
240
|
-
return self._fileobj.tell()
|
|
241
|
-
|
|
242
|
-
def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
|
|
243
|
-
return self._fileobj.seek(offset, whence)
|
|
244
|
-
|
|
245
|
-
def readable(self) -> bool:
|
|
246
|
-
return self._mode[0] == "r" or self._mode[-1] == "+"
|
|
247
|
-
|
|
248
|
-
def read(self, size: Optional[int] = None) -> bytes:
|
|
249
|
-
if not self.readable():
|
|
250
|
-
raise io.UnsupportedOperation("not readable")
|
|
251
|
-
return self._fileobj.read(size)
|
|
252
|
-
|
|
253
|
-
def readline(self, size: Optional[int] = None) -> bytes:
|
|
254
|
-
if not self.readable():
|
|
255
|
-
raise io.UnsupportedOperation("not readable")
|
|
256
|
-
if size is None:
|
|
257
|
-
size = -1
|
|
258
|
-
return self._fileobj.readline(size)
|
|
259
|
-
|
|
260
|
-
def readlines(self, hint: Optional[int] = None) -> List[bytes]:
|
|
261
|
-
if not self.readable():
|
|
262
|
-
raise io.UnsupportedOperation("not readable")
|
|
263
|
-
if hint is None:
|
|
264
|
-
hint = -1
|
|
265
|
-
return self._fileobj.readlines(hint)
|
|
266
|
-
|
|
267
|
-
def writable(self) -> bool:
|
|
268
|
-
return self._mode[0] == "w" or self._mode[0] == "a" or self._mode[-1] == "+"
|
|
269
|
-
|
|
270
|
-
def flush(self):
|
|
271
|
-
self._fileobj.flush()
|
|
272
|
-
|
|
273
|
-
def write(self, data: bytes) -> int:
|
|
274
|
-
if not self.writable():
|
|
275
|
-
raise io.UnsupportedOperation("not writable")
|
|
276
|
-
if self._mode[0] == "a":
|
|
277
|
-
self.seek(0, os.SEEK_END)
|
|
278
|
-
return self._fileobj.write(data)
|
|
279
|
-
|
|
280
|
-
def writelines(self, lines: Iterable[bytes]):
|
|
281
|
-
if not self.writable():
|
|
282
|
-
raise io.UnsupportedOperation("not writable")
|
|
283
|
-
if self._mode[0] == "a":
|
|
284
|
-
self.seek(0, os.SEEK_END)
|
|
285
|
-
self._fileobj.writelines(lines)
|
|
286
|
-
|
|
287
|
-
def _file_exists(self) -> bool:
|
|
288
|
-
try:
|
|
289
|
-
return not self._client.is_dir(self._real_path)
|
|
290
|
-
except RemoteResourceNotFound:
|
|
291
|
-
return False
|
|
292
|
-
|
|
293
|
-
def _download_fileobj(self):
|
|
294
|
-
need_download = self._mode[0] == "r" or (
|
|
295
|
-
self._mode[0] == "a" and self._file_exists()
|
|
296
|
-
)
|
|
297
|
-
if not need_download:
|
|
298
|
-
return
|
|
299
|
-
# directly download to the file handle
|
|
300
|
-
self._client.download_from(self._fileobj, self._real_path)
|
|
301
|
-
if self._mode[0] == "r":
|
|
302
|
-
self.seek(0, os.SEEK_SET)
|
|
303
|
-
|
|
304
|
-
def _upload_fileobj(self):
|
|
305
|
-
need_upload = self.writable()
|
|
306
|
-
if not need_upload:
|
|
307
|
-
return
|
|
308
|
-
# directly upload from file handle
|
|
309
|
-
self.seek(0, os.SEEK_SET)
|
|
310
|
-
self._client.upload_to(self._fileobj, self._real_path)
|
|
311
|
-
|
|
312
|
-
def _close(self, need_upload: bool = True):
|
|
313
|
-
if hasattr(self, "_fileobj"):
|
|
314
|
-
if need_upload:
|
|
315
|
-
self._upload_fileobj()
|
|
316
|
-
self._fileobj.close()
|
|
290
|
+
def _webdav_check_accept_ranges(client: WebdavClient, remote_path: str):
|
|
291
|
+
urn = Urn(remote_path)
|
|
292
|
+
response = client.execute_request(action="download", path=urn.quote())
|
|
293
|
+
response.close()
|
|
294
|
+
headers = response.headers
|
|
295
|
+
return headers.get("Accept-Ranges") == "bytes"
|
|
317
296
|
|
|
318
297
|
|
|
319
298
|
@SmartPath.register
|
|
@@ -346,16 +325,15 @@ class WebdavPath(URIPath):
|
|
|
346
325
|
if parts.port:
|
|
347
326
|
self._hostname += f":{parts.port}"
|
|
348
327
|
|
|
349
|
-
self.
|
|
328
|
+
self._remote_path = unquote(parts.path) if parts.path else "/"
|
|
350
329
|
|
|
351
330
|
@cached_property
|
|
352
331
|
def parts(self) -> Tuple[str, ...]:
|
|
353
332
|
"""A tuple giving access to the path's various components"""
|
|
354
|
-
|
|
355
|
-
parts: List[str] = [urlunsplit(new_parts)] # pyre-ignore[9]
|
|
333
|
+
parts = [urlunsplit(self._urlsplit_parts._replace(path=""))]
|
|
356
334
|
path = self._urlsplit_parts.path.lstrip("/")
|
|
357
335
|
if path != "":
|
|
358
|
-
parts.extend(
|
|
336
|
+
parts.extend(path.split("/"))
|
|
359
337
|
return tuple(parts)
|
|
360
338
|
|
|
361
339
|
@property
|
|
@@ -385,7 +363,7 @@ class WebdavPath(URIPath):
|
|
|
385
363
|
:returns: True if the path exists, else False
|
|
386
364
|
"""
|
|
387
365
|
try:
|
|
388
|
-
_webdav_stat(self._client, self.
|
|
366
|
+
_webdav_stat(self._client, self._remote_path)
|
|
389
367
|
return True
|
|
390
368
|
except RemoteResourceNotFound:
|
|
391
369
|
return False
|
|
@@ -436,11 +414,11 @@ class WebdavPath(URIPath):
|
|
|
436
414
|
raise FileNotFoundError
|
|
437
415
|
:returns: An iterator contains tuples of path and file stat
|
|
438
416
|
"""
|
|
439
|
-
remote_path = self.
|
|
417
|
+
remote_path = self._remote_path
|
|
440
418
|
if pattern:
|
|
441
419
|
remote_path = os.path.join(remote_path, pattern)
|
|
442
420
|
remote_path, pattern = _webdav_split_magic(remote_path)
|
|
443
|
-
root = os.path.relpath(remote_path, self.
|
|
421
|
+
root = os.path.relpath(remote_path, self._remote_path)
|
|
444
422
|
root = uri_join(self.path_with_protocol, root)
|
|
445
423
|
root = uri_norm(root)
|
|
446
424
|
pattern = re.compile(translate(pattern))
|
|
@@ -479,7 +457,7 @@ class WebdavPath(URIPath):
|
|
|
479
457
|
:returns: True if the path is a directory, else False
|
|
480
458
|
"""
|
|
481
459
|
try:
|
|
482
|
-
return _webdav_stat(self._client, self.
|
|
460
|
+
return _webdav_stat(self._client, self._remote_path)["is_dir"]
|
|
483
461
|
except RemoteResourceNotFound:
|
|
484
462
|
return False
|
|
485
463
|
|
|
@@ -491,7 +469,7 @@ class WebdavPath(URIPath):
|
|
|
491
469
|
:returns: True if the path is a file, else False
|
|
492
470
|
"""
|
|
493
471
|
try:
|
|
494
|
-
return not _webdav_stat(self._client, self.
|
|
472
|
+
return not _webdav_stat(self._client, self._remote_path)["is_dir"]
|
|
495
473
|
except RemoteResourceNotFound:
|
|
496
474
|
return False
|
|
497
475
|
|
|
@@ -550,7 +528,7 @@ class WebdavPath(URIPath):
|
|
|
550
528
|
parent_path_object.mkdir(mode=mode, parents=False, exist_ok=True)
|
|
551
529
|
|
|
552
530
|
try:
|
|
553
|
-
self._client.mkdir(self.
|
|
531
|
+
self._client.mkdir(self._remote_path)
|
|
554
532
|
except WebDavException:
|
|
555
533
|
# Catch exception when mkdir concurrently
|
|
556
534
|
if not self.exists():
|
|
@@ -590,7 +568,9 @@ class WebdavPath(URIPath):
|
|
|
590
568
|
if self._is_same_backend(dst_path):
|
|
591
569
|
if overwrite:
|
|
592
570
|
dst_path.remove(missing_ok=True)
|
|
593
|
-
self._client.move(
|
|
571
|
+
self._client.move(
|
|
572
|
+
self._remote_path, dst_path._remote_path, overwrite=overwrite
|
|
573
|
+
)
|
|
594
574
|
else:
|
|
595
575
|
if self.is_dir():
|
|
596
576
|
for file_entry in self.scandir():
|
|
@@ -626,7 +606,7 @@ class WebdavPath(URIPath):
|
|
|
626
606
|
if missing_ok and not self.exists():
|
|
627
607
|
return
|
|
628
608
|
try:
|
|
629
|
-
self._client.clean(self.
|
|
609
|
+
self._client.clean(self._remote_path)
|
|
630
610
|
except RemoteResourceNotFound:
|
|
631
611
|
if not missing_ok:
|
|
632
612
|
raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
|
|
@@ -665,8 +645,8 @@ class WebdavPath(URIPath):
|
|
|
665
645
|
)
|
|
666
646
|
return
|
|
667
647
|
|
|
668
|
-
for info in _webdav_scan(self._client, self.
|
|
669
|
-
entry = _make_entry(info, self.
|
|
648
|
+
for info in _webdav_scan(self._client, self._remote_path):
|
|
649
|
+
entry = _make_entry(info, self._remote_path, self.path_with_protocol)
|
|
670
650
|
if entry.is_dir():
|
|
671
651
|
continue
|
|
672
652
|
yield entry
|
|
@@ -691,8 +671,8 @@ class WebdavPath(URIPath):
|
|
|
691
671
|
raise NotADirectoryError(f"Not a directory: '{self.path_with_protocol}'")
|
|
692
672
|
|
|
693
673
|
def create_generator():
|
|
694
|
-
for info in _webdav_scandir(self._client, self.
|
|
695
|
-
yield _make_entry(info, self.
|
|
674
|
+
for info in _webdav_scandir(self._client, self._remote_path):
|
|
675
|
+
yield _make_entry(info, self._remote_path, self.path_with_protocol)
|
|
696
676
|
|
|
697
677
|
return ContextIterator(create_generator())
|
|
698
678
|
|
|
@@ -703,7 +683,7 @@ class WebdavPath(URIPath):
|
|
|
703
683
|
:returns: StatResult
|
|
704
684
|
"""
|
|
705
685
|
try:
|
|
706
|
-
info = _webdav_stat(self._client, self.
|
|
686
|
+
info = _webdav_stat(self._client, self._remote_path)
|
|
707
687
|
return _make_stat(info)
|
|
708
688
|
except RemoteResourceNotFound:
|
|
709
689
|
raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
|
|
@@ -717,7 +697,7 @@ class WebdavPath(URIPath):
|
|
|
717
697
|
if missing_ok and not self.exists():
|
|
718
698
|
return
|
|
719
699
|
try:
|
|
720
|
-
self._client.clean(self.
|
|
700
|
+
self._client.clean(self._remote_path)
|
|
721
701
|
except RemoteResourceNotFound:
|
|
722
702
|
if not missing_ok:
|
|
723
703
|
raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
|
|
@@ -736,7 +716,7 @@ class WebdavPath(URIPath):
|
|
|
736
716
|
if self.is_file():
|
|
737
717
|
return
|
|
738
718
|
|
|
739
|
-
stack = [self.
|
|
719
|
+
stack = [self._remote_path]
|
|
740
720
|
while stack:
|
|
741
721
|
root = stack.pop()
|
|
742
722
|
dirs, files = [], []
|
|
@@ -810,13 +790,15 @@ class WebdavPath(URIPath):
|
|
|
810
790
|
with self.open(mode="wb") as output:
|
|
811
791
|
output.write(file_object.read())
|
|
812
792
|
|
|
793
|
+
@binary_open
|
|
813
794
|
def open(
|
|
814
795
|
self,
|
|
815
|
-
mode: str = "
|
|
796
|
+
mode: str = "rb",
|
|
816
797
|
*,
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
798
|
+
max_workers: Optional[int] = None,
|
|
799
|
+
max_buffer_size: int = READER_MAX_BUFFER_SIZE,
|
|
800
|
+
block_forward: Optional[int] = None,
|
|
801
|
+
block_size: int = READER_BLOCK_SIZE,
|
|
820
802
|
**kwargs,
|
|
821
803
|
) -> IO:
|
|
822
804
|
"""Open a file on the path.
|
|
@@ -837,15 +819,27 @@ class WebdavPath(URIPath):
|
|
|
837
819
|
elif not self.exists():
|
|
838
820
|
raise FileNotFoundError("No such file: %r" % self.path_with_protocol)
|
|
839
821
|
|
|
840
|
-
|
|
841
|
-
self.
|
|
842
|
-
|
|
822
|
+
if mode == "rb":
|
|
823
|
+
if _webdav_check_accept_ranges(self._client, self._remote_path):
|
|
824
|
+
reader = WebdavPrefetchReader(
|
|
825
|
+
self._remote_path,
|
|
826
|
+
client=self._client,
|
|
827
|
+
block_size=block_size,
|
|
828
|
+
max_buffer_size=max_buffer_size,
|
|
829
|
+
block_forward=block_forward,
|
|
830
|
+
max_retries=WEBDAV_MAX_RETRY_TIMES,
|
|
831
|
+
max_workers=max_workers,
|
|
832
|
+
)
|
|
833
|
+
if _is_pickle(reader):
|
|
834
|
+
reader = io.BufferedReader(reader) # type: ignore
|
|
835
|
+
return reader
|
|
836
|
+
|
|
837
|
+
return WebdavMemoryHandler(
|
|
838
|
+
self._remote_path,
|
|
839
|
+
mode,
|
|
843
840
|
webdav_client=self._client,
|
|
844
841
|
name=self.path_with_protocol,
|
|
845
842
|
)
|
|
846
|
-
if "b" not in mode:
|
|
847
|
-
return io.TextIOWrapper(buffer, encoding=encoding, errors=errors)
|
|
848
|
-
return buffer
|
|
849
843
|
|
|
850
844
|
def chmod(self, mode: int, *, follow_symlinks: bool = True):
|
|
851
845
|
"""
|
|
@@ -870,7 +864,7 @@ class WebdavPath(URIPath):
|
|
|
870
864
|
"""
|
|
871
865
|
if len(self.listdir()) > 0:
|
|
872
866
|
raise OSError(f"Directory not empty: '{self.path_with_protocol}'")
|
|
873
|
-
self._client.clean(self.
|
|
867
|
+
self._client.clean(self._remote_path)
|
|
874
868
|
|
|
875
869
|
def copy(
|
|
876
870
|
self,
|
|
@@ -902,11 +896,11 @@ class WebdavPath(URIPath):
|
|
|
902
896
|
dst_path = self.from_path(dst_path)
|
|
903
897
|
|
|
904
898
|
if self._is_same_backend(dst_path):
|
|
905
|
-
if self.
|
|
899
|
+
if self._remote_path == dst_path._remote_path:
|
|
906
900
|
raise SameFileError(
|
|
907
901
|
f"'{self.path}' and '{dst_path.path}' are the same file"
|
|
908
902
|
)
|
|
909
|
-
self._client.copy(self.
|
|
903
|
+
self._client.copy(self._remote_path, dst_path._remote_path)
|
|
910
904
|
if callback:
|
|
911
905
|
callback(self.stat().size)
|
|
912
906
|
else:
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version:
|
|
3
|
+
Version: 5.0.0
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
|
-
Author-email: megvii <
|
|
5
|
+
Author-email: megvii-reng <megvii-reng@googlegroups.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/megvii-research/megfile
|
|
7
7
|
Classifier: Development Status :: 5 - Production/Stable
|
|
8
8
|
Classifier: Environment :: Console
|
|
@@ -12,12 +12,12 @@ Classifier: Operating System :: POSIX :: Linux
|
|
|
12
12
|
Classifier: Programming Language :: Python
|
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
|
14
14
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
18
|
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
-
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
License-File: LICENSE
|
|
23
23
|
License-File: LICENSE.pyre
|
|
@@ -51,21 +51,16 @@ megfile - Megvii FILE library
|
|
|
51
51
|
|
|
52
52
|
* Docs: http://megvii-research.github.io/megfile
|
|
53
53
|
|
|
54
|
-
`megfile` provides a silky operation experience with different backends
|
|
54
|
+
`megfile` provides a silky operation experience with different backends, which enable you to focus more on the logic of your own project instead of the question of "Which backend is used for this file?"
|
|
55
55
|
|
|
56
|
-
|
|
56
|
+
## Why megfile
|
|
57
57
|
|
|
58
|
-
*
|
|
59
|
-
*
|
|
60
|
-
*
|
|
61
|
-
*
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
* `smart_open` can open resources that use various protocols. Especially, reader / writer of s3 in `megfile` is implemented with multi-thread, which is faster than known competitors.
|
|
66
|
-
* `smart_glob` is available on majority protocols. And it supports zsh extended pattern syntax of `[]`, e.g. `s3://bucket/video.{mp4,avi}`.
|
|
67
|
-
* All-inclusive functions like `smart_exists` / `smart_stat` / `smart_sync`. If you don't find the functions you want, [submit an issue](https://github.com/megvii-research/megfile/issues).
|
|
68
|
-
* Compatible with `pathlib.Path` interface, referring to `SmartPath` and other protocol classes like `S3Path`.
|
|
58
|
+
* Same interfaces as the python standard library, low learning curve
|
|
59
|
+
* Faster file read and write operations
|
|
60
|
+
* Excellent error retry mechanism to help you handle network issues
|
|
61
|
+
* Supports popular protocols, even making it easy to use the same protocol with different endpoints
|
|
62
|
+
* Stable and secure, with CI coverage over 95%, used by multiple industry giants
|
|
63
|
+
* Perfect type hints and built-in documentation. You can enjoy the IDE's auto-completion and static checking
|
|
69
64
|
|
|
70
65
|
## Support Protocols
|
|
71
66
|
- fs(local filesystem)
|
|
@@ -73,11 +68,15 @@ megfile - Megvii FILE library
|
|
|
73
68
|
- sftp
|
|
74
69
|
- http
|
|
75
70
|
- stdio
|
|
76
|
-
- hdfs: `
|
|
71
|
+
- hdfs: `pip3 install 'megfile[hdfs]'`
|
|
72
|
+
- webdav: `pip3 install 'megfile[webdav]'`
|
|
77
73
|
|
|
78
74
|
## Quick Start
|
|
79
75
|
|
|
80
|
-
|
|
76
|
+
The interfaces of `megfile` correspond to those in the Python standard library. For example, `open` -> `smart_open` and `pathlib.Path` -> `SmartPath`. You only need to [configure the protocol settings](https://megvii-research.github.io/megfile/configuration.html) and provide the path in the corresponding format to use them conveniently.
|
|
77
|
+
|
|
78
|
+
Path string in `megfile` almost is `protocol://path/to/file`, for example `s3://bucketA/key`. More details see [path format document](https://megvii-research.github.io/megfile/path_format.html).
|
|
79
|
+
|
|
81
80
|
Here's an example of writing a file to s3 / fs, syncing to local, reading and finally deleting it.
|
|
82
81
|
|
|
83
82
|
### Functional Interface
|
|
@@ -106,7 +105,7 @@ smart_glob('s3://playground/megfile-?.{mp4,avi}')
|
|
|
106
105
|
|
|
107
106
|
### SmartPath Interface
|
|
108
107
|
|
|
109
|
-
`SmartPath` has a similar interface with pathlib.Path
|
|
108
|
+
`SmartPath` has a similar interface with `pathlib.Path`.
|
|
110
109
|
|
|
111
110
|
```python
|
|
112
111
|
from megfile.smart_path import SmartPath
|
|
@@ -138,28 +137,15 @@ $ megfile cp s3://playground/megfile-test /tmp/playground/megfile-test
|
|
|
138
137
|
|
|
139
138
|
```bash
|
|
140
139
|
pip3 install megfile
|
|
141
|
-
```
|
|
142
140
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
pip3 install "megfile~=0.0"
|
|
146
|
-
```
|
|
141
|
+
# for cli support
|
|
142
|
+
pip3 install 'megfile[cli]'
|
|
147
143
|
|
|
148
|
-
|
|
144
|
+
# for hdfs support
|
|
145
|
+
pip3 install 'megfile[hdfs]'
|
|
149
146
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
git clone git@github.com:megvii-research/megfile.git
|
|
153
|
-
cd megfile
|
|
154
|
-
pip3 install -U .
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
### Development Environment
|
|
158
|
-
|
|
159
|
-
```bash
|
|
160
|
-
git clone git@github.com:megvii-research/megfile.git
|
|
161
|
-
cd megfile
|
|
162
|
-
pip3 install -r requirements.txt -r requirements-dev.txt
|
|
147
|
+
# for webdav support
|
|
148
|
+
pip3 install 'megfile[webdav]'
|
|
163
149
|
```
|
|
164
150
|
|
|
165
151
|
## Configuration
|
|
@@ -218,6 +204,8 @@ You can get the configuration from `~/.config/megfile/aliases.conf`, like:
|
|
|
218
204
|
protocol = s3+tos
|
|
219
205
|
```
|
|
220
206
|
|
|
207
|
+
You can use alias in path, like `tos://bucket/key`, the same as `s3+tos://bucket/key`.
|
|
208
|
+
|
|
221
209
|
## Benchmark
|
|
222
210
|
[](https://megvii-research.github.io/megfile/benchmark.html)
|
|
223
211
|
[](https://megvii-research.github.io/megfile/benchmark.html)
|