megfile 4.2.5__py3-none-any.whl → 5.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/webdav_path.py CHANGED
@@ -2,6 +2,9 @@ import hashlib
2
2
  import io
3
3
  import os
4
4
  import re
5
+ import shlex
6
+ import subprocess
7
+ import time
5
8
  from functools import cached_property
6
9
  from logging import getLogger as get_logger
7
10
  from typing import IO, BinaryIO, Callable, Iterable, Iterator, List, Optional, Tuple
@@ -10,27 +13,46 @@ from urllib.parse import quote, unquote, urlsplit, urlunsplit
10
13
  import dateutil.parser
11
14
  from webdav3.client import Client as WebdavClient
12
15
  from webdav3.client import WebDavXmlUtils
13
- from webdav3.exceptions import RemoteResourceNotFound, WebDavException
16
+ from webdav3.exceptions import (
17
+ RemoteResourceNotFound,
18
+ ResponseErrorCode,
19
+ WebDavException,
20
+ )
14
21
  from webdav3.urn import Urn
15
22
 
16
- from megfile.errors import SameFileError, _create_missing_ok_generator
23
+ from megfile.config import (
24
+ READER_BLOCK_SIZE,
25
+ READER_MAX_BUFFER_SIZE,
26
+ WEBDAV_MAX_RETRY_TIMES,
27
+ )
28
+ from megfile.errors import (
29
+ SameFileError,
30
+ _create_missing_ok_generator,
31
+ http_should_retry,
32
+ patch_method,
33
+ )
17
34
  from megfile.interfaces import (
18
35
  ContextIterator,
19
36
  FileEntry,
20
37
  PathLike,
21
- Readable,
22
- Seekable,
23
38
  StatResult,
24
- Writable,
25
39
  )
26
40
  from megfile.lib.compare import is_same_file
27
41
  from megfile.lib.compat import fspath
28
42
  from megfile.lib.fnmatch import translate
29
43
  from megfile.lib.glob import has_magic
30
44
  from megfile.lib.joinpath import uri_join, uri_norm
45
+ from megfile.lib.webdav_memory_handler import WebdavMemoryHandler, _webdav_stat
46
+ from megfile.lib.webdav_prefetch_reader import WebdavPrefetchReader
31
47
  from megfile.pathlike import URIPath
32
48
  from megfile.smart_path import SmartPath
33
- from megfile.utils import calculate_md5, copyfileobj, get_binary_mode, thread_local
49
+ from megfile.utils import (
50
+ _is_pickle,
51
+ binary_open,
52
+ calculate_md5,
53
+ copyfileobj,
54
+ thread_local,
55
+ )
34
56
 
35
57
  _logger = get_logger(__name__)
36
58
 
@@ -42,6 +64,7 @@ __all__ = [
42
64
  WEBDAV_USERNAME = "WEBDAV_USERNAME"
43
65
  WEBDAV_PASSWORD = "WEBDAV_PASSWORD"
44
66
  WEBDAV_TOKEN = "WEBDAV_TOKEN"
67
+ WEBDAV_TOKEN_COMMAND = "WEBDAV_TOKEN_COMMAND"
45
68
  WEBDAV_TIMEOUT = "WEBDAV_TIMEOUT"
46
69
 
47
70
 
@@ -55,7 +78,7 @@ def _make_stat(info: dict) -> StatResult:
55
78
  except Exception:
56
79
  mtime = 0.0
57
80
 
58
- isdir = info.get("is_dir", False)
81
+ isdir = info.get("isdir", False)
59
82
 
60
83
  return StatResult(
61
84
  size=size,
@@ -81,6 +104,7 @@ def provide_connect_info(
81
104
  username: Optional[str] = None,
82
105
  password: Optional[str] = None,
83
106
  token: Optional[str] = None,
107
+ token_command: Optional[str] = None,
84
108
  ) -> dict:
85
109
  """Provide connection info for WebDAV client"""
86
110
  if not username:
@@ -89,6 +113,8 @@ def provide_connect_info(
89
113
  password = os.getenv(WEBDAV_PASSWORD)
90
114
  if not token:
91
115
  token = os.getenv(WEBDAV_TOKEN)
116
+ if not token_command:
117
+ token_command = os.getenv(WEBDAV_TOKEN_COMMAND)
92
118
 
93
119
  timeout = int(os.getenv(WEBDAV_TIMEOUT, "30"))
94
120
 
@@ -98,7 +124,9 @@ def provide_connect_info(
98
124
  "webdav_disable_check": True,
99
125
  }
100
126
 
101
- if token:
127
+ if token_command:
128
+ options["webdav_token_command"] = token_command
129
+ elif token:
102
130
  options["webdav_token"] = token
103
131
  elif username and password:
104
132
  options["webdav_login"] = username
@@ -107,15 +135,83 @@ def provide_connect_info(
107
135
  return options
108
136
 
109
137
 
138
+ def _patch_execute_request(
139
+ client: WebdavClient,
140
+ status_forcelist: Iterable[int] = (500, 502, 503, 504),
141
+ max_retries: int = WEBDAV_MAX_RETRY_TIMES,
142
+ ) -> WebdavClient:
143
+ def webdav_update_token_by_command():
144
+ cmds = shlex.split(client.webdav.token_command)
145
+ client.webdav.token_command_last_call = time.time()
146
+ client.webdav.token = subprocess.check_output(cmds).decode().strip()
147
+ _logger.debug("update webdav token by command: %s", client.webdav.token_command)
148
+
149
+ def webdav_should_retry(error: Exception) -> bool:
150
+ if http_should_retry(error):
151
+ return True
152
+ if (
153
+ isinstance(error, ResponseErrorCode)
154
+ and error.code == 401 # pytype: disable=attribute-error
155
+ ):
156
+ token_command = client.webdav.token_command # pyre-ignore[16]
157
+ last_call = client.webdav.token_command_last_call # pyre-ignore[16]
158
+ if token_command is not None and time.time() - last_call > 5:
159
+ webdav_update_token_by_command()
160
+ return True
161
+ return False
162
+
163
+ def after_callback(response, *args, **kwargs):
164
+ if response.status_code in status_forcelist:
165
+ response.raise_for_status()
166
+ return response
167
+
168
+ def before_callback(action, path, data=None, headers_ext=None):
169
+ # refresh token if needed
170
+ if client.webdav.token_command is not None and not client.webdav.token:
171
+ webdav_update_token_by_command()
172
+ _logger.debug(
173
+ "send http request: %s %r, with parameters: %s, headers: %s",
174
+ action,
175
+ path,
176
+ data,
177
+ headers_ext,
178
+ )
179
+
180
+ def retry_callback(error, action, path, data=None, headers_ext=None):
181
+ if data and hasattr(data, "seek"):
182
+ data.seek(0)
183
+ elif isinstance(data, Iterator):
184
+ _logger.warning("Can not retry http request with iterator data")
185
+ raise
186
+
187
+ client.execute_request = patch_method(
188
+ client.execute_request,
189
+ max_retries=max_retries,
190
+ should_retry=webdav_should_retry,
191
+ before_callback=before_callback,
192
+ after_callback=after_callback,
193
+ retry_callback=retry_callback,
194
+ )
195
+
196
+ return client
197
+
198
+
110
199
  def _get_webdav_client(
111
200
  hostname: str,
112
201
  username: Optional[str] = None,
113
202
  password: Optional[str] = None,
114
203
  token: Optional[str] = None,
204
+ token_command: Optional[str] = None,
115
205
  ) -> WebdavClient:
116
206
  """Get WebDAV client"""
117
- options = provide_connect_info(hostname, username, password, token)
118
- return WebdavClient(options)
207
+ options = provide_connect_info(hostname, username, password, token, token_command)
208
+ client = WebdavClient(options)
209
+ client.webdav.token_command = options.pop( # pyre-ignore[16]
210
+ "webdav_token_command", None
211
+ )
212
+ client.webdav.token_command_last_call = 0 # pyre-ignore[16]
213
+ client = _patch_execute_request(client)
214
+ return client
119
215
 
120
216
 
121
217
  def get_webdav_client(
@@ -123,10 +219,11 @@ def get_webdav_client(
123
219
  username: Optional[str] = None,
124
220
  password: Optional[str] = None,
125
221
  token: Optional[str] = None,
222
+ token_command: Optional[str] = None,
126
223
  ) -> WebdavClient:
127
224
  """Get cached WebDAV client"""
128
225
  return thread_local(
129
- f"webdav_client:{hostname},{username},{password},{token}",
226
+ f"webdav_client:{hostname},{username},{password},{token},{token_command}",
130
227
  _get_webdav_client,
131
228
  hostname,
132
229
  username,
@@ -160,23 +257,6 @@ def _webdav_scan_pairs(
160
257
  yield src_file_path, dst_file_path
161
258
 
162
259
 
163
- def _webdav_stat(client: WebdavClient, remote_path: str):
164
- urn = Urn(remote_path)
165
- client._check_remote_resource(remote_path, urn)
166
-
167
- response = client.execute_request(
168
- action="info", path=urn.quote(), headers_ext=["Depth: 0"]
169
- )
170
- path = client.get_full_path(urn)
171
- info = WebDavXmlUtils.parse_info_response(
172
- response.content, path, client.webdav.hostname
173
- )
174
- info["is_dir"] = WebDavXmlUtils.parse_is_dir_response(
175
- response.content, path, client.webdav.hostname
176
- )
177
- return info
178
-
179
-
180
260
  def _webdav_scan(client: WebdavClient, remote_path: str) -> List[dict]:
181
261
  directory_urn = Urn(remote_path, directory=True)
182
262
  if directory_urn.path() != WebdavClient.root and not client.check(
@@ -208,112 +288,12 @@ def _webdav_split_magic(path: str) -> Tuple[str, str]:
208
288
  return path, ""
209
289
 
210
290
 
211
- class WebdavMemoryHandler(Readable[bytes], Seekable, Writable[bytes]): # noqa: F821
212
- def __init__(
213
- self,
214
- real_path: str,
215
- mode: str,
216
- *,
217
- webdav_client: WebdavClient,
218
- name: str,
219
- ):
220
- self._real_path = real_path
221
- self._mode = mode
222
- self._client = webdav_client
223
- self._name = name
224
-
225
- if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
226
- raise ValueError("unacceptable mode: %r" % mode)
227
-
228
- self._fileobj = io.BytesIO()
229
- self._download_fileobj()
230
-
231
- @property
232
- def name(self) -> str:
233
- return self._name
234
-
235
- @property
236
- def mode(self) -> str:
237
- return self._mode
238
-
239
- def tell(self) -> int:
240
- return self._fileobj.tell()
241
-
242
- def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
243
- return self._fileobj.seek(offset, whence)
244
-
245
- def readable(self) -> bool:
246
- return self._mode[0] == "r" or self._mode[-1] == "+"
247
-
248
- def read(self, size: Optional[int] = None) -> bytes:
249
- if not self.readable():
250
- raise io.UnsupportedOperation("not readable")
251
- return self._fileobj.read(size)
252
-
253
- def readline(self, size: Optional[int] = None) -> bytes:
254
- if not self.readable():
255
- raise io.UnsupportedOperation("not readable")
256
- if size is None:
257
- size = -1
258
- return self._fileobj.readline(size)
259
-
260
- def readlines(self, hint: Optional[int] = None) -> List[bytes]:
261
- if not self.readable():
262
- raise io.UnsupportedOperation("not readable")
263
- if hint is None:
264
- hint = -1
265
- return self._fileobj.readlines(hint)
266
-
267
- def writable(self) -> bool:
268
- return self._mode[0] == "w" or self._mode[0] == "a" or self._mode[-1] == "+"
269
-
270
- def flush(self):
271
- self._fileobj.flush()
272
-
273
- def write(self, data: bytes) -> int:
274
- if not self.writable():
275
- raise io.UnsupportedOperation("not writable")
276
- if self._mode[0] == "a":
277
- self.seek(0, os.SEEK_END)
278
- return self._fileobj.write(data)
279
-
280
- def writelines(self, lines: Iterable[bytes]):
281
- if not self.writable():
282
- raise io.UnsupportedOperation("not writable")
283
- if self._mode[0] == "a":
284
- self.seek(0, os.SEEK_END)
285
- self._fileobj.writelines(lines)
286
-
287
- def _file_exists(self) -> bool:
288
- try:
289
- return not self._client.is_dir(self._real_path)
290
- except RemoteResourceNotFound:
291
- return False
292
-
293
- def _download_fileobj(self):
294
- need_download = self._mode[0] == "r" or (
295
- self._mode[0] == "a" and self._file_exists()
296
- )
297
- if not need_download:
298
- return
299
- # directly download to the file handle
300
- self._client.download_from(self._fileobj, self._real_path)
301
- if self._mode[0] == "r":
302
- self.seek(0, os.SEEK_SET)
303
-
304
- def _upload_fileobj(self):
305
- need_upload = self.writable()
306
- if not need_upload:
307
- return
308
- # directly upload from file handle
309
- self.seek(0, os.SEEK_SET)
310
- self._client.upload_to(self._fileobj, self._real_path)
311
-
312
- def _close(self, need_upload: bool = True):
313
- if hasattr(self, "_fileobj"):
314
- if need_upload:
315
- self._upload_fileobj()
316
- self._fileobj.close()
291
+ def _webdav_check_accept_ranges(client: WebdavClient, remote_path: str):
292
+ urn = Urn(remote_path)
293
+ response = client.execute_request(action="download", path=urn.quote())
294
+ response.close()
295
+ headers = response.headers
296
+ return headers.get("Accept-Ranges") == "bytes"
317
297
 
318
298
 
319
299
  @SmartPath.register
@@ -346,16 +326,15 @@ class WebdavPath(URIPath):
346
326
  if parts.port:
347
327
  self._hostname += f":{parts.port}"
348
328
 
349
- self._real_path = unquote(parts.path) if parts.path else "/"
329
+ self._remote_path = unquote(parts.path) if parts.path else "/"
350
330
 
351
331
  @cached_property
352
332
  def parts(self) -> Tuple[str, ...]:
353
333
  """A tuple giving access to the path's various components"""
354
- new_parts = self._urlsplit_parts._replace(path="/")
355
- parts: List[str] = [urlunsplit(new_parts)] # pyre-ignore[9]
334
+ parts = [urlunsplit(self._urlsplit_parts._replace(path=""))]
356
335
  path = self._urlsplit_parts.path.lstrip("/")
357
336
  if path != "":
358
- parts.extend(unquote(path).split("/"))
337
+ parts.extend(path.split("/"))
359
338
  return tuple(parts)
360
339
 
361
340
  @property
@@ -385,7 +364,7 @@ class WebdavPath(URIPath):
385
364
  :returns: True if the path exists, else False
386
365
  """
387
366
  try:
388
- _webdav_stat(self._client, self._real_path)
367
+ _webdav_stat(self._client, self._remote_path)
389
368
  return True
390
369
  except RemoteResourceNotFound:
391
370
  return False
@@ -436,11 +415,11 @@ class WebdavPath(URIPath):
436
415
  raise FileNotFoundError
437
416
  :returns: An iterator contains tuples of path and file stat
438
417
  """
439
- remote_path = self._real_path
418
+ remote_path = self._remote_path
440
419
  if pattern:
441
420
  remote_path = os.path.join(remote_path, pattern)
442
421
  remote_path, pattern = _webdav_split_magic(remote_path)
443
- root = os.path.relpath(remote_path, self._real_path)
422
+ root = os.path.relpath(remote_path, self._remote_path)
444
423
  root = uri_join(self.path_with_protocol, root)
445
424
  root = uri_norm(root)
446
425
  pattern = re.compile(translate(pattern))
@@ -479,7 +458,7 @@ class WebdavPath(URIPath):
479
458
  :returns: True if the path is a directory, else False
480
459
  """
481
460
  try:
482
- return _webdav_stat(self._client, self._real_path)["is_dir"]
461
+ return _webdav_stat(self._client, self._remote_path)["isdir"]
483
462
  except RemoteResourceNotFound:
484
463
  return False
485
464
 
@@ -491,7 +470,7 @@ class WebdavPath(URIPath):
491
470
  :returns: True if the path is a file, else False
492
471
  """
493
472
  try:
494
- return not _webdav_stat(self._client, self._real_path)["is_dir"]
473
+ return not _webdav_stat(self._client, self._remote_path)["isdir"]
495
474
  except RemoteResourceNotFound:
496
475
  return False
497
476
 
@@ -550,7 +529,7 @@ class WebdavPath(URIPath):
550
529
  parent_path_object.mkdir(mode=mode, parents=False, exist_ok=True)
551
530
 
552
531
  try:
553
- self._client.mkdir(self._real_path)
532
+ self._client.mkdir(self._remote_path)
554
533
  except WebDavException:
555
534
  # Catch exception when mkdir concurrently
556
535
  if not self.exists():
@@ -590,7 +569,9 @@ class WebdavPath(URIPath):
590
569
  if self._is_same_backend(dst_path):
591
570
  if overwrite:
592
571
  dst_path.remove(missing_ok=True)
593
- self._client.move(self._real_path, dst_path._real_path, overwrite=overwrite)
572
+ self._client.move(
573
+ self._remote_path, dst_path._remote_path, overwrite=overwrite
574
+ )
594
575
  else:
595
576
  if self.is_dir():
596
577
  for file_entry in self.scandir():
@@ -626,7 +607,7 @@ class WebdavPath(URIPath):
626
607
  if missing_ok and not self.exists():
627
608
  return
628
609
  try:
629
- self._client.clean(self._real_path)
610
+ self._client.clean(self._remote_path)
630
611
  except RemoteResourceNotFound:
631
612
  if not missing_ok:
632
613
  raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
@@ -665,8 +646,8 @@ class WebdavPath(URIPath):
665
646
  )
666
647
  return
667
648
 
668
- for info in _webdav_scan(self._client, self._real_path):
669
- entry = _make_entry(info, self._real_path, self.path_with_protocol)
649
+ for info in _webdav_scan(self._client, self._remote_path):
650
+ entry = _make_entry(info, self._remote_path, self.path_with_protocol)
670
651
  if entry.is_dir():
671
652
  continue
672
653
  yield entry
@@ -691,8 +672,8 @@ class WebdavPath(URIPath):
691
672
  raise NotADirectoryError(f"Not a directory: '{self.path_with_protocol}'")
692
673
 
693
674
  def create_generator():
694
- for info in _webdav_scandir(self._client, self._real_path):
695
- yield _make_entry(info, self._real_path, self.path_with_protocol)
675
+ for info in _webdav_scandir(self._client, self._remote_path):
676
+ yield _make_entry(info, self._remote_path, self.path_with_protocol)
696
677
 
697
678
  return ContextIterator(create_generator())
698
679
 
@@ -703,7 +684,7 @@ class WebdavPath(URIPath):
703
684
  :returns: StatResult
704
685
  """
705
686
  try:
706
- info = _webdav_stat(self._client, self._real_path)
687
+ info = _webdav_stat(self._client, self._remote_path)
707
688
  return _make_stat(info)
708
689
  except RemoteResourceNotFound:
709
690
  raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
@@ -717,7 +698,7 @@ class WebdavPath(URIPath):
717
698
  if missing_ok and not self.exists():
718
699
  return
719
700
  try:
720
- self._client.clean(self._real_path)
701
+ self._client.clean(self._remote_path)
721
702
  except RemoteResourceNotFound:
722
703
  if not missing_ok:
723
704
  raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
@@ -736,7 +717,7 @@ class WebdavPath(URIPath):
736
717
  if self.is_file():
737
718
  return
738
719
 
739
- stack = [self._real_path]
720
+ stack = [self._remote_path]
740
721
  while stack:
741
722
  root = stack.pop()
742
723
  dirs, files = [], []
@@ -810,13 +791,15 @@ class WebdavPath(URIPath):
810
791
  with self.open(mode="wb") as output:
811
792
  output.write(file_object.read())
812
793
 
794
+ @binary_open
813
795
  def open(
814
796
  self,
815
- mode: str = "r",
797
+ mode: str = "rb",
816
798
  *,
817
- buffering=-1,
818
- encoding: Optional[str] = None,
819
- errors: Optional[str] = None,
799
+ max_workers: Optional[int] = None,
800
+ max_buffer_size: int = READER_MAX_BUFFER_SIZE,
801
+ block_forward: Optional[int] = None,
802
+ block_size: int = READER_BLOCK_SIZE,
820
803
  **kwargs,
821
804
  ) -> IO:
822
805
  """Open a file on the path.
@@ -837,15 +820,27 @@ class WebdavPath(URIPath):
837
820
  elif not self.exists():
838
821
  raise FileNotFoundError("No such file: %r" % self.path_with_protocol)
839
822
 
840
- buffer = WebdavMemoryHandler(
841
- self._real_path,
842
- get_binary_mode(mode),
823
+ if mode == "rb":
824
+ if _webdav_check_accept_ranges(self._client, self._remote_path):
825
+ reader = WebdavPrefetchReader(
826
+ self._remote_path,
827
+ client=self._client,
828
+ block_size=block_size,
829
+ max_buffer_size=max_buffer_size,
830
+ block_forward=block_forward,
831
+ max_retries=WEBDAV_MAX_RETRY_TIMES,
832
+ max_workers=max_workers,
833
+ )
834
+ if _is_pickle(reader):
835
+ reader = io.BufferedReader(reader) # type: ignore
836
+ return reader
837
+
838
+ return WebdavMemoryHandler(
839
+ self._remote_path,
840
+ mode,
843
841
  webdav_client=self._client,
844
842
  name=self.path_with_protocol,
845
843
  )
846
- if "b" not in mode:
847
- return io.TextIOWrapper(buffer, encoding=encoding, errors=errors)
848
- return buffer
849
844
 
850
845
  def chmod(self, mode: int, *, follow_symlinks: bool = True):
851
846
  """
@@ -870,7 +865,7 @@ class WebdavPath(URIPath):
870
865
  """
871
866
  if len(self.listdir()) > 0:
872
867
  raise OSError(f"Directory not empty: '{self.path_with_protocol}'")
873
- self._client.clean(self._real_path)
868
+ self._client.clean(self._remote_path)
874
869
 
875
870
  def copy(
876
871
  self,
@@ -902,11 +897,11 @@ class WebdavPath(URIPath):
902
897
  dst_path = self.from_path(dst_path)
903
898
 
904
899
  if self._is_same_backend(dst_path):
905
- if self._real_path == dst_path._real_path:
900
+ if self._remote_path == dst_path._remote_path:
906
901
  raise SameFileError(
907
902
  f"'{self.path}' and '{dst_path.path}' are the same file"
908
903
  )
909
- self._client.copy(self._real_path, dst_path._real_path)
904
+ self._client.copy(self._remote_path, dst_path._remote_path)
910
905
  if callback:
911
906
  callback(self.stat().size)
912
907
  else:
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megfile
3
- Version: 4.2.5
3
+ Version: 5.0.1
4
4
  Summary: Megvii file operation library
5
- Author-email: megvii <megfile@megvii.com>
5
+ Author-email: megvii-reng <megvii-reng@googlegroups.com>
6
6
  Project-URL: Homepage, https://github.com/megvii-research/megfile
7
7
  Classifier: Development Status :: 5 - Production/Stable
8
8
  Classifier: Environment :: Console
@@ -12,12 +12,12 @@ Classifier: Operating System :: POSIX :: Linux
12
12
  Classifier: Programming Language :: Python
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Programming Language :: Python :: 3 :: Only
15
- Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
17
  Classifier: Programming Language :: Python :: 3.12
19
18
  Classifier: Programming Language :: Python :: 3.13
20
- Requires-Python: >=3.9
19
+ Classifier: Programming Language :: Python :: 3.14
20
+ Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  License-File: LICENSE.pyre
@@ -51,21 +51,16 @@ megfile - Megvii FILE library
51
51
 
52
52
  * Docs: http://megvii-research.github.io/megfile
53
53
 
54
- `megfile` provides a silky operation experience with different backends (currently including local file system and s3), which enable you to focus more on the logic of your own project instead of the question of "Which backend is used for this file?"
54
+ `megfile` provides a silky operation experience with different backends, which enable you to focus more on the logic of your own project instead of the question of "Which backend is used for this file?"
55
55
 
56
- `megfile` provides:
56
+ ## Why megfile
57
57
 
58
- * Almost unified file system operation experience. Target path can be easily moved from local file system to s3.
59
- * Complete boundary case handling. Even the most difficult (or even you can't even think of) boundary conditions, `megfile` can help you easily handle it.
60
- * Perfect type hints and built-in documentation. You can enjoy the IDE's auto-completion and static checking.
61
- * Semantic version and upgrade guide, which allows you enjoy the latest features easily.
62
-
63
- `megfile`'s advantages are:
64
-
65
- * `smart_open` can open resources that use various protocols. Especially, reader / writer of s3 in `megfile` is implemented with multi-thread, which is faster than known competitors.
66
- * `smart_glob` is available on majority protocols. And it supports zsh extended pattern syntax of `[]`, e.g. `s3://bucket/video.{mp4,avi}`.
67
- * All-inclusive functions like `smart_exists` / `smart_stat` / `smart_sync`. If you don't find the functions you want, [submit an issue](https://github.com/megvii-research/megfile/issues).
68
- * Compatible with `pathlib.Path` interface, referring to `SmartPath` and other protocol classes like `S3Path`.
58
+ * Same interfaces as the python standard library, low learning curve
59
+ * Faster file read and write operations
60
+ * Excellent error retry mechanism to help you handle network issues
61
+ * Supports popular protocols, even making it easy to use the same protocol with different endpoints
62
+ * Stable and secure, with CI coverage over 95%, used by multiple industry giants
63
+ * Perfect type hints and built-in documentation. You can enjoy the IDE's auto-completion and static checking
69
64
 
70
65
  ## Support Protocols
71
66
  - fs(local filesystem)
@@ -73,11 +68,15 @@ megfile - Megvii FILE library
73
68
  - sftp
74
69
  - http
75
70
  - stdio
76
- - hdfs: `pip install 'megfile[hdfs]'`
71
+ - hdfs: `pip3 install 'megfile[hdfs]'`
72
+ - webdav: `pip3 install 'megfile[webdav]'`
77
73
 
78
74
  ## Quick Start
79
75
 
80
- Path string in `megfile` almost is `protocol://path/to/file`, for example `s3://bucketA/key`. But sftp path is a little different, format is `sftp://[username[:password]@]hostname[:port]//absolute_file_path`. More details see [path format document](https://megvii-research.github.io/megfile/path_format.html).
76
+ The interfaces of `megfile` correspond to those in the Python standard library. For example, `open` -> `smart_open` and `pathlib.Path` -> `SmartPath`. You only need to [configure the protocol settings](https://megvii-research.github.io/megfile/configuration.html) and provide the path in the corresponding format to use them conveniently.
77
+
78
+ Path string in `megfile` almost is `protocol://path/to/file`, for example `s3://bucketA/key`. More details see [path format document](https://megvii-research.github.io/megfile/path_format.html).
79
+
81
80
  Here's an example of writing a file to s3 / fs, syncing to local, reading and finally deleting it.
82
81
 
83
82
  ### Functional Interface
@@ -106,7 +105,7 @@ smart_glob('s3://playground/megfile-?.{mp4,avi}')
106
105
 
107
106
  ### SmartPath Interface
108
107
 
109
- `SmartPath` has a similar interface with pathlib.Path.
108
+ `SmartPath` has a similar interface with `pathlib.Path`.
110
109
 
111
110
  ```python
112
111
  from megfile.smart_path import SmartPath
@@ -138,28 +137,15 @@ $ megfile cp s3://playground/megfile-test /tmp/playground/megfile-test
138
137
 
139
138
  ```bash
140
139
  pip3 install megfile
141
- ```
142
140
 
143
- You can specify megfile version as well
144
- ```bash
145
- pip3 install "megfile~=0.0"
146
- ```
141
+ # for cli support
142
+ pip3 install 'megfile[cli]'
147
143
 
148
- ### Build from Source
144
+ # for hdfs support
145
+ pip3 install 'megfile[hdfs]'
149
146
 
150
- megfile can be installed from source
151
- ```bash
152
- git clone git@github.com:megvii-research/megfile.git
153
- cd megfile
154
- pip3 install -U .
155
- ```
156
-
157
- ### Development Environment
158
-
159
- ```bash
160
- git clone git@github.com:megvii-research/megfile.git
161
- cd megfile
162
- pip3 install -r requirements.txt -r requirements-dev.txt
147
+ # for webdav support
148
+ pip3 install 'megfile[webdav]'
163
149
  ```
164
150
 
165
151
  ## Configuration
@@ -218,6 +204,8 @@ You can get the configuration from `~/.config/megfile/aliases.conf`, like:
218
204
  protocol = s3+tos
219
205
  ```
220
206
 
207
+ You can use alias in path, like `tos://bucket/key`, the same as `s3+tos://bucket/key`.
208
+
221
209
  ## Benchmark
222
210
  [![10GiB](https://github.com/megvii-research/megfile/blob/main/scripts/benchmark/10GiB.png?raw=true)](https://megvii-research.github.io/megfile/benchmark.html)
223
211
  [![10MiB](https://github.com/megvii-research/megfile/blob/main/scripts/benchmark/10MiB.png?raw=true)](https://megvii-research.github.io/megfile/benchmark.html)