megfile 4.2.5__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/webdav_path.py CHANGED
@@ -2,6 +2,9 @@ import hashlib
2
2
  import io
3
3
  import os
4
4
  import re
5
+ import shlex
6
+ import subprocess
7
+ import time
5
8
  from functools import cached_property
6
9
  from logging import getLogger as get_logger
7
10
  from typing import IO, BinaryIO, Callable, Iterable, Iterator, List, Optional, Tuple
@@ -10,27 +13,46 @@ from urllib.parse import quote, unquote, urlsplit, urlunsplit
10
13
  import dateutil.parser
11
14
  from webdav3.client import Client as WebdavClient
12
15
  from webdav3.client import WebDavXmlUtils
13
- from webdav3.exceptions import RemoteResourceNotFound, WebDavException
16
+ from webdav3.exceptions import (
17
+ RemoteResourceNotFound,
18
+ ResponseErrorCode,
19
+ WebDavException,
20
+ )
14
21
  from webdav3.urn import Urn
15
22
 
16
- from megfile.errors import SameFileError, _create_missing_ok_generator
23
+ from megfile.config import (
24
+ READER_BLOCK_SIZE,
25
+ READER_MAX_BUFFER_SIZE,
26
+ WEBDAV_MAX_RETRY_TIMES,
27
+ )
28
+ from megfile.errors import (
29
+ SameFileError,
30
+ _create_missing_ok_generator,
31
+ http_should_retry,
32
+ patch_method,
33
+ )
17
34
  from megfile.interfaces import (
18
35
  ContextIterator,
19
36
  FileEntry,
20
37
  PathLike,
21
- Readable,
22
- Seekable,
23
38
  StatResult,
24
- Writable,
25
39
  )
26
40
  from megfile.lib.compare import is_same_file
27
41
  from megfile.lib.compat import fspath
28
42
  from megfile.lib.fnmatch import translate
29
43
  from megfile.lib.glob import has_magic
30
44
  from megfile.lib.joinpath import uri_join, uri_norm
45
+ from megfile.lib.webdav_memory_handler import WebdavMemoryHandler, _webdav_stat
46
+ from megfile.lib.webdav_prefetch_reader import WebdavPrefetchReader
31
47
  from megfile.pathlike import URIPath
32
48
  from megfile.smart_path import SmartPath
33
- from megfile.utils import calculate_md5, copyfileobj, get_binary_mode, thread_local
49
+ from megfile.utils import (
50
+ _is_pickle,
51
+ binary_open,
52
+ calculate_md5,
53
+ copyfileobj,
54
+ thread_local,
55
+ )
34
56
 
35
57
  _logger = get_logger(__name__)
36
58
 
@@ -42,6 +64,7 @@ __all__ = [
42
64
  WEBDAV_USERNAME = "WEBDAV_USERNAME"
43
65
  WEBDAV_PASSWORD = "WEBDAV_PASSWORD"
44
66
  WEBDAV_TOKEN = "WEBDAV_TOKEN"
67
+ WEBDAV_TOKEN_COMMAND = "WEBDAV_TOKEN_COMMAND"
45
68
  WEBDAV_TIMEOUT = "WEBDAV_TIMEOUT"
46
69
 
47
70
 
@@ -81,6 +104,7 @@ def provide_connect_info(
81
104
  username: Optional[str] = None,
82
105
  password: Optional[str] = None,
83
106
  token: Optional[str] = None,
107
+ token_command: Optional[str] = None,
84
108
  ) -> dict:
85
109
  """Provide connection info for WebDAV client"""
86
110
  if not username:
@@ -89,6 +113,8 @@ def provide_connect_info(
89
113
  password = os.getenv(WEBDAV_PASSWORD)
90
114
  if not token:
91
115
  token = os.getenv(WEBDAV_TOKEN)
116
+ if not token_command:
117
+ token_command = os.getenv(WEBDAV_TOKEN_COMMAND)
92
118
 
93
119
  timeout = int(os.getenv(WEBDAV_TIMEOUT, "30"))
94
120
 
@@ -98,7 +124,9 @@ def provide_connect_info(
98
124
  "webdav_disable_check": True,
99
125
  }
100
126
 
101
- if token:
127
+ if token_command:
128
+ options["webdav_token_command"] = token_command
129
+ elif token:
102
130
  options["webdav_token"] = token
103
131
  elif username and password:
104
132
  options["webdav_login"] = username
@@ -107,15 +135,82 @@ def provide_connect_info(
107
135
  return options
108
136
 
109
137
 
138
+ def _patch_execute_request(
139
+ client: WebdavClient,
140
+ status_forcelist: Iterable[int] = (500, 502, 503, 504),
141
+ max_retries: int = WEBDAV_MAX_RETRY_TIMES,
142
+ ) -> WebdavClient:
143
+ def webdav_update_token_by_command():
144
+ cmds = shlex.split(client.webdav.token_command)
145
+ client.webdav.token_command_last_call = time.time()
146
+ client.webdav.token = subprocess.check_output(cmds).decode().strip()
147
+
148
+ def webdav_should_retry(error: Exception) -> bool:
149
+ if http_should_retry(error):
150
+ return True
151
+ if (
152
+ isinstance(error, ResponseErrorCode)
153
+ and error.code == 401 # pytype: disable=attribute-error
154
+ ):
155
+ token_command = client.webdav.token_command # pyre-ignore[16]
156
+ last_call = client.webdav.token_command_last_call # pyre-ignore[16]
157
+ if token_command is not None and time.time() - last_call > 5:
158
+ webdav_update_token_by_command()
159
+ return True
160
+ return False
161
+
162
+ def after_callback(response, *args, **kwargs):
163
+ if response.status_code in status_forcelist:
164
+ response.raise_for_status()
165
+ return response
166
+
167
+ def before_callback(action, path, data=None, headers_ext=None):
168
+ # refresh token if needed
169
+ if client.webdav.token_command is not None and not client.webdav.token:
170
+ webdav_update_token_by_command()
171
+ _logger.debug(
172
+ "send http request: %s %r, with parameters: %s, headers: %s",
173
+ action,
174
+ path,
175
+ data,
176
+ headers_ext,
177
+ )
178
+
179
+ def retry_callback(error, action, path, data=None, headers_ext=None):
180
+ if data and hasattr(data, "seek"):
181
+ data.seek(0)
182
+ elif isinstance(data, Iterator):
183
+ _logger.warning("Can not retry http request with iterator data")
184
+ raise
185
+
186
+ client.execute_request = patch_method(
187
+ client.execute_request,
188
+ max_retries=max_retries,
189
+ should_retry=webdav_should_retry,
190
+ before_callback=before_callback,
191
+ after_callback=after_callback,
192
+ retry_callback=retry_callback,
193
+ )
194
+
195
+ return client
196
+
197
+
110
198
  def _get_webdav_client(
111
199
  hostname: str,
112
200
  username: Optional[str] = None,
113
201
  password: Optional[str] = None,
114
202
  token: Optional[str] = None,
203
+ token_command: Optional[str] = None,
115
204
  ) -> WebdavClient:
116
205
  """Get WebDAV client"""
117
- options = provide_connect_info(hostname, username, password, token)
118
- return WebdavClient(options)
206
+ options = provide_connect_info(hostname, username, password, token, token_command)
207
+ client = WebdavClient(options)
208
+ client.webdav.token_command = options.pop( # pyre-ignore[16]
209
+ "webdav_token_command", None
210
+ )
211
+ client.webdav.token_command_last_call = 0 # pyre-ignore[16]
212
+ client = _patch_execute_request(client)
213
+ return client
119
214
 
120
215
 
121
216
  def get_webdav_client(
@@ -123,10 +218,11 @@ def get_webdav_client(
123
218
  username: Optional[str] = None,
124
219
  password: Optional[str] = None,
125
220
  token: Optional[str] = None,
221
+ token_command: Optional[str] = None,
126
222
  ) -> WebdavClient:
127
223
  """Get cached WebDAV client"""
128
224
  return thread_local(
129
- f"webdav_client:{hostname},{username},{password},{token}",
225
+ f"webdav_client:{hostname},{username},{password},{token},{token_command}",
130
226
  _get_webdav_client,
131
227
  hostname,
132
228
  username,
@@ -160,23 +256,6 @@ def _webdav_scan_pairs(
160
256
  yield src_file_path, dst_file_path
161
257
 
162
258
 
163
- def _webdav_stat(client: WebdavClient, remote_path: str):
164
- urn = Urn(remote_path)
165
- client._check_remote_resource(remote_path, urn)
166
-
167
- response = client.execute_request(
168
- action="info", path=urn.quote(), headers_ext=["Depth: 0"]
169
- )
170
- path = client.get_full_path(urn)
171
- info = WebDavXmlUtils.parse_info_response(
172
- response.content, path, client.webdav.hostname
173
- )
174
- info["is_dir"] = WebDavXmlUtils.parse_is_dir_response(
175
- response.content, path, client.webdav.hostname
176
- )
177
- return info
178
-
179
-
180
259
  def _webdav_scan(client: WebdavClient, remote_path: str) -> List[dict]:
181
260
  directory_urn = Urn(remote_path, directory=True)
182
261
  if directory_urn.path() != WebdavClient.root and not client.check(
@@ -208,112 +287,12 @@ def _webdav_split_magic(path: str) -> Tuple[str, str]:
208
287
  return path, ""
209
288
 
210
289
 
211
- class WebdavMemoryHandler(Readable[bytes], Seekable, Writable[bytes]): # noqa: F821
212
- def __init__(
213
- self,
214
- real_path: str,
215
- mode: str,
216
- *,
217
- webdav_client: WebdavClient,
218
- name: str,
219
- ):
220
- self._real_path = real_path
221
- self._mode = mode
222
- self._client = webdav_client
223
- self._name = name
224
-
225
- if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
226
- raise ValueError("unacceptable mode: %r" % mode)
227
-
228
- self._fileobj = io.BytesIO()
229
- self._download_fileobj()
230
-
231
- @property
232
- def name(self) -> str:
233
- return self._name
234
-
235
- @property
236
- def mode(self) -> str:
237
- return self._mode
238
-
239
- def tell(self) -> int:
240
- return self._fileobj.tell()
241
-
242
- def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
243
- return self._fileobj.seek(offset, whence)
244
-
245
- def readable(self) -> bool:
246
- return self._mode[0] == "r" or self._mode[-1] == "+"
247
-
248
- def read(self, size: Optional[int] = None) -> bytes:
249
- if not self.readable():
250
- raise io.UnsupportedOperation("not readable")
251
- return self._fileobj.read(size)
252
-
253
- def readline(self, size: Optional[int] = None) -> bytes:
254
- if not self.readable():
255
- raise io.UnsupportedOperation("not readable")
256
- if size is None:
257
- size = -1
258
- return self._fileobj.readline(size)
259
-
260
- def readlines(self, hint: Optional[int] = None) -> List[bytes]:
261
- if not self.readable():
262
- raise io.UnsupportedOperation("not readable")
263
- if hint is None:
264
- hint = -1
265
- return self._fileobj.readlines(hint)
266
-
267
- def writable(self) -> bool:
268
- return self._mode[0] == "w" or self._mode[0] == "a" or self._mode[-1] == "+"
269
-
270
- def flush(self):
271
- self._fileobj.flush()
272
-
273
- def write(self, data: bytes) -> int:
274
- if not self.writable():
275
- raise io.UnsupportedOperation("not writable")
276
- if self._mode[0] == "a":
277
- self.seek(0, os.SEEK_END)
278
- return self._fileobj.write(data)
279
-
280
- def writelines(self, lines: Iterable[bytes]):
281
- if not self.writable():
282
- raise io.UnsupportedOperation("not writable")
283
- if self._mode[0] == "a":
284
- self.seek(0, os.SEEK_END)
285
- self._fileobj.writelines(lines)
286
-
287
- def _file_exists(self) -> bool:
288
- try:
289
- return not self._client.is_dir(self._real_path)
290
- except RemoteResourceNotFound:
291
- return False
292
-
293
- def _download_fileobj(self):
294
- need_download = self._mode[0] == "r" or (
295
- self._mode[0] == "a" and self._file_exists()
296
- )
297
- if not need_download:
298
- return
299
- # directly download to the file handle
300
- self._client.download_from(self._fileobj, self._real_path)
301
- if self._mode[0] == "r":
302
- self.seek(0, os.SEEK_SET)
303
-
304
- def _upload_fileobj(self):
305
- need_upload = self.writable()
306
- if not need_upload:
307
- return
308
- # directly upload from file handle
309
- self.seek(0, os.SEEK_SET)
310
- self._client.upload_to(self._fileobj, self._real_path)
311
-
312
- def _close(self, need_upload: bool = True):
313
- if hasattr(self, "_fileobj"):
314
- if need_upload:
315
- self._upload_fileobj()
316
- self._fileobj.close()
290
+ def _webdav_check_accept_ranges(client: WebdavClient, remote_path: str):
291
+ urn = Urn(remote_path)
292
+ response = client.execute_request(action="download", path=urn.quote())
293
+ response.close()
294
+ headers = response.headers
295
+ return headers.get("Accept-Ranges") == "bytes"
317
296
 
318
297
 
319
298
  @SmartPath.register
@@ -346,16 +325,15 @@ class WebdavPath(URIPath):
346
325
  if parts.port:
347
326
  self._hostname += f":{parts.port}"
348
327
 
349
- self._real_path = unquote(parts.path) if parts.path else "/"
328
+ self._remote_path = unquote(parts.path) if parts.path else "/"
350
329
 
351
330
  @cached_property
352
331
  def parts(self) -> Tuple[str, ...]:
353
332
  """A tuple giving access to the path's various components"""
354
- new_parts = self._urlsplit_parts._replace(path="/")
355
- parts: List[str] = [urlunsplit(new_parts)] # pyre-ignore[9]
333
+ parts = [urlunsplit(self._urlsplit_parts._replace(path=""))]
356
334
  path = self._urlsplit_parts.path.lstrip("/")
357
335
  if path != "":
358
- parts.extend(unquote(path).split("/"))
336
+ parts.extend(path.split("/"))
359
337
  return tuple(parts)
360
338
 
361
339
  @property
@@ -385,7 +363,7 @@ class WebdavPath(URIPath):
385
363
  :returns: True if the path exists, else False
386
364
  """
387
365
  try:
388
- _webdav_stat(self._client, self._real_path)
366
+ _webdav_stat(self._client, self._remote_path)
389
367
  return True
390
368
  except RemoteResourceNotFound:
391
369
  return False
@@ -436,11 +414,11 @@ class WebdavPath(URIPath):
436
414
  raise FileNotFoundError
437
415
  :returns: An iterator contains tuples of path and file stat
438
416
  """
439
- remote_path = self._real_path
417
+ remote_path = self._remote_path
440
418
  if pattern:
441
419
  remote_path = os.path.join(remote_path, pattern)
442
420
  remote_path, pattern = _webdav_split_magic(remote_path)
443
- root = os.path.relpath(remote_path, self._real_path)
421
+ root = os.path.relpath(remote_path, self._remote_path)
444
422
  root = uri_join(self.path_with_protocol, root)
445
423
  root = uri_norm(root)
446
424
  pattern = re.compile(translate(pattern))
@@ -479,7 +457,7 @@ class WebdavPath(URIPath):
479
457
  :returns: True if the path is a directory, else False
480
458
  """
481
459
  try:
482
- return _webdav_stat(self._client, self._real_path)["is_dir"]
460
+ return _webdav_stat(self._client, self._remote_path)["is_dir"]
483
461
  except RemoteResourceNotFound:
484
462
  return False
485
463
 
@@ -491,7 +469,7 @@ class WebdavPath(URIPath):
491
469
  :returns: True if the path is a file, else False
492
470
  """
493
471
  try:
494
- return not _webdav_stat(self._client, self._real_path)["is_dir"]
472
+ return not _webdav_stat(self._client, self._remote_path)["is_dir"]
495
473
  except RemoteResourceNotFound:
496
474
  return False
497
475
 
@@ -550,7 +528,7 @@ class WebdavPath(URIPath):
550
528
  parent_path_object.mkdir(mode=mode, parents=False, exist_ok=True)
551
529
 
552
530
  try:
553
- self._client.mkdir(self._real_path)
531
+ self._client.mkdir(self._remote_path)
554
532
  except WebDavException:
555
533
  # Catch exception when mkdir concurrently
556
534
  if not self.exists():
@@ -590,7 +568,9 @@ class WebdavPath(URIPath):
590
568
  if self._is_same_backend(dst_path):
591
569
  if overwrite:
592
570
  dst_path.remove(missing_ok=True)
593
- self._client.move(self._real_path, dst_path._real_path, overwrite=overwrite)
571
+ self._client.move(
572
+ self._remote_path, dst_path._remote_path, overwrite=overwrite
573
+ )
594
574
  else:
595
575
  if self.is_dir():
596
576
  for file_entry in self.scandir():
@@ -626,7 +606,7 @@ class WebdavPath(URIPath):
626
606
  if missing_ok and not self.exists():
627
607
  return
628
608
  try:
629
- self._client.clean(self._real_path)
609
+ self._client.clean(self._remote_path)
630
610
  except RemoteResourceNotFound:
631
611
  if not missing_ok:
632
612
  raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
@@ -665,8 +645,8 @@ class WebdavPath(URIPath):
665
645
  )
666
646
  return
667
647
 
668
- for info in _webdav_scan(self._client, self._real_path):
669
- entry = _make_entry(info, self._real_path, self.path_with_protocol)
648
+ for info in _webdav_scan(self._client, self._remote_path):
649
+ entry = _make_entry(info, self._remote_path, self.path_with_protocol)
670
650
  if entry.is_dir():
671
651
  continue
672
652
  yield entry
@@ -691,8 +671,8 @@ class WebdavPath(URIPath):
691
671
  raise NotADirectoryError(f"Not a directory: '{self.path_with_protocol}'")
692
672
 
693
673
  def create_generator():
694
- for info in _webdav_scandir(self._client, self._real_path):
695
- yield _make_entry(info, self._real_path, self.path_with_protocol)
674
+ for info in _webdav_scandir(self._client, self._remote_path):
675
+ yield _make_entry(info, self._remote_path, self.path_with_protocol)
696
676
 
697
677
  return ContextIterator(create_generator())
698
678
 
@@ -703,7 +683,7 @@ class WebdavPath(URIPath):
703
683
  :returns: StatResult
704
684
  """
705
685
  try:
706
- info = _webdav_stat(self._client, self._real_path)
686
+ info = _webdav_stat(self._client, self._remote_path)
707
687
  return _make_stat(info)
708
688
  except RemoteResourceNotFound:
709
689
  raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
@@ -717,7 +697,7 @@ class WebdavPath(URIPath):
717
697
  if missing_ok and not self.exists():
718
698
  return
719
699
  try:
720
- self._client.clean(self._real_path)
700
+ self._client.clean(self._remote_path)
721
701
  except RemoteResourceNotFound:
722
702
  if not missing_ok:
723
703
  raise FileNotFoundError(f"No such file: '{self.path_with_protocol}'")
@@ -736,7 +716,7 @@ class WebdavPath(URIPath):
736
716
  if self.is_file():
737
717
  return
738
718
 
739
- stack = [self._real_path]
719
+ stack = [self._remote_path]
740
720
  while stack:
741
721
  root = stack.pop()
742
722
  dirs, files = [], []
@@ -810,13 +790,15 @@ class WebdavPath(URIPath):
810
790
  with self.open(mode="wb") as output:
811
791
  output.write(file_object.read())
812
792
 
793
+ @binary_open
813
794
  def open(
814
795
  self,
815
- mode: str = "r",
796
+ mode: str = "rb",
816
797
  *,
817
- buffering=-1,
818
- encoding: Optional[str] = None,
819
- errors: Optional[str] = None,
798
+ max_workers: Optional[int] = None,
799
+ max_buffer_size: int = READER_MAX_BUFFER_SIZE,
800
+ block_forward: Optional[int] = None,
801
+ block_size: int = READER_BLOCK_SIZE,
820
802
  **kwargs,
821
803
  ) -> IO:
822
804
  """Open a file on the path.
@@ -837,15 +819,27 @@ class WebdavPath(URIPath):
837
819
  elif not self.exists():
838
820
  raise FileNotFoundError("No such file: %r" % self.path_with_protocol)
839
821
 
840
- buffer = WebdavMemoryHandler(
841
- self._real_path,
842
- get_binary_mode(mode),
822
+ if mode == "rb":
823
+ if _webdav_check_accept_ranges(self._client, self._remote_path):
824
+ reader = WebdavPrefetchReader(
825
+ self._remote_path,
826
+ client=self._client,
827
+ block_size=block_size,
828
+ max_buffer_size=max_buffer_size,
829
+ block_forward=block_forward,
830
+ max_retries=WEBDAV_MAX_RETRY_TIMES,
831
+ max_workers=max_workers,
832
+ )
833
+ if _is_pickle(reader):
834
+ reader = io.BufferedReader(reader) # type: ignore
835
+ return reader
836
+
837
+ return WebdavMemoryHandler(
838
+ self._remote_path,
839
+ mode,
843
840
  webdav_client=self._client,
844
841
  name=self.path_with_protocol,
845
842
  )
846
- if "b" not in mode:
847
- return io.TextIOWrapper(buffer, encoding=encoding, errors=errors)
848
- return buffer
849
843
 
850
844
  def chmod(self, mode: int, *, follow_symlinks: bool = True):
851
845
  """
@@ -870,7 +864,7 @@ class WebdavPath(URIPath):
870
864
  """
871
865
  if len(self.listdir()) > 0:
872
866
  raise OSError(f"Directory not empty: '{self.path_with_protocol}'")
873
- self._client.clean(self._real_path)
867
+ self._client.clean(self._remote_path)
874
868
 
875
869
  def copy(
876
870
  self,
@@ -902,11 +896,11 @@ class WebdavPath(URIPath):
902
896
  dst_path = self.from_path(dst_path)
903
897
 
904
898
  if self._is_same_backend(dst_path):
905
- if self._real_path == dst_path._real_path:
899
+ if self._remote_path == dst_path._remote_path:
906
900
  raise SameFileError(
907
901
  f"'{self.path}' and '{dst_path.path}' are the same file"
908
902
  )
909
- self._client.copy(self._real_path, dst_path._real_path)
903
+ self._client.copy(self._remote_path, dst_path._remote_path)
910
904
  if callback:
911
905
  callback(self.stat().size)
912
906
  else:
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megfile
3
- Version: 4.2.5
3
+ Version: 5.0.0
4
4
  Summary: Megvii file operation library
5
- Author-email: megvii <megfile@megvii.com>
5
+ Author-email: megvii-reng <megvii-reng@googlegroups.com>
6
6
  Project-URL: Homepage, https://github.com/megvii-research/megfile
7
7
  Classifier: Development Status :: 5 - Production/Stable
8
8
  Classifier: Environment :: Console
@@ -12,12 +12,12 @@ Classifier: Operating System :: POSIX :: Linux
12
12
  Classifier: Programming Language :: Python
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Programming Language :: Python :: 3 :: Only
15
- Classifier: Programming Language :: Python :: 3.9
16
15
  Classifier: Programming Language :: Python :: 3.10
17
16
  Classifier: Programming Language :: Python :: 3.11
18
17
  Classifier: Programming Language :: Python :: 3.12
19
18
  Classifier: Programming Language :: Python :: 3.13
20
- Requires-Python: >=3.9
19
+ Classifier: Programming Language :: Python :: 3.14
20
+ Requires-Python: >=3.10
21
21
  Description-Content-Type: text/markdown
22
22
  License-File: LICENSE
23
23
  License-File: LICENSE.pyre
@@ -51,21 +51,16 @@ megfile - Megvii FILE library
51
51
 
52
52
  * Docs: http://megvii-research.github.io/megfile
53
53
 
54
- `megfile` provides a silky operation experience with different backends (currently including local file system and s3), which enable you to focus more on the logic of your own project instead of the question of "Which backend is used for this file?"
54
+ `megfile` provides a silky operation experience with different backends, which enable you to focus more on the logic of your own project instead of the question of "Which backend is used for this file?"
55
55
 
56
- `megfile` provides:
56
+ ## Why megfile
57
57
 
58
- * Almost unified file system operation experience. Target path can be easily moved from local file system to s3.
59
- * Complete boundary case handling. Even the most difficult (or even you can't even think of) boundary conditions, `megfile` can help you easily handle it.
60
- * Perfect type hints and built-in documentation. You can enjoy the IDE's auto-completion and static checking.
61
- * Semantic version and upgrade guide, which allows you enjoy the latest features easily.
62
-
63
- `megfile`'s advantages are:
64
-
65
- * `smart_open` can open resources that use various protocols. Especially, reader / writer of s3 in `megfile` is implemented with multi-thread, which is faster than known competitors.
66
- * `smart_glob` is available on majority protocols. And it supports zsh extended pattern syntax of `[]`, e.g. `s3://bucket/video.{mp4,avi}`.
67
- * All-inclusive functions like `smart_exists` / `smart_stat` / `smart_sync`. If you don't find the functions you want, [submit an issue](https://github.com/megvii-research/megfile/issues).
68
- * Compatible with `pathlib.Path` interface, referring to `SmartPath` and other protocol classes like `S3Path`.
58
+ * Same interfaces as the python standard library, low learning curve
59
+ * Faster file read and write operations
60
+ * Excellent error retry mechanism to help you handle network issues
61
+ * Supports popular protocols, even making it easy to use the same protocol with different endpoints
62
+ * Stable and secure, with CI coverage over 95%, used by multiple industry giants
63
+ * Perfect type hints and built-in documentation. You can enjoy the IDE's auto-completion and static checking
69
64
 
70
65
  ## Support Protocols
71
66
  - fs(local filesystem)
@@ -73,11 +68,15 @@ megfile - Megvii FILE library
73
68
  - sftp
74
69
  - http
75
70
  - stdio
76
- - hdfs: `pip install 'megfile[hdfs]'`
71
+ - hdfs: `pip3 install 'megfile[hdfs]'`
72
+ - webdav: `pip3 install 'megfile[webdav]'`
77
73
 
78
74
  ## Quick Start
79
75
 
80
- Path string in `megfile` almost is `protocol://path/to/file`, for example `s3://bucketA/key`. But sftp path is a little different, format is `sftp://[username[:password]@]hostname[:port]//absolute_file_path`. More details see [path format document](https://megvii-research.github.io/megfile/path_format.html).
76
+ The interfaces of `megfile` correspond to those in the Python standard library. For example, `open` -> `smart_open` and `pathlib.Path` -> `SmartPath`. You only need to [configure the protocol settings](https://megvii-research.github.io/megfile/configuration.html) and provide the path in the corresponding format to use them conveniently.
77
+
78
+ Path string in `megfile` almost is `protocol://path/to/file`, for example `s3://bucketA/key`. More details see [path format document](https://megvii-research.github.io/megfile/path_format.html).
79
+
81
80
  Here's an example of writing a file to s3 / fs, syncing to local, reading and finally deleting it.
82
81
 
83
82
  ### Functional Interface
@@ -106,7 +105,7 @@ smart_glob('s3://playground/megfile-?.{mp4,avi}')
106
105
 
107
106
  ### SmartPath Interface
108
107
 
109
- `SmartPath` has a similar interface with pathlib.Path.
108
+ `SmartPath` has a similar interface with `pathlib.Path`.
110
109
 
111
110
  ```python
112
111
  from megfile.smart_path import SmartPath
@@ -138,28 +137,15 @@ $ megfile cp s3://playground/megfile-test /tmp/playground/megfile-test
138
137
 
139
138
  ```bash
140
139
  pip3 install megfile
141
- ```
142
140
 
143
- You can specify megfile version as well
144
- ```bash
145
- pip3 install "megfile~=0.0"
146
- ```
141
+ # for cli support
142
+ pip3 install 'megfile[cli]'
147
143
 
148
- ### Build from Source
144
+ # for hdfs support
145
+ pip3 install 'megfile[hdfs]'
149
146
 
150
- megfile can be installed from source
151
- ```bash
152
- git clone git@github.com:megvii-research/megfile.git
153
- cd megfile
154
- pip3 install -U .
155
- ```
156
-
157
- ### Development Environment
158
-
159
- ```bash
160
- git clone git@github.com:megvii-research/megfile.git
161
- cd megfile
162
- pip3 install -r requirements.txt -r requirements-dev.txt
147
+ # for webdav support
148
+ pip3 install 'megfile[webdav]'
163
149
  ```
164
150
 
165
151
  ## Configuration
@@ -218,6 +204,8 @@ You can get the configuration from `~/.config/megfile/aliases.conf`, like:
218
204
  protocol = s3+tos
219
205
  ```
220
206
 
207
+ You can use alias in path, like `tos://bucket/key`, the same as `s3+tos://bucket/key`.
208
+
221
209
  ## Benchmark
222
210
  [![10GiB](https://github.com/megvii-research/megfile/blob/main/scripts/benchmark/10GiB.png?raw=true)](https://megvii-research.github.io/megfile/benchmark.html)
223
211
  [![10MiB](https://github.com/megvii-research/megfile/blob/main/scripts/benchmark/10MiB.png?raw=true)](https://megvii-research.github.io/megfile/benchmark.html)