megfile 4.2.4__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/interfaces.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import os
2
2
  from abc import ABC, abstractmethod
3
3
  from io import IOBase, UnsupportedOperation
4
+ from logging import getLogger as get_logger
4
5
  from typing import IO, AnyStr, Iterable, List, Optional
5
6
 
6
7
  from megfile.pathlike import (
@@ -31,6 +32,8 @@ __all__ = [
31
32
  "URIPath",
32
33
  ]
33
34
 
35
+ _logger = get_logger(__name__)
36
+
34
37
 
35
38
  def fullname(o):
36
39
  klass = o.__class__
@@ -43,16 +46,39 @@ def fullname(o):
43
46
  # 1. Default value of closed is False
44
47
  # 2. closed is set to True when close() are called
45
48
  # 3. close() will only be called once
49
+ # 4. atomic means the file-like object should not be closed automatically
50
+ # when an exception is raised in the context manager or when the object is
51
+ # garbage collected.
52
+ # 5. atomic is False by default
46
53
  class Closable(ABC):
47
54
  @property
48
55
  def closed(self) -> bool:
49
56
  """Return True if the file-like object is closed."""
50
57
  return getattr(self, "__closed__", False)
51
58
 
59
+ @property
60
+ def atomic(self) -> bool:
61
+ """Return True if the file-like object is atomic."""
62
+ return getattr(self, "__atomic__", False)
63
+
52
64
  @abstractmethod
53
65
  def _close(self) -> None:
54
66
  pass # pragma: no cover
55
67
 
68
+ def _abort(self) -> None:
69
+ pass
70
+
71
+ def abort(self) -> bool:
72
+ """Abort the file-like object without saving.
73
+
74
+ This method has no effect if the file is already closed.
75
+ """
76
+ if not getattr(self, "__closed__", False):
77
+ self._abort()
78
+ setattr(self, "__closed__", True)
79
+ return True
80
+ return False
81
+
56
82
  def close(self) -> None:
57
83
  """Flush and close the file-like object.
58
84
 
@@ -66,6 +92,24 @@ class Closable(ABC):
66
92
  return self
67
93
 
68
94
  def __exit__(self, type, value, traceback) -> None:
95
+ if self.atomic and value is not None:
96
+ if self.abort():
97
+ from megfile.errors import full_error_message
98
+
99
+ _logger.warning(
100
+ f"skip closing atomic file-like object: {self}, "
101
+ f"since error encountered: {full_error_message(value)}"
102
+ )
103
+ return
104
+ self.close()
105
+
106
+ def __del__(self):
107
+ if self.atomic:
108
+ if self.abort():
109
+ _logger.warning(
110
+ f"skip closing atomic file-like object before deletion: {self}"
111
+ )
112
+ return
69
113
  self.close()
70
114
 
71
115
 
@@ -0,0 +1,92 @@
1
+ import os
2
+ from abc import ABC, abstractmethod
3
+ from io import BytesIO, UnsupportedOperation
4
+ from typing import Iterable, List, Optional
5
+
6
+ from megfile.interfaces import Readable, Seekable, Writable
7
+
8
+
9
+ class BaseMemoryHandler(Readable[bytes], Seekable, Writable[bytes], ABC):
10
+ def __init__(
11
+ self,
12
+ mode: str,
13
+ ):
14
+ self._mode = mode
15
+
16
+ if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
17
+ raise ValueError("unacceptable mode: %r" % mode)
18
+
19
+ self._fileobj = BytesIO()
20
+ self._download_fileobj()
21
+
22
+ @property
23
+ @abstractmethod
24
+ def name(self) -> str:
25
+ pass
26
+
27
+ @property
28
+ def mode(self) -> str:
29
+ return self._mode
30
+
31
+ def tell(self) -> int:
32
+ return self._fileobj.tell()
33
+
34
+ def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
35
+ return self._fileobj.seek(offset, whence)
36
+
37
+ def readable(self) -> bool:
38
+ return self._mode[0] == "r" or self._mode[-1] == "+"
39
+
40
+ def read(self, size: Optional[int] = None) -> bytes:
41
+ if not self.readable():
42
+ raise UnsupportedOperation("not readable")
43
+ return self._fileobj.read(size)
44
+
45
+ def readline(self, size: Optional[int] = None) -> bytes:
46
+ if not self.readable():
47
+ raise UnsupportedOperation("not readable")
48
+ if size is None:
49
+ size = -1
50
+ return self._fileobj.readline(size)
51
+
52
+ def readlines(self, hint: Optional[int] = None) -> List[bytes]:
53
+ if not self.readable():
54
+ raise UnsupportedOperation("not readable")
55
+ if hint is None:
56
+ hint = -1
57
+ return self._fileobj.readlines(hint)
58
+
59
+ def writable(self) -> bool:
60
+ return self._mode[0] == "w" or self._mode[0] == "a" or self._mode[-1] == "+"
61
+
62
+ def flush(self):
63
+ self._fileobj.flush()
64
+
65
+ def write(self, data: bytes) -> int:
66
+ if not self.writable():
67
+ raise UnsupportedOperation("not writable")
68
+ if self._mode[0] == "a":
69
+ self.seek(0, os.SEEK_END)
70
+ return self._fileobj.write(data)
71
+
72
+ def writelines(self, lines: Iterable[bytes]):
73
+ if not self.writable():
74
+ raise UnsupportedOperation("not writable")
75
+ if self._mode[0] == "a":
76
+ self.seek(0, os.SEEK_END)
77
+ self._fileobj.writelines(lines)
78
+
79
+ @abstractmethod
80
+ def _download_fileobj(self):
81
+ pass
82
+
83
+ @abstractmethod
84
+ def _upload_fileobj(self):
85
+ pass
86
+
87
+ def _close(self, need_upload: bool = True):
88
+ if hasattr(self, "_fileobj"):
89
+ need_upload = need_upload and self.writable()
90
+ if need_upload:
91
+ self._upload_fileobj()
92
+ self._fileobj.close()
megfile/lib/glob.py CHANGED
@@ -289,9 +289,9 @@ def get_non_glob_dir(glob: str):
289
289
  root_dir = []
290
290
  if glob.startswith("/"):
291
291
  root_dir.append("/")
292
- elif "://" in glob:
293
- protocol, glob = glob.split("://", 1)
294
- root_dir.append(f"{protocol}://")
292
+ elif "//" in glob:
293
+ protocol_or_domain, glob = glob.rsplit("//", 1)
294
+ root_dir.append(f"{protocol_or_domain}//")
295
295
  for name in glob.split("/"):
296
296
  if has_magic(name):
297
297
  break
@@ -85,29 +85,29 @@ class HttpPrefetchReader(BasePrefetchReader):
85
85
  "Cookies": response.cookies,
86
86
  "StatusCode": response.status_code,
87
87
  }
88
- else:
89
- range_end = end
90
- if self._content_size is not None:
91
- range_end = min(range_end, self._content_size - 1)
92
- headers = {"Range": f"bytes={start}-{range_end}"}
93
- with self._session.get(
94
- fspath(self._url), headers=headers, stream=True
95
- ) as response:
96
- if len(response.content) != int(response.headers["Content-Length"]):
97
- raise HttpBodyIncompleteError(
98
- "The downloaded content is incomplete, "
99
- "expected size: %s, actual size: %d"
100
- % (
101
- response.headers["Content-Length"],
102
- len(response.content),
103
- )
88
+
89
+ range_end = end
90
+ if self._content_size is not None:
91
+ range_end = min(range_end, self._content_size - 1)
92
+ headers = {"Range": f"bytes={start}-{range_end}"}
93
+ with self._session.get(
94
+ fspath(self._url), headers=headers, stream=True
95
+ ) as response:
96
+ if len(response.content) != int(response.headers["Content-Length"]):
97
+ raise HttpBodyIncompleteError(
98
+ "The downloaded content is incomplete, "
99
+ "expected size: %s, actual size: %d"
100
+ % (
101
+ response.headers["Content-Length"],
102
+ len(response.content),
104
103
  )
105
- return {
106
- "Body": BytesIO(response.content),
107
- "Headers": response.headers,
108
- "Cookies": response.cookies,
109
- "StatusCode": response.status_code,
110
- }
104
+ )
105
+ return {
106
+ "Body": BytesIO(response.content),
107
+ "Headers": response.headers,
108
+ "Cookies": response.cookies,
109
+ "StatusCode": response.status_code,
110
+ }
111
111
 
112
112
  fetch_response = patch_method(
113
113
  fetch_response,
megfile/lib/joinpath.py CHANGED
@@ -33,3 +33,16 @@ def uri_join(path: str, *other_paths: str) -> str:
33
33
 
34
34
  # Imp. 3
35
35
  # return '/'.join((path, *other_paths))
36
+
37
+
38
+ def uri_norm(path: str) -> str:
39
+ parts = path.split("/")
40
+ new_parts = []
41
+ for part in parts:
42
+ if part == ".":
43
+ continue
44
+ if part == ".." and new_parts and new_parts[-1] != "..":
45
+ new_parts.pop()
46
+ else:
47
+ new_parts.append(part)
48
+ return "/".join(new_parts)
@@ -53,11 +53,13 @@ class S3BufferedWriter(Writable[bytes]):
53
53
  max_buffer_size: int = WRITER_MAX_BUFFER_SIZE,
54
54
  max_workers: Optional[int] = None,
55
55
  profile_name: Optional[str] = None,
56
+ atomic: bool = False,
56
57
  ):
57
58
  self._bucket = bucket
58
59
  self._key = key
59
60
  self._client = s3_client
60
61
  self._profile_name = profile_name
62
+ self.__atomic__ = atomic
61
63
 
62
64
  # user maybe put block_size with 'numpy.uint64' type
63
65
  self._base_block_size = int(block_size)
@@ -213,6 +215,17 @@ class S3BufferedWriter(Writable[bytes]):
213
215
  if not self._is_global_executor:
214
216
  self._executor.shutdown()
215
217
 
218
+ def _abort(self):
219
+ _logger.debug("abort file: %r" % self.name)
220
+
221
+ if self._is_multipart:
222
+ with raise_s3_error(self.name):
223
+ self._client.abort_multipart_upload(
224
+ Bucket=self._bucket, Key=self._key, UploadId=self._upload_id
225
+ )
226
+
227
+ self._shutdown()
228
+
216
229
  def _close(self):
217
230
  _logger.debug("close file: %r" % self.name)
218
231
 
@@ -33,6 +33,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
33
33
  max_buffer_size: int = WRITER_MAX_BUFFER_SIZE,
34
34
  max_workers: Optional[int] = None,
35
35
  profile_name: Optional[str] = None,
36
+ atomic: bool = False,
36
37
  ):
37
38
  super().__init__(
38
39
  bucket,
@@ -42,6 +43,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
42
43
  max_buffer_size=max_buffer_size,
43
44
  max_workers=max_workers,
44
45
  profile_name=profile_name,
46
+ atomic=atomic,
45
47
  )
46
48
 
47
49
  self._head_block_size = head_block_size or block_size
@@ -1,17 +1,16 @@
1
1
  import os
2
- from io import BytesIO, UnsupportedOperation
3
- from typing import Iterable, List, Optional
2
+ from typing import Optional
4
3
 
5
4
  from megfile.errors import (
6
5
  S3ConfigError,
7
- UnknownError,
8
- raise_s3_error,
6
+ S3PermissionError,
7
+ S3UnknownError,
9
8
  translate_s3_error,
10
9
  )
11
- from megfile.interfaces import Readable, Seekable, Writable
10
+ from megfile.lib.base_memory_handler import BaseMemoryHandler
12
11
 
13
12
 
14
- class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
13
+ class S3MemoryHandler(BaseMemoryHandler):
15
14
  def __init__(
16
15
  self,
17
16
  bucket: str,
@@ -23,75 +22,14 @@ class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
23
22
  ):
24
23
  self._bucket = bucket
25
24
  self._key = key
26
- self._mode = mode
27
25
  self._client = s3_client
28
26
  self._profile_name = profile_name
29
-
30
- if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
31
- raise ValueError("unacceptable mode: %r" % mode)
32
-
33
- self._fileobj = BytesIO()
34
- self._download_fileobj()
27
+ super().__init__(mode=mode)
35
28
 
36
29
  @property
37
30
  def name(self) -> str:
38
- return "s3%s://%s/%s" % (
39
- f"+{self._profile_name}" if self._profile_name else "",
40
- self._bucket,
41
- self._key,
42
- )
43
-
44
- @property
45
- def mode(self) -> str:
46
- return self._mode
47
-
48
- def tell(self) -> int:
49
- return self._fileobj.tell()
50
-
51
- def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
52
- return self._fileobj.seek(offset, whence)
53
-
54
- def readable(self) -> bool:
55
- return self._mode[0] == "r" or self._mode[-1] == "+"
56
-
57
- def read(self, size: Optional[int] = None) -> bytes:
58
- if not self.readable():
59
- raise UnsupportedOperation("not readable")
60
- return self._fileobj.read(size)
61
-
62
- def readline(self, size: Optional[int] = None) -> bytes:
63
- if not self.readable():
64
- raise UnsupportedOperation("not readable")
65
- if size is None:
66
- size = -1
67
- return self._fileobj.readline(size)
68
-
69
- def readlines(self, hint: Optional[int] = None) -> List[bytes]:
70
- if not self.readable():
71
- raise UnsupportedOperation("not readable")
72
- if hint is None:
73
- hint = -1
74
- return self._fileobj.readlines(hint)
75
-
76
- def writable(self) -> bool:
77
- return self._mode[0] == "w" or self._mode[0] == "a" or self._mode[-1] == "+"
78
-
79
- def flush(self):
80
- self._fileobj.flush()
81
-
82
- def write(self, data: bytes) -> int:
83
- if not self.writable():
84
- raise UnsupportedOperation("not writable")
85
- if self._mode[0] == "a":
86
- self.seek(0, os.SEEK_END)
87
- return self._fileobj.write(data)
88
-
89
- def writelines(self, lines: Iterable[bytes]):
90
- if not self.writable():
91
- raise UnsupportedOperation("not writable")
92
- if self._mode[0] == "a":
93
- self.seek(0, os.SEEK_END)
94
- self._fileobj.writelines(lines)
31
+ protocol = f"s3+{self._profile_name}" if self._profile_name else "s3"
32
+ return f"{protocol}://{self._bucket}/{self._key}"
95
33
 
96
34
  def _translate_error(self, error: Exception):
97
35
  return translate_s3_error(error, self.name)
@@ -101,15 +39,14 @@ class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
101
39
  self._client.head_object(Bucket=self._bucket, Key=self._key)
102
40
  except Exception as error:
103
41
  error = self._translate_error(error)
104
- if isinstance(error, (UnknownError, S3ConfigError)):
42
+ if isinstance(error, (S3UnknownError, S3ConfigError, S3PermissionError)):
105
43
  raise error
106
44
  return False
107
45
  return True
108
46
 
109
47
  def _download_fileobj(self):
110
- need_download = self._mode[0] == "r" or (
111
- self._mode[0] == "a" and self._file_exists()
112
- )
48
+ need_download = self._mode[0] == "r"
49
+ need_download = need_download or (self._mode[0] == "a" and self._file_exists())
113
50
  if not need_download:
114
51
  return
115
52
  # directly download to the file handle
@@ -126,11 +63,7 @@ class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
126
63
  return
127
64
  # directly upload from file handle
128
65
  self.seek(0, os.SEEK_SET)
129
- with raise_s3_error(self.name):
66
+ try:
130
67
  self._client.upload_fileobj(self._fileobj, self._bucket, self._key)
131
-
132
- def _close(self, need_upload: bool = True):
133
- if hasattr(self, "_fileobj"):
134
- if need_upload:
135
- self._upload_fileobj()
136
- self._fileobj.close()
68
+ except Exception as error:
69
+ raise self._translate_error(error)
@@ -0,0 +1,83 @@
1
+ import os
2
+
3
+ from webdav3.client import Client as WebdavClient
4
+ from webdav3.client import Urn, WebDavXmlUtils, wrap_connection_error
5
+ from webdav3.exceptions import (
6
+ OptionNotValid,
7
+ RemoteResourceNotFound,
8
+ )
9
+
10
+ from megfile.lib.base_memory_handler import BaseMemoryHandler
11
+
12
+
13
+ def _webdav_stat(client: WebdavClient, remote_path: str):
14
+ urn = Urn(remote_path)
15
+ response = client.execute_request(
16
+ action="info", path=urn.quote(), headers_ext=["Depth: 0"]
17
+ )
18
+ path = client.get_full_path(urn)
19
+ info = WebDavXmlUtils.parse_info_response(
20
+ response.content, path, client.webdav.hostname
21
+ )
22
+ info["is_dir"] = WebDavXmlUtils.parse_is_dir_response(
23
+ response.content, path, client.webdav.hostname
24
+ )
25
+ return info
26
+
27
+
28
+ @wrap_connection_error
29
+ def _webdav_download_from(client: WebdavClient, buff, remote_path):
30
+ urn = Urn(remote_path)
31
+ if client.is_dir(urn.path()):
32
+ raise OptionNotValid(name="remote_path", value=remote_path)
33
+
34
+ if not client.check(urn.path()):
35
+ raise RemoteResourceNotFound(urn.path())
36
+
37
+ response = client.execute_request(action="download", path=urn.quote())
38
+
39
+ for chunk in response.iter_content(chunk_size=client.chunk_size):
40
+ buff.write(chunk)
41
+
42
+
43
+ class WebdavMemoryHandler(BaseMemoryHandler):
44
+ def __init__(
45
+ self,
46
+ remote_path: str,
47
+ mode: str,
48
+ *,
49
+ webdav_client: WebdavClient,
50
+ name: str,
51
+ ):
52
+ self._remote_path = remote_path
53
+ self._client = webdav_client
54
+ self._name = name
55
+ super().__init__(mode=mode)
56
+
57
+ @property
58
+ def name(self) -> str:
59
+ return self._name
60
+
61
+ def _file_exists(self) -> bool:
62
+ try:
63
+ return not _webdav_stat(self._client, self._remote_path)["is_dir"]
64
+ except RemoteResourceNotFound:
65
+ return False
66
+
67
+ def _download_fileobj(self):
68
+ need_download = self._mode[0] == "r"
69
+ need_download = need_download or (self._mode[0] == "a" and self._file_exists())
70
+ if not need_download:
71
+ return
72
+ # directly download to the file handle
73
+ _webdav_download_from(self._client, self._fileobj, self._remote_path)
74
+ if self._mode[0] == "r":
75
+ self.seek(0, os.SEEK_SET)
76
+
77
+ def _upload_fileobj(self):
78
+ need_upload = self.writable()
79
+ if not need_upload:
80
+ return
81
+ # directly upload from file handle
82
+ self.seek(0, os.SEEK_SET)
83
+ self._client.upload_to(self._fileobj, self._remote_path)
@@ -0,0 +1,115 @@
1
+ from io import BytesIO
2
+ from typing import Optional
3
+
4
+ from webdav3.client import Client as WebdavClient
5
+ from webdav3.client import Urn
6
+
7
+ from megfile.config import (
8
+ READER_BLOCK_SIZE,
9
+ READER_MAX_BUFFER_SIZE,
10
+ WEBDAV_MAX_RETRY_TIMES,
11
+ )
12
+ from megfile.errors import (
13
+ HttpBodyIncompleteError,
14
+ http_should_retry,
15
+ patch_method,
16
+ )
17
+ from megfile.lib.base_prefetch_reader import BasePrefetchReader
18
+ from megfile.lib.webdav_memory_handler import _webdav_stat
19
+
20
+ DEFAULT_TIMEOUT = (60, 60 * 60 * 24)
21
+
22
+
23
+ class WebdavPrefetchReader(BasePrefetchReader):
24
+ """
25
+ Reader to fast read the http content, service must support Accept-Ranges.
26
+
27
+ This will divide the file content into equal parts of block_size size, and will use
28
+ LRU to cache at most blocks in max_buffer_size memory.
29
+
30
+ open(), seek() and read() will trigger prefetch read.
31
+
32
+ The prefetch will cached block_forward blocks of data from offset position
33
+ (the position after reading if the called function is read).
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ remote_path: str,
39
+ *,
40
+ client: Optional[WebdavClient] = None,
41
+ block_size: int = READER_BLOCK_SIZE,
42
+ max_buffer_size: int = READER_MAX_BUFFER_SIZE,
43
+ block_forward: Optional[int] = None,
44
+ max_retries: int = WEBDAV_MAX_RETRY_TIMES,
45
+ max_workers: Optional[int] = None,
46
+ ):
47
+ self._urn = Urn(remote_path)
48
+ self._remote_path = remote_path
49
+ self._client = client or WebdavClient({})
50
+
51
+ super().__init__(
52
+ block_size=block_size,
53
+ max_buffer_size=max_buffer_size,
54
+ block_forward=block_forward,
55
+ max_retries=max_retries,
56
+ max_workers=max_workers,
57
+ )
58
+
59
+ def _get_content_size(self) -> int:
60
+ info = _webdav_stat(self._client, self._remote_path)
61
+ return int(info.get("size") or 0)
62
+
63
+ @property
64
+ def name(self) -> str:
65
+ return self._remote_path
66
+
67
+ def _fetch_response(
68
+ self, start: Optional[int] = None, end: Optional[int] = None
69
+ ) -> dict:
70
+ def fetch_response() -> dict:
71
+ if start is None or end is None:
72
+ with self._client.execute_request(
73
+ action="download", path=self._urn.quote()
74
+ ) as response:
75
+ return {
76
+ "Headers": response.headers,
77
+ "Cookies": response.cookies,
78
+ "StatusCode": response.status_code,
79
+ }
80
+
81
+ range_end = end
82
+ if self._content_size is not None:
83
+ range_end = min(range_end, self._content_size - 1)
84
+ headers_ext = [f"Range: bytes={start}-{range_end}"]
85
+ with self._client.execute_request(
86
+ action="download", path=self._urn.quote(), headers_ext=headers_ext
87
+ ) as response:
88
+ headers = response.headers
89
+ if (
90
+ "Content-Length" in headers
91
+ and len(response.content) != int(headers["Content-Length"])
92
+ and not headers.get("Content-Encoding")
93
+ ):
94
+ raise HttpBodyIncompleteError(
95
+ "The downloaded content is incomplete, "
96
+ "expected size: %s, actual size: %d"
97
+ % (
98
+ headers["Content-Length"],
99
+ len(response.content),
100
+ )
101
+ )
102
+ return {
103
+ "Body": BytesIO(response.content),
104
+ "Headers": response.headers,
105
+ "Cookies": response.cookies,
106
+ "StatusCode": response.status_code,
107
+ }
108
+
109
+ fetch_response = patch_method(
110
+ fetch_response,
111
+ max_retries=self._max_retries,
112
+ should_retry=http_should_retry,
113
+ )
114
+
115
+ return fetch_response()
megfile/pathlike.py CHANGED
@@ -22,7 +22,6 @@ from megfile.lib.compat import PathLike as _PathLike
22
22
  from megfile.lib.compat import fspath
23
23
  from megfile.lib.fnmatch import _compile_pattern
24
24
  from megfile.lib.joinpath import uri_join
25
- from megfile.utils import classproperty
26
25
 
27
26
  Self = TypeVar("Self")
28
27
 
@@ -556,15 +555,15 @@ class BasePath:
556
555
  ) as f:
557
556
  return f.write(data)
558
557
 
559
- @classproperty
558
+ @cached_property
560
559
  def drive(self) -> str:
561
560
  return ""
562
561
 
563
- @classproperty
562
+ @cached_property
564
563
  def root(self) -> str:
565
564
  return self.protocol + "://"
566
565
 
567
- @classproperty
566
+ @cached_property
568
567
  def anchor(self) -> str:
569
568
  return self.root # pyre-ignore[7]
570
569