PyPI - megfile - Versions diffs - 3.1.1__py3-none-any.whl → 3.1.3__py3-none-any.whl - Mend

megfile 3.1.1py3-none-any.whl → 3.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

docs/conf.py +2 -4
megfile/__init__.py +394 -203
megfile/cli.py +258 -238
megfile/config.py +25 -21
megfile/errors.py +126 -114
megfile/fs.py +174 -140
megfile/fs_path.py +462 -354
megfile/hdfs.py +133 -101
megfile/hdfs_path.py +290 -236
megfile/http.py +15 -14
megfile/http_path.py +111 -107
megfile/interfaces.py +70 -65
megfile/lib/base_prefetch_reader.py +84 -65
megfile/lib/combine_reader.py +12 -12
megfile/lib/compare.py +17 -13
megfile/lib/compat.py +1 -5
megfile/lib/fnmatch.py +29 -30
megfile/lib/glob.py +46 -54
megfile/lib/hdfs_prefetch_reader.py +40 -25
megfile/lib/hdfs_tools.py +1 -3
megfile/lib/http_prefetch_reader.py +69 -46
megfile/lib/joinpath.py +5 -5
megfile/lib/lazy_handler.py +7 -3
megfile/lib/s3_buffered_writer.py +58 -51
megfile/lib/s3_cached_handler.py +13 -14
megfile/lib/s3_limited_seekable_writer.py +37 -28
megfile/lib/s3_memory_handler.py +34 -30
megfile/lib/s3_pipe_handler.py +24 -25
megfile/lib/s3_prefetch_reader.py +71 -52
megfile/lib/s3_share_cache_reader.py +37 -24
megfile/lib/shadow_handler.py +7 -3
megfile/lib/stdio_handler.py +9 -8
megfile/lib/url.py +3 -3
megfile/pathlike.py +259 -228
megfile/s3.py +220 -153
megfile/s3_path.py +977 -802
megfile/sftp.py +190 -156
megfile/sftp_path.py +540 -450
megfile/smart.py +397 -330
megfile/smart_path.py +100 -105
megfile/stdio.py +10 -9
megfile/stdio_path.py +32 -35
megfile/utils/__init__.py +73 -54
megfile/utils/mutex.py +11 -14
megfile/version.py +1 -1
{megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/METADATA +5 -8
megfile-3.1.3.dist-info/RECORD +55 -0
{megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/WHEEL +1 -1
scripts/convert_results_to_sarif.py +45 -78
scripts/generate_file.py +140 -64
megfile-3.1.1.dist-info/RECORD +0 -55
{megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/LICENSE +0 -0
{megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/LICENSE.pyre +0 -0
{megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/entry_points.txt +0 -0
{megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/top_level.txt +0 -0

megfile/http.py CHANGED Viewed

@@ -2,30 +2,31 @@ from megfile.http_path import HttpPath, get_http_session, http_open, is_http
 from megfile.interfaces import PathLike, StatResult
 __all__ = [
-    'get_http_session',
-    'is_http',
-    'http_open',
-    'http_stat',
-    'http_getsize',
-    'http_getmtime',
-    'http_exists',
+    "get_http_session",
+    "is_http",
+    "http_open",
+    "http_stat",
+    "http_getsize",
+    "http_getmtime",
+    "http_exists",
 ]
 def http_stat(path: PathLike, follow_symlinks=True) -> StatResult:
-    '''
-    Get StatResult of http_url response, including size and mtime, referring to http_getsize and http_getmtime
+    """
+    Get StatResult of http_url response, including size and mtime,
+    referring to http_getsize and http_getmtime
     :param path: Given path
     :param follow_symlinks: Ignore this parameter, just for compatibility
     :returns: StatResult
     :raises: HttpPermissionError, HttpFileNotFoundError
-    '''
+    """
     return HttpPath(path).stat(follow_symlinks)
 def http_getsize(path: PathLike, follow_symlinks: bool = False) -> int:
-    '''
+    """
     Get file size on the given http_url path.
     If http response header don't support Content-Length, will return None
@@ -34,12 +35,12 @@ def http_getsize(path: PathLike, follow_symlinks: bool = False) -> int:
     :param follow_symlinks: Ignore this parameter, just for compatibility
     :returns: File size (in bytes)
     :raises: HttpPermissionError, HttpFileNotFoundError
-    '''
+    """
     return HttpPath(path).getsize(follow_symlinks)
 def http_getmtime(path: PathLike, follow_symlinks: bool = False) -> float:
-    '''
+    """
     Get Last-Modified time of the http request on the given http_url path.
     If http response header don't support Last-Modified, will return None
@@ -48,7 +49,7 @@ def http_getmtime(path: PathLike, follow_symlinks: bool = False) -> float:
     :param follow_symlinks: Ignore this parameter, just for compatibility
     :returns: Last-Modified time (in Unix timestamp format)
     :raises: HttpPermissionError, HttpFileNotFoundError
-    '''
+    """
     return HttpPath(path).getmtime(follow_symlinks)

megfile/http_path.py CHANGED Viewed

@@ -16,17 +16,10 @@ from megfile.lib.compat import fspath
 from megfile.lib.http_prefetch_reader import DEFAULT_TIMEOUT, HttpPrefetchReader
 from megfile.lib.s3_buffered_writer import DEFAULT_MAX_BUFFER_SIZE
 from megfile.lib.url import get_url_scheme
-from megfile.pathlike import PathLike
 from megfile.smart_path import SmartPath
 from megfile.utils import _is_pickle, binary_open
-__all__ = [
-    'HttpPath',
-    'HttpsPath',
-    'get_http_session',
-    'is_http',
-    'http_open',
-]
+__all__ = ["HttpPath", "HttpsPath", "get_http_session", "is_http", "http_open"]
 _logger = get_logger(__name__)
 max_retries = HTTP_MAX_RETRY_TIMES
@@ -34,7 +27,7 @@ max_retries = HTTP_MAX_RETRY_TIMES
 def get_http_session(
     timeout: Optional[Union[int, Tuple[int, int]]] = DEFAULT_TIMEOUT,
-    status_forcelist: Iterable[int] = (500, 502, 503, 504)
+    status_forcelist: Iterable[int] = (500, 502, 503, 504),
 ) -> requests.Session:
     session = requests.Session()
@@ -45,8 +38,8 @@ def get_http_session(
     def before_callback(method, url, **kwargs):
         _logger.debug(
-            'send http request: %s %r, with parameters: %s', method, url,
-            kwargs)
+            "send http request: %s %r, with parameters: %s", method, url, kwargs
+        )
     def retry_callback(
         error,
@@ -68,36 +61,38 @@ def get_http_session(
         json=None,
         **kwargs,
     ):
-        if data and hasattr(data, 'seek'):
+        if data and hasattr(data, "seek"):
             data.seek(0)
         elif isinstance(data, Iterator):
-            _logger.warning(f'Can not retry http request with iterator data')
+            _logger.warning("Can not retry http request with iterator data")
             raise
         if files:
             def seek_or_reopen(file_object):
                 if isinstance(file_object, (str, bytes)):
                     return file_object
-                elif hasattr(file_object, 'seek'):
+                elif hasattr(file_object, "seek"):
                     file_object.seek(0)
                     return file_object
-                elif hasattr(file_object, 'name'):
-                    with SmartPath(file_object.name).open('rb') as f:
+                elif hasattr(file_object, "name"):
+                    with SmartPath(file_object.name).open("rb") as f:
                         return BytesIO(f.read())
                 else:
                     _logger.warning(
-                        f'Can not retry http request, because the file object is not seekable and not support "name"'
+                        "Can not retry http request, because the file object "
+                        'is not seekable and not support "name"'
                     )
                     raise
             for key, file_info in files.items():
-                if hasattr(file_info, 'seek'):
+                if hasattr(file_info, "seek"):
                     file_info.seek(0)
-                elif isinstance(file_info,
-                                (tuple, list)) and len(file_info) >= 2:
+                elif isinstance(file_info, (tuple, list)) and len(file_info) >= 2:
                     file_info = list(file_info)
-                    if isinstance(file_info[1],
-                                  (tuple, list)) and len(file_info[1]) >= 2:
+                    if (
+                        isinstance(file_info[1], (tuple, list))
+                        and len(file_info[1]) >= 2
+                    ):
                         file_info[1] = list(file_info[1])
                         file_info[1] = seek_or_reopen(file_info[1])
                     else:
@@ -116,47 +111,53 @@ def get_http_session(
 def is_http(path: PathLike) -> bool:
-    '''http scheme definition: http(s)://domain/path
+    """http scheme definition: http(s)://domain/path
     :param path: Path to be tested
     :returns: True if path is http url, else False
-    '''
+    """
     path = fspath(path)
-    if not isinstance(path, str) or not (path.startswith('http://') or
-                                         path.startswith('https://')):
+    if not isinstance(path, str) or not (
+        path.startswith("http://") or path.startswith("https://")
+    ):
         return False
     scheme = get_url_scheme(path)
-    return scheme == 'http' or scheme == 'https'
+    return scheme == "http" or scheme == "https"
 def http_open(
-        path: PathLike,
-        mode: str = 'rb',
-        *,
-        encoding: Optional[str] = None,
-        errors: Optional[str] = None,
-        max_concurrency: Optional[int] = None,
-        max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
-        forward_ratio: Optional[float] = None,
-        block_size: int = DEFAULT_BLOCK_SIZE,
-        **kwargs) -> Union[BufferedReader, HttpPrefetchReader]:
-    '''Open a BytesIO to read binary data of given http(s) url
+    path: PathLike,
+    mode: str = "rb",
+    *,
+    encoding: Optional[str] = None,
+    errors: Optional[str] = None,
+    max_concurrency: Optional[int] = None,
+    max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
+    forward_ratio: Optional[float] = None,
+    block_size: int = DEFAULT_BLOCK_SIZE,
+    **kwargs,
+) -> Union[BufferedReader, HttpPrefetchReader]:
+    """Open a BytesIO to read binary data of given http(s) url
     .. note ::
-        Essentially, it reads data of http(s) url to memory by requests, and then return BytesIO to user.
+        Essentially, it reads data of http(s) url to memory by requests,
+        and then return BytesIO to user.
     :param path: Given path
     :param mode: Only supports 'rb' mode now
-    :param encoding: encoding is the name of the encoding used to decode or encode the file. This should only be used in text mode.
-    :param errors: errors is an optional string that specifies how encoding and decoding errors are to be handled—this cannot be used in binary mode.
+    :param encoding: encoding is the name of the encoding used to decode or encode
+        the file. This should only be used in text mode.
+    :param errors: errors is an optional string that specifies how encoding and decoding
+        errors are to be handled—this cannot be used in binary mode.
     :param max_concurrency: Max download thread number, None by default
     :param max_buffer_size: Max cached buffer size in memory, 128MB by default
-    :param block_size: Size of single block, 8MB by default. Each block will be uploaded or downloaded by single thread.
+    :param block_size: Size of single block, 8MB by default. Each block will be uploaded
+        or downloaded by single thread.
     :return: BytesIO initialized with http(s) data
-    '''
+    """
     return HttpPath(path).open(
         mode,
         encoding=encoding,
@@ -164,68 +165,73 @@ def http_open(
         max_concurrency=max_concurrency,
         max_buffer_size=max_buffer_size,
         forward_ratio=forward_ratio,
-        block_size=block_size)
+        block_size=block_size,
+    )
 @SmartPath.register
 class HttpPath(URIPath):
     protocol = "http"
     def __init__(self, path: PathLike, *other_paths: PathLike):
         super().__init__(path, *other_paths)
-        if fspath(path).startswith('https://'):
-            self.protocol = 'https'
+        if fspath(path).startswith("https://"):
+            self.protocol = "https"
         self.request_kwargs = {}
     @binary_open
     def open(
-            self,
-            mode: str = 'rb',
-            *,
-            max_concurrency: Optional[int] = None,
-            max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
-            forward_ratio: Optional[float] = None,
-            block_size: int = DEFAULT_BLOCK_SIZE,
-            **kwargs) -> Union[BufferedReader, HttpPrefetchReader]:
-        '''Open a BytesIO to read binary data of given http(s) url
+        self,
+        mode: str = "rb",
+        *,
+        max_concurrency: Optional[int] = None,
+        max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
+        forward_ratio: Optional[float] = None,
+        block_size: int = DEFAULT_BLOCK_SIZE,
+        **kwargs,
+    ) -> Union[BufferedReader, HttpPrefetchReader]:
+        """Open a BytesIO to read binary data of given http(s) url
         .. note ::
-            Essentially, it reads data of http(s) url to memory by requests, and then return BytesIO to user.
+            Essentially, it reads data of http(s) url to memory by requests,
+            and then return BytesIO to user.
         :param mode: Only supports 'rb' mode now
-        :param encoding: encoding is the name of the encoding used to decode or encode the file. This should only be used in text mode.
-        :param errors: errors is an optional string that specifies how encoding and decoding errors are to be handled—this cannot be used in binary mode.
+        :param encoding: encoding is the name of the encoding used to decode or encode
+            the file. This should only be used in text mode.
+        :param errors: errors is an optional string that specifies how encoding and
+            decoding errors are to be handled—this cannot be used in binary mode.
         :param max_concurrency: Max download thread number, None by default
         :param max_buffer_size: Max cached buffer size in memory, 128MB by default
-        :param block_size: Size of single block, 8MB by default. Each block will be uploaded or downloaded by single thread.
+        :param block_size: Size of single block, 8MB by default. Each block will
+            be uploaded or downloaded by single thread.
         :return: BytesIO initialized with http(s) data
-        '''
-        if mode not in ('rb',):
-            raise ValueError('unacceptable mode: %r' % mode)
+        """
+        if mode not in ("rb",):
+            raise ValueError("unacceptable mode: %r" % mode)
         response = None
         request_kwargs = deepcopy(self.request_kwargs)
-        timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
-        stream = request_kwargs.pop('stream', True)
+        timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
+        stream = request_kwargs.pop("stream", True)
         try:
-            response = get_http_session(
-                timeout=timeout,
-                status_forcelist=(),
-            ).get(
-                self.path_with_protocol, stream=stream, **request_kwargs)
+            response = get_http_session(timeout=timeout, status_forcelist=()).get(
+                self.path_with_protocol, stream=stream, **request_kwargs
+            )
             response.raise_for_status()
         except Exception as error:
             if response:
                 response.close()
             raise translate_http_error(error, self.path_with_protocol)
-        content_size = int(response.headers['Content-Length'])
-        if (response.headers.get('Accept-Ranges') == 'bytes' and
-                content_size >= block_size * 2 and
-                not response.headers.get('Content-Encoding')):
+        content_size = int(response.headers["Content-Length"])
+        if (
+            response.headers.get("Accept-Ranges") == "bytes"
+            and content_size >= block_size * 2
+            and not response.headers.get("Content-Encoding")
+        ):
             response.close()
             block_capacity = max_buffer_size // block_size
@@ -248,56 +254,56 @@ class HttpPath(URIPath):
             return reader
         response.raw.name = self.path_with_protocol
-        # TODO: When python version must bigger than 3.10, use urllib3>=2.0.0 instead of 'Response'
+        # TODO: When python version must bigger than 3.10,
+        # use urllib3>=2.0.0 instead of 'Response'
         # response.raw.auto_close = False
         # response.raw.decode_content = True
         # return BufferedReader(response.raw)
         return BufferedReader(Response(response.raw))  # type: ignore
     def stat(self, follow_symlinks=True) -> StatResult:
-        '''
-        Get StatResult of http_url response, including size and mtime, referring to http_getsize and http_getmtime
+        """
+        Get StatResult of http_url response, including size and mtime,
+        referring to http_getsize and http_getmtime
         :param follow_symlinks: Ignore this parameter, just for compatibility
         :returns: StatResult
         :raises: HttpPermissionError, HttpFileNotFoundError
-        '''
+        """
         request_kwargs = deepcopy(self.request_kwargs)
-        timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
-        stream = request_kwargs.pop('stream', True)
+        timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
+        stream = request_kwargs.pop("stream", True)
         try:
             with get_http_session(timeout=timeout, status_forcelist=()).get(
-                    self.path_with_protocol, stream=stream,
-                    **request_kwargs) as response:
+                self.path_with_protocol, stream=stream, **request_kwargs
+            ) as response:
                 response.raise_for_status()
                 headers = response.headers
         except Exception as error:
             raise translate_http_error(error, self.path_with_protocol)
-        size = headers.get('Content-Length')
+        size = headers.get("Content-Length")
         if size:
             size = int(size)
         else:
             size = 0
-        last_modified = headers.get('Last-Modified')
+        last_modified = headers.get("Last-Modified")
         if last_modified:
             last_modified = time.mktime(
-                time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z"))
+                time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z")
+            )
         else:
             last_modified = 0.0
         return StatResult(
-            size=size,
-            mtime=last_modified,
-            isdir=False,
-            islnk=False,
-            extra=headers)
+            size=size, mtime=last_modified, isdir=False, islnk=False, extra=headers
+        )
     def getsize(self, follow_symlinks: bool = False) -> int:
-        '''
+        """
         Get file size on the given http_url path.
         If http response header don't support Content-Length, will return None
@@ -305,19 +311,19 @@ class HttpPath(URIPath):
         :param follow_symlinks: Ignore this parameter, just for compatibility
         :returns: File size (in bytes)
         :raises: HttpPermissionError, HttpFileNotFoundError
-        '''
+        """
         return self.stat().size
     def getmtime(self, follow_symlinks: bool = False) -> float:
-        '''
+        """
         Get Last-Modified time of the http request on the given http_url path.
         If http response header don't support Last-Modified, will return None
         :param follow_symlinks: Ignore this parameter, just for compatibility
         :returns: Last-Modified time (in Unix timestamp format)
         :raises: HttpPermissionError, HttpFileNotFoundError
-        '''
+        """
         return self.stat().mtime
     def exists(self, followlinks: bool = False) -> bool:
@@ -329,13 +335,13 @@ class HttpPath(URIPath):
         :rtype: bool
         """
         request_kwargs = deepcopy(self.request_kwargs)
-        timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
-        stream = request_kwargs.pop('stream', True)
+        timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
+        stream = request_kwargs.pop("stream", True)
         try:
             with get_http_session(timeout=timeout, status_forcelist=()).get(
-                    self.path_with_protocol, stream=stream,
-                    **request_kwargs) as response:
+                self.path_with_protocol, stream=stream, **request_kwargs
+            ) as response:
                 if response.status_code == 404:
                     return False
                 return True
@@ -345,12 +351,10 @@ class HttpPath(URIPath):
 @SmartPath.register
 class HttpsPath(HttpPath):
     protocol = "https"
 class Response(Readable[bytes]):
     def __init__(self, raw: HTTPResponse) -> None:
         super().__init__()
@@ -367,7 +371,7 @@ class Response(Readable[bytes]):
     @property
     def mode(self):
-        return 'rb'
+        return "rb"
     def tell(self) -> int:
         return self._offset
@@ -378,7 +382,7 @@ class Response(Readable[bytes]):
     def read(self, size: Optional[int] = None) -> bytes:
         if size == 0:
-            return b''
+            return b""
         if size is not None and size < 0:
             size = None
@@ -399,7 +403,7 @@ class Response(Readable[bytes]):
     def readline(self, size: Optional[int] = None) -> bytes:
         if size == 0:
-            return b''
+            return b""
         if size is not None and size < 0:
             size = None
@@ -407,11 +411,11 @@ class Response(Readable[bytes]):
             self._buffer.seek(0)
             buffer = self._buffer.read()
             self._clear_buffer()
-            if b'\n' in buffer:
-                content = buffer[:buffer.index(b'\n') + 1]
+            if b"\n" in buffer:
+                content = buffer[: buffer.index(b"\n") + 1]
                 if size:
                     content = content[:size]
-                self._buffer.write(buffer[len(content):])
+                self._buffer.write(buffer[len(content) :])
             elif size and len(buffer) >= size:
                 content = buffer[:size]
                 self._buffer.write(buffer[size:])

megfile 3.1.1__py3-none-any.whl → 3.1.3__py3-none-any.whl

megfile 3.1.1py3-none-any.whl → 3.1.3py3-none-any.whl