PyPI - megfile - Versions diffs - 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl - Mend

megfile 3.1.1py3-none-any.whl → 3.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

docs/conf.py +2 -4
megfile/__init__.py +394 -203
megfile/cli.py +258 -238
megfile/config.py +25 -21
megfile/errors.py +124 -114
megfile/fs.py +174 -140
megfile/fs_path.py +462 -354
megfile/hdfs.py +133 -101
megfile/hdfs_path.py +290 -236
megfile/http.py +15 -14
megfile/http_path.py +111 -107
megfile/interfaces.py +70 -65
megfile/lib/base_prefetch_reader.py +84 -65
megfile/lib/combine_reader.py +12 -12
megfile/lib/compare.py +17 -13
megfile/lib/compat.py +1 -5
megfile/lib/fnmatch.py +29 -30
megfile/lib/glob.py +46 -54
megfile/lib/hdfs_prefetch_reader.py +40 -25
megfile/lib/hdfs_tools.py +1 -3
megfile/lib/http_prefetch_reader.py +69 -46
megfile/lib/joinpath.py +5 -5
megfile/lib/lazy_handler.py +7 -3
megfile/lib/s3_buffered_writer.py +58 -51
megfile/lib/s3_cached_handler.py +13 -14
megfile/lib/s3_limited_seekable_writer.py +37 -28
megfile/lib/s3_memory_handler.py +34 -30
megfile/lib/s3_pipe_handler.py +24 -25
megfile/lib/s3_prefetch_reader.py +71 -52
megfile/lib/s3_share_cache_reader.py +37 -24
megfile/lib/shadow_handler.py +7 -3
megfile/lib/stdio_handler.py +9 -8
megfile/lib/url.py +3 -3
megfile/pathlike.py +259 -228
megfile/s3.py +220 -153
megfile/s3_path.py +977 -802
megfile/sftp.py +190 -156
megfile/sftp_path.py +540 -450
megfile/smart.py +397 -330
megfile/smart_path.py +100 -105
megfile/stdio.py +10 -9
megfile/stdio_path.py +32 -35
megfile/utils/__init__.py +73 -54
megfile/utils/mutex.py +11 -14
megfile/version.py +1 -1
{megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
megfile-3.1.2.dist-info/RECORD +55 -0
{megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
scripts/convert_results_to_sarif.py +45 -78
scripts/generate_file.py +140 -64
megfile-3.1.1.dist-info/RECORD +0 -55
{megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
{megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
{megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
{megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0

megfile/lib/combine_reader.py CHANGED Viewed

@@ -5,11 +5,10 @@ from typing import IO, AnyStr, List, Optional, Union
 from megfile.interfaces import Readable, Seekable
 from megfile.utils import get_content_size, get_mode, get_name, is_readable
-NEWLINE = ord('\n')
+NEWLINE = ord("\n")
 class CombineReader(Readable, Seekable):
     def __init__(self, file_objects: List[IO], name: str):
         self._file_objects = file_objects
         self._blocks_sizes = []
@@ -19,14 +18,15 @@ class CombineReader(Readable, Seekable):
         self._mode = None
         for file_object in self._file_objects:
             if not is_readable(file_object):
-                raise IOError('not readable: %r' % get_name(file_object))
+                raise IOError("not readable: %r" % get_name(file_object))
             mode = get_mode(file_object)
             if self._mode is None:
                 self._mode = mode
             if self._mode != mode:
                 raise IOError(
-                    'inconsistent mode: %r, expected: %r, got: %r' %
-                    (get_name(file_object), self._mode, mode))
+                    "inconsistent mode: %r, expected: %r, got: %r"
+                    % (get_name(file_object), self._mode, mode)
+                )
             self._blocks_sizes.append(self._content_size)
             self._content_size += get_content_size(file_object)
         self._blocks_sizes.append(self._content_size)
@@ -36,7 +36,7 @@ class CombineReader(Readable, Seekable):
         for index, size in enumerate(self._blocks_sizes):
             if self._offset < size:
                 return index - 1, self._offset - self._blocks_sizes[index - 1]
-        raise IOError('offset out of range: %d' % self._offset)
+        raise IOError("offset out of range: %d" % self._offset)
     @property
     def name(self) -> str:
@@ -50,12 +50,12 @@ class CombineReader(Readable, Seekable):
         return self._offset
     def _empty_bytes(self) -> AnyStr:  # pyre-ignore[34]
-        if 'b' in self._mode:
-            return b''  # pyre-ignore[7]
-        return ''  # pyre-ignore[7]
+        if "b" in self._mode:
+            return b""  # pyre-ignore[7]
+        return ""  # pyre-ignore[7]
     def _empty_buffer(self) -> Union[BytesIO, StringIO]:
-        if 'b' in self._mode:
+        if "b" in self._mode:
             return BytesIO()
         return StringIO()
@@ -107,10 +107,10 @@ class CombineReader(Readable, Seekable):
         elif whence == os.SEEK_END:
             target_offset = self._content_size + offset
         else:
-            raise ValueError('invalid whence: %r' % whence)
+            raise ValueError("invalid whence: %r" % whence)
         if target_offset < 0:
-            raise ValueError('negative seek value %r' % target_offset)
+            raise ValueError("negative seek value %r" % target_offset)
         self._offset = target_offset
         return self._offset

megfile/lib/compare.py CHANGED Viewed

@@ -5,17 +5,19 @@ from megfile.pathlike import StatResult
 def get_sync_type(src_protocol, dst_protocol):
-    if src_protocol == 's3' and dst_protocol != 's3':
-        return 'download'
-    elif src_protocol != 's3' and dst_protocol == 's3':
-        return 'upload'
+    if src_protocol == "s3" and dst_protocol != "s3":
+        return "download"
+    elif src_protocol != "s3" and dst_protocol == "s3":
+        return "upload"
     else:
-        return 'copy'
+        return "copy"
 def compare_time(
-        src_stat: Union[StatResult, stat_result],
-        dest_stat: Union[StatResult, stat_result], sync_type: str):
+    src_stat: Union[StatResult, stat_result],
+    dest_stat: Union[StatResult, stat_result],
+    sync_type: str,
+):
     """
     :returns: True if the file does not need updating based on time of
         last modification and type of operation.
@@ -35,7 +37,6 @@ def compare_time(
             # at the source location.
             return False
     elif sync_type == "download":
         if delta <= 0:
             return True
         else:
@@ -45,14 +46,17 @@ def compare_time(
 def is_same_file(
-        src_stat: Union[StatResult, stat_result],
-        dest_stat: Union[StatResult, stat_result], sync_type: str):
+    src_stat: Union[StatResult, stat_result],
+    dest_stat: Union[StatResult, stat_result],
+    sync_type: str,
+):
     """
-    Determines whether or not the source and destination files should be synced based on a comparison of their size and last modified time.
+    Determines whether or not the source and destination files should be synced based on
+    a comparison of their size and last modified time.
-    :param src_stat: A Union[StatResult, stat_result] object representing the source file to be compared.
+    :param src_stat: A object representing the source file to be compared.
     :type src_stat: Union[StatResult, stat_result]
-    :param dest_stat: A Union[StatResult, stat_result] object representing the destination file to be compared.
+    :param dest_stat: A object representing the destination file to be compared.
     :type dest_stat: Union[StatResult, stat_result]
     :return: A boolean value indicating whether or not the files should be synced.

megfile/lib/compat.py CHANGED Viewed

@@ -1,11 +1,7 @@
 import os
 from os import PathLike
-__all__ = [
-    'PathLike',
-    'fspath',
-    'copytree',
-]
+__all__ = ["PathLike", "fspath"]
 def fspath(path) -> str:

megfile/lib/fnmatch.py CHANGED Viewed

@@ -9,6 +9,7 @@ expression.  They cache the compiled regular expressions for speed.
 The function translate(PATTERN) returns a regular expression
 corresponding to PATTERN.  (It does not compile it.)
 """
 """Compared with the standard library, syntax '{seq1,seq2}' is supported"""
 import functools
@@ -67,7 +68,7 @@ def filter(names: List[str], pat: str) -> List[str]:
 def _compat(res: str) -> str:
-    return r'(?s:%s)\Z' % res
+    return r"(?s:%s)\Z" % res
 def translate(pat: str) -> str:
@@ -77,58 +78,56 @@ def translate(pat: str) -> str:
     """
     i, n = 0, len(pat)
-    res = ''
+    res = ""
     while i < n:
         c = pat[i]
         i = i + 1
-        if c == '*':
+        if c == "*":
             j = i
-            while j < n and pat[j] == '*':
+            while j < n and pat[j] == "*":
                 j = j + 1
             if j > i:
-                if (j < n and pat[j] == '/') and \
-                    (i <= 1 or pat[i - 2] == '/'):
+                if (j < n and pat[j] == "/") and (i <= 1 or pat[i - 2] == "/"):
                     # hit /**/ instead of /seq**/
                     j = j + 1
-                    res = res + r'(.*/)?'
+                    res = res + r"(.*/)?"
                 else:
-                    res = res + r'.*'
+                    res = res + r".*"
             else:
-                res = res + r'[^/]*'
+                res = res + r"[^/]*"
             i = j
-        elif c == '?':
-            res = res + r'.'
-        elif c == '[':
+        elif c == "?":
+            res = res + r"."
+        elif c == "[":
             j = i
-            if j < n and pat[j] == '!':
+            if j < n and pat[j] == "!":
                 j = j + 1
-            if j < n and pat[j] == ']':
+            if j < n and pat[j] == "]":
                 j = j + 1
-            while j < n and pat[j] != ']':
+            while j < n and pat[j] != "]":
                 j = j + 1
             if j >= n:
-                res = res + r'\['
+                res = res + r"\["
             else:
-                stuff = pat[i:j].replace('\\', r'\\')
+                stuff = pat[i:j].replace("\\", r"\\")
                 i = j + 1
-                if stuff[0] == '!':
-                    stuff = r'^' + stuff[1:]
-                elif stuff[0] == '^':
-                    stuff = '\\' + stuff
-                res = r'%s[%s]' % (res, stuff)
-        elif c == '{':
+                if stuff[0] == "!":
+                    stuff = r"^" + stuff[1:]
+                elif stuff[0] == "^":
+                    stuff = "\\" + stuff
+                res = r"%s[%s]" % (res, stuff)
+        elif c == "{":
             j = i
-            if j < n and pat[j] == '}':
+            if j < n and pat[j] == "}":
                 j = j + 1
-            while j < n and pat[j] != '}':
+            while j < n and pat[j] != "}":
                 j = j + 1
             if j >= n:
-                res = res + r'\{'
+                res = res + r"\{"
             else:
-                stuff = pat[i:j].replace('\\', r'\\')
-                stuff = r'|'.join(
-                    map(re.escape, stuff.split(',')))  # pyre-ignore[6]
-                res = r'%s(%s)' % (res, stuff)
+                stuff = pat[i:j].replace("\\", r"\\")
+                stuff = r"|".join(map(re.escape, stuff.split(",")))  # pyre-ignore[6]
+                res = r"%s(%s)" % (res, stuff)
                 i = j + 1
         else:
             res = res + re.escape(c)

megfile/lib/glob.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Filename globbing utility."""
 """remove once py35 is dead"""
 import os
@@ -10,16 +11,16 @@ from typing import Iterator, List, Tuple
 from megfile.lib import fnmatch
 # Python 3.5+ Compatible
-'''
+"""
 class FSFunc(NamedTuple):
     exists: Callable[[str], bool]
     isdir: Callable[[str], bool]
     scandir: Callable[[str], Iterator[Tuple[str, bool]]]  # name, isdir
 in Python 3.6+
-'''
+"""
-FSFunc = NamedTuple('FSFunc', ['exists', 'isdir', 'scandir'])
+FSFunc = NamedTuple("FSFunc", ["exists", "isdir", "scandir"])
 def _exists(path: str) -> bool:
@@ -39,10 +40,8 @@ DEFAULT_FILESYSTEM_FUNC = FSFunc(_exists, _isdir, _scandir)
 def glob(
-        pathname: str,
-        *,
-        recursive: bool = False,
-        fs: FSFunc = DEFAULT_FILESYSTEM_FUNC) -> List[str]:
+    pathname: str, *, recursive: bool = False, fs: FSFunc = DEFAULT_FILESYSTEM_FUNC
+) -> List[str]:
     """Return a list of paths matching a pathname pattern.
     The pattern may contain simple shell-style wildcards a la
@@ -57,10 +56,8 @@ def glob(
 def iglob(
-        pathname: str,
-        *,
-        recursive: bool = False,
-        fs: FSFunc = DEFAULT_FILESYSTEM_FUNC) -> Iterator[str]:
+    pathname: str, *, recursive: bool = False, fs: FSFunc = DEFAULT_FILESYSTEM_FUNC
+) -> Iterator[str]:
     """Return an iterator which yields the paths matching a pathname pattern.
     The pattern may contain simple shell-style wildcards a la
@@ -80,10 +77,9 @@ def iglob(
     return it
-def _iglob(pathname: str, recursive: bool, dironly: bool,
-           fs: FSFunc) -> Iterator[str]:
-    if '://' in pathname:
-        protocol, path_without_protocol = pathname.split('://', 1)
+def _iglob(pathname: str, recursive: bool, dironly: bool, fs: FSFunc) -> Iterator[str]:
+    if "://" in pathname:
+        protocol, path_without_protocol = pathname.split("://", 1)
     else:
         protocol, path_without_protocol = "", pathname
     dirname, basename = os.path.split(path_without_protocol)
@@ -92,8 +88,7 @@ def _iglob(pathname: str, recursive: bool, dironly: bool,
     if not has_magic(pathname):
         if dironly:
             # TODO: replace AssertionError with OSError in 4.0.0
-            raise AssertionError(
-                "can't use dironly with non-magic patterns in _iglob")
+            raise AssertionError("can't use dironly with non-magic patterns in _iglob")
         if basename:
             if fs.exists(pathname):
                 yield pathname
@@ -153,8 +148,7 @@ def _glob0(dirname: str, basename: str, dironly: bool, fs: FSFunc) -> List[str]:
 # This helper function recursively yields relative pathnames inside a literal
 # directory.
-def _glob2(dirname: str, pattern: str, dironly: bool,
-           fs: FSFunc) -> Iterator[str]:
+def _glob2(dirname: str, pattern: str, dironly: bool, fs: FSFunc) -> Iterator[str]:
     if not _isrecursive(pattern):
         # TODO: replace AssertionError with OSError in 4.0.0
         raise AssertionError("error call '_glob2' with non-glob pattern")
@@ -194,10 +188,10 @@ def _rlistdir(dirname: str, dironly: bool, fs: FSFunc) -> Iterator[str]:
                 yield os.path.join(x, y)
-magic_check = re.compile(r'([*?[{])')
-magic_decheck = re.compile(r'\[(.)\]')
-brace_check = re.compile(r'(\{.*\})')
-unbrace_check = re.compile(r'([*?[])')
+magic_check = re.compile(r"([*?[{])")
+magic_decheck = re.compile(r"\[(.)\]")
+brace_check = re.compile(r"(\{.*\})")
+unbrace_check = re.compile(r"([*?[])")
 def has_magic(s: str) -> bool:
@@ -211,46 +205,44 @@ def has_magic_ignore_brace(s: str) -> bool:
 def _ishidden(path: str) -> bool:
-    return path[0] == '.'
+    return path[0] == "."
 def _isrecursive(pattern: str) -> bool:
-    return pattern == '**'
+    return pattern == "**"
 def escape(pathname):
-    """Escape all special characters.
-    """
+    """Escape all special characters."""
     # Escaping is done by wrapping any of "*?[" between square brackets.
     # Metacharacters do not work in the drive part and shouldn't be escaped.
     drive, pathname = os.path.splitdrive(pathname)
-    pathname = magic_check.sub(r'[\1]', pathname)
+    pathname = magic_check.sub(r"[\1]", pathname)
     return drive + pathname
 def unescape(pathname):
-    """Unescape all special characters.
-    """
+    """Unescape all special characters."""
     drive, pathname = os.path.splitdrive(pathname)
-    pathname = magic_decheck.sub(r'\1', pathname)
+    pathname = magic_decheck.sub(r"\1", pathname)
     return drive + pathname
-def _find_suffix(path_list: List[str], prefix: str,
-                 split_sign: str) -> List[str]:
+def _find_suffix(path_list: List[str], prefix: str, split_sign: str) -> List[str]:
     suffix = []
     temp_path_list = []
     for path_index in range(0, len(path_list)):
-        temp_path_list.append(
-            path_list[path_index][len(prefix):].split(split_sign))
+        temp_path_list.append(path_list[path_index][len(prefix) :].split(split_sign))
     i = 0
     while True:
         i = i - 1
         if len(temp_path_list[0]) <= abs(i):
             return suffix
         for path_index in range(1, len(path_list)):
-            if len(temp_path_list[path_index]) <= abs(
-                    i) or temp_path_list[path_index][i] != temp_path_list[0][i]:
+            if (
+                len(temp_path_list[path_index]) <= abs(i)
+                or temp_path_list[path_index][i] != temp_path_list[0][i]
+            ):
                 return suffix
         else:
             suffix.insert(0, temp_path_list[0][i])
@@ -260,8 +252,8 @@ def globlize(path_list: List[str]) -> str:
     path_list = sorted(path_list)
     if path_list[0] == path_list[-1]:
         return path_list[0]
-    first_path = path_list[0].split('/')
-    last_path = path_list[-1].split('/')
+    first_path = path_list[0].split("/")
+    last_path = path_list[-1].split("/")
     prefix = []
     for i in range(0, min(len(first_path), len(last_path))):
@@ -272,46 +264,46 @@ def globlize(path_list: List[str]) -> str:
     if len(prefix) == 0:
         prefix = ""
     else:
-        prefix = '/'.join(prefix) + '/'
-    suffix = _find_suffix(path_list, prefix, '/')
+        prefix = "/".join(prefix) + "/"
+    suffix = _find_suffix(path_list, prefix, "/")
     if len(suffix) == 0:
-        suffix = _find_suffix(path_list, prefix, '.')
+        suffix = _find_suffix(path_list, prefix, ".")
         if len(suffix) == 0:
             suffix = ""
         else:
-            suffix = '.' + '.'.join(suffix)
+            suffix = "." + ".".join(suffix)
     else:
-        suffix = '/' + '/'.join(suffix)
+        suffix = "/" + "/".join(suffix)
     path = []
     for i in path_list:
-        if i[len(prefix):len(i) - len(suffix)] not in path:
-            path.append(unescape(i[len(prefix):len(i) - len(suffix)]))
-    return prefix + "{" + ','.join(path) + "}" + suffix
+        if i[len(prefix) : len(i) - len(suffix)] not in path:
+            path.append(unescape(i[len(prefix) : len(i) - len(suffix)]))
+    return prefix + "{" + ",".join(path) + "}" + suffix
 def ungloblize(glob: str) -> List[str]:
     path_list = [glob]
     while True:
         temp_path = path_list[0]
-        begin = temp_path.find('{')
-        end = temp_path.find('}', begin)
+        begin = temp_path.find("{")
+        end = temp_path.find("}", begin)
         if end == -1:
             break
         path_list.pop(0)
-        subpath_list = temp_path[begin + 1:end].split(',')
+        subpath_list = temp_path[begin + 1 : end].split(",")
         for subpath in subpath_list:
-            path = temp_path[:begin] + escape(subpath) + temp_path[end + 1:]
+            path = temp_path[:begin] + escape(subpath) + temp_path[end + 1 :]
             path_list.append(path)
     return path_list
 def get_non_glob_dir(glob: str):
     root_dir = []
-    if glob.startswith('/'):
-        root_dir.append('/')
-    for name in glob.split('/'):
+    if glob.startswith("/"):
+        root_dir.append("/")
+    for name in glob.split("/"):
         if has_magic(name):
             break
         root_dir.append(name)

megfile/lib/hdfs_prefetch_reader.py CHANGED Viewed

@@ -1,28 +1,38 @@
 from io import BytesIO
 from typing import Optional
-from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, HDFS_MAX_RETRY_TIMES
+from megfile.config import (
+    DEFAULT_BLOCK_CAPACITY,
+    DEFAULT_BLOCK_SIZE,
+    HDFS_MAX_RETRY_TIMES,
+)
 from megfile.errors import raise_hdfs_error
 from megfile.lib.base_prefetch_reader import BasePrefetchReader
 class HdfsPrefetchReader(BasePrefetchReader):
-    '''
-    Reader to fast read the hdfs content. This will divide the file content into equal parts of block_size size, and will use LRU to cache at most block_capacity blocks in memory.
-    open(), seek() and read() will trigger prefetch read. The prefetch will cached block_forward blocks of data from offset position (the position after reading if the called function is read).
-    '''
+    """
+    Reader to fast read the hdfs content. This will divide the file content into equal
+    parts of block_size size, and will use LRU to cache at most block_capacity blocks
+    in memory.
+    open(), seek() and read() will trigger prefetch read. The prefetch will cached
+    block_forward blocks of data from offset position (the position after reading
+    if the called function is read).
+    """
     def __init__(
-            self,
-            hdfs_path: str,
-            *,
-            client,
-            block_size: int = DEFAULT_BLOCK_SIZE,
-            block_capacity: int = DEFAULT_BLOCK_CAPACITY,
-            block_forward: Optional[int] = None,
-            max_retries: int = HDFS_MAX_RETRY_TIMES,
-            max_workers: Optional[int] = None,
-            profile_name: Optional[str] = None):
+        self,
+        hdfs_path: str,
+        *,
+        client,
+        block_size: int = DEFAULT_BLOCK_SIZE,
+        block_capacity: int = DEFAULT_BLOCK_CAPACITY,
+        block_forward: Optional[int] = None,
+        max_retries: int = HDFS_MAX_RETRY_TIMES,
+        max_workers: Optional[int] = None,
+        profile_name: Optional[str] = None,
+    ):
         self._path = hdfs_path
         self._client = client
         self._profile_name = profile_name
@@ -32,22 +42,27 @@ class HdfsPrefetchReader(BasePrefetchReader):
             block_capacity=block_capacity,
             block_forward=block_forward,
             max_retries=max_retries,
-            max_workers=max_workers)
+            max_workers=max_workers,
+        )
     def _get_content_size(self):
         with raise_hdfs_error(self._path):
-            return self._client.status(self._path)['length']
+            return self._client.status(self._path)["length"]
     @property
     def name(self) -> str:
-        return 'hdfs%s://%s' % (
-            f"+{self._profile_name}" if self._profile_name else "", self._path)
+        return "hdfs%s://%s" % (
+            f"+{self._profile_name}" if self._profile_name else "",
+            self._path,
+        )
     def _fetch_response(
-            self,
-            start: Optional[int] = None,
-            end: Optional[int] = None) -> dict:
+        self, start: Optional[int] = None, end: Optional[int] = None
+    ) -> dict:
         with raise_hdfs_error(self.name):
-            with self._client.read(self._path, offset=start or 0, length=end -
-                                   start if start and end else None) as f:
-                return {'Body': BytesIO(f.read())}
+            with self._client.read(
+                self._path,
+                offset=start or 0,
+                length=end - start if start and end else None,
+            ) as f:
+                return {"Body": BytesIO(f.read())}

megfile/lib/hdfs_tools.py CHANGED Viewed

@@ -3,9 +3,7 @@ try:
 except ImportError:  # pragma: no cover
     hdfs_api = None
-__all__ = [
-    'hdfs_api',
-]
+__all__ = ["hdfs_api"]
 if hdfs_api:
     _to_error = hdfs_api.client._to_error

megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl

megfile 3.1.1py3-none-any.whl → 3.1.2py3-none-any.whl