PyPI - megfile - Versions diffs - 3.1.6__py3-none-any.whl → 4.0.0__py3-none-any.whl - Mend

megfile 3.1.6py3-none-any.whl → 4.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

megfile/cli.py +12 -7
megfile/config.py +34 -44
megfile/fs.py +169 -11
megfile/fs_path.py +183 -259
megfile/hdfs.py +106 -5
megfile/hdfs_path.py +34 -90
megfile/http.py +50 -1
megfile/http_path.py +27 -65
megfile/interfaces.py +1 -8
megfile/lib/base_prefetch_reader.py +62 -78
megfile/lib/combine_reader.py +5 -0
megfile/lib/glob.py +3 -6
megfile/lib/hdfs_prefetch_reader.py +7 -7
megfile/lib/http_prefetch_reader.py +6 -6
megfile/lib/s3_buffered_writer.py +67 -64
megfile/lib/s3_cached_handler.py +1 -2
megfile/lib/s3_limited_seekable_writer.py +3 -7
megfile/lib/s3_memory_handler.py +1 -2
megfile/lib/s3_pipe_handler.py +1 -2
megfile/lib/s3_prefetch_reader.py +15 -20
megfile/lib/s3_share_cache_reader.py +8 -5
megfile/pathlike.py +397 -401
megfile/s3.py +118 -17
megfile/s3_path.py +150 -224
megfile/sftp.py +300 -10
megfile/sftp_path.py +46 -322
megfile/smart.py +33 -27
megfile/smart_path.py +9 -14
megfile/stdio.py +1 -1
megfile/stdio_path.py +2 -2
megfile/utils/__init__.py +11 -4
megfile/version.py +1 -1
{megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/METADATA +7 -7
megfile-4.0.0.dist-info/RECORD +52 -0
{megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/WHEEL +1 -1
{megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/top_level.txt +0 -2
docs/conf.py +0 -65
megfile-3.1.6.dist-info/RECORD +0 -55
scripts/convert_results_to_sarif.py +0 -91
scripts/generate_file.py +0 -344
{megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/LICENSE +0 -0
{megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/LICENSE.pyre +0 -0
{megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/entry_points.txt +0 -0

megfile/s3_path.py CHANGED Viewed

@@ -7,18 +7,23 @@ from concurrent.futures import ThreadPoolExecutor
 from functools import cached_property, lru_cache, wraps
 from logging import getLogger as get_logger
 from typing import IO, Any, BinaryIO, Callable, Dict, Iterator, List, Optional, Tuple
+from urllib.parse import urlparse
 import boto3
 import botocore
-from botocore.awsrequest import AWSResponse
+from boto3.s3.transfer import TransferConfig
+from botocore.awsrequest import AWSPreparedRequest, AWSResponse
 from megfile.config import (
-    DEFAULT_BLOCK_SIZE,
-    DEFAULT_MAX_BLOCK_SIZE,
-    DEFAULT_MIN_BLOCK_SIZE,
     GLOBAL_MAX_WORKERS,
+    HTTP_AUTH_HEADERS,
+    READER_BLOCK_SIZE,
+    READER_MAX_BUFFER_SIZE,
     S3_CLIENT_CACHE_MODE,
     S3_MAX_RETRY_TIMES,
+    WRITER_BLOCK_SIZE,
+    WRITER_MAX_BUFFER_SIZE,
+    to_boolean,
 )
 from megfile.errors import (
     S3BucketNotFoundError,
@@ -59,7 +64,6 @@ from megfile.lib.fnmatch import translate
 from megfile.lib.glob import has_magic, has_magic_ignore_brace, ungloblize
 from megfile.lib.joinpath import uri_join
 from megfile.lib.s3_buffered_writer import (
-    DEFAULT_MAX_BUFFER_SIZE,
     S3BufferedWriter,
 )
 from megfile.lib.s3_cached_handler import S3CachedHandler
@@ -76,6 +80,7 @@ from megfile.utils import (
     generate_cache_path,
     get_binary_mode,
     get_content_offset,
+    is_domain_or_subdomain,
     is_readable,
     necessary_params,
     process_local,
@@ -98,26 +103,14 @@ __all__ = [
     "s3_share_cache_open",
     "s3_open",
     "S3Cacher",
-    "S3BufferedWriter",
-    "S3LimitedSeekableWriter",
-    "S3PrefetchReader",
-    "S3ShareCacheReader",
     "s3_upload",
     "s3_download",
     "s3_load_content",
-    "s3_readlink",
-    "s3_glob",
-    "s3_glob_stat",
-    "s3_iglob",
-    "s3_rename",
-    "s3_makedirs",
     "s3_concat",
-    "s3_lstat",
 ]
 _logger = get_logger(__name__)
 content_md5_header = "megfile-content-md5"
 endpoint_url = "https://s3.amazonaws.com"
-max_pool_connections = GLOBAL_MAX_WORKERS  # for compatibility
 max_retries = S3_MAX_RETRY_TIMES
 max_keys = 1000
@@ -162,24 +155,30 @@ def _patch_make_request(client: botocore.client.BaseClient, redirect: bool = Fal
         retry_callback=retry_callback,
     )
-    def patch_send_request(send_request):
-        def patched_send_request(request_dict, operation_model):
-            http, parsed_response = send_request(request_dict, operation_model)
+    def patch_send(send):
+        def patched_send(request: AWSPreparedRequest) -> AWSResponse:
+            response: AWSResponse = send(request)
             if (
-                request_dict["method"] == "GET"  # only support GET method for now
-                and http.status_code in (301, 302, 307, 308)
-                and "Location" in http.headers
+                request.method == "GET"  # only support GET method for now
+                and response.status_code in (301, 302, 307, 308)
+                and "Location" in response.headers
             ):
-                request_dict["url"] = http.headers["Location"]
-                http, parsed_response = send_request(request_dict, operation_model)
-            return http, parsed_response
-        return patched_send_request
+                # Permit sending auth/cookie headers from "foo.com" to "sub.foo.com".
+                # See also: https://go.dev/src/net/http/client.go#L980
+                location = response.headers["Location"]
+                ihost = urlparse(request.url).hostname
+                dhost = urlparse(location).hostname
+                if not is_domain_or_subdomain(dhost, ihost):
+                    for name in HTTP_AUTH_HEADERS:
+                        request.headers.pop(name, None)
+                request.url = location
+                response = send(request)
+            return response
+        return patched_send
     if redirect:
-        client._endpoint._send_request = patch_send_request(
-            client._endpoint._send_request
-        )
+        client._endpoint._send = patch_send(client._endpoint._send)
     return client
@@ -257,7 +256,7 @@ def get_env_var(env_name: str, profile_name=None):
 def parse_boolean(value: Optional[str], default: bool = False) -> bool:
     if value is None:
         return default
-    return value.lower() in ("true", "yes", "1")
+    return to_boolean(value)
 def get_access_token(profile_name=None):
@@ -629,7 +628,7 @@ def _s3_scan_pairs(
     src_url: PathLike, dst_url: PathLike
 ) -> Iterator[Tuple[PathLike, PathLike]]:
     for src_file_path in S3Path(src_url).scan():
-        content_path = src_file_path[len(src_url) :]
+        content_path = src_file_path[len(fspath(src_url)) :]
         if len(content_path) > 0:
             dst_file_path = s3_path_join(dst_url, content_path)
         else:
@@ -689,8 +688,8 @@ def s3_prefetch_open(
     mode: str = "rb",
     followlinks: bool = False,
     *,
-    max_concurrency: Optional[int] = None,
-    max_block_size: int = DEFAULT_BLOCK_SIZE,
+    max_workers: Optional[int] = None,
+    block_size: int = READER_BLOCK_SIZE,
 ) -> S3PrefetchReader:
     """Open a asynchronous prefetch reader, to support fast sequential
     read and random read
@@ -701,11 +700,18 @@ def s3_prefetch_open(
         Supports context manager
-        Some parameter setting may perform well: max_concurrency=10 or 20,
-        max_block_size=8 or 16 MB, default value None means using global thread pool
-    :param max_concurrency: Max download thread number, None by default
-    :param max_block_size: Max data size downloaded by each thread, in bytes,
+        Some parameter setting may perform well: max_workers=10 or 20,
+        block_size=8 or 16 MB, default value None means using global thread pool
+    :param s3_url: s3 path
+    :param mode: only support "r" or "rb"
+    :param encoding: encoding is the name of the encoding used to decode or encode
+        the file. This should only be used in text mode.
+    :param errors: errors is an optional string that specifies how encoding and
+        decoding errors are to be handled—this cannot be used in binary mode.
+    :param followlinks: follow symbolic link, default `False`
+    :param max_workers: Max download thread number, `None` by default
+    :param block_size: Max data size downloaded by each thread, in bytes,
         8MB by default
     :returns: An opened S3PrefetchReader object
     :raises: S3FileNotFoundError
@@ -721,15 +727,15 @@ def s3_prefetch_open(
             pass
     bucket, key = parse_s3_url(s3_url.path_with_protocol)
-    config = botocore.config.Config(max_pool_connections=max_pool_connections)
+    config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
     client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
     return S3PrefetchReader(
         bucket,
         key,
         s3_client=client,
         max_retries=max_retries,
-        max_workers=max_concurrency,
-        block_size=max_block_size,
+        max_workers=max_workers,
+        block_size=block_size,
         profile_name=s3_url._profile_name,
     )
@@ -741,8 +747,8 @@ def s3_share_cache_open(
     followlinks: bool = False,
     *,
     cache_key: str = "lru",
-    max_concurrency: Optional[int] = None,
-    max_block_size: int = DEFAULT_BLOCK_SIZE,
+    max_workers: Optional[int] = None,
+    block_size: int = READER_BLOCK_SIZE,
 ) -> S3ShareCacheReader:
     """Open a asynchronous prefetch reader, to support fast sequential read and
     random read
@@ -753,11 +759,18 @@ def s3_share_cache_open(
         Supports context manager
-        Some parameter setting may perform well: max_concurrency=10 or 20,
-        max_block_size=8 or 16 MB, default value None means using global thread pool
-    :param max_concurrency: Max download thread number, None by default
-    :param max_block_size: Max data size downloaded by each thread, in bytes,
+        Some parameter setting may perform well: max_workers=10 or 20,
+        block_size=8 or 16 MB, default value None means using global thread pool
+    :param s3_url: s3 path
+    :param mode: only support "r" or "rb"
+    :param encoding: encoding is the name of the encoding used to decode or encode
+        the file. This should only be used in text mode.
+    :param errors: errors is an optional string that specifies how encoding and
+        decoding errors are to be handled—this cannot be used in binary mode.
+    :param followlinks: follow symbolic link, default `False`
+    :param max_workers: Max download thread number, None by default
+    :param block_size: Max data size downloaded by each thread, in bytes,
         8MB by default
     :returns: An opened S3ShareCacheReader object
     :raises: S3FileNotFoundError
@@ -774,7 +787,7 @@ def s3_share_cache_open(
             pass
     bucket, key = parse_s3_url(s3_url.path_with_protocol)
-    config = botocore.config.Config(max_pool_connections=max_pool_connections)
+    config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
     client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
     return S3ShareCacheReader(
         bucket,
@@ -782,8 +795,8 @@ def s3_share_cache_open(
         cache_key=cache_key,
         s3_client=client,
         max_retries=max_retries,
-        max_workers=max_concurrency,
-        block_size=max_block_size,
+        max_workers=max_workers,
+        block_size=block_size,
         profile_name=s3_url._profile_name,
     )
@@ -810,7 +823,13 @@ def s3_pipe_open(
         But asynchronous behavior can guarantee the file are successfully written,
         and frequent execution may cause thread and file handle exhaustion
-    :param mode: Mode to open file, either "rb" or "wb"
+    :param s3_url: s3 path
+    :param mode: Mode to open file, either "r", "rb", "w" or "wb"
+    :param encoding: encoding is the name of the encoding used to decode or encode
+        the file. This should only be used in text mode.
+    :param errors: errors is an optional string that specifies how encoding and
+        decoding errors are to be handled—this cannot be used in binary mode.
+    :param followlinks: follow symbolic link, default `False`
     :param join_thread: If wait after function execution until s3 finishes writing
     :returns: An opened BufferedReader / BufferedWriter object
     """
@@ -829,7 +848,7 @@ def s3_pipe_open(
             pass
     bucket, key = parse_s3_url(s3_url.path_with_protocol)
-    config = botocore.config.Config(max_pool_connections=max_pool_connections)
+    config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
     client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
     return S3PipeHandler(
         bucket,
@@ -860,7 +879,14 @@ def s3_cached_open(
         cache_path can specify the path of cache file. Performance could be better
         if cache file path is on ssd or tmpfs
-    :param mode: Mode to open file, could be one of "rb", "wb" or "ab"
+    :param s3_url: s3 path
+    :param mode: Mode to open file, could be one of "rb", "wb", "ab", "rb+", "wb+"
+        or "ab+"
+    :param encoding: encoding is the name of the encoding used to decode or encode
+        the file. This should only be used in text mode.
+    :param errors: errors is an optional string that specifies how encoding and
+        decoding errors are to be handled—this cannot be used in binary mode.
+    :param followlinks: follow symbolic link, default `False`
     :param cache_path: cache file path
     :returns: An opened BufferedReader / BufferedWriter object
     """
@@ -875,7 +901,7 @@ def s3_cached_open(
             pass
     bucket, key = parse_s3_url(s3_url.path_with_protocol)
-    config = botocore.config.Config(max_pool_connections=max_pool_connections)
+    config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
     client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
     return S3CachedHandler(
         bucket,
@@ -893,16 +919,14 @@ def s3_buffered_open(
     mode: str,
     followlinks: bool = False,
     *,
-    max_concurrency: Optional[int] = None,
-    max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
-    forward_ratio: Optional[float] = None,
+    max_workers: Optional[int] = None,
+    max_buffer_size: Optional[int] = None,
+    block_forward: Optional[int] = None,
     block_size: Optional[int] = None,
     limited_seekable: bool = False,
     buffered: bool = False,
     share_cache_key: Optional[str] = None,
     cache_path: Optional[str] = None,
-    min_block_size: Optional[int] = None,
-    max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
 ) -> IO:
     """Open an asynchronous prefetch reader, to support fast sequential read
@@ -912,22 +936,30 @@ def s3_buffered_open(
         Supports context manager
-        Some parameter setting may perform well: max_concurrency=10 or 20,
-        max_block_size=8 or 16 MB, default value None means using global thread pool
-    :param max_concurrency: Max download thread number, None by default
-    :param max_buffer_size: Max cached buffer size in memory, 128MB by default
-    :param min_block_size: Min size of single block, default is same as block_size.
-        Each block will be downloaded by single thread.
-    :param max_block_size: Max size of single block, 128MB by default.
-        Each block will be downloaded by single thread.
-    :param block_size: Size of single block, 8MB by default.
+        Some parameter setting may perform well: max_workers=10 or 20,
+        default value None means using global thread pool
+    :param s3_url: s3 path
+    :param mode: Mode to open file, could be one of "rb", "wb", "ab", "rb+", "wb+"
+        or "ab+"
+    :param encoding: encoding is the name of the encoding used to decode or encode
+        the file. This should only be used in text mode.
+    :param errors: errors is an optional string that specifies how encoding and
+        decoding errors are to be handled—this cannot be used in binary mode.
+    :param followlinks: follow symbolic link, default `False`
+    :param max_workers: Max download / upload thread number, `None` by default,
+        will use global thread pool with 8 threads.
+    :param max_buffer_size: Max cached buffer size in memory, 128MB by default.
+        Set to `0` will disable cache.
+    :param block_forward: How many blocks of data cached from offset position, only for
+        read mode.
+    :param block_size: Size of single block.
         Each block will be uploaded by single thread.
     :param limited_seekable: If write-handle supports limited seek
         (both file head part and tail part can seek block_size).
         Notes: This parameter are valid only for write-handle.
         Read-handle support arbitrary seek
-    :returns: An opened S3PrefetchReader object
+    :returns: An opened File object
     :raises: S3FileNotFoundError
     """
     if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
@@ -939,11 +971,8 @@ def s3_buffered_open(
             s3_url = s3_url.readlink()
         except S3NotALinkError:
             pass
-    min_block_size = min_block_size or block_size or DEFAULT_MIN_BLOCK_SIZE
-    block_size = block_size or DEFAULT_BLOCK_SIZE
     bucket, key = parse_s3_url(s3_url.path_with_protocol)
-    config = botocore.config.Config(max_pool_connections=max_pool_connections)
+    config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
     client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
     if "a" in mode or "+" in mode:
@@ -961,13 +990,6 @@ def s3_buffered_open(
         )
     if mode == "rb":
-        # A rough conversion algorithm to align 2 types of Reader / Writer parameters
-        # TODO: Optimize the conversion algorithm
-        block_capacity = max_buffer_size // block_size
-        if forward_ratio is None:
-            block_forward = None
-        else:
-            block_forward = max(int(block_capacity * forward_ratio), 1)
         if share_cache_key is not None:
             reader = S3ShareCacheReader(
                 bucket,
@@ -975,8 +997,8 @@ def s3_buffered_open(
                 cache_key=share_cache_key,
                 s3_client=client,
                 max_retries=max_retries,
-                max_workers=max_concurrency,
-                block_size=block_size,
+                max_workers=max_workers,
+                block_size=block_size or READER_BLOCK_SIZE,
                 block_forward=block_forward,
                 profile_name=s3_url._profile_name,
             )
@@ -986,10 +1008,10 @@ def s3_buffered_open(
                 key,
                 s3_client=client,
                 max_retries=max_retries,
-                max_workers=max_concurrency,
-                block_capacity=block_capacity,
+                max_workers=max_workers,
+                max_buffer_size=max_buffer_size or READER_MAX_BUFFER_SIZE,
                 block_forward=block_forward,
-                block_size=block_size,
+                block_size=block_size or READER_BLOCK_SIZE,
                 profile_name=s3_url._profile_name,
             )
         if buffered or _is_pickle(reader):
@@ -1001,10 +1023,9 @@ def s3_buffered_open(
             bucket,
             key,
             s3_client=client,
-            max_workers=max_concurrency,
-            block_size=min_block_size,
-            max_block_size=max_block_size,
-            max_buffer_size=max_buffer_size,
+            max_workers=max_workers,
+            block_size=block_size or WRITER_BLOCK_SIZE,
+            max_buffer_size=max_buffer_size or WRITER_MAX_BUFFER_SIZE,
             profile_name=s3_url._profile_name,
         )
     else:
@@ -1012,10 +1033,9 @@ def s3_buffered_open(
             bucket,
             key,
             s3_client=client,
-            max_workers=max_concurrency,
-            block_size=min_block_size,
-            max_block_size=max_block_size,
-            max_buffer_size=max_buffer_size,
+            max_workers=max_workers,
+            block_size=block_size or WRITER_BLOCK_SIZE,
+            max_buffer_size=max_buffer_size or WRITER_MAX_BUFFER_SIZE,
             profile_name=s3_url._profile_name,
         )
     if buffered or _is_pickle(writer):
@@ -1050,7 +1070,7 @@ def s3_memory_open(
             pass
     bucket, key = parse_s3_url(s3_url.path_with_protocol)
-    config = botocore.config.Config(max_pool_connections=max_pool_connections)
+    config = botocore.config.Config(max_pool_connections=GLOBAL_MAX_WORKERS)
     client = get_s3_client_with_cache(config=config, profile_name=s3_url._profile_name)
     return S3MemoryHandler(
         bucket, key, mode, s3_client=client, profile_name=s3_url._profile_name
@@ -1109,7 +1129,7 @@ def s3_download(
     if not src_url.is_file():
         raise S3IsADirectoryError("Is a directory: %r" % src_url.path_with_protocol)
-    dst_directory = os.path.dirname(dst_path.path_without_protocol)
+    dst_directory = os.path.dirname(dst_path.path_without_protocol)  # pyre-ignore[6]
     if dst_directory != "":
         os.makedirs(dst_directory, exist_ok=True)
@@ -1117,9 +1137,21 @@ def s3_download(
     download_file = patch_method(
         client.download_file, max_retries=max_retries, should_retry=s3_should_retry
     )
+    transfer_config = TransferConfig(
+        multipart_threshold=READER_BLOCK_SIZE,
+        max_concurrency=GLOBAL_MAX_WORKERS,
+        multipart_chunksize=READER_BLOCK_SIZE,
+        num_download_attempts=S3_MAX_RETRY_TIMES,
+        max_io_queue=max(READER_MAX_BUFFER_SIZE // READER_BLOCK_SIZE, 1),
+    )
     try:
         download_file(
-            src_bucket, src_key, dst_path.path_without_protocol, Callback=callback
+            src_bucket,
+            src_key,
+            dst_path.path_without_protocol,
+            Callback=callback,
+            Config=transfer_config,
         )
     except Exception as error:
         error = translate_fs_error(error, dst_url)
@@ -1170,8 +1202,19 @@ def s3_upload(
         client.upload_fileobj, max_retries=max_retries, should_retry=s3_should_retry
     )
+    transfer_config = TransferConfig(
+        multipart_threshold=WRITER_BLOCK_SIZE,
+        max_concurrency=GLOBAL_MAX_WORKERS,
+        multipart_chunksize=WRITER_BLOCK_SIZE,
+    )
     with open(src_path.path_without_protocol, "rb") as src, raise_s3_error(dst_url):
-        upload_fileobj(src, Bucket=dst_bucket, Key=dst_key, Callback=callback)
+        upload_fileobj(
+            src,
+            Bucket=dst_bucket,
+            Key=dst_key,
+            Callback=callback,
+            Config=transfer_config,
+        )
 def s3_load_content(
@@ -1217,27 +1260,6 @@ def s3_load_content(
         )(client, bucket, key, range_str)
-def s3_readlink(path) -> str:
-    """
-    Return a string representing the path to which the symbolic link points.
-    :returns: Return a string representing the path to which the symbolic link points.
-    :raises: S3NameTooLongError, S3BucketNotFoundError, S3IsADirectoryError,
-        S3NotALinkError
-    """
-    return S3Path(path).readlink().path_with_protocol
-def s3_rename(src_url: PathLike, dst_url: PathLike, overwrite: bool = True) -> None:
-    """
-    Move s3 file path from src_url to dst_url
-    :param dst_url: Given destination path
-    :param overwrite: whether or not overwrite file when exists
-    """
-    S3Path(src_url).rename(dst_url, overwrite)
 class S3Cacher(FileCacher):
     cache_path = None
@@ -1259,97 +1281,8 @@ class S3Cacher(FileCacher):
             os.unlink(self.cache_path)
-def s3_glob(
-    path: PathLike,
-    recursive: bool = True,
-    missing_ok: bool = True,
-    followlinks: bool = False,
-) -> List[str]:
-    """Return s3 path list in ascending alphabetical order,
-    in which path matches glob pattern
-    Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
-    raise UnsupportedError
-    :param recursive: If False, `**` will not search directory recursively
-    :param missing_ok: If False and target path doesn't match any file,
-        raise FileNotFoundError
-    :raises: UnsupportedError, when bucket part contains wildcard characters
-    :returns: A list contains paths match `s3_pathname`
-    """
-    return list(
-        s3_iglob(
-            path=path,
-            recursive=recursive,
-            missing_ok=missing_ok,
-            followlinks=followlinks,
-        )
-    )
-def s3_glob_stat(
-    path: PathLike,
-    recursive: bool = True,
-    missing_ok: bool = True,
-    followlinks: bool = False,
-) -> Iterator[FileEntry]:
-    """Return a generator contains tuples of path and file stat,
-    in ascending alphabetical order, in which path matches glob pattern
-    Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
-    raise UnsupportedError
-    :param recursive: If False, `**` will not search directory recursively
-    :param missing_ok: If False and target path doesn't match any file,
-        raise FileNotFoundError
-    :raises: UnsupportedError, when bucket part contains wildcard characters
-    :returns: A generator contains tuples of path and file stat,
-        in which paths match `s3_pathname`
-    """
-    return S3Path(path).glob_stat(
-        pattern="", recursive=recursive, missing_ok=missing_ok, followlinks=followlinks
-    )
-def s3_iglob(
-    path: PathLike,
-    recursive: bool = True,
-    missing_ok: bool = True,
-    followlinks: bool = False,
-) -> Iterator[str]:
-    """Return s3 path iterator in ascending alphabetical order,
-    in which path matches glob pattern
-    Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
-    raise UnsupportedError
-    :param recursive: If False, `**` will not search directory recursively
-    :param missing_ok: If False and target path doesn't match any file,
-        raise FileNotFoundError
-    :raises: UnsupportedError, when bucket part contains wildcard characters
-    :returns: An iterator contains paths match `s3_pathname`
-    """
-    for path_obj in S3Path(path).iglob(
-        pattern="", recursive=recursive, missing_ok=missing_ok, followlinks=followlinks
-    ):
-        yield path_obj.path_with_protocol
-def s3_makedirs(path: PathLike, exist_ok: bool = False):
-    """
-    Create an s3 directory.
-    Purely creating directory is invalid because it's unavailable on OSS.
-    This function is to test the target bucket have WRITE access.
-    :param path: Given path
-    :param exist_ok: If False and target directory exists, raise S3FileExistsError
-    :raises: S3BucketNotFoundError, S3FileExistsError
-    """
-    return S3Path(path).mkdir(parents=True, exist_ok=exist_ok)
 def _group_src_paths_by_block(
-    src_paths: List[PathLike], block_size: int = DEFAULT_BLOCK_SIZE
+    src_paths: List[PathLike], block_size: int = READER_BLOCK_SIZE
 ) -> List[List[Tuple[PathLike, Optional[str]]]]:
     groups = []
     current_group, current_group_size = [], 0
@@ -1395,7 +1328,7 @@ def _group_src_paths_by_block(
 def s3_concat(
     src_paths: List[PathLike],
     dst_path: PathLike,
-    block_size: int = DEFAULT_BLOCK_SIZE,
+    block_size: int = READER_BLOCK_SIZE,
     max_workers: int = GLOBAL_MAX_WORKERS,
 ) -> None:
     """Concatenate s3 files to one file.
@@ -1410,9 +1343,10 @@ def s3_concat(
         else:
             groups = _group_src_paths_by_block(src_paths, block_size=block_size)
-        with MultiPartWriter(client, dst_path) as writer, ThreadPoolExecutor(
-            max_workers=max_workers
-        ) as executor:
+        with (
+            MultiPartWriter(client, dst_path) as writer,
+            ThreadPoolExecutor(max_workers=max_workers) as executor,
+        ):
             for index, group in enumerate(groups, start=1):
                 if len(group) == 1:
                     executor.submit(
@@ -1422,14 +1356,6 @@ def s3_concat(
                     executor.submit(writer.upload_part_by_paths, index, group)
-def s3_lstat(path: PathLike) -> StatResult:
-    """
-    Like Path.stat() but, if the path points to a symbolic link,
-    return the symbolic link’s information rather than its target’s.
-    """
-    return S3Path(path).lstat()
 @SmartPath.register
 class S3Path(URIPath):
     protocol = "s3"

megfile 3.1.6__py3-none-any.whl → 4.0.0__py3-none-any.whl

megfile 3.1.6py3-none-any.whl → 4.0.0py3-none-any.whl