megfile 3.1.6__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/cli.py +12 -7
- megfile/config.py +34 -44
- megfile/fs.py +169 -11
- megfile/fs_path.py +183 -259
- megfile/hdfs.py +106 -5
- megfile/hdfs_path.py +34 -90
- megfile/http.py +50 -1
- megfile/http_path.py +27 -65
- megfile/interfaces.py +1 -8
- megfile/lib/base_prefetch_reader.py +62 -78
- megfile/lib/combine_reader.py +5 -0
- megfile/lib/glob.py +3 -6
- megfile/lib/hdfs_prefetch_reader.py +7 -7
- megfile/lib/http_prefetch_reader.py +6 -6
- megfile/lib/s3_buffered_writer.py +67 -64
- megfile/lib/s3_cached_handler.py +1 -2
- megfile/lib/s3_limited_seekable_writer.py +3 -7
- megfile/lib/s3_memory_handler.py +1 -2
- megfile/lib/s3_pipe_handler.py +1 -2
- megfile/lib/s3_prefetch_reader.py +15 -20
- megfile/lib/s3_share_cache_reader.py +8 -5
- megfile/pathlike.py +397 -401
- megfile/s3.py +118 -17
- megfile/s3_path.py +150 -224
- megfile/sftp.py +300 -10
- megfile/sftp_path.py +46 -322
- megfile/smart.py +33 -27
- megfile/smart_path.py +9 -14
- megfile/stdio.py +1 -1
- megfile/stdio_path.py +2 -2
- megfile/utils/__init__.py +11 -4
- megfile/version.py +1 -1
- {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/METADATA +7 -7
- megfile-4.0.0.dist-info/RECORD +52 -0
- {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/WHEEL +1 -1
- {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/top_level.txt +0 -2
- docs/conf.py +0 -65
- megfile-3.1.6.dist-info/RECORD +0 -55
- scripts/convert_results_to_sarif.py +0 -91
- scripts/generate_file.py +0 -344
- {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/LICENSE +0 -0
- {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/entry_points.txt +0 -0
|
@@ -3,12 +3,8 @@ from io import BytesIO
|
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
5
|
from megfile.config import (
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
DEFAULT_BLOCK_CAPACITY,
|
|
9
|
-
DEFAULT_BLOCK_SIZE,
|
|
10
|
-
GLOBAL_MAX_WORKERS,
|
|
11
|
-
NEWLINE,
|
|
6
|
+
READER_BLOCK_SIZE,
|
|
7
|
+
READER_MAX_BUFFER_SIZE,
|
|
12
8
|
S3_MAX_RETRY_TIMES,
|
|
13
9
|
)
|
|
14
10
|
from megfile.errors import (
|
|
@@ -21,12 +17,6 @@ from megfile.errors import (
|
|
|
21
17
|
from megfile.lib.base_prefetch_reader import BasePrefetchReader, LRUCacheFutureManager
|
|
22
18
|
|
|
23
19
|
__all__ = [
|
|
24
|
-
"DEFAULT_BLOCK_CAPACITY",
|
|
25
|
-
"DEFAULT_BLOCK_SIZE",
|
|
26
|
-
"GLOBAL_MAX_WORKERS",
|
|
27
|
-
"BACKOFF_INITIAL",
|
|
28
|
-
"BACKOFF_FACTOR",
|
|
29
|
-
"NEWLINE",
|
|
30
20
|
"S3PrefetchReader",
|
|
31
21
|
"LRUCacheFutureManager",
|
|
32
22
|
]
|
|
@@ -37,7 +27,7 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
37
27
|
Reader to fast read the s3 content.
|
|
38
28
|
|
|
39
29
|
This will divide the file content into equalparts of block_size size,
|
|
40
|
-
and will use LRU to cache at most
|
|
30
|
+
and will use LRU to cache at most blocks in max_buffer_size memory.
|
|
41
31
|
|
|
42
32
|
open(), seek() and read() will trigger prefetch read.
|
|
43
33
|
The prefetch will cached block_forward blocks of data from offset position
|
|
@@ -50,8 +40,8 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
50
40
|
key: str,
|
|
51
41
|
*,
|
|
52
42
|
s3_client,
|
|
53
|
-
block_size: int =
|
|
54
|
-
|
|
43
|
+
block_size: int = READER_BLOCK_SIZE,
|
|
44
|
+
max_buffer_size: int = READER_MAX_BUFFER_SIZE,
|
|
55
45
|
block_forward: Optional[int] = None,
|
|
56
46
|
max_retries: int = S3_MAX_RETRY_TIMES,
|
|
57
47
|
max_workers: Optional[int] = None,
|
|
@@ -66,7 +56,7 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
66
56
|
|
|
67
57
|
super().__init__(
|
|
68
58
|
block_size=block_size,
|
|
69
|
-
|
|
59
|
+
max_buffer_size=max_buffer_size,
|
|
70
60
|
block_forward=block_forward,
|
|
71
61
|
max_retries=max_retries,
|
|
72
62
|
max_workers=max_workers,
|
|
@@ -76,16 +66,21 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
76
66
|
try:
|
|
77
67
|
start, end = 0, self._block_size - 1
|
|
78
68
|
first_index_response = self._fetch_response(start=start, end=end)
|
|
79
|
-
|
|
69
|
+
if "ContentRange" in first_index_response:
|
|
70
|
+
content_size = int(first_index_response["ContentRange"].split("/")[-1])
|
|
71
|
+
else:
|
|
72
|
+
# usually when read a file only have one block
|
|
73
|
+
content_size = int(first_index_response["ContentLength"])
|
|
80
74
|
except S3InvalidRangeError:
|
|
81
75
|
# usually when read a empty file
|
|
82
76
|
# can use minio test empty file: https://hub.docker.com/r/minio/minio
|
|
83
77
|
first_index_response = self._fetch_response()
|
|
84
78
|
content_size = int(first_index_response["ContentLength"])
|
|
85
79
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
80
|
+
if self._block_capacity > 0:
|
|
81
|
+
first_future = Future()
|
|
82
|
+
first_future.set_result(first_index_response["Body"])
|
|
83
|
+
self._insert_futures(index=0, future=first_future)
|
|
89
84
|
self._content_etag = first_index_response["ETag"]
|
|
90
85
|
self._content_info = first_index_response
|
|
91
86
|
return content_size
|
|
@@ -4,8 +4,8 @@ from logging import getLogger as get_logger
|
|
|
4
4
|
from typing import Optional
|
|
5
5
|
|
|
6
6
|
from megfile.config import (
|
|
7
|
-
|
|
8
|
-
|
|
7
|
+
READER_BLOCK_SIZE,
|
|
8
|
+
READER_MAX_BUFFER_SIZE,
|
|
9
9
|
S3_MAX_RETRY_TIMES,
|
|
10
10
|
)
|
|
11
11
|
from megfile.lib.s3_prefetch_reader import LRUCacheFutureManager, S3PrefetchReader
|
|
@@ -14,6 +14,9 @@ from megfile.utils import thread_local
|
|
|
14
14
|
_logger = get_logger(__name__)
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
DEFAULT_BLOCK_CAPACITY = max(READER_MAX_BUFFER_SIZE // READER_BLOCK_SIZE, 1)
|
|
18
|
+
|
|
19
|
+
|
|
17
20
|
class S3ShareCacheReader(S3PrefetchReader):
|
|
18
21
|
"""
|
|
19
22
|
Reader to fast read the s3 content.
|
|
@@ -32,8 +35,8 @@ class S3ShareCacheReader(S3PrefetchReader):
|
|
|
32
35
|
key: str,
|
|
33
36
|
*,
|
|
34
37
|
s3_client,
|
|
35
|
-
block_size: int =
|
|
36
|
-
|
|
38
|
+
block_size: int = READER_BLOCK_SIZE,
|
|
39
|
+
max_buffer_size: int = READER_MAX_BUFFER_SIZE,
|
|
37
40
|
block_forward: Optional[int] = None,
|
|
38
41
|
max_retries: int = S3_MAX_RETRY_TIMES,
|
|
39
42
|
cache_key: str = "lru",
|
|
@@ -47,7 +50,7 @@ class S3ShareCacheReader(S3PrefetchReader):
|
|
|
47
50
|
key,
|
|
48
51
|
s3_client=s3_client,
|
|
49
52
|
block_size=block_size,
|
|
50
|
-
|
|
53
|
+
max_buffer_size=max_buffer_size,
|
|
51
54
|
block_forward=block_forward,
|
|
52
55
|
max_retries=max_retries,
|
|
53
56
|
max_workers=max_workers,
|