megfile 4.0.1__py3-none-any.whl → 4.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/cli.py +13 -9
- megfile/config.py +21 -3
- megfile/lib/s3_prefetch_reader.py +23 -25
- megfile/s3_path.py +10 -10
- megfile/smart.py +2 -1
- megfile/version.py +1 -1
- {megfile-4.0.1.dist-info → megfile-4.0.3.dist-info}/METADATA +4 -1
- {megfile-4.0.1.dist-info → megfile-4.0.3.dist-info}/RECORD +23 -13
- {megfile-4.0.1.dist-info → megfile-4.0.3.dist-info}/top_level.txt +1 -0
- scripts/benchmark/code/iopath_read.py +29 -0
- scripts/benchmark/code/iopath_write.py +30 -0
- scripts/benchmark/code/megfile_read.py +13 -0
- scripts/benchmark/code/megfile_write.py +14 -0
- scripts/benchmark/code/pyarrow_read.py +17 -0
- scripts/benchmark/code/pyarrow_write.py +18 -0
- scripts/benchmark/code/s3fs_read.py +21 -0
- scripts/benchmark/code/s3fs_write.py +22 -0
- scripts/benchmark/code/smart_open_read.py +25 -0
- scripts/benchmark/code/smart_open_write.py +26 -0
- {megfile-4.0.1.dist-info → megfile-4.0.3.dist-info}/LICENSE +0 -0
- {megfile-4.0.1.dist-info → megfile-4.0.3.dist-info}/LICENSE.pyre +0 -0
- {megfile-4.0.1.dist-info → megfile-4.0.3.dist-info}/WHEEL +0 -0
- {megfile-4.0.1.dist-info → megfile-4.0.3.dist-info}/entry_points.txt +0 -0
megfile/cli.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import configparser
|
|
2
|
-
import logging
|
|
3
2
|
import os
|
|
4
3
|
import shutil
|
|
5
4
|
import sys
|
|
@@ -10,7 +9,7 @@ from functools import partial
|
|
|
10
9
|
import click
|
|
11
10
|
from tqdm import tqdm
|
|
12
11
|
|
|
13
|
-
from megfile.config import READER_BLOCK_SIZE, SFTP_HOST_KEY_POLICY
|
|
12
|
+
from megfile.config import READER_BLOCK_SIZE, SFTP_HOST_KEY_POLICY, set_log_level
|
|
14
13
|
from megfile.hdfs_path import DEFAULT_HDFS_TIMEOUT
|
|
15
14
|
from megfile.interfaces import FileEntry
|
|
16
15
|
from megfile.lib.glob import get_non_glob_dir, has_magic
|
|
@@ -45,29 +44,34 @@ from megfile.smart_path import SmartPath
|
|
|
45
44
|
from megfile.utils import get_human_size
|
|
46
45
|
from megfile.version import VERSION
|
|
47
46
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
DEBUG = False
|
|
47
|
+
options = {}
|
|
48
|
+
set_log_level()
|
|
51
49
|
|
|
52
50
|
|
|
53
51
|
@click.group()
|
|
54
52
|
@click.option("--debug", is_flag=True, help="Enable debug mode.")
|
|
55
|
-
|
|
53
|
+
@click.option(
|
|
54
|
+
"--log-level",
|
|
55
|
+
type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"]),
|
|
56
|
+
help="Set logging level.",
|
|
57
|
+
)
|
|
58
|
+
def cli(debug, log_level):
|
|
56
59
|
"""
|
|
57
60
|
Client for megfile.
|
|
58
61
|
|
|
59
62
|
If you install megfile with ``--user``,
|
|
60
63
|
you also need configure ``$HOME/.local/bin`` into ``$PATH``.
|
|
61
64
|
"""
|
|
62
|
-
|
|
63
|
-
|
|
65
|
+
options["debug"] = debug
|
|
66
|
+
options["log_level"] = log_level or ("DEBUG" if debug else "INFO")
|
|
67
|
+
set_log_level(options["log_level"])
|
|
64
68
|
|
|
65
69
|
|
|
66
70
|
def safe_cli(): # pragma: no cover
|
|
67
71
|
try:
|
|
68
72
|
cli()
|
|
69
73
|
except Exception as e:
|
|
70
|
-
if
|
|
74
|
+
if options.get("debug", False):
|
|
71
75
|
raise
|
|
72
76
|
else:
|
|
73
77
|
click.echo(f"\n[{type(e).__name__}] {e}", err=True)
|
megfile/config.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
2
3
|
import typing as T
|
|
3
4
|
|
|
@@ -61,10 +62,24 @@ def parse_quantity(quantity: T.Union[str, int]) -> int:
|
|
|
61
62
|
return number * (base**exponent) # pytype: disable=bad-return-type
|
|
62
63
|
|
|
63
64
|
|
|
64
|
-
def
|
|
65
|
+
def parse_boolean(value: T.Optional[str], default: bool = False):
|
|
66
|
+
if value is None:
|
|
67
|
+
return default
|
|
65
68
|
return value.lower() in ("true", "yes", "1")
|
|
66
69
|
|
|
67
70
|
|
|
71
|
+
def set_log_level(level: T.Optional[T.Union[int, str]] = None):
|
|
72
|
+
logging.basicConfig(
|
|
73
|
+
level=logging.ERROR,
|
|
74
|
+
format=(
|
|
75
|
+
"%(asctime)s | %(levelname)-8s | "
|
|
76
|
+
"%(name)s:%(funcName)s:%(lineno)d - %(message)s"
|
|
77
|
+
),
|
|
78
|
+
)
|
|
79
|
+
level = level or os.getenv("MEGFILE_LOG_LEVEL") or logging.INFO
|
|
80
|
+
logging.getLogger("megfile").setLevel(level)
|
|
81
|
+
|
|
82
|
+
|
|
68
83
|
READER_BLOCK_SIZE = parse_quantity(os.getenv("MEGFILE_READER_BLOCK_SIZE") or 8 * 2**20)
|
|
69
84
|
if READER_BLOCK_SIZE <= 0:
|
|
70
85
|
raise ValueError(
|
|
@@ -87,8 +102,8 @@ WRITER_MAX_BUFFER_SIZE = parse_quantity(
|
|
|
87
102
|
)
|
|
88
103
|
DEFAULT_WRITER_BLOCK_AUTOSCALE = not os.getenv("MEGFILE_WRITER_BLOCK_SIZE")
|
|
89
104
|
if os.getenv("MEGFILE_WRITER_BLOCK_AUTOSCALE"):
|
|
90
|
-
DEFAULT_WRITER_BLOCK_AUTOSCALE =
|
|
91
|
-
os.environ["MEGFILE_WRITER_BLOCK_AUTOSCALE"]
|
|
105
|
+
DEFAULT_WRITER_BLOCK_AUTOSCALE = parse_boolean(
|
|
106
|
+
os.environ["MEGFILE_WRITER_BLOCK_AUTOSCALE"]
|
|
92
107
|
)
|
|
93
108
|
|
|
94
109
|
GLOBAL_MAX_WORKERS = int(os.getenv("MEGFILE_MAX_WORKERS") or 8)
|
|
@@ -114,3 +129,6 @@ SFTP_MAX_RETRY_TIMES = int(
|
|
|
114
129
|
SFTP_HOST_KEY_POLICY = os.getenv("MEGFILE_SFTP_HOST_KEY_POLICY")
|
|
115
130
|
|
|
116
131
|
HTTP_AUTH_HEADERS = ("Authorization", "Www-Authenticate", "Cookie", "Cookie2")
|
|
132
|
+
|
|
133
|
+
if os.getenv("MEGFILE_LOG_LEVEL"):
|
|
134
|
+
set_log_level()
|
|
@@ -62,31 +62,29 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
def _get_content_size(self):
|
|
65
|
-
if self._block_capacity
|
|
66
|
-
try:
|
|
67
|
-
start, end = 0, self._block_size - 1
|
|
68
|
-
first_index_response = self._fetch_response(start=start, end=end)
|
|
69
|
-
if "ContentRange" in first_index_response:
|
|
70
|
-
content_size = int(
|
|
71
|
-
first_index_response["ContentRange"].split("/")[-1]
|
|
72
|
-
)
|
|
73
|
-
else:
|
|
74
|
-
# usually when read a file only have one block
|
|
75
|
-
content_size = int(first_index_response["ContentLength"])
|
|
76
|
-
except S3InvalidRangeError:
|
|
77
|
-
# usually when read a empty file
|
|
78
|
-
# can use minio test empty file: https://hub.docker.com/r/minio/minio
|
|
79
|
-
first_index_response = self._fetch_response()
|
|
80
|
-
content_size = int(first_index_response["ContentLength"])
|
|
81
|
-
|
|
82
|
-
first_future = Future()
|
|
83
|
-
first_future.set_result(first_index_response["Body"])
|
|
84
|
-
self._insert_futures(index=0, future=first_future)
|
|
85
|
-
self._content_etag = first_index_response["ETag"]
|
|
86
|
-
else:
|
|
65
|
+
if self._block_capacity <= 0:
|
|
87
66
|
response = self._client.head_object(Bucket=self._bucket, Key=self._key)
|
|
88
|
-
self._content_etag = response
|
|
89
|
-
|
|
67
|
+
self._content_etag = response.get("ETag")
|
|
68
|
+
return int(response["ContentLength"])
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
start, end = 0, self._block_size - 1
|
|
72
|
+
first_index_response = self._fetch_response(start=start, end=end)
|
|
73
|
+
if "ContentRange" in first_index_response:
|
|
74
|
+
content_size = int(first_index_response["ContentRange"].split("/")[-1])
|
|
75
|
+
else:
|
|
76
|
+
# usually when read a file only have one block
|
|
77
|
+
content_size = int(first_index_response["ContentLength"])
|
|
78
|
+
except S3InvalidRangeError:
|
|
79
|
+
# usually when read a empty file
|
|
80
|
+
# can use minio test empty file: https://hub.docker.com/r/minio/minio
|
|
81
|
+
first_index_response = self._fetch_response()
|
|
82
|
+
content_size = int(first_index_response["ContentLength"])
|
|
83
|
+
|
|
84
|
+
first_future = Future()
|
|
85
|
+
first_future.set_result(first_index_response["Body"])
|
|
86
|
+
self._insert_futures(index=0, future=first_future)
|
|
87
|
+
self._content_etag = first_index_response.get("ETag")
|
|
90
88
|
return content_size
|
|
91
89
|
|
|
92
90
|
@property
|
|
@@ -122,7 +120,7 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
122
120
|
start, end = index * self._block_size, (index + 1) * self._block_size - 1
|
|
123
121
|
response = self._fetch_response(start=start, end=end)
|
|
124
122
|
etag = response.get("ETag", None)
|
|
125
|
-
if
|
|
123
|
+
if self._content_etag and etag and etag != self._content_etag:
|
|
126
124
|
raise S3FileChangedError(
|
|
127
125
|
"File changed: %r, etag before: %s, after: %s"
|
|
128
126
|
% (self.name, self._content_etag, etag)
|
megfile/s3_path.py
CHANGED
|
@@ -23,7 +23,7 @@ from megfile.config import (
|
|
|
23
23
|
S3_MAX_RETRY_TIMES,
|
|
24
24
|
WRITER_BLOCK_SIZE,
|
|
25
25
|
WRITER_MAX_BUFFER_SIZE,
|
|
26
|
-
|
|
26
|
+
parse_boolean,
|
|
27
27
|
)
|
|
28
28
|
from megfile.errors import (
|
|
29
29
|
S3BucketNotFoundError,
|
|
@@ -253,12 +253,6 @@ def get_env_var(env_name: str, profile_name=None):
|
|
|
253
253
|
return os.getenv(env_name.upper())
|
|
254
254
|
|
|
255
255
|
|
|
256
|
-
def parse_boolean(value: Optional[str], default: bool = False) -> bool:
|
|
257
|
-
if value is None:
|
|
258
|
-
return default
|
|
259
|
-
return to_boolean(value)
|
|
260
|
-
|
|
261
|
-
|
|
262
256
|
def get_access_token(profile_name=None):
|
|
263
257
|
access_key = get_env_var("AWS_ACCESS_KEY_ID", profile_name=profile_name)
|
|
264
258
|
secret_key = get_env_var("AWS_SECRET_ACCESS_KEY", profile_name=profile_name)
|
|
@@ -1003,13 +997,15 @@ def s3_buffered_open(
|
|
|
1003
997
|
profile_name=s3_url._profile_name,
|
|
1004
998
|
)
|
|
1005
999
|
else:
|
|
1000
|
+
if max_buffer_size is None:
|
|
1001
|
+
max_buffer_size = READER_MAX_BUFFER_SIZE
|
|
1006
1002
|
reader = S3PrefetchReader(
|
|
1007
1003
|
bucket,
|
|
1008
1004
|
key,
|
|
1009
1005
|
s3_client=client,
|
|
1010
1006
|
max_retries=max_retries,
|
|
1011
1007
|
max_workers=max_workers,
|
|
1012
|
-
max_buffer_size=max_buffer_size
|
|
1008
|
+
max_buffer_size=max_buffer_size,
|
|
1013
1009
|
block_forward=block_forward,
|
|
1014
1010
|
block_size=block_size or READER_BLOCK_SIZE,
|
|
1015
1011
|
profile_name=s3_url._profile_name,
|
|
@@ -1019,23 +1015,27 @@ def s3_buffered_open(
|
|
|
1019
1015
|
return reader
|
|
1020
1016
|
|
|
1021
1017
|
if limited_seekable:
|
|
1018
|
+
if max_buffer_size is None:
|
|
1019
|
+
max_buffer_size = WRITER_MAX_BUFFER_SIZE
|
|
1022
1020
|
writer = S3LimitedSeekableWriter(
|
|
1023
1021
|
bucket,
|
|
1024
1022
|
key,
|
|
1025
1023
|
s3_client=client,
|
|
1026
1024
|
max_workers=max_workers,
|
|
1027
1025
|
block_size=block_size or WRITER_BLOCK_SIZE,
|
|
1028
|
-
max_buffer_size=max_buffer_size
|
|
1026
|
+
max_buffer_size=max_buffer_size,
|
|
1029
1027
|
profile_name=s3_url._profile_name,
|
|
1030
1028
|
)
|
|
1031
1029
|
else:
|
|
1030
|
+
if max_buffer_size is None:
|
|
1031
|
+
max_buffer_size = WRITER_MAX_BUFFER_SIZE
|
|
1032
1032
|
writer = S3BufferedWriter(
|
|
1033
1033
|
bucket,
|
|
1034
1034
|
key,
|
|
1035
1035
|
s3_client=client,
|
|
1036
1036
|
max_workers=max_workers,
|
|
1037
1037
|
block_size=block_size or WRITER_BLOCK_SIZE,
|
|
1038
|
-
max_buffer_size=max_buffer_size
|
|
1038
|
+
max_buffer_size=max_buffer_size,
|
|
1039
1039
|
profile_name=s3_url._profile_name,
|
|
1040
1040
|
)
|
|
1041
1041
|
if buffered or _is_pickle(writer):
|
megfile/smart.py
CHANGED
|
@@ -1006,7 +1006,8 @@ def smart_load_content(
|
|
|
1006
1006
|
offset = -1
|
|
1007
1007
|
if stop is not None:
|
|
1008
1008
|
offset = stop - (start or 0) # start may be None
|
|
1009
|
-
|
|
1009
|
+
if offset < 0:
|
|
1010
|
+
raise ValueError("stop should be greater than start")
|
|
1010
1011
|
return fd.read(offset) # pytype: disable=bad-return-type
|
|
1011
1012
|
|
|
1012
1013
|
|
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "4.0.
|
|
1
|
+
VERSION = "4.0.3"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version: 4.0.
|
|
3
|
+
Version: 4.0.3
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
5
|
Author-email: megvii <megfile@megvii.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/megvii-research/megfile
|
|
@@ -212,6 +212,9 @@ You can get the configuration from `~/.config/megfile/aliases.conf`, like:
|
|
|
212
212
|
protocol = s3+tos
|
|
213
213
|
```
|
|
214
214
|
|
|
215
|
+
## Benchmark
|
|
216
|
+
[](https://megvii-research.github.io/megfile/benchmark.html)
|
|
217
|
+
[](https://megvii-research.github.io/megfile/benchmark.html)
|
|
215
218
|
|
|
216
219
|
## How to Contribute
|
|
217
220
|
* We welcome everyone to contribute code to the `megfile` project, but the contributed code needs to meet the following conditions as much as possible:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
megfile/__init__.py,sha256=7oEfu410CFKzDWZ9RjL5xEJ1gtkJkTfvPrL_7TWdJuY,7366
|
|
2
|
-
megfile/cli.py,sha256=
|
|
3
|
-
megfile/config.py,sha256=
|
|
2
|
+
megfile/cli.py,sha256=e3VVr8oe8iR7L_PtpNtyqAvQL_WgJzzEz8oewSAlgX4,24887
|
|
3
|
+
megfile/config.py,sha256=_6HiGeXEyk6RjPdjA0eEj1unq9iLJV_vQJBzQ-eHNvs,4185
|
|
4
4
|
megfile/errors.py,sha256=a55qKQgyfiLmV-qnojUFzq2gu9JXpj3ZiC2qVaWyUTA,14160
|
|
5
5
|
megfile/fs.py,sha256=bPGbapv41FzME87X3MhSNQRjNmHrI23FuFnjPT0ukQs,18239
|
|
6
6
|
megfile/fs_path.py,sha256=ZK-po1xqhHocMb9Vrxf5K9tDx3zxQmGxNIHY3Z7Akp8,39085
|
|
@@ -11,14 +11,14 @@ megfile/http_path.py,sha256=c-xAu5wDxcTevmIUmrNEy-m-QiCfDJToaVI7y8SVIUI,14492
|
|
|
11
11
|
megfile/interfaces.py,sha256=p4UvVZpeLx5djd6bqqDaygIx_s-_AxIVj-gudTch4JE,8467
|
|
12
12
|
megfile/pathlike.py,sha256=vfuTBqSTIciRxkkqMfLfnBxWTEl9yns1yR8zgK4Raw0,31268
|
|
13
13
|
megfile/s3.py,sha256=zqAegH5tijcztEKcfHXmOYhAR880nTxaAzc2O0JJnjc,16661
|
|
14
|
-
megfile/s3_path.py,sha256=
|
|
14
|
+
megfile/s3_path.py,sha256=lpUKy4n5DTf6hK6TvPhMjt_ZgdIXO4vcyK_VLaGkvhg,93395
|
|
15
15
|
megfile/sftp.py,sha256=0ZnQlmhgvs7pYjFTcvvOyxTo2IUurE-hp1GN0hnIrdQ,26473
|
|
16
16
|
megfile/sftp_path.py,sha256=4rcbn3wqcOEs71W6qWu1efcj6MZUgrZm6U0Jan-eB70,43604
|
|
17
|
-
megfile/smart.py,sha256=
|
|
17
|
+
megfile/smart.py,sha256=Ps8acPx6jeG1UJnRD8xL2aQjRp7IMW8sV6VFkMF0TQk,36910
|
|
18
18
|
megfile/smart_path.py,sha256=Bqg95T2-XZrRXWhH7GT-jMCYzD7i1SIXdczQxtOxiPs,7583
|
|
19
19
|
megfile/stdio.py,sha256=C_cGID_npthpwoPcsJMMEqqbVUPUnDxxJV9jLY2_D7c,635
|
|
20
20
|
megfile/stdio_path.py,sha256=L8ODNIwO79UIv13YYc2OTr6f4XTv4ZPyvBeRk83-AjA,2700
|
|
21
|
-
megfile/version.py,sha256=
|
|
21
|
+
megfile/version.py,sha256=aJJATI8dDtIUcu59WVztQ3az97BkUrCKd86VVa6W_aM,19
|
|
22
22
|
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
megfile/lib/base_prefetch_reader.py,sha256=6Dy2ZwlowqAvyUUa7bpQLCKOclmmUDhqEF-_CDDp0Og,13100
|
|
24
24
|
megfile/lib/combine_reader.py,sha256=nKGAug29lOpNIZuLKu7_qVrJJRpXL_J4jxLglWbGJ1w,4808
|
|
@@ -36,17 +36,27 @@ megfile/lib/s3_cached_handler.py,sha256=X8PdeRC-BY6eSmOO5f2BeyjTPxyEwNtHgmAm9Vgm
|
|
|
36
36
|
megfile/lib/s3_limited_seekable_writer.py,sha256=mUeoTS98LHluwDN7zxdCVcsjOGBT1bOYV8nRvi9QMGE,6212
|
|
37
37
|
megfile/lib/s3_memory_handler.py,sha256=4uzBzz2jfRI_u6jl0CpOGAhpNJhDQo18FSAweauCUFs,4136
|
|
38
38
|
megfile/lib/s3_pipe_handler.py,sha256=dm7NnZd1Ym5ABS1GvOQtoCJEO_CB8e6p4sUhLiid0go,3622
|
|
39
|
-
megfile/lib/s3_prefetch_reader.py,sha256=
|
|
39
|
+
megfile/lib/s3_prefetch_reader.py,sha256=R37-y_L9l8IKJhpT8HwBrZEbo2X72vCqEV6fvqPCBug,4437
|
|
40
40
|
megfile/lib/s3_share_cache_reader.py,sha256=LVWKxHdHo0_zUIW4o8yqNvplqqwezUPeYEt02Vj-WNM,3754
|
|
41
41
|
megfile/lib/shadow_handler.py,sha256=TntewlvIW9ZxCfmqASDQREHoiZ8v42faOe9sovQYQz0,2779
|
|
42
42
|
megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,1987
|
|
43
43
|
megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
|
|
44
44
|
megfile/utils/__init__.py,sha256=sATf_NlsSTYIMEiA8-gM6K1M-Q1K6_7rx2VM31hrqaA,10838
|
|
45
45
|
megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
46
|
+
scripts/benchmark/code/iopath_read.py,sha256=O1Qs3mpvD9S_XCuRH2A2FpGWxCOSw6qZvEBrtPeRL1E,661
|
|
47
|
+
scripts/benchmark/code/iopath_write.py,sha256=Mm0efW1J09RJ_CK5i1xtG2hJuaaslikin8qVpuRFP_Q,704
|
|
48
|
+
scripts/benchmark/code/megfile_read.py,sha256=sAMebUiMColHDv3JEkXplImAHvn_IF1-g3BIJxhcQYE,239
|
|
49
|
+
scripts/benchmark/code/megfile_write.py,sha256=bzn-i2bGH4eRcsVvkhXK35KsQkX2v2oEsOJ0Ft5saj0,257
|
|
50
|
+
scripts/benchmark/code/pyarrow_read.py,sha256=2QBGKjGV2Dvl2ukOntLSag33pF55l3tfZ2Z6dLUjovw,305
|
|
51
|
+
scripts/benchmark/code/pyarrow_write.py,sha256=U1puLh-ljSXv772bZYAyhzmxhPOq4aR4j-QtwdM9hG0,328
|
|
52
|
+
scripts/benchmark/code/s3fs_read.py,sha256=XiTA-qrYblUs-jQWXSnvNg5Wo722C_g47aMMfo5XJBY,380
|
|
53
|
+
scripts/benchmark/code/s3fs_write.py,sha256=gdXKkWXYGjLJlRT_J64pJN85XvRg3bZexcAJQEMXwtw,402
|
|
54
|
+
scripts/benchmark/code/smart_open_read.py,sha256=SA02jHwS9Y31yFtV9CoJcfND5dR0eA_HsGmGNUrpQls,515
|
|
55
|
+
scripts/benchmark/code/smart_open_write.py,sha256=jDxFJdY97yNH889jz3pawBoei3yaqy8pEMvC_ymHFtM,537
|
|
56
|
+
megfile-4.0.3.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
57
|
+
megfile-4.0.3.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
58
|
+
megfile-4.0.3.dist-info/METADATA,sha256=At2cDDQKTUSj0QmjmPAjPRpqG_M5TZdefe-_2xMNjX8,9573
|
|
59
|
+
megfile-4.0.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
60
|
+
megfile-4.0.3.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
61
|
+
megfile-4.0.3.dist-info/top_level.txt,sha256=fVg49lk5B9L7jyfWUXWxb0DDSuw5pbr0OU62Tvx8J8M,44
|
|
62
|
+
megfile-4.0.3.dist-info/RECORD,,
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from iopath.common.file_io import PathManager
|
|
6
|
+
from iopath.common.s3 import S3PathHandler
|
|
7
|
+
|
|
8
|
+
times = 10240
|
|
9
|
+
s3_path = "s3://bucketA/large.txt"
|
|
10
|
+
|
|
11
|
+
start = time.time()
|
|
12
|
+
|
|
13
|
+
path_manager = PathManager()
|
|
14
|
+
|
|
15
|
+
session = boto3.Session(
|
|
16
|
+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
17
|
+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
18
|
+
)
|
|
19
|
+
client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
20
|
+
handler = S3PathHandler()
|
|
21
|
+
handler.client = client
|
|
22
|
+
|
|
23
|
+
path_manager.register_handler(handler)
|
|
24
|
+
|
|
25
|
+
with path_manager.open(s3_path, "rb") as f:
|
|
26
|
+
for i in range(times):
|
|
27
|
+
f.read(1024 * 1024)
|
|
28
|
+
|
|
29
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from iopath.common.file_io import PathManager
|
|
6
|
+
from iopath.common.s3 import S3PathHandler
|
|
7
|
+
|
|
8
|
+
times = 10240
|
|
9
|
+
s3_path = "s3://bucketA/large.txt"
|
|
10
|
+
block = b"1" * 1024 * 1024
|
|
11
|
+
|
|
12
|
+
start = time.time()
|
|
13
|
+
|
|
14
|
+
path_manager = PathManager()
|
|
15
|
+
|
|
16
|
+
session = boto3.Session(
|
|
17
|
+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
18
|
+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
19
|
+
)
|
|
20
|
+
client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
21
|
+
handler = S3PathHandler()
|
|
22
|
+
handler.client = client
|
|
23
|
+
|
|
24
|
+
path_manager.register_handler(handler)
|
|
25
|
+
|
|
26
|
+
with path_manager.open(s3_path, "wb") as f:
|
|
27
|
+
for i in range(times):
|
|
28
|
+
f.write(block)
|
|
29
|
+
|
|
30
|
+
print(time.time() - start) # write 10GB 91.642
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from megfile import smart_open
|
|
4
|
+
|
|
5
|
+
times = 10240
|
|
6
|
+
s3_path = "s3://bucketA/large.txt"
|
|
7
|
+
block = b"1" * 1024 * 1024
|
|
8
|
+
|
|
9
|
+
start = time.time()
|
|
10
|
+
with smart_open(s3_path, "wb") as f:
|
|
11
|
+
for i in range(times):
|
|
12
|
+
f.write(block)
|
|
13
|
+
|
|
14
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from pyarrow import fs
|
|
5
|
+
|
|
6
|
+
times = 10240
|
|
7
|
+
s3_path = "bucketA/large.txt"
|
|
8
|
+
|
|
9
|
+
start = time.time()
|
|
10
|
+
|
|
11
|
+
s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
|
|
12
|
+
|
|
13
|
+
with s3.open_input_stream(s3_path) as f:
|
|
14
|
+
for i in range(times):
|
|
15
|
+
f.read(1024 * 1024)
|
|
16
|
+
|
|
17
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from pyarrow import fs
|
|
5
|
+
|
|
6
|
+
times = 10240
|
|
7
|
+
block = b"1" * 1024 * 1024
|
|
8
|
+
s3_path = "bucketA/large.txt"
|
|
9
|
+
|
|
10
|
+
start = time.time()
|
|
11
|
+
|
|
12
|
+
s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
|
|
13
|
+
|
|
14
|
+
with s3.open_output_stream(s3_path) as f:
|
|
15
|
+
for i in range(times):
|
|
16
|
+
f.write(block)
|
|
17
|
+
|
|
18
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import s3fs
|
|
5
|
+
|
|
6
|
+
times = 10240
|
|
7
|
+
s3_path = "bucketA/large.txt"
|
|
8
|
+
|
|
9
|
+
start = time.time()
|
|
10
|
+
|
|
11
|
+
s3 = s3fs.S3FileSystem(
|
|
12
|
+
endpoint_url=os.environ["OSS_ENDPOINT"],
|
|
13
|
+
key=os.environ["AWS_ACCESS_KEY_ID"],
|
|
14
|
+
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
with s3.open(s3_path, "rb") as f:
|
|
18
|
+
for i in range(times):
|
|
19
|
+
f.read(1024 * 1024)
|
|
20
|
+
|
|
21
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import s3fs
|
|
5
|
+
|
|
6
|
+
times = 10240
|
|
7
|
+
block = b"1" * 1024 * 1024
|
|
8
|
+
s3_path = "bucketA/large.txt"
|
|
9
|
+
|
|
10
|
+
start = time.time()
|
|
11
|
+
|
|
12
|
+
s3 = s3fs.S3FileSystem(
|
|
13
|
+
endpoint_url=os.environ["OSS_ENDPOINT"],
|
|
14
|
+
key=os.environ["AWS_ACCESS_KEY_ID"],
|
|
15
|
+
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
with s3.open(s3_path, "wb") as f:
|
|
19
|
+
for i in range(times):
|
|
20
|
+
f.write(block)
|
|
21
|
+
|
|
22
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from smart_open import open
|
|
6
|
+
|
|
7
|
+
times = 10240
|
|
8
|
+
s3_path = "s3://bucketA/large.txt"
|
|
9
|
+
|
|
10
|
+
start = time.time()
|
|
11
|
+
session = boto3.Session(
|
|
12
|
+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
13
|
+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
14
|
+
)
|
|
15
|
+
with open(
|
|
16
|
+
s3_path,
|
|
17
|
+
"rb",
|
|
18
|
+
transport_params={
|
|
19
|
+
"client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
20
|
+
},
|
|
21
|
+
) as f:
|
|
22
|
+
for i in range(times):
|
|
23
|
+
f.read(1024 * 1024)
|
|
24
|
+
|
|
25
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from smart_open import open
|
|
6
|
+
|
|
7
|
+
times = 10240
|
|
8
|
+
s3_path = "s3://bucketA/large.txt"
|
|
9
|
+
block = b"1" * 1024 * 1024
|
|
10
|
+
|
|
11
|
+
start = time.time()
|
|
12
|
+
session = boto3.Session(
|
|
13
|
+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
14
|
+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
15
|
+
)
|
|
16
|
+
with open(
|
|
17
|
+
s3_path,
|
|
18
|
+
"wb",
|
|
19
|
+
transport_params={
|
|
20
|
+
"client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
21
|
+
},
|
|
22
|
+
) as f:
|
|
23
|
+
for i in range(times):
|
|
24
|
+
f.write(block)
|
|
25
|
+
|
|
26
|
+
print(time.time() - start)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|