megfile 4.0.1__py3-none-any.whl → 4.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/cli.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import configparser
2
- import logging
3
2
  import os
4
3
  import shutil
5
4
  import sys
@@ -10,7 +9,7 @@ from functools import partial
10
9
  import click
11
10
  from tqdm import tqdm
12
11
 
13
- from megfile.config import READER_BLOCK_SIZE, SFTP_HOST_KEY_POLICY
12
+ from megfile.config import READER_BLOCK_SIZE, SFTP_HOST_KEY_POLICY, set_log_level
14
13
  from megfile.hdfs_path import DEFAULT_HDFS_TIMEOUT
15
14
  from megfile.interfaces import FileEntry
16
15
  from megfile.lib.glob import get_non_glob_dir, has_magic
@@ -45,29 +44,34 @@ from megfile.smart_path import SmartPath
45
44
  from megfile.utils import get_human_size
46
45
  from megfile.version import VERSION
47
46
 
48
- logging.basicConfig(level=logging.ERROR)
49
- logging.getLogger("megfile").setLevel(level=logging.INFO)
50
- DEBUG = False
47
+ options = {}
48
+ set_log_level()
51
49
 
52
50
 
53
51
  @click.group()
54
52
  @click.option("--debug", is_flag=True, help="Enable debug mode.")
55
- def cli(debug):
53
+ @click.option(
54
+ "--log-level",
55
+ type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"]),
56
+ help="Set logging level.",
57
+ )
58
+ def cli(debug, log_level):
56
59
  """
57
60
  Client for megfile.
58
61
 
59
62
  If you install megfile with ``--user``,
60
63
  you also need configure ``$HOME/.local/bin`` into ``$PATH``.
61
64
  """
62
- global DEBUG
63
- DEBUG = debug
65
+ options["debug"] = debug
66
+ options["log_level"] = log_level or ("DEBUG" if debug else "INFO")
67
+ set_log_level(options["log_level"])
64
68
 
65
69
 
66
70
  def safe_cli(): # pragma: no cover
67
71
  try:
68
72
  cli()
69
73
  except Exception as e:
70
- if DEBUG:
74
+ if options.get("debug", False):
71
75
  raise
72
76
  else:
73
77
  click.echo(f"\n[{type(e).__name__}] {e}", err=True)
megfile/config.py CHANGED
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  import os
2
3
  import typing as T
3
4
 
@@ -61,10 +62,24 @@ def parse_quantity(quantity: T.Union[str, int]) -> int:
61
62
  return number * (base**exponent) # pytype: disable=bad-return-type
62
63
 
63
64
 
64
- def to_boolean(value):
65
+ def parse_boolean(value: T.Optional[str], default: bool = False):
66
+ if value is None:
67
+ return default
65
68
  return value.lower() in ("true", "yes", "1")
66
69
 
67
70
 
71
+ def set_log_level(level: T.Optional[T.Union[int, str]] = None):
72
+ logging.basicConfig(
73
+ level=logging.ERROR,
74
+ format=(
75
+ "%(asctime)s | %(levelname)-8s | "
76
+ "%(name)s:%(funcName)s:%(lineno)d - %(message)s"
77
+ ),
78
+ )
79
+ level = level or os.getenv("MEGFILE_LOG_LEVEL") or logging.INFO
80
+ logging.getLogger("megfile").setLevel(level)
81
+
82
+
68
83
  READER_BLOCK_SIZE = parse_quantity(os.getenv("MEGFILE_READER_BLOCK_SIZE") or 8 * 2**20)
69
84
  if READER_BLOCK_SIZE <= 0:
70
85
  raise ValueError(
@@ -87,8 +102,8 @@ WRITER_MAX_BUFFER_SIZE = parse_quantity(
87
102
  )
88
103
  DEFAULT_WRITER_BLOCK_AUTOSCALE = not os.getenv("MEGFILE_WRITER_BLOCK_SIZE")
89
104
  if os.getenv("MEGFILE_WRITER_BLOCK_AUTOSCALE"):
90
- DEFAULT_WRITER_BLOCK_AUTOSCALE = to_boolean(
91
- os.environ["MEGFILE_WRITER_BLOCK_AUTOSCALE"].lower()
105
+ DEFAULT_WRITER_BLOCK_AUTOSCALE = parse_boolean(
106
+ os.environ["MEGFILE_WRITER_BLOCK_AUTOSCALE"]
92
107
  )
93
108
 
94
109
  GLOBAL_MAX_WORKERS = int(os.getenv("MEGFILE_MAX_WORKERS") or 8)
@@ -114,3 +129,6 @@ SFTP_MAX_RETRY_TIMES = int(
114
129
  SFTP_HOST_KEY_POLICY = os.getenv("MEGFILE_SFTP_HOST_KEY_POLICY")
115
130
 
116
131
  HTTP_AUTH_HEADERS = ("Authorization", "Www-Authenticate", "Cookie", "Cookie2")
132
+
133
+ if os.getenv("MEGFILE_LOG_LEVEL"):
134
+ set_log_level()
@@ -62,31 +62,29 @@ class S3PrefetchReader(BasePrefetchReader):
62
62
  )
63
63
 
64
64
  def _get_content_size(self):
65
- if self._block_capacity > 0:
66
- try:
67
- start, end = 0, self._block_size - 1
68
- first_index_response = self._fetch_response(start=start, end=end)
69
- if "ContentRange" in first_index_response:
70
- content_size = int(
71
- first_index_response["ContentRange"].split("/")[-1]
72
- )
73
- else:
74
- # usually when read a file only have one block
75
- content_size = int(first_index_response["ContentLength"])
76
- except S3InvalidRangeError:
77
- # usually when read a empty file
78
- # can use minio test empty file: https://hub.docker.com/r/minio/minio
79
- first_index_response = self._fetch_response()
80
- content_size = int(first_index_response["ContentLength"])
81
-
82
- first_future = Future()
83
- first_future.set_result(first_index_response["Body"])
84
- self._insert_futures(index=0, future=first_future)
85
- self._content_etag = first_index_response["ETag"]
86
- else:
65
+ if self._block_capacity <= 0:
87
66
  response = self._client.head_object(Bucket=self._bucket, Key=self._key)
88
- self._content_etag = response["ETag"]
89
- content_size = int(response["ContentLength"])
67
+ self._content_etag = response.get("ETag")
68
+ return int(response["ContentLength"])
69
+
70
+ try:
71
+ start, end = 0, self._block_size - 1
72
+ first_index_response = self._fetch_response(start=start, end=end)
73
+ if "ContentRange" in first_index_response:
74
+ content_size = int(first_index_response["ContentRange"].split("/")[-1])
75
+ else:
76
+ # usually when read a file only have one block
77
+ content_size = int(first_index_response["ContentLength"])
78
+ except S3InvalidRangeError:
79
+ # usually when read a empty file
80
+ # can use minio test empty file: https://hub.docker.com/r/minio/minio
81
+ first_index_response = self._fetch_response()
82
+ content_size = int(first_index_response["ContentLength"])
83
+
84
+ first_future = Future()
85
+ first_future.set_result(first_index_response["Body"])
86
+ self._insert_futures(index=0, future=first_future)
87
+ self._content_etag = first_index_response.get("ETag")
90
88
  return content_size
91
89
 
92
90
  @property
@@ -122,7 +120,7 @@ class S3PrefetchReader(BasePrefetchReader):
122
120
  start, end = index * self._block_size, (index + 1) * self._block_size - 1
123
121
  response = self._fetch_response(start=start, end=end)
124
122
  etag = response.get("ETag", None)
125
- if etag is not None and etag != self._content_etag:
123
+ if self._content_etag and etag and etag != self._content_etag:
126
124
  raise S3FileChangedError(
127
125
  "File changed: %r, etag before: %s, after: %s"
128
126
  % (self.name, self._content_etag, etag)
megfile/s3_path.py CHANGED
@@ -23,7 +23,7 @@ from megfile.config import (
23
23
  S3_MAX_RETRY_TIMES,
24
24
  WRITER_BLOCK_SIZE,
25
25
  WRITER_MAX_BUFFER_SIZE,
26
- to_boolean,
26
+ parse_boolean,
27
27
  )
28
28
  from megfile.errors import (
29
29
  S3BucketNotFoundError,
@@ -253,12 +253,6 @@ def get_env_var(env_name: str, profile_name=None):
253
253
  return os.getenv(env_name.upper())
254
254
 
255
255
 
256
- def parse_boolean(value: Optional[str], default: bool = False) -> bool:
257
- if value is None:
258
- return default
259
- return to_boolean(value)
260
-
261
-
262
256
  def get_access_token(profile_name=None):
263
257
  access_key = get_env_var("AWS_ACCESS_KEY_ID", profile_name=profile_name)
264
258
  secret_key = get_env_var("AWS_SECRET_ACCESS_KEY", profile_name=profile_name)
@@ -1003,13 +997,15 @@ def s3_buffered_open(
1003
997
  profile_name=s3_url._profile_name,
1004
998
  )
1005
999
  else:
1000
+ if max_buffer_size is None:
1001
+ max_buffer_size = READER_MAX_BUFFER_SIZE
1006
1002
  reader = S3PrefetchReader(
1007
1003
  bucket,
1008
1004
  key,
1009
1005
  s3_client=client,
1010
1006
  max_retries=max_retries,
1011
1007
  max_workers=max_workers,
1012
- max_buffer_size=max_buffer_size or READER_MAX_BUFFER_SIZE,
1008
+ max_buffer_size=max_buffer_size,
1013
1009
  block_forward=block_forward,
1014
1010
  block_size=block_size or READER_BLOCK_SIZE,
1015
1011
  profile_name=s3_url._profile_name,
@@ -1019,23 +1015,27 @@ def s3_buffered_open(
1019
1015
  return reader
1020
1016
 
1021
1017
  if limited_seekable:
1018
+ if max_buffer_size is None:
1019
+ max_buffer_size = WRITER_MAX_BUFFER_SIZE
1022
1020
  writer = S3LimitedSeekableWriter(
1023
1021
  bucket,
1024
1022
  key,
1025
1023
  s3_client=client,
1026
1024
  max_workers=max_workers,
1027
1025
  block_size=block_size or WRITER_BLOCK_SIZE,
1028
- max_buffer_size=max_buffer_size or WRITER_MAX_BUFFER_SIZE,
1026
+ max_buffer_size=max_buffer_size,
1029
1027
  profile_name=s3_url._profile_name,
1030
1028
  )
1031
1029
  else:
1030
+ if max_buffer_size is None:
1031
+ max_buffer_size = WRITER_MAX_BUFFER_SIZE
1032
1032
  writer = S3BufferedWriter(
1033
1033
  bucket,
1034
1034
  key,
1035
1035
  s3_client=client,
1036
1036
  max_workers=max_workers,
1037
1037
  block_size=block_size or WRITER_BLOCK_SIZE,
1038
- max_buffer_size=max_buffer_size or WRITER_MAX_BUFFER_SIZE,
1038
+ max_buffer_size=max_buffer_size,
1039
1039
  profile_name=s3_url._profile_name,
1040
1040
  )
1041
1041
  if buffered or _is_pickle(writer):
megfile/smart.py CHANGED
@@ -1006,7 +1006,8 @@ def smart_load_content(
1006
1006
  offset = -1
1007
1007
  if stop is not None:
1008
1008
  offset = stop - (start or 0) # start may be None
1009
- assert offset >= 0, "stop should be greater than start"
1009
+ if offset < 0:
1010
+ raise ValueError("stop should be greater than start")
1010
1011
  return fd.read(offset) # pytype: disable=bad-return-type
1011
1012
 
1012
1013
 
megfile/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "4.0.1"
1
+ VERSION = "4.0.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: megfile
3
- Version: 4.0.1
3
+ Version: 4.0.3
4
4
  Summary: Megvii file operation library
5
5
  Author-email: megvii <megfile@megvii.com>
6
6
  Project-URL: Homepage, https://github.com/megvii-research/megfile
@@ -212,6 +212,9 @@ You can get the configuration from `~/.config/megfile/aliases.conf`, like:
212
212
  protocol = s3+tos
213
213
  ```
214
214
 
215
+ ## Benchmark
216
+ [![10GiB](https://github.com/megvii-research/megfile/blob/main/scripts/benchmark/10GiB.png?raw=true)](https://megvii-research.github.io/megfile/benchmark.html)
217
+ [![10MiB](https://github.com/megvii-research/megfile/blob/main/scripts/benchmark/10MiB.png?raw=true)](https://megvii-research.github.io/megfile/benchmark.html)
215
218
 
216
219
  ## How to Contribute
217
220
  * We welcome everyone to contribute code to the `megfile` project, but the contributed code needs to meet the following conditions as much as possible:
@@ -1,6 +1,6 @@
1
1
  megfile/__init__.py,sha256=7oEfu410CFKzDWZ9RjL5xEJ1gtkJkTfvPrL_7TWdJuY,7366
2
- megfile/cli.py,sha256=FuwxjU7-Z_pIU8sX8Jf-0QNSLCR8JqLD4fqXyj7TrQI,24704
3
- megfile/config.py,sha256=4aXs2fw59ep1a6PfMT3Vq-AaOxTwaS8yfgMNcb3Cvw8,3652
2
+ megfile/cli.py,sha256=e3VVr8oe8iR7L_PtpNtyqAvQL_WgJzzEz8oewSAlgX4,24887
3
+ megfile/config.py,sha256=_6HiGeXEyk6RjPdjA0eEj1unq9iLJV_vQJBzQ-eHNvs,4185
4
4
  megfile/errors.py,sha256=a55qKQgyfiLmV-qnojUFzq2gu9JXpj3ZiC2qVaWyUTA,14160
5
5
  megfile/fs.py,sha256=bPGbapv41FzME87X3MhSNQRjNmHrI23FuFnjPT0ukQs,18239
6
6
  megfile/fs_path.py,sha256=ZK-po1xqhHocMb9Vrxf5K9tDx3zxQmGxNIHY3Z7Akp8,39085
@@ -11,14 +11,14 @@ megfile/http_path.py,sha256=c-xAu5wDxcTevmIUmrNEy-m-QiCfDJToaVI7y8SVIUI,14492
11
11
  megfile/interfaces.py,sha256=p4UvVZpeLx5djd6bqqDaygIx_s-_AxIVj-gudTch4JE,8467
12
12
  megfile/pathlike.py,sha256=vfuTBqSTIciRxkkqMfLfnBxWTEl9yns1yR8zgK4Raw0,31268
13
13
  megfile/s3.py,sha256=zqAegH5tijcztEKcfHXmOYhAR880nTxaAzc2O0JJnjc,16661
14
- megfile/s3_path.py,sha256=oBA9GdOseEtQJmh7LMDOf1sGamsEERs6Sm1jHpdksO8,93343
14
+ megfile/s3_path.py,sha256=lpUKy4n5DTf6hK6TvPhMjt_ZgdIXO4vcyK_VLaGkvhg,93395
15
15
  megfile/sftp.py,sha256=0ZnQlmhgvs7pYjFTcvvOyxTo2IUurE-hp1GN0hnIrdQ,26473
16
16
  megfile/sftp_path.py,sha256=4rcbn3wqcOEs71W6qWu1efcj6MZUgrZm6U0Jan-eB70,43604
17
- megfile/smart.py,sha256=h42U8WQvCy9UXOX0X3V1eOZ500wDeZy8YKfr-uygTaA,36881
17
+ megfile/smart.py,sha256=Ps8acPx6jeG1UJnRD8xL2aQjRp7IMW8sV6VFkMF0TQk,36910
18
18
  megfile/smart_path.py,sha256=Bqg95T2-XZrRXWhH7GT-jMCYzD7i1SIXdczQxtOxiPs,7583
19
19
  megfile/stdio.py,sha256=C_cGID_npthpwoPcsJMMEqqbVUPUnDxxJV9jLY2_D7c,635
20
20
  megfile/stdio_path.py,sha256=L8ODNIwO79UIv13YYc2OTr6f4XTv4ZPyvBeRk83-AjA,2700
21
- megfile/version.py,sha256=AvvDxCXX9bKN1IZtn0pXhbfwx56BkY-VhwBBiNl-1JE,19
21
+ megfile/version.py,sha256=aJJATI8dDtIUcu59WVztQ3az97BkUrCKd86VVa6W_aM,19
22
22
  megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  megfile/lib/base_prefetch_reader.py,sha256=6Dy2ZwlowqAvyUUa7bpQLCKOclmmUDhqEF-_CDDp0Og,13100
24
24
  megfile/lib/combine_reader.py,sha256=nKGAug29lOpNIZuLKu7_qVrJJRpXL_J4jxLglWbGJ1w,4808
@@ -36,17 +36,27 @@ megfile/lib/s3_cached_handler.py,sha256=X8PdeRC-BY6eSmOO5f2BeyjTPxyEwNtHgmAm9Vgm
36
36
  megfile/lib/s3_limited_seekable_writer.py,sha256=mUeoTS98LHluwDN7zxdCVcsjOGBT1bOYV8nRvi9QMGE,6212
37
37
  megfile/lib/s3_memory_handler.py,sha256=4uzBzz2jfRI_u6jl0CpOGAhpNJhDQo18FSAweauCUFs,4136
38
38
  megfile/lib/s3_pipe_handler.py,sha256=dm7NnZd1Ym5ABS1GvOQtoCJEO_CB8e6p4sUhLiid0go,3622
39
- megfile/lib/s3_prefetch_reader.py,sha256=ujqQMXAxsaNtWA_VdqfwqcEKBJFFv0GauAaj2WGLTCs,4552
39
+ megfile/lib/s3_prefetch_reader.py,sha256=R37-y_L9l8IKJhpT8HwBrZEbo2X72vCqEV6fvqPCBug,4437
40
40
  megfile/lib/s3_share_cache_reader.py,sha256=LVWKxHdHo0_zUIW4o8yqNvplqqwezUPeYEt02Vj-WNM,3754
41
41
  megfile/lib/shadow_handler.py,sha256=TntewlvIW9ZxCfmqASDQREHoiZ8v42faOe9sovQYQz0,2779
42
42
  megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,1987
43
43
  megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
44
44
  megfile/utils/__init__.py,sha256=sATf_NlsSTYIMEiA8-gM6K1M-Q1K6_7rx2VM31hrqaA,10838
45
45
  megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
46
- megfile-4.0.1.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
- megfile-4.0.1.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
- megfile-4.0.1.dist-info/METADATA,sha256=GVa381PxFA9AaKcRRWpUdxvOwBxBSbPhZud6_HP_1eE,9240
49
- megfile-4.0.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
50
- megfile-4.0.1.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
51
- megfile-4.0.1.dist-info/top_level.txt,sha256=IaHHoRXeemLL6kTM5YuC3H0UyOnTdZH9J324TkeBneo,36
52
- megfile-4.0.1.dist-info/RECORD,,
46
+ scripts/benchmark/code/iopath_read.py,sha256=O1Qs3mpvD9S_XCuRH2A2FpGWxCOSw6qZvEBrtPeRL1E,661
47
+ scripts/benchmark/code/iopath_write.py,sha256=Mm0efW1J09RJ_CK5i1xtG2hJuaaslikin8qVpuRFP_Q,704
48
+ scripts/benchmark/code/megfile_read.py,sha256=sAMebUiMColHDv3JEkXplImAHvn_IF1-g3BIJxhcQYE,239
49
+ scripts/benchmark/code/megfile_write.py,sha256=bzn-i2bGH4eRcsVvkhXK35KsQkX2v2oEsOJ0Ft5saj0,257
50
+ scripts/benchmark/code/pyarrow_read.py,sha256=2QBGKjGV2Dvl2ukOntLSag33pF55l3tfZ2Z6dLUjovw,305
51
+ scripts/benchmark/code/pyarrow_write.py,sha256=U1puLh-ljSXv772bZYAyhzmxhPOq4aR4j-QtwdM9hG0,328
52
+ scripts/benchmark/code/s3fs_read.py,sha256=XiTA-qrYblUs-jQWXSnvNg5Wo722C_g47aMMfo5XJBY,380
53
+ scripts/benchmark/code/s3fs_write.py,sha256=gdXKkWXYGjLJlRT_J64pJN85XvRg3bZexcAJQEMXwtw,402
54
+ scripts/benchmark/code/smart_open_read.py,sha256=SA02jHwS9Y31yFtV9CoJcfND5dR0eA_HsGmGNUrpQls,515
55
+ scripts/benchmark/code/smart_open_write.py,sha256=jDxFJdY97yNH889jz3pawBoei3yaqy8pEMvC_ymHFtM,537
56
+ megfile-4.0.3.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
57
+ megfile-4.0.3.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
58
+ megfile-4.0.3.dist-info/METADATA,sha256=At2cDDQKTUSj0QmjmPAjPRpqG_M5TZdefe-_2xMNjX8,9573
59
+ megfile-4.0.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
60
+ megfile-4.0.3.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
61
+ megfile-4.0.3.dist-info/top_level.txt,sha256=fVg49lk5B9L7jyfWUXWxb0DDSuw5pbr0OU62Tvx8J8M,44
62
+ megfile-4.0.3.dist-info/RECORD,,
@@ -3,3 +3,4 @@ docs
3
3
  html_cov
4
4
  html_doc
5
5
  megfile
6
+ scripts
@@ -0,0 +1,29 @@
1
+ import os
2
+ import time
3
+
4
+ import boto3
5
+ from iopath.common.file_io import PathManager
6
+ from iopath.common.s3 import S3PathHandler
7
+
8
+ times = 10240
9
+ s3_path = "s3://bucketA/large.txt"
10
+
11
+ start = time.time()
12
+
13
+ path_manager = PathManager()
14
+
15
+ session = boto3.Session(
16
+ aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
17
+ aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
18
+ )
19
+ client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
20
+ handler = S3PathHandler()
21
+ handler.client = client
22
+
23
+ path_manager.register_handler(handler)
24
+
25
+ with path_manager.open(s3_path, "rb") as f:
26
+ for i in range(times):
27
+ f.read(1024 * 1024)
28
+
29
+ print(time.time() - start)
@@ -0,0 +1,30 @@
1
+ import os
2
+ import time
3
+
4
+ import boto3
5
+ from iopath.common.file_io import PathManager
6
+ from iopath.common.s3 import S3PathHandler
7
+
8
+ times = 10240
9
+ s3_path = "s3://bucketA/large.txt"
10
+ block = b"1" * 1024 * 1024
11
+
12
+ start = time.time()
13
+
14
+ path_manager = PathManager()
15
+
16
+ session = boto3.Session(
17
+ aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
18
+ aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
19
+ )
20
+ client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
21
+ handler = S3PathHandler()
22
+ handler.client = client
23
+
24
+ path_manager.register_handler(handler)
25
+
26
+ with path_manager.open(s3_path, "wb") as f:
27
+ for i in range(times):
28
+ f.write(block)
29
+
30
+ print(time.time() - start) # write 10GB 91.642
@@ -0,0 +1,13 @@
1
+ import time
2
+
3
+ from megfile import smart_open
4
+
5
+ times = 10240
6
+ s3_path = "s3://bucketA/large.txt"
7
+
8
+ start = time.time()
9
+ with smart_open(s3_path, "rb") as f:
10
+ for i in range(times):
11
+ f.read(1024 * 1024 * 1)
12
+
13
+ print(time.time() - start)
@@ -0,0 +1,14 @@
1
+ import time
2
+
3
+ from megfile import smart_open
4
+
5
+ times = 10240
6
+ s3_path = "s3://bucketA/large.txt"
7
+ block = b"1" * 1024 * 1024
8
+
9
+ start = time.time()
10
+ with smart_open(s3_path, "wb") as f:
11
+ for i in range(times):
12
+ f.write(block)
13
+
14
+ print(time.time() - start)
@@ -0,0 +1,17 @@
1
+ import os
2
+ import time
3
+
4
+ from pyarrow import fs
5
+
6
+ times = 10240
7
+ s3_path = "bucketA/large.txt"
8
+
9
+ start = time.time()
10
+
11
+ s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
12
+
13
+ with s3.open_input_stream(s3_path) as f:
14
+ for i in range(times):
15
+ f.read(1024 * 1024)
16
+
17
+ print(time.time() - start)
@@ -0,0 +1,18 @@
1
+ import os
2
+ import time
3
+
4
+ from pyarrow import fs
5
+
6
+ times = 10240
7
+ block = b"1" * 1024 * 1024
8
+ s3_path = "bucketA/large.txt"
9
+
10
+ start = time.time()
11
+
12
+ s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
13
+
14
+ with s3.open_output_stream(s3_path) as f:
15
+ for i in range(times):
16
+ f.write(block)
17
+
18
+ print(time.time() - start)
@@ -0,0 +1,21 @@
1
+ import os
2
+ import time
3
+
4
+ import s3fs
5
+
6
+ times = 10240
7
+ s3_path = "bucketA/large.txt"
8
+
9
+ start = time.time()
10
+
11
+ s3 = s3fs.S3FileSystem(
12
+ endpoint_url=os.environ["OSS_ENDPOINT"],
13
+ key=os.environ["AWS_ACCESS_KEY_ID"],
14
+ secret=os.environ["AWS_SECRET_ACCESS_KEY"],
15
+ )
16
+
17
+ with s3.open(s3_path, "rb") as f:
18
+ for i in range(times):
19
+ f.read(1024 * 1024)
20
+
21
+ print(time.time() - start)
@@ -0,0 +1,22 @@
1
+ import os
2
+ import time
3
+
4
+ import s3fs
5
+
6
+ times = 10240
7
+ block = b"1" * 1024 * 1024
8
+ s3_path = "bucketA/large.txt"
9
+
10
+ start = time.time()
11
+
12
+ s3 = s3fs.S3FileSystem(
13
+ endpoint_url=os.environ["OSS_ENDPOINT"],
14
+ key=os.environ["AWS_ACCESS_KEY_ID"],
15
+ secret=os.environ["AWS_SECRET_ACCESS_KEY"],
16
+ )
17
+
18
+ with s3.open(s3_path, "wb") as f:
19
+ for i in range(times):
20
+ f.write(block)
21
+
22
+ print(time.time() - start)
@@ -0,0 +1,25 @@
1
+ import os
2
+ import time
3
+
4
+ import boto3
5
+ from smart_open import open
6
+
7
+ times = 10240
8
+ s3_path = "s3://bucketA/large.txt"
9
+
10
+ start = time.time()
11
+ session = boto3.Session(
12
+ aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
13
+ aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
14
+ )
15
+ with open(
16
+ s3_path,
17
+ "rb",
18
+ transport_params={
19
+ "client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
20
+ },
21
+ ) as f:
22
+ for i in range(times):
23
+ f.read(1024 * 1024)
24
+
25
+ print(time.time() - start)
@@ -0,0 +1,26 @@
1
+ import os
2
+ import time
3
+
4
+ import boto3
5
+ from smart_open import open
6
+
7
+ times = 10240
8
+ s3_path = "s3://bucketA/large.txt"
9
+ block = b"1" * 1024 * 1024
10
+
11
+ start = time.time()
12
+ session = boto3.Session(
13
+ aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
14
+ aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
15
+ )
16
+ with open(
17
+ s3_path,
18
+ "wb",
19
+ transport_params={
20
+ "client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
21
+ },
22
+ ) as f:
23
+ for i in range(times):
24
+ f.write(block)
25
+
26
+ print(time.time() - start)