megfile 4.1.3__py3-none-any.whl → 4.1.3.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/s3_path.py CHANGED
@@ -366,12 +366,9 @@ def _list_all_buckets(profile_name: Optional[str] = None) -> List[str]:
366
366
  return [content["Name"] for content in response["Buckets"]]
367
367
 
368
368
 
369
- def _parse_s3_url_ignore_brace(s3_url: str) -> Tuple[str, str]:
370
- s3_url = fspath(s3_url)
371
- s3_scheme, right_part = s3_url[:5], s3_url[5:]
372
- if s3_scheme != "s3://":
373
- raise ValueError("Not a s3 url: %r" % s3_url)
369
+ def _parse_s3_url_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
374
370
  left_brace = False
371
+ right_part = s3_pathname.split("://", maxsplit=1)[1]
375
372
  for current_index, current_character in enumerate(right_part):
376
373
  if current_character == "/" and left_brace is False:
377
374
  return right_part[:current_index], right_part[current_index + 1 :]
@@ -382,9 +379,14 @@ def _parse_s3_url_ignore_brace(s3_url: str) -> Tuple[str, str]:
382
379
  return right_part, ""
383
380
 
384
381
 
385
- def _group_s3path_by_bucket(
386
- s3_pathname: str, profile_name: Optional[str] = None
387
- ) -> List[str]:
382
+ def _parse_s3_url_profile(s3_pathname: str) -> Tuple[str, Optional[str]]:
383
+ protocol = s3_pathname.split("://", maxsplit=1)[0]
384
+ profile_name = protocol[3:] if protocol.startswith("s3+") else None
385
+ return protocol, profile_name
386
+
387
+
388
+ def _group_s3path_by_bucket(s3_pathname: str) -> List[str]:
389
+ protocol, profile_name = _parse_s3_url_profile(s3_pathname)
388
390
  bucket, key = _parse_s3_url_ignore_brace(s3_pathname)
389
391
  if not bucket:
390
392
  if not key:
@@ -395,8 +397,8 @@ def _group_s3path_by_bucket(
395
397
 
396
398
  def generate_s3_path(bucket: str, key: str) -> str:
397
399
  if key:
398
- return "s3://%s/%s" % (bucket, key)
399
- return "s3://%s%s" % (bucket, "/" if s3_pathname.endswith("/") else "")
400
+ return f"{protocol}://{bucket}/{key}"
401
+ return f"{protocol}://{bucket}{'/' if s3_pathname.endswith('/') else ''}"
400
402
 
401
403
  all_bucket = lru_cache(maxsize=1)(_list_all_buckets)
402
404
  for bucket_name in ungloblize(bucket):
@@ -418,23 +420,11 @@ def _group_s3path_by_bucket(
418
420
 
419
421
 
420
422
  def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
421
- if not s3_pathname:
422
- raise ValueError("s3_pathname: %s", s3_pathname)
423
-
424
- has_protocol = False
425
- if s3_pathname.startswith("s3://"):
426
- has_protocol = True
427
- s3_pathname = s3_pathname[5:]
428
-
429
- has_delimiter = False
430
- if s3_pathname.endswith("/"):
431
- has_delimiter = True
432
- s3_pathname = s3_pathname[:-1]
433
-
434
- normal_parts = []
435
- magic_parts = []
436
- left_brace = False
437
- left_index = 0
423
+ left_brace, left_index = False, 0
424
+ normal_parts, magic_parts = [], []
425
+ s3_pathname_with_suffix = s3_pathname
426
+ s3_pathname = s3_pathname.rstrip("/")
427
+ suffix = (len(s3_pathname_with_suffix) - len(s3_pathname)) * "/"
438
428
  for current_index, current_character in enumerate(s3_pathname):
439
429
  if current_character == "/" and left_brace is False:
440
430
  if has_magic_ignore_brace(s3_pathname[left_index:current_index]):
@@ -454,18 +444,13 @@ def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
454
444
  magic_parts.append(s3_pathname[left_index:])
455
445
  else:
456
446
  normal_parts.append(s3_pathname[left_index:])
457
-
458
- if has_protocol and normal_parts:
459
- normal_parts.insert(0, "s3:/")
460
- elif has_protocol:
461
- magic_parts.insert(0, "s3:/")
462
-
463
- if has_delimiter and magic_parts:
464
- magic_parts.append("")
465
- elif has_delimiter:
466
- normal_parts.append("")
467
-
468
- return "/".join(normal_parts), "/".join(magic_parts)
447
+ top_dir, magic_part = "/".join(normal_parts), "/".join(magic_parts)
448
+ if suffix:
449
+ if magic_part:
450
+ magic_part += suffix
451
+ else:
452
+ top_dir += suffix
453
+ return top_dir, magic_part
469
454
 
470
455
 
471
456
  def _group_s3path_by_prefix(s3_pathname: str) -> List[str]:
@@ -493,17 +478,15 @@ def _become_prefix(prefix: str) -> str:
493
478
  def _s3_split_magic(s3_pathname: str) -> Tuple[str, str]:
494
479
  if not has_magic(s3_pathname):
495
480
  return s3_pathname, ""
496
- delimiter = "/"
497
481
  normal_parts = []
498
482
  magic_parts = []
499
- all_parts = s3_pathname.split(delimiter)
483
+ all_parts = s3_pathname.split("/")
500
484
  for i, part in enumerate(all_parts):
501
- if not has_magic(part):
502
- normal_parts.append(part)
503
- else:
485
+ if has_magic(part):
504
486
  magic_parts = all_parts[i:]
505
487
  break
506
- return delimiter.join(normal_parts), delimiter.join(magic_parts)
488
+ normal_parts.append(part)
489
+ return "/".join(normal_parts), "/".join(magic_parts)
507
490
 
508
491
 
509
492
  def _list_objects_recursive(s3_client, bucket: str, prefix: str, delimiter: str = ""):
@@ -574,12 +557,12 @@ def _s3_glob_stat_single_path(
574
557
  recursive: bool = True,
575
558
  missing_ok: bool = True,
576
559
  followlinks: bool = False,
577
- profile_name: Optional[str] = None,
578
560
  ) -> Iterator[FileEntry]:
579
561
  s3_pathname = fspath(s3_pathname)
580
562
  if not recursive:
581
563
  # If not recursive, replace ** with *
582
564
  s3_pathname = re.sub(r"\*{2,}", "*", s3_pathname)
565
+ protocol, profile_name = _parse_s3_url_profile(s3_pathname)
583
566
  top_dir, wildcard_part = _s3_split_magic(s3_pathname)
584
567
  search_dir = wildcard_part.endswith("/")
585
568
 
@@ -596,7 +579,7 @@ def _s3_glob_stat_single_path(
596
579
  if not has_magic(_s3_pathname):
597
580
  _s3_pathname_obj = S3Path(_s3_pathname)
598
581
  if _s3_pathname_obj.is_file():
599
- stat = S3Path(_s3_pathname).stat(follow_symlinks=followlinks)
582
+ stat = _s3_pathname_obj.stat(follow_symlinks=followlinks)
600
583
  yield FileEntry(_s3_pathname_obj.name, _s3_pathname_obj.path, stat)
601
584
  if _s3_pathname_obj.is_dir():
602
585
  yield FileEntry(
@@ -616,7 +599,7 @@ def _s3_glob_stat_single_path(
616
599
  with raise_s3_error(_s3_pathname, S3BucketNotFoundError):
617
600
  for resp in _list_objects_recursive(client, bucket, prefix, delimiter):
618
601
  for content in resp.get("Contents", []):
619
- path = s3_path_join("s3://", bucket, content["Key"])
602
+ path = s3_path_join(f"{protocol}://", bucket, content["Key"])
620
603
  if not search_dir and pattern.match(path):
621
604
  yield FileEntry(S3Path(path).name, path, _make_stat(content))
622
605
  dirname = os.path.dirname(path)
@@ -629,7 +612,9 @@ def _s3_glob_stat_single_path(
629
612
  )
630
613
  dirname = os.path.dirname(dirname)
631
614
  for common_prefix in resp.get("CommonPrefixes", []):
632
- path = s3_path_join("s3://", bucket, common_prefix["Prefix"])
615
+ path = s3_path_join(
616
+ f"{protocol}://", bucket, common_prefix["Prefix"]
617
+ )
633
618
  dirname = os.path.dirname(path)
634
619
  if dirname not in dirnames and dirname != top_dir:
635
620
  dirnames.add(dirname)
@@ -1595,26 +1580,19 @@ class S3Path(URIPath):
1595
1580
  :returns: A generator contains tuples of path and file stat,
1596
1581
  in which paths match `s3_pathname`
1597
1582
  """
1598
- glob_path = self._s3_path
1583
+ glob_path = self.path_with_protocol
1599
1584
  if pattern:
1600
- glob_path = self.joinpath(pattern)._s3_path
1585
+ glob_path = self.joinpath(pattern).path_with_protocol
1601
1586
  s3_pathname = fspath(glob_path)
1602
1587
 
1603
1588
  def create_generator():
1604
- for group_s3_pathname_1 in _group_s3path_by_bucket(
1605
- s3_pathname, self._profile_name
1606
- ):
1589
+ for group_s3_pathname_1 in _group_s3path_by_bucket(s3_pathname):
1607
1590
  for group_s3_pathname_2 in _group_s3path_by_prefix(group_s3_pathname_1):
1608
1591
  for file_entry in _s3_glob_stat_single_path(
1609
1592
  group_s3_pathname_2,
1610
1593
  recursive,
1611
1594
  missing_ok,
1612
- profile_name=self._profile_name,
1613
1595
  ):
1614
- if self._profile_name:
1615
- file_entry = file_entry._replace(
1616
- path=f"{self._protocol_with_profile}://{file_entry.path[5:]}"
1617
- )
1618
1596
  yield file_entry
1619
1597
 
1620
1598
  return _create_missing_ok_generator(
@@ -2044,7 +2022,7 @@ class S3Path(URIPath):
2044
2022
  for content in response["Buckets"]:
2045
2023
  yield FileEntry(
2046
2024
  content["Name"],
2047
- f"s3://{content['Name']}",
2025
+ f"{self._protocol_with_profile}://{content['Name']}",
2048
2026
  StatResult(
2049
2027
  ctime=content["CreationDate"].timestamp(),
2050
2028
  isdir=True,
megfile/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "4.1.3"
1
+ VERSION = "4.1.3.post2"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megfile
3
- Version: 4.1.3
3
+ Version: 4.1.3.post2
4
4
  Summary: Megvii file operation library
5
5
  Author-email: megvii <megfile@megvii.com>
6
6
  Project-URL: Homepage, https://github.com/megvii-research/megfile
@@ -11,14 +11,14 @@ megfile/http_path.py,sha256=yRIk-fNbrsY8rUS5KVOfocS_PS520dX5KOs8lImpLaY,14173
11
11
  megfile/interfaces.py,sha256=p4UvVZpeLx5djd6bqqDaygIx_s-_AxIVj-gudTch4JE,8467
12
12
  megfile/pathlike.py,sha256=3Hnw-fn6RcIe9iPrJt00QdHSA--UfDyxnVBuZ_ymYYQ,31278
13
13
  megfile/s3.py,sha256=abBxnI7RIyn7n7qjGszP1VruYd6Gi9I8QnUOvsHkx1Y,16325
14
- megfile/s3_path.py,sha256=zelXhlRVOVSWBE6HJz0vXrrcRzSuj6Cnjd9HLGwPbCM,93644
14
+ megfile/s3_path.py,sha256=Pkz5TjtY7-hh8Gtbd6cxef9c9EwYJCW5BqFo1rI1oog,93235
15
15
  megfile/sftp.py,sha256=uBcLQs-j6Q-q-sWAdd-pgi5Qmb_kq7boJM-0sCfcNO0,26540
16
16
  megfile/sftp_path.py,sha256=Wz4VcQ0pBUuWDGMSxPpPbutrT09mnY6jZNiAqTi5tO4,43840
17
17
  megfile/smart.py,sha256=Sae2KJzaU0k_qV_Bk0YifOMq8WsV5qQ2pGInDRF546I,36411
18
18
  megfile/smart_path.py,sha256=Up_6xNZ2019iSzMn_JAU_1H--z-AP6O7SxdXGdeTG0c,7659
19
19
  megfile/stdio.py,sha256=ZwxsnJNJYIT7Iyg5pIw4qiyH8bszG6oAhEJuR-hXGG4,658
20
20
  megfile/stdio_path.py,sha256=cxaDr8rtisTPnN-rjtaEpqQnshwiqwXFUJBM9xWY7Cg,2711
21
- megfile/version.py,sha256=VxmH81mTbRqT3UnqXh5OXAB3IfXRrcnMl0YgLgOqhPw,19
21
+ megfile/version.py,sha256=SMg7F3UjVjLLIP3ibFt2p-0gK8lmy7rF2Y0Ehz9wx9M,25
22
22
  megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  megfile/lib/base_prefetch_reader.py,sha256=uxVwYknOjc8hLF7q_T2QKMsBqFcrf411ZsuK25CN1eQ,12848
24
24
  megfile/lib/combine_reader.py,sha256=Kp2wEloOUpTlIU7dve87MBpSzmIM-F9OtpTawAjFkiU,4828
@@ -43,20 +43,10 @@ megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,
43
43
  megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
44
44
  megfile/utils/__init__.py,sha256=xAzmICA0MtAbg-I2yPfeHjA1N4CiMP4sBrC9BgrfZLw,10151
45
45
  megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
46
- megfile-4.1.3.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
- megfile-4.1.3.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
- scripts/benchmark/code/iopath_read.py,sha256=O1Qs3mpvD9S_XCuRH2A2FpGWxCOSw6qZvEBrtPeRL1E,661
49
- scripts/benchmark/code/iopath_write.py,sha256=Mm0efW1J09RJ_CK5i1xtG2hJuaaslikin8qVpuRFP_Q,704
50
- scripts/benchmark/code/megfile_read.py,sha256=sAMebUiMColHDv3JEkXplImAHvn_IF1-g3BIJxhcQYE,239
51
- scripts/benchmark/code/megfile_write.py,sha256=bzn-i2bGH4eRcsVvkhXK35KsQkX2v2oEsOJ0Ft5saj0,257
52
- scripts/benchmark/code/pyarrow_read.py,sha256=2QBGKjGV2Dvl2ukOntLSag33pF55l3tfZ2Z6dLUjovw,305
53
- scripts/benchmark/code/pyarrow_write.py,sha256=U1puLh-ljSXv772bZYAyhzmxhPOq4aR4j-QtwdM9hG0,328
54
- scripts/benchmark/code/s3fs_read.py,sha256=XiTA-qrYblUs-jQWXSnvNg5Wo722C_g47aMMfo5XJBY,380
55
- scripts/benchmark/code/s3fs_write.py,sha256=gdXKkWXYGjLJlRT_J64pJN85XvRg3bZexcAJQEMXwtw,402
56
- scripts/benchmark/code/smart_open_read.py,sha256=SA02jHwS9Y31yFtV9CoJcfND5dR0eA_HsGmGNUrpQls,515
57
- scripts/benchmark/code/smart_open_write.py,sha256=jDxFJdY97yNH889jz3pawBoei3yaqy8pEMvC_ymHFtM,537
58
- megfile-4.1.3.dist-info/METADATA,sha256=hKZKmMsgZAysVrZJ3aRQtiGRrvHnRfEPx68ayMEt3EM,9595
59
- megfile-4.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
60
- megfile-4.1.3.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
61
- megfile-4.1.3.dist-info/top_level.txt,sha256=TR6xUw8Px5Ms_UENhEmLNmxOdfHAhTlSt9yTw9LRgsQ,35
62
- megfile-4.1.3.dist-info/RECORD,,
46
+ megfile-4.1.3.post2.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
+ megfile-4.1.3.post2.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
+ megfile-4.1.3.post2.dist-info/METADATA,sha256=c4_nmj6sTfOJDTLB9kKn0c-st_kCklsMRXKOuQi6v2o,9601
49
+ megfile-4.1.3.post2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
50
+ megfile-4.1.3.post2.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
51
+ megfile-4.1.3.post2.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
52
+ megfile-4.1.3.post2.dist-info/RECORD,,
@@ -0,0 +1 @@
1
+ megfile
@@ -1,5 +0,0 @@
1
- dist
2
- docs
3
- html_cov
4
- megfile
5
- scripts
@@ -1,29 +0,0 @@
1
- import os
2
- import time
3
-
4
- import boto3
5
- from iopath.common.file_io import PathManager
6
- from iopath.common.s3 import S3PathHandler
7
-
8
- times = 10240
9
- s3_path = "s3://bucketA/large.txt"
10
-
11
- start = time.time()
12
-
13
- path_manager = PathManager()
14
-
15
- session = boto3.Session(
16
- aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
17
- aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
18
- )
19
- client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
20
- handler = S3PathHandler()
21
- handler.client = client
22
-
23
- path_manager.register_handler(handler)
24
-
25
- with path_manager.open(s3_path, "rb") as f:
26
- for i in range(times):
27
- f.read(1024 * 1024)
28
-
29
- print(time.time() - start)
@@ -1,30 +0,0 @@
1
- import os
2
- import time
3
-
4
- import boto3
5
- from iopath.common.file_io import PathManager
6
- from iopath.common.s3 import S3PathHandler
7
-
8
- times = 10240
9
- s3_path = "s3://bucketA/large.txt"
10
- block = b"1" * 1024 * 1024
11
-
12
- start = time.time()
13
-
14
- path_manager = PathManager()
15
-
16
- session = boto3.Session(
17
- aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
18
- aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
19
- )
20
- client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
21
- handler = S3PathHandler()
22
- handler.client = client
23
-
24
- path_manager.register_handler(handler)
25
-
26
- with path_manager.open(s3_path, "wb") as f:
27
- for i in range(times):
28
- f.write(block)
29
-
30
- print(time.time() - start) # write 10GB 91.642
@@ -1,13 +0,0 @@
1
- import time
2
-
3
- from megfile import smart_open
4
-
5
- times = 10240
6
- s3_path = "s3://bucketA/large.txt"
7
-
8
- start = time.time()
9
- with smart_open(s3_path, "rb") as f:
10
- for i in range(times):
11
- f.read(1024 * 1024 * 1)
12
-
13
- print(time.time() - start)
@@ -1,14 +0,0 @@
1
- import time
2
-
3
- from megfile import smart_open
4
-
5
- times = 10240
6
- s3_path = "s3://bucketA/large.txt"
7
- block = b"1" * 1024 * 1024
8
-
9
- start = time.time()
10
- with smart_open(s3_path, "wb") as f:
11
- for i in range(times):
12
- f.write(block)
13
-
14
- print(time.time() - start)
@@ -1,17 +0,0 @@
1
- import os
2
- import time
3
-
4
- from pyarrow import fs
5
-
6
- times = 10240
7
- s3_path = "bucketA/large.txt"
8
-
9
- start = time.time()
10
-
11
- s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
12
-
13
- with s3.open_input_stream(s3_path) as f:
14
- for i in range(times):
15
- f.read(1024 * 1024)
16
-
17
- print(time.time() - start)
@@ -1,18 +0,0 @@
1
- import os
2
- import time
3
-
4
- from pyarrow import fs
5
-
6
- times = 10240
7
- block = b"1" * 1024 * 1024
8
- s3_path = "bucketA/large.txt"
9
-
10
- start = time.time()
11
-
12
- s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
13
-
14
- with s3.open_output_stream(s3_path) as f:
15
- for i in range(times):
16
- f.write(block)
17
-
18
- print(time.time() - start)
@@ -1,21 +0,0 @@
1
- import os
2
- import time
3
-
4
- import s3fs
5
-
6
- times = 10240
7
- s3_path = "bucketA/large.txt"
8
-
9
- start = time.time()
10
-
11
- s3 = s3fs.S3FileSystem(
12
- endpoint_url=os.environ["OSS_ENDPOINT"],
13
- key=os.environ["AWS_ACCESS_KEY_ID"],
14
- secret=os.environ["AWS_SECRET_ACCESS_KEY"],
15
- )
16
-
17
- with s3.open(s3_path, "rb") as f:
18
- for i in range(times):
19
- f.read(1024 * 1024)
20
-
21
- print(time.time() - start)
@@ -1,22 +0,0 @@
1
- import os
2
- import time
3
-
4
- import s3fs
5
-
6
- times = 10240
7
- block = b"1" * 1024 * 1024
8
- s3_path = "bucketA/large.txt"
9
-
10
- start = time.time()
11
-
12
- s3 = s3fs.S3FileSystem(
13
- endpoint_url=os.environ["OSS_ENDPOINT"],
14
- key=os.environ["AWS_ACCESS_KEY_ID"],
15
- secret=os.environ["AWS_SECRET_ACCESS_KEY"],
16
- )
17
-
18
- with s3.open(s3_path, "wb") as f:
19
- for i in range(times):
20
- f.write(block)
21
-
22
- print(time.time() - start)
@@ -1,25 +0,0 @@
1
- import os
2
- import time
3
-
4
- import boto3
5
- from smart_open import open
6
-
7
- times = 10240
8
- s3_path = "s3://bucketA/large.txt"
9
-
10
- start = time.time()
11
- session = boto3.Session(
12
- aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
13
- aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
14
- )
15
- with open(
16
- s3_path,
17
- "rb",
18
- transport_params={
19
- "client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
20
- },
21
- ) as f:
22
- for i in range(times):
23
- f.read(1024 * 1024)
24
-
25
- print(time.time() - start)
@@ -1,26 +0,0 @@
1
- import os
2
- import time
3
-
4
- import boto3
5
- from smart_open import open
6
-
7
- times = 10240
8
- s3_path = "s3://bucketA/large.txt"
9
- block = b"1" * 1024 * 1024
10
-
11
- start = time.time()
12
- session = boto3.Session(
13
- aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
14
- aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
15
- )
16
- with open(
17
- s3_path,
18
- "wb",
19
- transport_params={
20
- "client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
21
- },
22
- ) as f:
23
- for i in range(times):
24
- f.write(block)
25
-
26
- print(time.time() - start)