megfile 4.1.3__py3-none-any.whl → 4.1.3.post2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/s3_path.py +38 -60
- megfile/version.py +1 -1
- {megfile-4.1.3.dist-info → megfile-4.1.3.post2.dist-info}/METADATA +1 -1
- {megfile-4.1.3.dist-info → megfile-4.1.3.post2.dist-info}/RECORD +9 -19
- megfile-4.1.3.post2.dist-info/top_level.txt +1 -0
- megfile-4.1.3.dist-info/top_level.txt +0 -5
- scripts/benchmark/code/iopath_read.py +0 -29
- scripts/benchmark/code/iopath_write.py +0 -30
- scripts/benchmark/code/megfile_read.py +0 -13
- scripts/benchmark/code/megfile_write.py +0 -14
- scripts/benchmark/code/pyarrow_read.py +0 -17
- scripts/benchmark/code/pyarrow_write.py +0 -18
- scripts/benchmark/code/s3fs_read.py +0 -21
- scripts/benchmark/code/s3fs_write.py +0 -22
- scripts/benchmark/code/smart_open_read.py +0 -25
- scripts/benchmark/code/smart_open_write.py +0 -26
- {megfile-4.1.3.dist-info → megfile-4.1.3.post2.dist-info}/WHEEL +0 -0
- {megfile-4.1.3.dist-info → megfile-4.1.3.post2.dist-info}/entry_points.txt +0 -0
- {megfile-4.1.3.dist-info → megfile-4.1.3.post2.dist-info}/licenses/LICENSE +0 -0
- {megfile-4.1.3.dist-info → megfile-4.1.3.post2.dist-info}/licenses/LICENSE.pyre +0 -0
megfile/s3_path.py
CHANGED
|
@@ -366,12 +366,9 @@ def _list_all_buckets(profile_name: Optional[str] = None) -> List[str]:
|
|
|
366
366
|
return [content["Name"] for content in response["Buckets"]]
|
|
367
367
|
|
|
368
368
|
|
|
369
|
-
def _parse_s3_url_ignore_brace(
|
|
370
|
-
s3_url = fspath(s3_url)
|
|
371
|
-
s3_scheme, right_part = s3_url[:5], s3_url[5:]
|
|
372
|
-
if s3_scheme != "s3://":
|
|
373
|
-
raise ValueError("Not a s3 url: %r" % s3_url)
|
|
369
|
+
def _parse_s3_url_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
|
|
374
370
|
left_brace = False
|
|
371
|
+
right_part = s3_pathname.split("://", maxsplit=1)[1]
|
|
375
372
|
for current_index, current_character in enumerate(right_part):
|
|
376
373
|
if current_character == "/" and left_brace is False:
|
|
377
374
|
return right_part[:current_index], right_part[current_index + 1 :]
|
|
@@ -382,9 +379,14 @@ def _parse_s3_url_ignore_brace(s3_url: str) -> Tuple[str, str]:
|
|
|
382
379
|
return right_part, ""
|
|
383
380
|
|
|
384
381
|
|
|
385
|
-
def
|
|
386
|
-
s3_pathname
|
|
387
|
-
|
|
382
|
+
def _parse_s3_url_profile(s3_pathname: str) -> Tuple[str, Optional[str]]:
|
|
383
|
+
protocol = s3_pathname.split("://", maxsplit=1)[0]
|
|
384
|
+
profile_name = protocol[3:] if protocol.startswith("s3+") else None
|
|
385
|
+
return protocol, profile_name
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _group_s3path_by_bucket(s3_pathname: str) -> List[str]:
|
|
389
|
+
protocol, profile_name = _parse_s3_url_profile(s3_pathname)
|
|
388
390
|
bucket, key = _parse_s3_url_ignore_brace(s3_pathname)
|
|
389
391
|
if not bucket:
|
|
390
392
|
if not key:
|
|
@@ -395,8 +397,8 @@ def _group_s3path_by_bucket(
|
|
|
395
397
|
|
|
396
398
|
def generate_s3_path(bucket: str, key: str) -> str:
|
|
397
399
|
if key:
|
|
398
|
-
return "
|
|
399
|
-
return "
|
|
400
|
+
return f"{protocol}://{bucket}/{key}"
|
|
401
|
+
return f"{protocol}://{bucket}{'/' if s3_pathname.endswith('/') else ''}"
|
|
400
402
|
|
|
401
403
|
all_bucket = lru_cache(maxsize=1)(_list_all_buckets)
|
|
402
404
|
for bucket_name in ungloblize(bucket):
|
|
@@ -418,23 +420,11 @@ def _group_s3path_by_bucket(
|
|
|
418
420
|
|
|
419
421
|
|
|
420
422
|
def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
has_protocol = True
|
|
427
|
-
s3_pathname = s3_pathname[5:]
|
|
428
|
-
|
|
429
|
-
has_delimiter = False
|
|
430
|
-
if s3_pathname.endswith("/"):
|
|
431
|
-
has_delimiter = True
|
|
432
|
-
s3_pathname = s3_pathname[:-1]
|
|
433
|
-
|
|
434
|
-
normal_parts = []
|
|
435
|
-
magic_parts = []
|
|
436
|
-
left_brace = False
|
|
437
|
-
left_index = 0
|
|
423
|
+
left_brace, left_index = False, 0
|
|
424
|
+
normal_parts, magic_parts = [], []
|
|
425
|
+
s3_pathname_with_suffix = s3_pathname
|
|
426
|
+
s3_pathname = s3_pathname.rstrip("/")
|
|
427
|
+
suffix = (len(s3_pathname_with_suffix) - len(s3_pathname)) * "/"
|
|
438
428
|
for current_index, current_character in enumerate(s3_pathname):
|
|
439
429
|
if current_character == "/" and left_brace is False:
|
|
440
430
|
if has_magic_ignore_brace(s3_pathname[left_index:current_index]):
|
|
@@ -454,18 +444,13 @@ def _s3_split_magic_ignore_brace(s3_pathname: str) -> Tuple[str, str]:
|
|
|
454
444
|
magic_parts.append(s3_pathname[left_index:])
|
|
455
445
|
else:
|
|
456
446
|
normal_parts.append(s3_pathname[left_index:])
|
|
457
|
-
|
|
458
|
-
if
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
magic_parts.append("")
|
|
465
|
-
elif has_delimiter:
|
|
466
|
-
normal_parts.append("")
|
|
467
|
-
|
|
468
|
-
return "/".join(normal_parts), "/".join(magic_parts)
|
|
447
|
+
top_dir, magic_part = "/".join(normal_parts), "/".join(magic_parts)
|
|
448
|
+
if suffix:
|
|
449
|
+
if magic_part:
|
|
450
|
+
magic_part += suffix
|
|
451
|
+
else:
|
|
452
|
+
top_dir += suffix
|
|
453
|
+
return top_dir, magic_part
|
|
469
454
|
|
|
470
455
|
|
|
471
456
|
def _group_s3path_by_prefix(s3_pathname: str) -> List[str]:
|
|
@@ -493,17 +478,15 @@ def _become_prefix(prefix: str) -> str:
|
|
|
493
478
|
def _s3_split_magic(s3_pathname: str) -> Tuple[str, str]:
|
|
494
479
|
if not has_magic(s3_pathname):
|
|
495
480
|
return s3_pathname, ""
|
|
496
|
-
delimiter = "/"
|
|
497
481
|
normal_parts = []
|
|
498
482
|
magic_parts = []
|
|
499
|
-
all_parts = s3_pathname.split(
|
|
483
|
+
all_parts = s3_pathname.split("/")
|
|
500
484
|
for i, part in enumerate(all_parts):
|
|
501
|
-
if
|
|
502
|
-
normal_parts.append(part)
|
|
503
|
-
else:
|
|
485
|
+
if has_magic(part):
|
|
504
486
|
magic_parts = all_parts[i:]
|
|
505
487
|
break
|
|
506
|
-
|
|
488
|
+
normal_parts.append(part)
|
|
489
|
+
return "/".join(normal_parts), "/".join(magic_parts)
|
|
507
490
|
|
|
508
491
|
|
|
509
492
|
def _list_objects_recursive(s3_client, bucket: str, prefix: str, delimiter: str = ""):
|
|
@@ -574,12 +557,12 @@ def _s3_glob_stat_single_path(
|
|
|
574
557
|
recursive: bool = True,
|
|
575
558
|
missing_ok: bool = True,
|
|
576
559
|
followlinks: bool = False,
|
|
577
|
-
profile_name: Optional[str] = None,
|
|
578
560
|
) -> Iterator[FileEntry]:
|
|
579
561
|
s3_pathname = fspath(s3_pathname)
|
|
580
562
|
if not recursive:
|
|
581
563
|
# If not recursive, replace ** with *
|
|
582
564
|
s3_pathname = re.sub(r"\*{2,}", "*", s3_pathname)
|
|
565
|
+
protocol, profile_name = _parse_s3_url_profile(s3_pathname)
|
|
583
566
|
top_dir, wildcard_part = _s3_split_magic(s3_pathname)
|
|
584
567
|
search_dir = wildcard_part.endswith("/")
|
|
585
568
|
|
|
@@ -596,7 +579,7 @@ def _s3_glob_stat_single_path(
|
|
|
596
579
|
if not has_magic(_s3_pathname):
|
|
597
580
|
_s3_pathname_obj = S3Path(_s3_pathname)
|
|
598
581
|
if _s3_pathname_obj.is_file():
|
|
599
|
-
stat =
|
|
582
|
+
stat = _s3_pathname_obj.stat(follow_symlinks=followlinks)
|
|
600
583
|
yield FileEntry(_s3_pathname_obj.name, _s3_pathname_obj.path, stat)
|
|
601
584
|
if _s3_pathname_obj.is_dir():
|
|
602
585
|
yield FileEntry(
|
|
@@ -616,7 +599,7 @@ def _s3_glob_stat_single_path(
|
|
|
616
599
|
with raise_s3_error(_s3_pathname, S3BucketNotFoundError):
|
|
617
600
|
for resp in _list_objects_recursive(client, bucket, prefix, delimiter):
|
|
618
601
|
for content in resp.get("Contents", []):
|
|
619
|
-
path = s3_path_join("
|
|
602
|
+
path = s3_path_join(f"{protocol}://", bucket, content["Key"])
|
|
620
603
|
if not search_dir and pattern.match(path):
|
|
621
604
|
yield FileEntry(S3Path(path).name, path, _make_stat(content))
|
|
622
605
|
dirname = os.path.dirname(path)
|
|
@@ -629,7 +612,9 @@ def _s3_glob_stat_single_path(
|
|
|
629
612
|
)
|
|
630
613
|
dirname = os.path.dirname(dirname)
|
|
631
614
|
for common_prefix in resp.get("CommonPrefixes", []):
|
|
632
|
-
path = s3_path_join(
|
|
615
|
+
path = s3_path_join(
|
|
616
|
+
f"{protocol}://", bucket, common_prefix["Prefix"]
|
|
617
|
+
)
|
|
633
618
|
dirname = os.path.dirname(path)
|
|
634
619
|
if dirname not in dirnames and dirname != top_dir:
|
|
635
620
|
dirnames.add(dirname)
|
|
@@ -1595,26 +1580,19 @@ class S3Path(URIPath):
|
|
|
1595
1580
|
:returns: A generator contains tuples of path and file stat,
|
|
1596
1581
|
in which paths match `s3_pathname`
|
|
1597
1582
|
"""
|
|
1598
|
-
glob_path = self.
|
|
1583
|
+
glob_path = self.path_with_protocol
|
|
1599
1584
|
if pattern:
|
|
1600
|
-
glob_path = self.joinpath(pattern).
|
|
1585
|
+
glob_path = self.joinpath(pattern).path_with_protocol
|
|
1601
1586
|
s3_pathname = fspath(glob_path)
|
|
1602
1587
|
|
|
1603
1588
|
def create_generator():
|
|
1604
|
-
for group_s3_pathname_1 in _group_s3path_by_bucket(
|
|
1605
|
-
s3_pathname, self._profile_name
|
|
1606
|
-
):
|
|
1589
|
+
for group_s3_pathname_1 in _group_s3path_by_bucket(s3_pathname):
|
|
1607
1590
|
for group_s3_pathname_2 in _group_s3path_by_prefix(group_s3_pathname_1):
|
|
1608
1591
|
for file_entry in _s3_glob_stat_single_path(
|
|
1609
1592
|
group_s3_pathname_2,
|
|
1610
1593
|
recursive,
|
|
1611
1594
|
missing_ok,
|
|
1612
|
-
profile_name=self._profile_name,
|
|
1613
1595
|
):
|
|
1614
|
-
if self._profile_name:
|
|
1615
|
-
file_entry = file_entry._replace(
|
|
1616
|
-
path=f"{self._protocol_with_profile}://{file_entry.path[5:]}"
|
|
1617
|
-
)
|
|
1618
1596
|
yield file_entry
|
|
1619
1597
|
|
|
1620
1598
|
return _create_missing_ok_generator(
|
|
@@ -2044,7 +2022,7 @@ class S3Path(URIPath):
|
|
|
2044
2022
|
for content in response["Buckets"]:
|
|
2045
2023
|
yield FileEntry(
|
|
2046
2024
|
content["Name"],
|
|
2047
|
-
f"
|
|
2025
|
+
f"{self._protocol_with_profile}://{content['Name']}",
|
|
2048
2026
|
StatResult(
|
|
2049
2027
|
ctime=content["CreationDate"].timestamp(),
|
|
2050
2028
|
isdir=True,
|
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "4.1.3"
|
|
1
|
+
VERSION = "4.1.3.post2"
|
|
@@ -11,14 +11,14 @@ megfile/http_path.py,sha256=yRIk-fNbrsY8rUS5KVOfocS_PS520dX5KOs8lImpLaY,14173
|
|
|
11
11
|
megfile/interfaces.py,sha256=p4UvVZpeLx5djd6bqqDaygIx_s-_AxIVj-gudTch4JE,8467
|
|
12
12
|
megfile/pathlike.py,sha256=3Hnw-fn6RcIe9iPrJt00QdHSA--UfDyxnVBuZ_ymYYQ,31278
|
|
13
13
|
megfile/s3.py,sha256=abBxnI7RIyn7n7qjGszP1VruYd6Gi9I8QnUOvsHkx1Y,16325
|
|
14
|
-
megfile/s3_path.py,sha256=
|
|
14
|
+
megfile/s3_path.py,sha256=Pkz5TjtY7-hh8Gtbd6cxef9c9EwYJCW5BqFo1rI1oog,93235
|
|
15
15
|
megfile/sftp.py,sha256=uBcLQs-j6Q-q-sWAdd-pgi5Qmb_kq7boJM-0sCfcNO0,26540
|
|
16
16
|
megfile/sftp_path.py,sha256=Wz4VcQ0pBUuWDGMSxPpPbutrT09mnY6jZNiAqTi5tO4,43840
|
|
17
17
|
megfile/smart.py,sha256=Sae2KJzaU0k_qV_Bk0YifOMq8WsV5qQ2pGInDRF546I,36411
|
|
18
18
|
megfile/smart_path.py,sha256=Up_6xNZ2019iSzMn_JAU_1H--z-AP6O7SxdXGdeTG0c,7659
|
|
19
19
|
megfile/stdio.py,sha256=ZwxsnJNJYIT7Iyg5pIw4qiyH8bszG6oAhEJuR-hXGG4,658
|
|
20
20
|
megfile/stdio_path.py,sha256=cxaDr8rtisTPnN-rjtaEpqQnshwiqwXFUJBM9xWY7Cg,2711
|
|
21
|
-
megfile/version.py,sha256=
|
|
21
|
+
megfile/version.py,sha256=SMg7F3UjVjLLIP3ibFt2p-0gK8lmy7rF2Y0Ehz9wx9M,25
|
|
22
22
|
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
megfile/lib/base_prefetch_reader.py,sha256=uxVwYknOjc8hLF7q_T2QKMsBqFcrf411ZsuK25CN1eQ,12848
|
|
24
24
|
megfile/lib/combine_reader.py,sha256=Kp2wEloOUpTlIU7dve87MBpSzmIM-F9OtpTawAjFkiU,4828
|
|
@@ -43,20 +43,10 @@ megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,
|
|
|
43
43
|
megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
|
|
44
44
|
megfile/utils/__init__.py,sha256=xAzmICA0MtAbg-I2yPfeHjA1N4CiMP4sBrC9BgrfZLw,10151
|
|
45
45
|
megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
|
|
46
|
-
megfile-4.1.3.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
47
|
-
megfile-4.1.3.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
scripts/benchmark/code/pyarrow_write.py,sha256=U1puLh-ljSXv772bZYAyhzmxhPOq4aR4j-QtwdM9hG0,328
|
|
54
|
-
scripts/benchmark/code/s3fs_read.py,sha256=XiTA-qrYblUs-jQWXSnvNg5Wo722C_g47aMMfo5XJBY,380
|
|
55
|
-
scripts/benchmark/code/s3fs_write.py,sha256=gdXKkWXYGjLJlRT_J64pJN85XvRg3bZexcAJQEMXwtw,402
|
|
56
|
-
scripts/benchmark/code/smart_open_read.py,sha256=SA02jHwS9Y31yFtV9CoJcfND5dR0eA_HsGmGNUrpQls,515
|
|
57
|
-
scripts/benchmark/code/smart_open_write.py,sha256=jDxFJdY97yNH889jz3pawBoei3yaqy8pEMvC_ymHFtM,537
|
|
58
|
-
megfile-4.1.3.dist-info/METADATA,sha256=hKZKmMsgZAysVrZJ3aRQtiGRrvHnRfEPx68ayMEt3EM,9595
|
|
59
|
-
megfile-4.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
60
|
-
megfile-4.1.3.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
61
|
-
megfile-4.1.3.dist-info/top_level.txt,sha256=TR6xUw8Px5Ms_UENhEmLNmxOdfHAhTlSt9yTw9LRgsQ,35
|
|
62
|
-
megfile-4.1.3.dist-info/RECORD,,
|
|
46
|
+
megfile-4.1.3.post2.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
47
|
+
megfile-4.1.3.post2.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
48
|
+
megfile-4.1.3.post2.dist-info/METADATA,sha256=c4_nmj6sTfOJDTLB9kKn0c-st_kCklsMRXKOuQi6v2o,9601
|
|
49
|
+
megfile-4.1.3.post2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
50
|
+
megfile-4.1.3.post2.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
51
|
+
megfile-4.1.3.post2.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
|
|
52
|
+
megfile-4.1.3.post2.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
megfile
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
import boto3
|
|
5
|
-
from iopath.common.file_io import PathManager
|
|
6
|
-
from iopath.common.s3 import S3PathHandler
|
|
7
|
-
|
|
8
|
-
times = 10240
|
|
9
|
-
s3_path = "s3://bucketA/large.txt"
|
|
10
|
-
|
|
11
|
-
start = time.time()
|
|
12
|
-
|
|
13
|
-
path_manager = PathManager()
|
|
14
|
-
|
|
15
|
-
session = boto3.Session(
|
|
16
|
-
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
17
|
-
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
18
|
-
)
|
|
19
|
-
client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
20
|
-
handler = S3PathHandler()
|
|
21
|
-
handler.client = client
|
|
22
|
-
|
|
23
|
-
path_manager.register_handler(handler)
|
|
24
|
-
|
|
25
|
-
with path_manager.open(s3_path, "rb") as f:
|
|
26
|
-
for i in range(times):
|
|
27
|
-
f.read(1024 * 1024)
|
|
28
|
-
|
|
29
|
-
print(time.time() - start)
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
import boto3
|
|
5
|
-
from iopath.common.file_io import PathManager
|
|
6
|
-
from iopath.common.s3 import S3PathHandler
|
|
7
|
-
|
|
8
|
-
times = 10240
|
|
9
|
-
s3_path = "s3://bucketA/large.txt"
|
|
10
|
-
block = b"1" * 1024 * 1024
|
|
11
|
-
|
|
12
|
-
start = time.time()
|
|
13
|
-
|
|
14
|
-
path_manager = PathManager()
|
|
15
|
-
|
|
16
|
-
session = boto3.Session(
|
|
17
|
-
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
18
|
-
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
19
|
-
)
|
|
20
|
-
client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
21
|
-
handler = S3PathHandler()
|
|
22
|
-
handler.client = client
|
|
23
|
-
|
|
24
|
-
path_manager.register_handler(handler)
|
|
25
|
-
|
|
26
|
-
with path_manager.open(s3_path, "wb") as f:
|
|
27
|
-
for i in range(times):
|
|
28
|
-
f.write(block)
|
|
29
|
-
|
|
30
|
-
print(time.time() - start) # write 10GB 91.642
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import time
|
|
2
|
-
|
|
3
|
-
from megfile import smart_open
|
|
4
|
-
|
|
5
|
-
times = 10240
|
|
6
|
-
s3_path = "s3://bucketA/large.txt"
|
|
7
|
-
block = b"1" * 1024 * 1024
|
|
8
|
-
|
|
9
|
-
start = time.time()
|
|
10
|
-
with smart_open(s3_path, "wb") as f:
|
|
11
|
-
for i in range(times):
|
|
12
|
-
f.write(block)
|
|
13
|
-
|
|
14
|
-
print(time.time() - start)
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
from pyarrow import fs
|
|
5
|
-
|
|
6
|
-
times = 10240
|
|
7
|
-
s3_path = "bucketA/large.txt"
|
|
8
|
-
|
|
9
|
-
start = time.time()
|
|
10
|
-
|
|
11
|
-
s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
|
|
12
|
-
|
|
13
|
-
with s3.open_input_stream(s3_path) as f:
|
|
14
|
-
for i in range(times):
|
|
15
|
-
f.read(1024 * 1024)
|
|
16
|
-
|
|
17
|
-
print(time.time() - start)
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
from pyarrow import fs
|
|
5
|
-
|
|
6
|
-
times = 10240
|
|
7
|
-
block = b"1" * 1024 * 1024
|
|
8
|
-
s3_path = "bucketA/large.txt"
|
|
9
|
-
|
|
10
|
-
start = time.time()
|
|
11
|
-
|
|
12
|
-
s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
|
|
13
|
-
|
|
14
|
-
with s3.open_output_stream(s3_path) as f:
|
|
15
|
-
for i in range(times):
|
|
16
|
-
f.write(block)
|
|
17
|
-
|
|
18
|
-
print(time.time() - start)
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
import s3fs
|
|
5
|
-
|
|
6
|
-
times = 10240
|
|
7
|
-
s3_path = "bucketA/large.txt"
|
|
8
|
-
|
|
9
|
-
start = time.time()
|
|
10
|
-
|
|
11
|
-
s3 = s3fs.S3FileSystem(
|
|
12
|
-
endpoint_url=os.environ["OSS_ENDPOINT"],
|
|
13
|
-
key=os.environ["AWS_ACCESS_KEY_ID"],
|
|
14
|
-
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
with s3.open(s3_path, "rb") as f:
|
|
18
|
-
for i in range(times):
|
|
19
|
-
f.read(1024 * 1024)
|
|
20
|
-
|
|
21
|
-
print(time.time() - start)
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
import s3fs
|
|
5
|
-
|
|
6
|
-
times = 10240
|
|
7
|
-
block = b"1" * 1024 * 1024
|
|
8
|
-
s3_path = "bucketA/large.txt"
|
|
9
|
-
|
|
10
|
-
start = time.time()
|
|
11
|
-
|
|
12
|
-
s3 = s3fs.S3FileSystem(
|
|
13
|
-
endpoint_url=os.environ["OSS_ENDPOINT"],
|
|
14
|
-
key=os.environ["AWS_ACCESS_KEY_ID"],
|
|
15
|
-
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
with s3.open(s3_path, "wb") as f:
|
|
19
|
-
for i in range(times):
|
|
20
|
-
f.write(block)
|
|
21
|
-
|
|
22
|
-
print(time.time() - start)
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
import boto3
|
|
5
|
-
from smart_open import open
|
|
6
|
-
|
|
7
|
-
times = 10240
|
|
8
|
-
s3_path = "s3://bucketA/large.txt"
|
|
9
|
-
|
|
10
|
-
start = time.time()
|
|
11
|
-
session = boto3.Session(
|
|
12
|
-
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
13
|
-
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
14
|
-
)
|
|
15
|
-
with open(
|
|
16
|
-
s3_path,
|
|
17
|
-
"rb",
|
|
18
|
-
transport_params={
|
|
19
|
-
"client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
20
|
-
},
|
|
21
|
-
) as f:
|
|
22
|
-
for i in range(times):
|
|
23
|
-
f.read(1024 * 1024)
|
|
24
|
-
|
|
25
|
-
print(time.time() - start)
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
import boto3
|
|
5
|
-
from smart_open import open
|
|
6
|
-
|
|
7
|
-
times = 10240
|
|
8
|
-
s3_path = "s3://bucketA/large.txt"
|
|
9
|
-
block = b"1" * 1024 * 1024
|
|
10
|
-
|
|
11
|
-
start = time.time()
|
|
12
|
-
session = boto3.Session(
|
|
13
|
-
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
14
|
-
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
15
|
-
)
|
|
16
|
-
with open(
|
|
17
|
-
s3_path,
|
|
18
|
-
"wb",
|
|
19
|
-
transport_params={
|
|
20
|
-
"client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
21
|
-
},
|
|
22
|
-
) as f:
|
|
23
|
-
for i in range(times):
|
|
24
|
-
f.write(block)
|
|
25
|
-
|
|
26
|
-
print(time.time() - start)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|