megfile 4.0.0.post1__py3-none-any.whl → 4.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/__init__.py +2 -0
- megfile/cli.py +67 -9
- megfile/config.py +89 -7
- megfile/lib/s3_prefetch_reader.py +9 -7
- megfile/s3_path.py +10 -10
- megfile/sftp.py +77 -0
- megfile/sftp_path.py +1 -0
- megfile/smart.py +5 -3
- megfile/version.py +1 -1
- {megfile-4.0.0.post1.dist-info → megfile-4.0.2.dist-info}/METADATA +6 -3
- {megfile-4.0.0.post1.dist-info → megfile-4.0.2.dist-info}/RECORD +26 -16
- {megfile-4.0.0.post1.dist-info → megfile-4.0.2.dist-info}/top_level.txt +1 -0
- scripts/speed_test/code/iopath_read.py +29 -0
- scripts/speed_test/code/iopath_write.py +30 -0
- scripts/speed_test/code/megfile_read.py +13 -0
- scripts/speed_test/code/megfile_write.py +14 -0
- scripts/speed_test/code/pyarrow_read.py +17 -0
- scripts/speed_test/code/pyarrow_write.py +18 -0
- scripts/speed_test/code/s3fs_read.py +21 -0
- scripts/speed_test/code/s3fs_write.py +22 -0
- scripts/speed_test/code/smart_open_read.py +25 -0
- scripts/speed_test/code/smart_open_write.py +26 -0
- {megfile-4.0.0.post1.dist-info → megfile-4.0.2.dist-info}/LICENSE +0 -0
- {megfile-4.0.0.post1.dist-info → megfile-4.0.2.dist-info}/LICENSE.pyre +0 -0
- {megfile-4.0.0.post1.dist-info → megfile-4.0.2.dist-info}/WHEEL +0 -0
- {megfile-4.0.0.post1.dist-info → megfile-4.0.2.dist-info}/entry_points.txt +0 -0
megfile/__init__.py
CHANGED
|
@@ -121,6 +121,7 @@ from megfile.s3_path import S3Path
|
|
|
121
121
|
from megfile.sftp import (
|
|
122
122
|
is_sftp,
|
|
123
123
|
sftp_absolute,
|
|
124
|
+
sftp_add_host_key,
|
|
124
125
|
sftp_chmod,
|
|
125
126
|
sftp_concat,
|
|
126
127
|
sftp_copy,
|
|
@@ -371,6 +372,7 @@ __all__ = [
|
|
|
371
372
|
"sftp_copy",
|
|
372
373
|
"sftp_sync",
|
|
373
374
|
"sftp_concat",
|
|
375
|
+
"sftp_add_host_key",
|
|
374
376
|
"is_hdfs",
|
|
375
377
|
"hdfs_exists",
|
|
376
378
|
"hdfs_stat",
|
megfile/cli.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import configparser
|
|
2
|
-
import logging
|
|
3
2
|
import os
|
|
4
3
|
import shutil
|
|
5
4
|
import sys
|
|
@@ -10,10 +9,11 @@ from functools import partial
|
|
|
10
9
|
import click
|
|
11
10
|
from tqdm import tqdm
|
|
12
11
|
|
|
13
|
-
from megfile.config import READER_BLOCK_SIZE
|
|
12
|
+
from megfile.config import READER_BLOCK_SIZE, SFTP_HOST_KEY_POLICY, set_log_level
|
|
14
13
|
from megfile.hdfs_path import DEFAULT_HDFS_TIMEOUT
|
|
15
14
|
from megfile.interfaces import FileEntry
|
|
16
15
|
from megfile.lib.glob import get_non_glob_dir, has_magic
|
|
16
|
+
from megfile.sftp import sftp_add_host_key
|
|
17
17
|
from megfile.smart import (
|
|
18
18
|
_smart_sync_single_file,
|
|
19
19
|
smart_copy,
|
|
@@ -44,29 +44,34 @@ from megfile.smart_path import SmartPath
|
|
|
44
44
|
from megfile.utils import get_human_size
|
|
45
45
|
from megfile.version import VERSION
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
DEBUG = False
|
|
47
|
+
options = {}
|
|
48
|
+
set_log_level()
|
|
50
49
|
|
|
51
50
|
|
|
52
51
|
@click.group()
|
|
53
52
|
@click.option("--debug", is_flag=True, help="Enable debug mode.")
|
|
54
|
-
|
|
53
|
+
@click.option(
|
|
54
|
+
"--log-level",
|
|
55
|
+
type=click.Choice(["DEBUG", "INFO", "WARNING", "ERROR"]),
|
|
56
|
+
help="Set logging level.",
|
|
57
|
+
)
|
|
58
|
+
def cli(debug, log_level):
|
|
55
59
|
"""
|
|
56
60
|
Client for megfile.
|
|
57
61
|
|
|
58
62
|
If you install megfile with ``--user``,
|
|
59
63
|
you also need configure ``$HOME/.local/bin`` into ``$PATH``.
|
|
60
64
|
"""
|
|
61
|
-
|
|
62
|
-
|
|
65
|
+
options["debug"] = debug
|
|
66
|
+
options["log_level"] = log_level or ("DEBUG" if debug else "INFO")
|
|
67
|
+
set_log_level(options["log_level"])
|
|
63
68
|
|
|
64
69
|
|
|
65
70
|
def safe_cli(): # pragma: no cover
|
|
66
71
|
try:
|
|
67
72
|
cli()
|
|
68
73
|
except Exception as e:
|
|
69
|
-
if
|
|
74
|
+
if options.get("debug", False):
|
|
70
75
|
raise
|
|
71
76
|
else:
|
|
72
77
|
click.echo(f"\n[{type(e).__name__}] {e}", err=True)
|
|
@@ -110,6 +115,23 @@ def smart_list_stat(path):
|
|
|
110
115
|
yield from smart_scandir(path)
|
|
111
116
|
|
|
112
117
|
|
|
118
|
+
def _sftp_prompt_host_key(path):
|
|
119
|
+
if SFTP_HOST_KEY_POLICY == "auto":
|
|
120
|
+
return
|
|
121
|
+
|
|
122
|
+
path = SmartPath(path)
|
|
123
|
+
if path.protocol == "sftp":
|
|
124
|
+
hostname = (
|
|
125
|
+
path.pathlike._urlsplit_parts.hostname # pytype: disable=attribute-error
|
|
126
|
+
)
|
|
127
|
+
port = path.pathlike._urlsplit_parts.port # pytype: disable=attribute-error
|
|
128
|
+
sftp_add_host_key(
|
|
129
|
+
hostname=hostname,
|
|
130
|
+
port=port,
|
|
131
|
+
prompt=True,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
113
135
|
def _ls(path: str, long: bool, recursive: bool, human_readable: bool):
|
|
114
136
|
base_path = path
|
|
115
137
|
full_path = False
|
|
@@ -121,6 +143,9 @@ def _ls(path: str, long: bool, recursive: bool, human_readable: bool):
|
|
|
121
143
|
scan_func = smart_scan_stat
|
|
122
144
|
else:
|
|
123
145
|
scan_func = smart_list_stat
|
|
146
|
+
|
|
147
|
+
_sftp_prompt_host_key(base_path)
|
|
148
|
+
|
|
124
149
|
if long:
|
|
125
150
|
if human_readable:
|
|
126
151
|
echo_func = human_echo
|
|
@@ -209,6 +234,10 @@ def cp(
|
|
|
209
234
|
):
|
|
210
235
|
if not no_target_directory and (dst_path.endswith("/") or smart_isdir(dst_path)):
|
|
211
236
|
dst_path = smart_path_join(dst_path, os.path.basename(src_path))
|
|
237
|
+
|
|
238
|
+
_sftp_prompt_host_key(src_path)
|
|
239
|
+
_sftp_prompt_host_key(dst_path)
|
|
240
|
+
|
|
212
241
|
if recursive:
|
|
213
242
|
with ThreadPoolExecutor(max_workers=(os.cpu_count() or 1) * 2) as executor:
|
|
214
243
|
if progress_bar:
|
|
@@ -274,6 +303,10 @@ def mv(
|
|
|
274
303
|
):
|
|
275
304
|
if not no_target_directory and (dst_path.endswith("/") or smart_isdir(dst_path)):
|
|
276
305
|
dst_path = smart_path_join(dst_path, os.path.basename(src_path))
|
|
306
|
+
|
|
307
|
+
_sftp_prompt_host_key(src_path)
|
|
308
|
+
_sftp_prompt_host_key(dst_path)
|
|
309
|
+
|
|
277
310
|
if progress_bar:
|
|
278
311
|
src_protocol, _ = SmartPath._extract_protocol(src_path)
|
|
279
312
|
dst_protocol, _ = SmartPath._extract_protocol(dst_path)
|
|
@@ -324,6 +357,8 @@ def mv(
|
|
|
324
357
|
"under the specified directory or prefix.",
|
|
325
358
|
)
|
|
326
359
|
def rm(path: str, recursive: bool):
|
|
360
|
+
_sftp_prompt_host_key(path)
|
|
361
|
+
|
|
327
362
|
remove_func = smart_remove if recursive else smart_unlink
|
|
328
363
|
remove_func(path)
|
|
329
364
|
|
|
@@ -349,6 +384,9 @@ def sync(
|
|
|
349
384
|
quiet: bool,
|
|
350
385
|
skip: bool,
|
|
351
386
|
):
|
|
387
|
+
_sftp_prompt_host_key(src_path)
|
|
388
|
+
_sftp_prompt_host_key(dst_path)
|
|
389
|
+
|
|
352
390
|
if not smart_exists(dst_path):
|
|
353
391
|
force = True
|
|
354
392
|
|
|
@@ -434,18 +472,24 @@ def sync(
|
|
|
434
472
|
@cli.command(short_help="Make the path if it doesn't already exist.")
|
|
435
473
|
@click.argument("path")
|
|
436
474
|
def mkdir(path: str):
|
|
475
|
+
_sftp_prompt_host_key(path)
|
|
476
|
+
|
|
437
477
|
smart_makedirs(path)
|
|
438
478
|
|
|
439
479
|
|
|
440
480
|
@cli.command(short_help="Make the file if it doesn't already exist.")
|
|
441
481
|
@click.argument("path")
|
|
442
482
|
def touch(path: str):
|
|
483
|
+
_sftp_prompt_host_key(path)
|
|
484
|
+
|
|
443
485
|
smart_touch(path)
|
|
444
486
|
|
|
445
487
|
|
|
446
488
|
@cli.command(short_help="Concatenate any files and send them to stdout.")
|
|
447
489
|
@click.argument("path")
|
|
448
490
|
def cat(path: str):
|
|
491
|
+
_sftp_prompt_host_key(path)
|
|
492
|
+
|
|
449
493
|
with smart_open(path, "rb") as f:
|
|
450
494
|
shutil.copyfileobj(f, sys.stdout.buffer) # pytype: disable=wrong-arg-types
|
|
451
495
|
|
|
@@ -458,6 +502,8 @@ def cat(path: str):
|
|
|
458
502
|
"-n", "--lines", type=click.INT, default=10, help="print the first NUM lines"
|
|
459
503
|
)
|
|
460
504
|
def head(path: str, lines: int):
|
|
505
|
+
_sftp_prompt_host_key(path)
|
|
506
|
+
|
|
461
507
|
with smart_open(path, "rb") as f:
|
|
462
508
|
for _ in range(lines):
|
|
463
509
|
try:
|
|
@@ -480,6 +526,8 @@ def head(path: str, lines: int):
|
|
|
480
526
|
"-f", "--follow", is_flag=True, help="output appended data as the file grows"
|
|
481
527
|
)
|
|
482
528
|
def tail(path: str, lines: int, follow: bool):
|
|
529
|
+
_sftp_prompt_host_key(path)
|
|
530
|
+
|
|
483
531
|
line_list = []
|
|
484
532
|
with smart_open(path, "rb") as f:
|
|
485
533
|
f.seek(0, os.SEEK_END)
|
|
@@ -524,6 +572,8 @@ def tail(path: str, lines: int, follow: bool):
|
|
|
524
572
|
@click.option("-a", "--append", is_flag=True, help="Append to the given file")
|
|
525
573
|
@click.option("-o", "--stdout", is_flag=True, help="File content to standard output")
|
|
526
574
|
def to(path: str, append: bool, stdout: bool):
|
|
575
|
+
_sftp_prompt_host_key(path)
|
|
576
|
+
|
|
527
577
|
mode = "wb"
|
|
528
578
|
if append:
|
|
529
579
|
mode = "ab"
|
|
@@ -545,24 +595,32 @@ def to(path: str, append: bool, stdout: bool):
|
|
|
545
595
|
@cli.command(short_help="Produce an md5sum file for all the objects in the path.")
|
|
546
596
|
@click.argument("path")
|
|
547
597
|
def md5sum(path: str):
|
|
598
|
+
_sftp_prompt_host_key(path)
|
|
599
|
+
|
|
548
600
|
click.echo(smart_getmd5(path, recalculate=True))
|
|
549
601
|
|
|
550
602
|
|
|
551
603
|
@cli.command(short_help="Return the total size and number of objects in remote:path.")
|
|
552
604
|
@click.argument("path")
|
|
553
605
|
def size(path: str):
|
|
606
|
+
_sftp_prompt_host_key(path)
|
|
607
|
+
|
|
554
608
|
click.echo(smart_getsize(path))
|
|
555
609
|
|
|
556
610
|
|
|
557
611
|
@cli.command(short_help="Return the mtime and number of objects in remote:path.")
|
|
558
612
|
@click.argument("path")
|
|
559
613
|
def mtime(path: str):
|
|
614
|
+
_sftp_prompt_host_key(path)
|
|
615
|
+
|
|
560
616
|
click.echo(smart_getmtime(path))
|
|
561
617
|
|
|
562
618
|
|
|
563
619
|
@cli.command(short_help="Return the stat and number of objects in remote:path.")
|
|
564
620
|
@click.argument("path")
|
|
565
621
|
def stat(path: str):
|
|
622
|
+
_sftp_prompt_host_key(path)
|
|
623
|
+
|
|
566
624
|
click.echo(smart_stat(path))
|
|
567
625
|
|
|
568
626
|
|
megfile/config.py
CHANGED
|
@@ -1,30 +1,109 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
import os
|
|
3
|
+
import typing as T
|
|
2
4
|
|
|
3
5
|
|
|
4
|
-
def
|
|
6
|
+
def parse_quantity(quantity: T.Union[str, int]) -> int:
|
|
7
|
+
"""
|
|
8
|
+
Parse kubernetes canonical form quantity like 200Mi to a int number.
|
|
9
|
+
Supported SI suffixes:
|
|
10
|
+
base1024: Ki | Mi | Gi | Ti | Pi | Ei
|
|
11
|
+
base1000: "" | k | M | G | T | P | E
|
|
12
|
+
|
|
13
|
+
(International System of units; See: http://physics.nist.gov/cuu/Units/binary.html)
|
|
14
|
+
|
|
15
|
+
Input:
|
|
16
|
+
quantity: string. kubernetes canonical form quantity
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Int
|
|
20
|
+
|
|
21
|
+
Raises:
|
|
22
|
+
ValueError on invalid or unknown input
|
|
23
|
+
"""
|
|
24
|
+
if isinstance(quantity, int):
|
|
25
|
+
return quantity
|
|
26
|
+
|
|
27
|
+
exponents = {"K": 1, "k": 1, "M": 2, "G": 3, "T": 4, "P": 5, "E": 6}
|
|
28
|
+
|
|
29
|
+
number = quantity
|
|
30
|
+
suffix = None
|
|
31
|
+
if len(quantity) >= 2 and quantity[-1] == "i":
|
|
32
|
+
if quantity[-2] in exponents:
|
|
33
|
+
number = quantity[:-2]
|
|
34
|
+
suffix = quantity[-2:]
|
|
35
|
+
elif len(quantity) >= 1 and quantity[-1] in exponents:
|
|
36
|
+
number = quantity[:-1]
|
|
37
|
+
suffix = quantity[-1:]
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
number = int(number)
|
|
41
|
+
except ValueError:
|
|
42
|
+
raise ValueError("Invalid number format: {}".format(number))
|
|
43
|
+
|
|
44
|
+
if suffix is None:
|
|
45
|
+
return number
|
|
46
|
+
|
|
47
|
+
if suffix.endswith("i"):
|
|
48
|
+
base = 1024
|
|
49
|
+
elif len(suffix) == 1:
|
|
50
|
+
base = 1000
|
|
51
|
+
else:
|
|
52
|
+
raise ValueError("{} has unknown suffix".format(quantity))
|
|
53
|
+
|
|
54
|
+
# handle SI inconsistency
|
|
55
|
+
if suffix == "ki":
|
|
56
|
+
raise ValueError("{} has unknown suffix".format(quantity))
|
|
57
|
+
|
|
58
|
+
if suffix[0] not in exponents:
|
|
59
|
+
raise ValueError("{} has unknown suffix".format(quantity))
|
|
60
|
+
|
|
61
|
+
exponent = int(exponents[suffix[0]])
|
|
62
|
+
return number * (base**exponent) # pytype: disable=bad-return-type
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def parse_boolean(value: T.Optional[str], default: bool = False):
|
|
66
|
+
if value is None:
|
|
67
|
+
return default
|
|
5
68
|
return value.lower() in ("true", "yes", "1")
|
|
6
69
|
|
|
7
70
|
|
|
8
|
-
|
|
71
|
+
def set_log_level(level: T.Optional[T.Union[int, str]] = None):
|
|
72
|
+
logging.basicConfig(
|
|
73
|
+
level=logging.ERROR,
|
|
74
|
+
format=(
|
|
75
|
+
"%(asctime)s | %(levelname)-8s | "
|
|
76
|
+
"%(name)s:%(funcName)s:%(lineno)d - %(message)s"
|
|
77
|
+
),
|
|
78
|
+
)
|
|
79
|
+
level = level or os.getenv("MEGFILE_LOG_LEVEL") or logging.INFO
|
|
80
|
+
logging.getLogger("megfile").setLevel(level)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
READER_BLOCK_SIZE = parse_quantity(os.getenv("MEGFILE_READER_BLOCK_SIZE") or 8 * 2**20)
|
|
9
84
|
if READER_BLOCK_SIZE <= 0:
|
|
10
85
|
raise ValueError(
|
|
11
86
|
f"'MEGFILE_READER_BLOCK_SIZE' must bigger than 0, got {READER_BLOCK_SIZE}"
|
|
12
87
|
)
|
|
13
|
-
READER_MAX_BUFFER_SIZE =
|
|
88
|
+
READER_MAX_BUFFER_SIZE = parse_quantity(
|
|
89
|
+
os.getenv("MEGFILE_READER_MAX_BUFFER_SIZE") or 128 * 2**20
|
|
90
|
+
)
|
|
14
91
|
|
|
15
92
|
# Multi-upload in aws s3 has a maximum of 10,000 parts,
|
|
16
93
|
# so the maximum supported file size is MEGFILE_WRITE_BLOCK_SIZE * 10,000,
|
|
17
94
|
# the largest object that can be uploaded in a single PUT is 5 TB in aws s3.
|
|
18
|
-
WRITER_BLOCK_SIZE =
|
|
95
|
+
WRITER_BLOCK_SIZE = parse_quantity(os.getenv("MEGFILE_WRITER_BLOCK_SIZE") or 8 * 2**20)
|
|
19
96
|
if WRITER_BLOCK_SIZE <= 0:
|
|
20
97
|
raise ValueError(
|
|
21
98
|
f"'MEGFILE_WRITER_BLOCK_SIZE' must bigger than 0, got {WRITER_BLOCK_SIZE}"
|
|
22
99
|
)
|
|
23
|
-
WRITER_MAX_BUFFER_SIZE =
|
|
100
|
+
WRITER_MAX_BUFFER_SIZE = parse_quantity(
|
|
101
|
+
os.getenv("MEGFILE_WRITER_MAX_BUFFER_SIZE") or 128 * 2**20
|
|
102
|
+
)
|
|
24
103
|
DEFAULT_WRITER_BLOCK_AUTOSCALE = not os.getenv("MEGFILE_WRITER_BLOCK_SIZE")
|
|
25
104
|
if os.getenv("MEGFILE_WRITER_BLOCK_AUTOSCALE"):
|
|
26
|
-
DEFAULT_WRITER_BLOCK_AUTOSCALE =
|
|
27
|
-
os.environ["MEGFILE_WRITER_BLOCK_AUTOSCALE"]
|
|
105
|
+
DEFAULT_WRITER_BLOCK_AUTOSCALE = parse_boolean(
|
|
106
|
+
os.environ["MEGFILE_WRITER_BLOCK_AUTOSCALE"]
|
|
28
107
|
)
|
|
29
108
|
|
|
30
109
|
GLOBAL_MAX_WORKERS = int(os.getenv("MEGFILE_MAX_WORKERS") or 8)
|
|
@@ -50,3 +129,6 @@ SFTP_MAX_RETRY_TIMES = int(
|
|
|
50
129
|
SFTP_HOST_KEY_POLICY = os.getenv("MEGFILE_SFTP_HOST_KEY_POLICY")
|
|
51
130
|
|
|
52
131
|
HTTP_AUTH_HEADERS = ("Authorization", "Www-Authenticate", "Cookie", "Cookie2")
|
|
132
|
+
|
|
133
|
+
if os.getenv("MEGFILE_LOG_LEVEL"):
|
|
134
|
+
set_log_level()
|
|
@@ -52,7 +52,6 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
52
52
|
self._client = s3_client
|
|
53
53
|
self._profile_name = profile_name
|
|
54
54
|
self._content_etag = None
|
|
55
|
-
self._content_info = None
|
|
56
55
|
|
|
57
56
|
super().__init__(
|
|
58
57
|
block_size=block_size,
|
|
@@ -63,6 +62,11 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
63
62
|
)
|
|
64
63
|
|
|
65
64
|
def _get_content_size(self):
|
|
65
|
+
if self._block_capacity <= 0:
|
|
66
|
+
response = self._client.head_object(Bucket=self._bucket, Key=self._key)
|
|
67
|
+
self._content_etag = response["ETag"]
|
|
68
|
+
return int(response["ContentLength"])
|
|
69
|
+
|
|
66
70
|
try:
|
|
67
71
|
start, end = 0, self._block_size - 1
|
|
68
72
|
first_index_response = self._fetch_response(start=start, end=end)
|
|
@@ -77,12 +81,10 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
77
81
|
first_index_response = self._fetch_response()
|
|
78
82
|
content_size = int(first_index_response["ContentLength"])
|
|
79
83
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
self._insert_futures(index=0, future=first_future)
|
|
84
|
+
first_future = Future()
|
|
85
|
+
first_future.set_result(first_index_response["Body"])
|
|
86
|
+
self._insert_futures(index=0, future=first_future)
|
|
84
87
|
self._content_etag = first_index_response["ETag"]
|
|
85
|
-
self._content_info = first_index_response
|
|
86
88
|
return content_size
|
|
87
89
|
|
|
88
90
|
@property
|
|
@@ -121,7 +123,7 @@ class S3PrefetchReader(BasePrefetchReader):
|
|
|
121
123
|
if etag is not None and etag != self._content_etag:
|
|
122
124
|
raise S3FileChangedError(
|
|
123
125
|
"File changed: %r, etag before: %s, after: %s"
|
|
124
|
-
% (self.name, self.
|
|
126
|
+
% (self.name, self._content_etag, etag)
|
|
125
127
|
)
|
|
126
128
|
|
|
127
129
|
return response["Body"]
|
megfile/s3_path.py
CHANGED
|
@@ -23,7 +23,7 @@ from megfile.config import (
|
|
|
23
23
|
S3_MAX_RETRY_TIMES,
|
|
24
24
|
WRITER_BLOCK_SIZE,
|
|
25
25
|
WRITER_MAX_BUFFER_SIZE,
|
|
26
|
-
|
|
26
|
+
parse_boolean,
|
|
27
27
|
)
|
|
28
28
|
from megfile.errors import (
|
|
29
29
|
S3BucketNotFoundError,
|
|
@@ -253,12 +253,6 @@ def get_env_var(env_name: str, profile_name=None):
|
|
|
253
253
|
return os.getenv(env_name.upper())
|
|
254
254
|
|
|
255
255
|
|
|
256
|
-
def parse_boolean(value: Optional[str], default: bool = False) -> bool:
|
|
257
|
-
if value is None:
|
|
258
|
-
return default
|
|
259
|
-
return to_boolean(value)
|
|
260
|
-
|
|
261
|
-
|
|
262
256
|
def get_access_token(profile_name=None):
|
|
263
257
|
access_key = get_env_var("AWS_ACCESS_KEY_ID", profile_name=profile_name)
|
|
264
258
|
secret_key = get_env_var("AWS_SECRET_ACCESS_KEY", profile_name=profile_name)
|
|
@@ -1003,13 +997,15 @@ def s3_buffered_open(
|
|
|
1003
997
|
profile_name=s3_url._profile_name,
|
|
1004
998
|
)
|
|
1005
999
|
else:
|
|
1000
|
+
if max_buffer_size is None:
|
|
1001
|
+
max_buffer_size = READER_MAX_BUFFER_SIZE
|
|
1006
1002
|
reader = S3PrefetchReader(
|
|
1007
1003
|
bucket,
|
|
1008
1004
|
key,
|
|
1009
1005
|
s3_client=client,
|
|
1010
1006
|
max_retries=max_retries,
|
|
1011
1007
|
max_workers=max_workers,
|
|
1012
|
-
max_buffer_size=max_buffer_size
|
|
1008
|
+
max_buffer_size=max_buffer_size,
|
|
1013
1009
|
block_forward=block_forward,
|
|
1014
1010
|
block_size=block_size or READER_BLOCK_SIZE,
|
|
1015
1011
|
profile_name=s3_url._profile_name,
|
|
@@ -1019,23 +1015,27 @@ def s3_buffered_open(
|
|
|
1019
1015
|
return reader
|
|
1020
1016
|
|
|
1021
1017
|
if limited_seekable:
|
|
1018
|
+
if max_buffer_size is None:
|
|
1019
|
+
max_buffer_size = WRITER_MAX_BUFFER_SIZE
|
|
1022
1020
|
writer = S3LimitedSeekableWriter(
|
|
1023
1021
|
bucket,
|
|
1024
1022
|
key,
|
|
1025
1023
|
s3_client=client,
|
|
1026
1024
|
max_workers=max_workers,
|
|
1027
1025
|
block_size=block_size or WRITER_BLOCK_SIZE,
|
|
1028
|
-
max_buffer_size=max_buffer_size
|
|
1026
|
+
max_buffer_size=max_buffer_size,
|
|
1029
1027
|
profile_name=s3_url._profile_name,
|
|
1030
1028
|
)
|
|
1031
1029
|
else:
|
|
1030
|
+
if max_buffer_size is None:
|
|
1031
|
+
max_buffer_size = WRITER_MAX_BUFFER_SIZE
|
|
1032
1032
|
writer = S3BufferedWriter(
|
|
1033
1033
|
bucket,
|
|
1034
1034
|
key,
|
|
1035
1035
|
s3_client=client,
|
|
1036
1036
|
max_workers=max_workers,
|
|
1037
1037
|
block_size=block_size or WRITER_BLOCK_SIZE,
|
|
1038
|
-
max_buffer_size=max_buffer_size
|
|
1038
|
+
max_buffer_size=max_buffer_size,
|
|
1039
1039
|
profile_name=s3_url._profile_name,
|
|
1040
1040
|
)
|
|
1041
1041
|
if buffered or _is_pickle(writer):
|
megfile/sftp.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import hashlib
|
|
1
3
|
import os
|
|
2
4
|
from logging import getLogger as get_logger
|
|
3
5
|
from typing import IO, BinaryIO, Callable, Iterator, List, Optional, Tuple
|
|
4
6
|
|
|
7
|
+
import paramiko
|
|
8
|
+
|
|
5
9
|
from megfile.interfaces import FileEntry, PathLike, StatResult
|
|
6
10
|
from megfile.lib.compat import fspath
|
|
7
11
|
from megfile.lib.joinpath import uri_join
|
|
@@ -52,6 +56,7 @@ __all__ = [
|
|
|
52
56
|
"sftp_rmdir",
|
|
53
57
|
"sftp_copy",
|
|
54
58
|
"sftp_sync",
|
|
59
|
+
"sftp_add_host_key",
|
|
55
60
|
]
|
|
56
61
|
|
|
57
62
|
|
|
@@ -739,3 +744,75 @@ def sftp_sync(
|
|
|
739
744
|
:param overwrite: whether or not overwrite file when exists, default is True
|
|
740
745
|
"""
|
|
741
746
|
return SftpPath(src_path).sync(dst_path, followlinks, force, overwrite)
|
|
747
|
+
|
|
748
|
+
|
|
749
|
+
def _check_input(input_str: str, fingerprint: str, times: int = 0) -> bool:
|
|
750
|
+
answers = input_str.strip()
|
|
751
|
+
if answers.lower() in ("yes", "y") or answers == fingerprint:
|
|
752
|
+
return True
|
|
753
|
+
elif answers.lower() in ("no", "n"):
|
|
754
|
+
return False
|
|
755
|
+
elif times >= 10:
|
|
756
|
+
_logger.warning("Retried more than 10 times, give up")
|
|
757
|
+
return False
|
|
758
|
+
else:
|
|
759
|
+
input_str = input("Please type 'yes', 'no' or the fingerprint: ")
|
|
760
|
+
return _check_input(input_str, fingerprint, times=times + 1)
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def _prompt_add_to_known_hosts(hostname, key) -> bool:
|
|
764
|
+
fingerprint = hashlib.sha256(key.asbytes()).digest()
|
|
765
|
+
fingerprint = f"SHA256:{base64.b64encode(fingerprint).decode('utf-8')}"
|
|
766
|
+
answers = input(f"""The authenticity of host '{hostname}' can't be established.
|
|
767
|
+
{key.get_name().upper()} key fingerprint is {fingerprint}.
|
|
768
|
+
This key is not known by any other names.
|
|
769
|
+
Are you sure you want to continue connecting (yes/no/[fingerprint])? """)
|
|
770
|
+
return _check_input(answers, fingerprint)
|
|
771
|
+
|
|
772
|
+
|
|
773
|
+
def sftp_add_host_key(
|
|
774
|
+
hostname: str,
|
|
775
|
+
port: int = 22,
|
|
776
|
+
prompt: bool = False,
|
|
777
|
+
host_key_path: Optional["str"] = None,
|
|
778
|
+
):
|
|
779
|
+
"""Add a host key to known_hosts.
|
|
780
|
+
|
|
781
|
+
:param hostname: hostname
|
|
782
|
+
:param port: port, default is 22
|
|
783
|
+
:param prompt: If True, requires user input of 'yes' or 'no' to decide whether to
|
|
784
|
+
add this host key
|
|
785
|
+
:param host_key_path: path of known_hosts, default is ~/.ssh/known_hosts
|
|
786
|
+
"""
|
|
787
|
+
if not host_key_path:
|
|
788
|
+
host_key_path = os.path.expanduser("~/.ssh/known_hosts")
|
|
789
|
+
|
|
790
|
+
if not os.path.exists(host_key_path):
|
|
791
|
+
dirname = os.path.dirname(host_key_path)
|
|
792
|
+
if dirname and dirname != ".":
|
|
793
|
+
os.makedirs(dirname, exist_ok=True, mode=0o700)
|
|
794
|
+
with open(host_key_path, "w"):
|
|
795
|
+
pass
|
|
796
|
+
os.chmod(host_key_path, 0o600)
|
|
797
|
+
|
|
798
|
+
host_key = paramiko.hostkeys.HostKeys(host_key_path)
|
|
799
|
+
if host_key.lookup(hostname):
|
|
800
|
+
return
|
|
801
|
+
|
|
802
|
+
transport = paramiko.Transport(
|
|
803
|
+
(
|
|
804
|
+
hostname,
|
|
805
|
+
port,
|
|
806
|
+
)
|
|
807
|
+
)
|
|
808
|
+
transport.connect()
|
|
809
|
+
key = transport.get_remote_server_key()
|
|
810
|
+
transport.close()
|
|
811
|
+
|
|
812
|
+
if prompt:
|
|
813
|
+
result = _prompt_add_to_known_hosts(hostname, key)
|
|
814
|
+
if not result:
|
|
815
|
+
return
|
|
816
|
+
|
|
817
|
+
host_key.add(hostname, key.get_name(), key)
|
|
818
|
+
host_key.save(host_key_path)
|
megfile/sftp_path.py
CHANGED
|
@@ -208,6 +208,7 @@ def _get_ssh_client(
|
|
|
208
208
|
policy = policies.get(SFTP_HOST_KEY_POLICY, default_policy)() # pyre-ignore[29]
|
|
209
209
|
|
|
210
210
|
ssh_client = paramiko.SSHClient()
|
|
211
|
+
ssh_client.load_system_host_keys()
|
|
211
212
|
ssh_client.set_missing_host_key_policy(policy)
|
|
212
213
|
max_unauth_connections = int(os.getenv(SFTP_MAX_UNAUTH_CONN, 10))
|
|
213
214
|
try:
|
megfile/smart.py
CHANGED
|
@@ -1001,11 +1001,13 @@ def smart_load_content(
|
|
|
1001
1001
|
return s3_load_content(path, start, stop)
|
|
1002
1002
|
|
|
1003
1003
|
with smart_open(path, "rb") as fd:
|
|
1004
|
-
if start:
|
|
1004
|
+
if start is not None:
|
|
1005
1005
|
fd.seek(start)
|
|
1006
1006
|
offset = -1
|
|
1007
|
-
if
|
|
1008
|
-
offset = stop - start
|
|
1007
|
+
if stop is not None:
|
|
1008
|
+
offset = stop - (start or 0) # start may be None
|
|
1009
|
+
if offset < 0:
|
|
1010
|
+
raise ValueError("stop should be greater than start")
|
|
1009
1011
|
return fd.read(offset) # pytype: disable=bad-return-type
|
|
1010
1012
|
|
|
1011
1013
|
|
megfile/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
VERSION = "4.0.
|
|
1
|
+
VERSION = "4.0.2"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: megfile
|
|
3
|
-
Version: 4.0.
|
|
3
|
+
Version: 4.0.2
|
|
4
4
|
Summary: Megvii file operation library
|
|
5
5
|
Author-email: megvii <megfile@megvii.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/megvii-research/megfile
|
|
@@ -200,10 +200,10 @@ s3 =
|
|
|
200
200
|
$ megfile config s3 accesskey secretkey \
|
|
201
201
|
--addressing-style virtual \
|
|
202
202
|
--endpoint-url https://tos-s3-cn-beijing.ivolces.com \
|
|
203
|
-
--profile tos
|
|
203
|
+
--profile-name tos
|
|
204
204
|
|
|
205
205
|
# create alias
|
|
206
|
-
$ megfile alias tos s3+tos
|
|
206
|
+
$ megfile config alias tos s3+tos
|
|
207
207
|
```
|
|
208
208
|
|
|
209
209
|
You can get the configuration from `~/.config/megfile/aliases.conf`, like:
|
|
@@ -212,6 +212,9 @@ You can get the configuration from `~/.config/megfile/aliases.conf`, like:
|
|
|
212
212
|
protocol = s3+tos
|
|
213
213
|
```
|
|
214
214
|
|
|
215
|
+
## Speed Test
|
|
216
|
+
[](https://megvii-research.github.io/megfile/speed_test.html)
|
|
217
|
+
[](https://megvii-research.github.io/megfile/speed_test.html)
|
|
215
218
|
|
|
216
219
|
## How to Contribute
|
|
217
220
|
* We welcome everyone to contribute code to the `megfile` project, but the contributed code needs to meet the following conditions as much as possible:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
megfile/__init__.py,sha256=
|
|
2
|
-
megfile/cli.py,sha256=
|
|
3
|
-
megfile/config.py,sha256=
|
|
1
|
+
megfile/__init__.py,sha256=7oEfu410CFKzDWZ9RjL5xEJ1gtkJkTfvPrL_7TWdJuY,7366
|
|
2
|
+
megfile/cli.py,sha256=e3VVr8oe8iR7L_PtpNtyqAvQL_WgJzzEz8oewSAlgX4,24887
|
|
3
|
+
megfile/config.py,sha256=_6HiGeXEyk6RjPdjA0eEj1unq9iLJV_vQJBzQ-eHNvs,4185
|
|
4
4
|
megfile/errors.py,sha256=a55qKQgyfiLmV-qnojUFzq2gu9JXpj3ZiC2qVaWyUTA,14160
|
|
5
5
|
megfile/fs.py,sha256=bPGbapv41FzME87X3MhSNQRjNmHrI23FuFnjPT0ukQs,18239
|
|
6
6
|
megfile/fs_path.py,sha256=ZK-po1xqhHocMb9Vrxf5K9tDx3zxQmGxNIHY3Z7Akp8,39085
|
|
@@ -11,14 +11,14 @@ megfile/http_path.py,sha256=c-xAu5wDxcTevmIUmrNEy-m-QiCfDJToaVI7y8SVIUI,14492
|
|
|
11
11
|
megfile/interfaces.py,sha256=p4UvVZpeLx5djd6bqqDaygIx_s-_AxIVj-gudTch4JE,8467
|
|
12
12
|
megfile/pathlike.py,sha256=vfuTBqSTIciRxkkqMfLfnBxWTEl9yns1yR8zgK4Raw0,31268
|
|
13
13
|
megfile/s3.py,sha256=zqAegH5tijcztEKcfHXmOYhAR880nTxaAzc2O0JJnjc,16661
|
|
14
|
-
megfile/s3_path.py,sha256=
|
|
15
|
-
megfile/sftp.py,sha256=
|
|
16
|
-
megfile/sftp_path.py,sha256=
|
|
17
|
-
megfile/smart.py,sha256=
|
|
14
|
+
megfile/s3_path.py,sha256=lpUKy4n5DTf6hK6TvPhMjt_ZgdIXO4vcyK_VLaGkvhg,93395
|
|
15
|
+
megfile/sftp.py,sha256=0ZnQlmhgvs7pYjFTcvvOyxTo2IUurE-hp1GN0hnIrdQ,26473
|
|
16
|
+
megfile/sftp_path.py,sha256=4rcbn3wqcOEs71W6qWu1efcj6MZUgrZm6U0Jan-eB70,43604
|
|
17
|
+
megfile/smart.py,sha256=Ps8acPx6jeG1UJnRD8xL2aQjRp7IMW8sV6VFkMF0TQk,36910
|
|
18
18
|
megfile/smart_path.py,sha256=Bqg95T2-XZrRXWhH7GT-jMCYzD7i1SIXdczQxtOxiPs,7583
|
|
19
19
|
megfile/stdio.py,sha256=C_cGID_npthpwoPcsJMMEqqbVUPUnDxxJV9jLY2_D7c,635
|
|
20
20
|
megfile/stdio_path.py,sha256=L8ODNIwO79UIv13YYc2OTr6f4XTv4ZPyvBeRk83-AjA,2700
|
|
21
|
-
megfile/version.py,sha256=
|
|
21
|
+
megfile/version.py,sha256=secXeY4K_CdlbaUs9G--LGWfWlf1VqdtopDxqRzq2JQ,19
|
|
22
22
|
megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
megfile/lib/base_prefetch_reader.py,sha256=6Dy2ZwlowqAvyUUa7bpQLCKOclmmUDhqEF-_CDDp0Og,13100
|
|
24
24
|
megfile/lib/combine_reader.py,sha256=nKGAug29lOpNIZuLKu7_qVrJJRpXL_J4jxLglWbGJ1w,4808
|
|
@@ -36,17 +36,27 @@ megfile/lib/s3_cached_handler.py,sha256=X8PdeRC-BY6eSmOO5f2BeyjTPxyEwNtHgmAm9Vgm
|
|
|
36
36
|
megfile/lib/s3_limited_seekable_writer.py,sha256=mUeoTS98LHluwDN7zxdCVcsjOGBT1bOYV8nRvi9QMGE,6212
|
|
37
37
|
megfile/lib/s3_memory_handler.py,sha256=4uzBzz2jfRI_u6jl0CpOGAhpNJhDQo18FSAweauCUFs,4136
|
|
38
38
|
megfile/lib/s3_pipe_handler.py,sha256=dm7NnZd1Ym5ABS1GvOQtoCJEO_CB8e6p4sUhLiid0go,3622
|
|
39
|
-
megfile/lib/s3_prefetch_reader.py,sha256=
|
|
39
|
+
megfile/lib/s3_prefetch_reader.py,sha256=dHltiM5Ui-SY4pqhvIsmC0iNmprXwlczDD4lNHB5WrQ,4418
|
|
40
40
|
megfile/lib/s3_share_cache_reader.py,sha256=LVWKxHdHo0_zUIW4o8yqNvplqqwezUPeYEt02Vj-WNM,3754
|
|
41
41
|
megfile/lib/shadow_handler.py,sha256=TntewlvIW9ZxCfmqASDQREHoiZ8v42faOe9sovQYQz0,2779
|
|
42
42
|
megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,1987
|
|
43
43
|
megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
|
|
44
44
|
megfile/utils/__init__.py,sha256=sATf_NlsSTYIMEiA8-gM6K1M-Q1K6_7rx2VM31hrqaA,10838
|
|
45
45
|
megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
46
|
+
scripts/speed_test/code/iopath_read.py,sha256=O1Qs3mpvD9S_XCuRH2A2FpGWxCOSw6qZvEBrtPeRL1E,661
|
|
47
|
+
scripts/speed_test/code/iopath_write.py,sha256=Mm0efW1J09RJ_CK5i1xtG2hJuaaslikin8qVpuRFP_Q,704
|
|
48
|
+
scripts/speed_test/code/megfile_read.py,sha256=sAMebUiMColHDv3JEkXplImAHvn_IF1-g3BIJxhcQYE,239
|
|
49
|
+
scripts/speed_test/code/megfile_write.py,sha256=bzn-i2bGH4eRcsVvkhXK35KsQkX2v2oEsOJ0Ft5saj0,257
|
|
50
|
+
scripts/speed_test/code/pyarrow_read.py,sha256=2QBGKjGV2Dvl2ukOntLSag33pF55l3tfZ2Z6dLUjovw,305
|
|
51
|
+
scripts/speed_test/code/pyarrow_write.py,sha256=U1puLh-ljSXv772bZYAyhzmxhPOq4aR4j-QtwdM9hG0,328
|
|
52
|
+
scripts/speed_test/code/s3fs_read.py,sha256=XiTA-qrYblUs-jQWXSnvNg5Wo722C_g47aMMfo5XJBY,380
|
|
53
|
+
scripts/speed_test/code/s3fs_write.py,sha256=gdXKkWXYGjLJlRT_J64pJN85XvRg3bZexcAJQEMXwtw,402
|
|
54
|
+
scripts/speed_test/code/smart_open_read.py,sha256=SA02jHwS9Y31yFtV9CoJcfND5dR0eA_HsGmGNUrpQls,515
|
|
55
|
+
scripts/speed_test/code/smart_open_write.py,sha256=jDxFJdY97yNH889jz3pawBoei3yaqy8pEMvC_ymHFtM,537
|
|
56
|
+
megfile-4.0.2.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
|
|
57
|
+
megfile-4.0.2.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
|
|
58
|
+
megfile-4.0.2.dist-info/METADATA,sha256=A8TXdy5RWnQFzA7e2r9h757InExc7l7JaSN3O3felgk,9578
|
|
59
|
+
megfile-4.0.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
60
|
+
megfile-4.0.2.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
|
|
61
|
+
megfile-4.0.2.dist-info/top_level.txt,sha256=fVg49lk5B9L7jyfWUXWxb0DDSuw5pbr0OU62Tvx8J8M,44
|
|
62
|
+
megfile-4.0.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from iopath.common.file_io import PathManager
|
|
6
|
+
from iopath.common.s3 import S3PathHandler
|
|
7
|
+
|
|
8
|
+
times = 10240
|
|
9
|
+
s3_path = "s3://bucketA/large.txt"
|
|
10
|
+
|
|
11
|
+
start = time.time()
|
|
12
|
+
|
|
13
|
+
path_manager = PathManager()
|
|
14
|
+
|
|
15
|
+
session = boto3.Session(
|
|
16
|
+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
17
|
+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
18
|
+
)
|
|
19
|
+
client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
20
|
+
handler = S3PathHandler()
|
|
21
|
+
handler.client = client
|
|
22
|
+
|
|
23
|
+
path_manager.register_handler(handler)
|
|
24
|
+
|
|
25
|
+
with path_manager.open(s3_path, "rb") as f:
|
|
26
|
+
for i in range(times):
|
|
27
|
+
f.read(1024 * 1024)
|
|
28
|
+
|
|
29
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from iopath.common.file_io import PathManager
|
|
6
|
+
from iopath.common.s3 import S3PathHandler
|
|
7
|
+
|
|
8
|
+
times = 10240
|
|
9
|
+
s3_path = "s3://bucketA/large.txt"
|
|
10
|
+
block = b"1" * 1024 * 1024
|
|
11
|
+
|
|
12
|
+
start = time.time()
|
|
13
|
+
|
|
14
|
+
path_manager = PathManager()
|
|
15
|
+
|
|
16
|
+
session = boto3.Session(
|
|
17
|
+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
18
|
+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
19
|
+
)
|
|
20
|
+
client = session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
21
|
+
handler = S3PathHandler()
|
|
22
|
+
handler.client = client
|
|
23
|
+
|
|
24
|
+
path_manager.register_handler(handler)
|
|
25
|
+
|
|
26
|
+
with path_manager.open(s3_path, "wb") as f:
|
|
27
|
+
for i in range(times):
|
|
28
|
+
f.write(block)
|
|
29
|
+
|
|
30
|
+
print(time.time() - start) # write 10GB 91.642
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
from megfile import smart_open
|
|
4
|
+
|
|
5
|
+
times = 10240
|
|
6
|
+
s3_path = "s3://bucketA/large.txt"
|
|
7
|
+
block = b"1" * 1024 * 1024
|
|
8
|
+
|
|
9
|
+
start = time.time()
|
|
10
|
+
with smart_open(s3_path, "wb") as f:
|
|
11
|
+
for i in range(times):
|
|
12
|
+
f.write(block)
|
|
13
|
+
|
|
14
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from pyarrow import fs
|
|
5
|
+
|
|
6
|
+
times = 10240
|
|
7
|
+
s3_path = "bucketA/large.txt"
|
|
8
|
+
|
|
9
|
+
start = time.time()
|
|
10
|
+
|
|
11
|
+
s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
|
|
12
|
+
|
|
13
|
+
with s3.open_input_stream(s3_path) as f:
|
|
14
|
+
for i in range(times):
|
|
15
|
+
f.read(1024 * 1024)
|
|
16
|
+
|
|
17
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from pyarrow import fs
|
|
5
|
+
|
|
6
|
+
times = 10240
|
|
7
|
+
block = b"1" * 1024 * 1024
|
|
8
|
+
s3_path = "bucketA/large.txt"
|
|
9
|
+
|
|
10
|
+
start = time.time()
|
|
11
|
+
|
|
12
|
+
s3 = fs.S3FileSystem(endpoint_override=os.environ["OSS_ENDPOINT"])
|
|
13
|
+
|
|
14
|
+
with s3.open_output_stream(s3_path) as f:
|
|
15
|
+
for i in range(times):
|
|
16
|
+
f.write(block)
|
|
17
|
+
|
|
18
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import s3fs
|
|
5
|
+
|
|
6
|
+
times = 10240
|
|
7
|
+
s3_path = "bucketA/large.txt"
|
|
8
|
+
|
|
9
|
+
start = time.time()
|
|
10
|
+
|
|
11
|
+
s3 = s3fs.S3FileSystem(
|
|
12
|
+
endpoint_url=os.environ["OSS_ENDPOINT"],
|
|
13
|
+
key=os.environ["AWS_ACCESS_KEY_ID"],
|
|
14
|
+
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
with s3.open(s3_path, "rb") as f:
|
|
18
|
+
for i in range(times):
|
|
19
|
+
f.read(1024 * 1024)
|
|
20
|
+
|
|
21
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import s3fs
|
|
5
|
+
|
|
6
|
+
times = 10240
|
|
7
|
+
block = b"1" * 1024 * 1024
|
|
8
|
+
s3_path = "bucketA/large.txt"
|
|
9
|
+
|
|
10
|
+
start = time.time()
|
|
11
|
+
|
|
12
|
+
s3 = s3fs.S3FileSystem(
|
|
13
|
+
endpoint_url=os.environ["OSS_ENDPOINT"],
|
|
14
|
+
key=os.environ["AWS_ACCESS_KEY_ID"],
|
|
15
|
+
secret=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
with s3.open(s3_path, "wb") as f:
|
|
19
|
+
for i in range(times):
|
|
20
|
+
f.write(block)
|
|
21
|
+
|
|
22
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from smart_open import open
|
|
6
|
+
|
|
7
|
+
times = 10240
|
|
8
|
+
s3_path = "s3://bucketA/large.txt"
|
|
9
|
+
|
|
10
|
+
start = time.time()
|
|
11
|
+
session = boto3.Session(
|
|
12
|
+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
13
|
+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
14
|
+
)
|
|
15
|
+
with open(
|
|
16
|
+
s3_path,
|
|
17
|
+
"rb",
|
|
18
|
+
transport_params={
|
|
19
|
+
"client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
20
|
+
},
|
|
21
|
+
) as f:
|
|
22
|
+
for i in range(times):
|
|
23
|
+
f.read(1024 * 1024)
|
|
24
|
+
|
|
25
|
+
print(time.time() - start)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
import boto3
|
|
5
|
+
from smart_open import open
|
|
6
|
+
|
|
7
|
+
times = 10240
|
|
8
|
+
s3_path = "s3://bucketA/large.txt"
|
|
9
|
+
block = b"1" * 1024 * 1024
|
|
10
|
+
|
|
11
|
+
start = time.time()
|
|
12
|
+
session = boto3.Session(
|
|
13
|
+
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
|
|
14
|
+
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
|
|
15
|
+
)
|
|
16
|
+
with open(
|
|
17
|
+
s3_path,
|
|
18
|
+
"wb",
|
|
19
|
+
transport_params={
|
|
20
|
+
"client": session.client("s3", endpoint_url=os.environ["OSS_ENDPOINT"])
|
|
21
|
+
},
|
|
22
|
+
) as f:
|
|
23
|
+
for i in range(times):
|
|
24
|
+
f.write(block)
|
|
25
|
+
|
|
26
|
+
print(time.time() - start)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|