megfile 4.0.3__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/cli.py +28 -25
- megfile/config.py +1 -6
- megfile/errors.py +15 -10
- megfile/fs.py +1 -1
- megfile/fs_path.py +33 -23
- megfile/hdfs.py +7 -7
- megfile/hdfs_path.py +43 -26
- megfile/http_path.py +1 -8
- megfile/lib/base_prefetch_reader.py +7 -13
- megfile/lib/combine_reader.py +1 -1
- megfile/lib/glob.py +6 -16
- megfile/lib/s3_cached_handler.py +3 -3
- megfile/lib/s3_limited_seekable_writer.py +1 -1
- megfile/lib/s3_memory_handler.py +3 -3
- megfile/lib/s3_pipe_handler.py +4 -4
- megfile/pathlike.py +5 -5
- megfile/s3.py +11 -17
- megfile/s3_path.py +180 -165
- megfile/sftp.py +7 -4
- megfile/sftp_path.py +38 -28
- megfile/smart.py +3 -4
- megfile/stdio.py +2 -1
- megfile/stdio_path.py +1 -0
- megfile/utils/__init__.py +3 -19
- megfile/version.py +1 -1
- {megfile-4.0.3.dist-info → megfile-4.1.0.dist-info}/METADATA +2 -2
- {megfile-4.0.3.dist-info → megfile-4.1.0.dist-info}/RECORD +32 -32
- {megfile-4.0.3.dist-info → megfile-4.1.0.dist-info}/WHEEL +1 -1
- {megfile-4.0.3.dist-info → megfile-4.1.0.dist-info}/LICENSE +0 -0
- {megfile-4.0.3.dist-info → megfile-4.1.0.dist-info}/LICENSE.pyre +0 -0
- {megfile-4.0.3.dist-info → megfile-4.1.0.dist-info}/entry_points.txt +0 -0
- {megfile-4.0.3.dist-info → megfile-4.1.0.dist-info}/top_level.txt +0 -0
megfile/cli.py
CHANGED
|
@@ -46,6 +46,7 @@ from megfile.version import VERSION
|
|
|
46
46
|
|
|
47
47
|
options = {}
|
|
48
48
|
set_log_level()
|
|
49
|
+
max_file_object_catch_count = 1024 * 128
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
@click.group()
|
|
@@ -124,12 +125,15 @@ def _sftp_prompt_host_key(path):
|
|
|
124
125
|
hostname = (
|
|
125
126
|
path.pathlike._urlsplit_parts.hostname # pytype: disable=attribute-error
|
|
126
127
|
)
|
|
127
|
-
port =
|
|
128
|
-
|
|
129
|
-
hostname=hostname,
|
|
130
|
-
port=port,
|
|
131
|
-
prompt=True,
|
|
128
|
+
port = (
|
|
129
|
+
path.pathlike._urlsplit_parts.port or 22 # pytype: disable=attribute-error
|
|
132
130
|
)
|
|
131
|
+
if hostname:
|
|
132
|
+
sftp_add_host_key(
|
|
133
|
+
hostname=hostname,
|
|
134
|
+
port=port,
|
|
135
|
+
prompt=True,
|
|
136
|
+
)
|
|
133
137
|
|
|
134
138
|
|
|
135
139
|
def _ls(path: str, long: bool, recursive: bool, human_readable: bool):
|
|
@@ -161,11 +165,7 @@ def _ls(path: str, long: bool, recursive: bool, human_readable: bool):
|
|
|
161
165
|
total_count += 1
|
|
162
166
|
output = echo_func(file_stat, base_path, full_path=full_path)
|
|
163
167
|
if file_stat.is_symlink():
|
|
164
|
-
|
|
165
|
-
link = smart_readlink(file_stat.path)
|
|
166
|
-
except FileNotFoundError as e:
|
|
167
|
-
link = repr(e)
|
|
168
|
-
output += " -> %s" % link
|
|
168
|
+
output += " -> %s" % smart_readlink(file_stat.path)
|
|
169
169
|
click.echo(output)
|
|
170
170
|
if long:
|
|
171
171
|
click.echo(f"total({total_count}): {get_human_size(total_size)}")
|
|
@@ -417,7 +417,7 @@ def sync(
|
|
|
417
417
|
file_entries = []
|
|
418
418
|
total_count = total_size = 0
|
|
419
419
|
for total_count, file_entry in enumerate(scan_func(src_path), start=1):
|
|
420
|
-
if total_count >
|
|
420
|
+
if total_count > max_file_object_catch_count:
|
|
421
421
|
file_entries = []
|
|
422
422
|
else:
|
|
423
423
|
file_entries.append(file_entry)
|
|
@@ -506,15 +506,21 @@ def head(path: str, lines: int):
|
|
|
506
506
|
|
|
507
507
|
with smart_open(path, "rb") as f:
|
|
508
508
|
for _ in range(lines):
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
if not content:
|
|
512
|
-
break
|
|
513
|
-
except EOFError:
|
|
509
|
+
content = f.readline()
|
|
510
|
+
if not content:
|
|
514
511
|
break
|
|
515
512
|
click.echo(content.strip(b"\n"))
|
|
516
513
|
|
|
517
514
|
|
|
515
|
+
def _tail_follow_content(path, offset):
|
|
516
|
+
with smart_open(path, "rb") as f:
|
|
517
|
+
f.seek(offset)
|
|
518
|
+
for line in f.readlines():
|
|
519
|
+
click.echo(line, nl=False)
|
|
520
|
+
offset = f.tell()
|
|
521
|
+
return offset
|
|
522
|
+
|
|
523
|
+
|
|
518
524
|
@cli.command(
|
|
519
525
|
short_help="Concatenate any files and send last n lines of them to stdout."
|
|
520
526
|
)
|
|
@@ -554,17 +560,14 @@ def tail(path: str, lines: int, follow: bool):
|
|
|
554
560
|
if line_list:
|
|
555
561
|
click.echo(line_list[-1], nl=False)
|
|
556
562
|
|
|
557
|
-
if follow:
|
|
563
|
+
if follow: # pragma: no cover
|
|
558
564
|
offset = file_size
|
|
559
565
|
while True:
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
time.sleep(1)
|
|
566
|
-
continue
|
|
567
|
-
click.echo(line, nl=False)
|
|
566
|
+
new_offset = _tail_follow_content(path, offset)
|
|
567
|
+
if new_offset == offset:
|
|
568
|
+
time.sleep(1)
|
|
569
|
+
else:
|
|
570
|
+
offset = new_offset
|
|
568
571
|
|
|
569
572
|
|
|
570
573
|
@cli.command(short_help="Write bytes from stdin to file.")
|
megfile/config.py
CHANGED
|
@@ -46,18 +46,13 @@ def parse_quantity(quantity: T.Union[str, int]) -> int:
|
|
|
46
46
|
|
|
47
47
|
if suffix.endswith("i"):
|
|
48
48
|
base = 1024
|
|
49
|
-
elif len(suffix) == 1:
|
|
50
|
-
base = 1000
|
|
51
49
|
else:
|
|
52
|
-
|
|
50
|
+
base = 1000
|
|
53
51
|
|
|
54
52
|
# handle SI inconsistency
|
|
55
53
|
if suffix == "ki":
|
|
56
54
|
raise ValueError("{} has unknown suffix".format(quantity))
|
|
57
55
|
|
|
58
|
-
if suffix[0] not in exponents:
|
|
59
|
-
raise ValueError("{} has unknown suffix".format(quantity))
|
|
60
|
-
|
|
61
56
|
exponent = int(exponents[suffix[0]])
|
|
62
57
|
return number * (base**exponent) # pytype: disable=bad-return-type
|
|
63
58
|
|
megfile/errors.py
CHANGED
|
@@ -177,17 +177,19 @@ def patch_method(
|
|
|
177
177
|
|
|
178
178
|
def _create_missing_ok_generator(generator, missing_ok: bool, error: Exception):
|
|
179
179
|
if missing_ok:
|
|
180
|
-
|
|
181
|
-
return
|
|
182
|
-
|
|
183
|
-
zero_elem = True
|
|
184
|
-
for item in generator:
|
|
185
|
-
zero_elem = False
|
|
186
|
-
yield item
|
|
180
|
+
return generator
|
|
187
181
|
|
|
188
|
-
|
|
182
|
+
try:
|
|
183
|
+
first = next(generator)
|
|
184
|
+
except StopIteration:
|
|
189
185
|
raise error
|
|
190
186
|
|
|
187
|
+
def create_generator():
|
|
188
|
+
yield first
|
|
189
|
+
yield from generator
|
|
190
|
+
|
|
191
|
+
return create_generator()
|
|
192
|
+
|
|
191
193
|
|
|
192
194
|
class UnknownError(Exception):
|
|
193
195
|
def __init__(self, error: Exception, path: PathLike, extra: Optional[str] = None):
|
|
@@ -411,11 +413,14 @@ def translate_http_error(http_error: Exception, http_url: str) -> Exception:
|
|
|
411
413
|
|
|
412
414
|
|
|
413
415
|
@contextmanager
|
|
414
|
-
def raise_s3_error(s3_url: PathLike):
|
|
416
|
+
def raise_s3_error(s3_url: PathLike, suppress_error_callback=None):
|
|
415
417
|
try:
|
|
416
418
|
yield
|
|
417
419
|
except Exception as error:
|
|
418
|
-
|
|
420
|
+
error = translate_s3_error(error, s3_url)
|
|
421
|
+
if suppress_error_callback and suppress_error_callback(error):
|
|
422
|
+
return
|
|
423
|
+
raise error
|
|
419
424
|
|
|
420
425
|
|
|
421
426
|
def s3_error_code_should_retry(error: str) -> bool:
|
megfile/fs.py
CHANGED
|
@@ -317,7 +317,7 @@ def fs_walk(
|
|
|
317
317
|
return FSPath(path).walk(followlinks)
|
|
318
318
|
|
|
319
319
|
|
|
320
|
-
def fs_getmd5(path: PathLike, recalculate: bool = False, followlinks: bool =
|
|
320
|
+
def fs_getmd5(path: PathLike, recalculate: bool = False, followlinks: bool = False):
|
|
321
321
|
"""
|
|
322
322
|
Calculate the md5 value of the file
|
|
323
323
|
|
megfile/fs_path.py
CHANGED
|
@@ -392,13 +392,15 @@ class FSPath(URIPath):
|
|
|
392
392
|
|
|
393
393
|
def iterdir(self) -> Iterator["FSPath"]:
|
|
394
394
|
"""
|
|
395
|
-
Get all contents of given fs path.
|
|
396
|
-
The result is in ascending alphabetical order.
|
|
395
|
+
Get all contents of given fs path. The order of result is in arbitrary order.
|
|
397
396
|
|
|
398
|
-
:returns: All contents have in the path
|
|
397
|
+
:returns: All contents have in the path.
|
|
399
398
|
"""
|
|
400
|
-
|
|
401
|
-
|
|
399
|
+
self._check_int_path()
|
|
400
|
+
for path in pathlib.Path(
|
|
401
|
+
self.path_without_protocol # pyre-ignore[6]
|
|
402
|
+
).iterdir():
|
|
403
|
+
yield self.from_path(fspath(path))
|
|
402
404
|
|
|
403
405
|
def load(self) -> BinaryIO:
|
|
404
406
|
"""Read all content on specified path and write into memory
|
|
@@ -469,6 +471,8 @@ class FSPath(URIPath):
|
|
|
469
471
|
src_path, dst_path = fspath(self.path_without_protocol), fspath(dst_path)
|
|
470
472
|
if os.path.isfile(src_path):
|
|
471
473
|
_fs_rename_file(src_path, dst_path, overwrite)
|
|
474
|
+
if os.path.exists(src_path):
|
|
475
|
+
os.remove(src_path)
|
|
472
476
|
return self.from_path(dst_path)
|
|
473
477
|
else:
|
|
474
478
|
os.makedirs(dst_path, exist_ok=True)
|
|
@@ -485,10 +489,7 @@ class FSPath(URIPath):
|
|
|
485
489
|
else:
|
|
486
490
|
_fs_rename_file(src_file_path, dst_file_path, overwrite)
|
|
487
491
|
|
|
488
|
-
|
|
489
|
-
shutil.rmtree(src_path)
|
|
490
|
-
else:
|
|
491
|
-
os.remove(src_path)
|
|
492
|
+
shutil.rmtree(src_path, ignore_errors=True)
|
|
492
493
|
|
|
493
494
|
return self.from_path(dst_path)
|
|
494
495
|
|
|
@@ -518,6 +519,8 @@ class FSPath(URIPath):
|
|
|
518
519
|
def _scan(
|
|
519
520
|
self, missing_ok: bool = True, followlinks: bool = False
|
|
520
521
|
) -> Iterator[str]:
|
|
522
|
+
self._check_int_path()
|
|
523
|
+
|
|
521
524
|
if self.is_file(followlinks=followlinks):
|
|
522
525
|
path = fspath(self.path_without_protocol)
|
|
523
526
|
yield path
|
|
@@ -567,12 +570,13 @@ class FSPath(URIPath):
|
|
|
567
570
|
"No match any file in: %r" % self.path_without_protocol
|
|
568
571
|
)
|
|
569
572
|
|
|
570
|
-
def scandir(self) ->
|
|
573
|
+
def scandir(self) -> ContextIterator:
|
|
571
574
|
"""
|
|
572
575
|
Get all content of given file path.
|
|
573
576
|
|
|
574
577
|
:returns: An iterator contains all contents have prefix path
|
|
575
578
|
"""
|
|
579
|
+
self._check_int_path()
|
|
576
580
|
|
|
577
581
|
def create_generator():
|
|
578
582
|
with os.scandir(self.path_without_protocol) as entries:
|
|
@@ -702,7 +706,7 @@ class FSPath(URIPath):
|
|
|
702
706
|
)
|
|
703
707
|
)
|
|
704
708
|
|
|
705
|
-
def md5(self, recalculate: bool = False, followlinks: bool =
|
|
709
|
+
def md5(self, recalculate: bool = False, followlinks: bool = False):
|
|
706
710
|
"""
|
|
707
711
|
Calculate the md5 value of the file
|
|
708
712
|
|
|
@@ -711,11 +715,11 @@ class FSPath(URIPath):
|
|
|
711
715
|
|
|
712
716
|
returns: md5 of file
|
|
713
717
|
"""
|
|
714
|
-
if
|
|
718
|
+
if self.is_dir():
|
|
715
719
|
hash_md5 = hashlib.md5() # nosec
|
|
716
720
|
for file_name in self.listdir():
|
|
717
721
|
chunk = (
|
|
718
|
-
|
|
722
|
+
self.joinpath(file_name)
|
|
719
723
|
.md5(recalculate=recalculate, followlinks=followlinks)
|
|
720
724
|
.encode()
|
|
721
725
|
)
|
|
@@ -740,9 +744,8 @@ class FSPath(URIPath):
|
|
|
740
744
|
if not buf:
|
|
741
745
|
break
|
|
742
746
|
fdst.write(buf)
|
|
743
|
-
if callback
|
|
744
|
-
|
|
745
|
-
callback(len(buf))
|
|
747
|
+
if callback:
|
|
748
|
+
callback(len(buf))
|
|
746
749
|
else:
|
|
747
750
|
shutil.copy2(
|
|
748
751
|
self.path_without_protocol, # pyre-ignore[6]
|
|
@@ -793,8 +796,13 @@ class FSPath(URIPath):
|
|
|
793
796
|
except FileNotFoundError as error:
|
|
794
797
|
# Prevent the dst_path directory from being created when src_path does not
|
|
795
798
|
# exist
|
|
796
|
-
|
|
797
|
-
|
|
799
|
+
dst_parent_dir = os.path.dirname(dst_path)
|
|
800
|
+
if (
|
|
801
|
+
dst_parent_dir
|
|
802
|
+
and dst_parent_dir != "."
|
|
803
|
+
and error.filename in (dst_path, dst_parent_dir)
|
|
804
|
+
):
|
|
805
|
+
self.from_path(dst_parent_dir).mkdir(parents=True, exist_ok=True)
|
|
798
806
|
self._copyfile(dst_path, callback=callback, followlinks=followlinks)
|
|
799
807
|
else:
|
|
800
808
|
raise
|
|
@@ -814,15 +822,15 @@ class FSPath(URIPath):
|
|
|
814
822
|
priority is higher than 'overwrite', default is False
|
|
815
823
|
:param overwrite: whether or not overwrite file when exists, default is True
|
|
816
824
|
"""
|
|
825
|
+
self._check_int_path()
|
|
826
|
+
|
|
817
827
|
if self.is_dir(followlinks=followlinks):
|
|
818
828
|
|
|
819
829
|
def ignore_same_file(src: str, names: List[str]) -> List[str]:
|
|
820
830
|
ignore_files = []
|
|
821
831
|
for name in names:
|
|
822
832
|
dst_obj = self.from_path(dst_path).joinpath(name)
|
|
823
|
-
if
|
|
824
|
-
pass
|
|
825
|
-
elif not overwrite and dst_obj.exists():
|
|
833
|
+
if not overwrite and dst_obj.exists():
|
|
826
834
|
ignore_files.append(name)
|
|
827
835
|
elif dst_obj.exists() and is_same_file(
|
|
828
836
|
self.joinpath(name).stat(), dst_obj.stat(), "copy"
|
|
@@ -833,11 +841,11 @@ class FSPath(URIPath):
|
|
|
833
841
|
shutil.copytree(
|
|
834
842
|
self.path_without_protocol, # pyre-ignore[6]
|
|
835
843
|
dst_path,
|
|
836
|
-
ignore=ignore_same_file,
|
|
844
|
+
ignore=None if force else ignore_same_file,
|
|
837
845
|
dirs_exist_ok=True,
|
|
838
846
|
)
|
|
839
847
|
else:
|
|
840
|
-
self.copy(dst_path, followlinks=followlinks, overwrite=overwrite)
|
|
848
|
+
self.copy(dst_path, followlinks=followlinks, overwrite=force or overwrite)
|
|
841
849
|
|
|
842
850
|
def symlink(self, dst_path: PathLike) -> None:
|
|
843
851
|
"""
|
|
@@ -893,6 +901,8 @@ class FSPath(URIPath):
|
|
|
893
901
|
return self.from_path(os.path.expanduser("~"))
|
|
894
902
|
|
|
895
903
|
def joinpath(self, *other_paths: PathLike) -> "FSPath":
|
|
904
|
+
self._check_int_path()
|
|
905
|
+
|
|
896
906
|
path = fspath(self)
|
|
897
907
|
if path == ".":
|
|
898
908
|
path = ""
|
megfile/hdfs.py
CHANGED
|
@@ -125,7 +125,7 @@ def hdfs_isfile(path: PathLike, followlinks: bool = False) -> bool:
|
|
|
125
125
|
return HdfsPath(path).is_file(followlinks)
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
def hdfs_listdir(path: PathLike
|
|
128
|
+
def hdfs_listdir(path: PathLike) -> List[str]:
|
|
129
129
|
"""
|
|
130
130
|
Get all contents of given path.
|
|
131
131
|
|
|
@@ -133,10 +133,10 @@ def hdfs_listdir(path: PathLike, followlinks: bool = False) -> List[str]:
|
|
|
133
133
|
:returns: All contents have prefix of path.
|
|
134
134
|
:raises: FileNotFoundError, NotADirectoryError
|
|
135
135
|
"""
|
|
136
|
-
return HdfsPath(path).listdir(
|
|
136
|
+
return HdfsPath(path).listdir()
|
|
137
137
|
|
|
138
138
|
|
|
139
|
-
def hdfs_load_from(path: PathLike
|
|
139
|
+
def hdfs_load_from(path: PathLike) -> BinaryIO:
|
|
140
140
|
"""Read all content in binary on specified path and write into memory
|
|
141
141
|
|
|
142
142
|
User should close the BinaryIO manually
|
|
@@ -144,7 +144,7 @@ def hdfs_load_from(path: PathLike, followlinks: bool = False) -> BinaryIO:
|
|
|
144
144
|
:param path: Given path
|
|
145
145
|
:returns: BinaryIO
|
|
146
146
|
"""
|
|
147
|
-
return HdfsPath(path).load(
|
|
147
|
+
return HdfsPath(path).load()
|
|
148
148
|
|
|
149
149
|
|
|
150
150
|
def hdfs_move(src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
|
|
@@ -209,15 +209,15 @@ def hdfs_scan_stat(
|
|
|
209
209
|
return HdfsPath(path).scan_stat(missing_ok, followlinks)
|
|
210
210
|
|
|
211
211
|
|
|
212
|
-
def hdfs_scandir(path: PathLike
|
|
212
|
+
def hdfs_scandir(path: PathLike) -> Iterator[FileEntry]:
|
|
213
213
|
"""
|
|
214
|
-
Get all contents of given path, the order of result is
|
|
214
|
+
Get all contents of given path, the order of result is in arbitrary order.
|
|
215
215
|
|
|
216
216
|
:param path: Given path
|
|
217
217
|
:returns: All contents have prefix of path
|
|
218
218
|
:raises: FileNotFoundError, NotADirectoryError
|
|
219
219
|
"""
|
|
220
|
-
return HdfsPath(path).scandir(
|
|
220
|
+
return HdfsPath(path).scandir()
|
|
221
221
|
|
|
222
222
|
|
|
223
223
|
def hdfs_unlink(path: PathLike, missing_ok: bool = False) -> None:
|
megfile/hdfs_path.py
CHANGED
|
@@ -12,7 +12,7 @@ from megfile.config import (
|
|
|
12
12
|
READER_MAX_BUFFER_SIZE,
|
|
13
13
|
)
|
|
14
14
|
from megfile.errors import _create_missing_ok_generator, raise_hdfs_error
|
|
15
|
-
from megfile.interfaces import FileEntry, PathLike, StatResult, URIPath
|
|
15
|
+
from megfile.interfaces import ContextIterator, FileEntry, PathLike, StatResult, URIPath
|
|
16
16
|
from megfile.lib.compat import fspath
|
|
17
17
|
from megfile.lib.glob import FSFunc, iglob
|
|
18
18
|
from megfile.lib.hdfs_prefetch_reader import HdfsPrefetchReader
|
|
@@ -299,7 +299,12 @@ class HdfsPath(URIPath):
|
|
|
299
299
|
Because hdfs symlink not support dir.
|
|
300
300
|
:returns: True if path is hdfs directory, else False
|
|
301
301
|
"""
|
|
302
|
-
|
|
302
|
+
try:
|
|
303
|
+
stat = self.stat(follow_symlinks=followlinks)
|
|
304
|
+
return stat.is_dir()
|
|
305
|
+
except FileNotFoundError:
|
|
306
|
+
pass
|
|
307
|
+
return False
|
|
303
308
|
|
|
304
309
|
def is_file(self, followlinks: bool = False) -> bool:
|
|
305
310
|
"""
|
|
@@ -307,9 +312,14 @@ class HdfsPath(URIPath):
|
|
|
307
312
|
|
|
308
313
|
:returns: True if path is hdfs file, else False
|
|
309
314
|
"""
|
|
310
|
-
|
|
315
|
+
try:
|
|
316
|
+
stat = self.stat(follow_symlinks=followlinks)
|
|
317
|
+
return stat.is_file()
|
|
318
|
+
except FileNotFoundError:
|
|
319
|
+
pass
|
|
320
|
+
return False
|
|
311
321
|
|
|
312
|
-
def listdir(self
|
|
322
|
+
def listdir(self) -> List[str]:
|
|
313
323
|
"""
|
|
314
324
|
Get all contents of given path.
|
|
315
325
|
|
|
@@ -319,19 +329,19 @@ class HdfsPath(URIPath):
|
|
|
319
329
|
if not self.is_dir():
|
|
320
330
|
raise NotADirectoryError("Not a directory: %r" % self.path)
|
|
321
331
|
with raise_hdfs_error(self.path_with_protocol):
|
|
322
|
-
return self._client.list(self.path_without_protocol)
|
|
332
|
+
return sorted(self._client.list(self.path_without_protocol))
|
|
323
333
|
|
|
324
|
-
def iterdir(self
|
|
334
|
+
def iterdir(self) -> Iterator["HdfsPath"]:
|
|
325
335
|
"""
|
|
326
336
|
Get all contents of given path.
|
|
327
337
|
|
|
328
338
|
:returns: All contents have prefix of path.
|
|
329
339
|
:raises: FileNotFoundError, NotADirectoryError
|
|
330
340
|
"""
|
|
331
|
-
for filename in self.listdir(
|
|
341
|
+
for filename in self.listdir():
|
|
332
342
|
yield self.joinpath(filename)
|
|
333
343
|
|
|
334
|
-
def load(self
|
|
344
|
+
def load(self) -> BinaryIO:
|
|
335
345
|
"""Read all content in binary on specified path and write into memory
|
|
336
346
|
|
|
337
347
|
User should close the BinaryIO manually
|
|
@@ -372,7 +382,10 @@ class HdfsPath(URIPath):
|
|
|
372
382
|
dst_path = self.from_path(dst_path)
|
|
373
383
|
if self.is_dir():
|
|
374
384
|
for filename in self.iterdir():
|
|
375
|
-
|
|
385
|
+
filename.rename(
|
|
386
|
+
dst_path.joinpath(filename.relative_to(self.path_with_protocol)),
|
|
387
|
+
overwrite=overwrite,
|
|
388
|
+
)
|
|
376
389
|
else:
|
|
377
390
|
if overwrite:
|
|
378
391
|
dst_path.remove(missing_ok=True)
|
|
@@ -463,28 +476,32 @@ class HdfsPath(URIPath):
|
|
|
463
476
|
),
|
|
464
477
|
)
|
|
465
478
|
|
|
466
|
-
def scandir(self
|
|
479
|
+
def scandir(self) -> ContextIterator:
|
|
467
480
|
"""
|
|
468
|
-
Get all contents of given path, the order of result is
|
|
481
|
+
Get all contents of given path, the order of result is in arbitrary order.
|
|
469
482
|
|
|
470
483
|
:returns: All contents have prefix of path
|
|
471
484
|
:raises: FileNotFoundError, NotADirectoryError
|
|
472
485
|
"""
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
486
|
+
|
|
487
|
+
def create_generator():
|
|
488
|
+
with raise_hdfs_error(self.path_with_protocol):
|
|
489
|
+
for filename, stat_data in self._client.list(
|
|
490
|
+
self.path_without_protocol, status=True
|
|
491
|
+
):
|
|
492
|
+
yield FileEntry(
|
|
493
|
+
name=filename,
|
|
494
|
+
path=self.joinpath(filename).path_with_protocol,
|
|
495
|
+
stat=StatResult(
|
|
496
|
+
size=stat_data["length"],
|
|
497
|
+
mtime=stat_data["modificationTime"] / 1000,
|
|
498
|
+
isdir=stat_data["type"] == "DIRECTORY",
|
|
499
|
+
islnk=False,
|
|
500
|
+
extra=stat_data,
|
|
501
|
+
),
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
return ContextIterator(create_generator())
|
|
488
505
|
|
|
489
506
|
def unlink(self, missing_ok: bool = False) -> None:
|
|
490
507
|
"""
|
megfile/http_path.py
CHANGED
|
@@ -96,14 +96,7 @@ def get_http_session(
|
|
|
96
96
|
file_info.seek(0)
|
|
97
97
|
elif isinstance(file_info, (tuple, list)) and len(file_info) >= 2:
|
|
98
98
|
file_info = list(file_info)
|
|
99
|
-
|
|
100
|
-
isinstance(file_info[1], (tuple, list))
|
|
101
|
-
and len(file_info[1]) >= 2
|
|
102
|
-
):
|
|
103
|
-
file_info[1] = list(file_info[1])
|
|
104
|
-
file_info[1] = seek_or_reopen(file_info[1])
|
|
105
|
-
else:
|
|
106
|
-
file_info[1] = seek_or_reopen(file_info[1])
|
|
99
|
+
file_info[1] = seek_or_reopen(file_info[1])
|
|
107
100
|
files[key] = file_info
|
|
108
101
|
|
|
109
102
|
session.request = patch_method(
|
|
@@ -5,7 +5,6 @@ from concurrent.futures import Future, ThreadPoolExecutor
|
|
|
5
5
|
from io import BytesIO
|
|
6
6
|
from logging import getLogger as get_logger
|
|
7
7
|
from math import ceil
|
|
8
|
-
from statistics import mean
|
|
9
8
|
from typing import Optional
|
|
10
9
|
|
|
11
10
|
from megfile.config import (
|
|
@@ -92,7 +91,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
92
91
|
|
|
93
92
|
@abstractmethod
|
|
94
93
|
def _get_content_size(self):
|
|
95
|
-
pass
|
|
94
|
+
pass # pragma: no cover
|
|
96
95
|
|
|
97
96
|
@property
|
|
98
97
|
def _futures(self) -> "LRUCacheFutureManager":
|
|
@@ -104,7 +103,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
104
103
|
@property
|
|
105
104
|
@abstractmethod
|
|
106
105
|
def name(self) -> str:
|
|
107
|
-
pass
|
|
106
|
+
pass # pragma: no cover
|
|
108
107
|
|
|
109
108
|
@property
|
|
110
109
|
def mode(self) -> str:
|
|
@@ -238,13 +237,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
238
237
|
|
|
239
238
|
if self._block_forward == 0:
|
|
240
239
|
block_index = self._offset // self._block_size
|
|
241
|
-
if
|
|
242
|
-
mean_read_count = mean(item.read_count for item in self._seek_history)
|
|
243
|
-
else:
|
|
244
|
-
mean_read_count = 0
|
|
245
|
-
if block_index not in self._futures and mean_read_count < 3:
|
|
246
|
-
# No using LRP will be better if read() are always called less than 3
|
|
247
|
-
# times after seek()
|
|
240
|
+
if block_index not in self._futures:
|
|
248
241
|
buffer[:size] = self._read(size)
|
|
249
242
|
return size
|
|
250
243
|
|
|
@@ -329,8 +322,9 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
329
322
|
history.append(item)
|
|
330
323
|
history.append(SeekRecord(index))
|
|
331
324
|
self._seek_history = history
|
|
332
|
-
self._block_forward =
|
|
333
|
-
self._block_capacity // len(self._seek_history), 0
|
|
325
|
+
self._block_forward = min(
|
|
326
|
+
max(self._block_capacity // len(self._seek_history), 0),
|
|
327
|
+
self._block_capacity - 1,
|
|
334
328
|
)
|
|
335
329
|
if self._block_forward == 0:
|
|
336
330
|
self._is_auto_scaling = False
|
|
@@ -343,7 +337,7 @@ class BasePrefetchReader(Readable[bytes], Seekable, ABC):
|
|
|
343
337
|
def _fetch_response(
|
|
344
338
|
self, start: Optional[int] = None, end: Optional[int] = None
|
|
345
339
|
) -> dict:
|
|
346
|
-
pass
|
|
340
|
+
pass # pragma: no cover
|
|
347
341
|
|
|
348
342
|
def _fetch_buffer(self, index: int) -> BytesIO:
|
|
349
343
|
start, end = index * self._block_size, (index + 1) * self._block_size - 1
|
megfile/lib/combine_reader.py
CHANGED
|
@@ -36,7 +36,7 @@ class CombineReader(Readable, Seekable):
|
|
|
36
36
|
for index, size in enumerate(self._blocks_sizes):
|
|
37
37
|
if self._offset < size:
|
|
38
38
|
return index - 1, self._offset - self._blocks_sizes[index - 1]
|
|
39
|
-
raise IOError("offset out of range: %d" % self._offset)
|
|
39
|
+
raise IOError("offset out of range: %d" % self._offset) # pragma: no cover
|
|
40
40
|
|
|
41
41
|
@property
|
|
42
42
|
def name(self) -> str:
|
megfile/lib/glob.py
CHANGED
|
@@ -5,22 +5,15 @@
|
|
|
5
5
|
import os
|
|
6
6
|
import re
|
|
7
7
|
from collections import OrderedDict
|
|
8
|
-
from
|
|
9
|
-
from typing import Iterator, List, Tuple
|
|
8
|
+
from typing import Callable, Iterator, List, NamedTuple, Tuple
|
|
10
9
|
|
|
11
10
|
from megfile.lib import fnmatch
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
"""
|
|
12
|
+
|
|
15
13
|
class FSFunc(NamedTuple):
|
|
16
14
|
exists: Callable[[str], bool]
|
|
17
15
|
isdir: Callable[[str], bool]
|
|
18
|
-
scandir: Callable[[str], Iterator[Tuple[str, bool]]]
|
|
19
|
-
|
|
20
|
-
in Python 3.6+
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
FSFunc = NamedTuple("FSFunc", ["exists", "isdir", "scandir"])
|
|
16
|
+
scandir: Callable[[str], Iterator[Tuple[str, bool]]]
|
|
24
17
|
|
|
25
18
|
|
|
26
19
|
def _exists(path: str) -> bool:
|
|
@@ -72,7 +65,7 @@ def iglob(
|
|
|
72
65
|
if recursive and _isrecursive(pathname):
|
|
73
66
|
s = next(it) # skip empty string
|
|
74
67
|
if s:
|
|
75
|
-
raise OSError("iglob with recursive=True error")
|
|
68
|
+
raise OSError("iglob with recursive=True error") # pragma: no cover
|
|
76
69
|
return it
|
|
77
70
|
|
|
78
71
|
|
|
@@ -161,11 +154,8 @@ def _iterdir(dirname: str, dironly: bool, fs: FSFunc) -> Iterator[str]:
|
|
|
161
154
|
try:
|
|
162
155
|
# dirname may be non-existent, raise OSError
|
|
163
156
|
for name, isdir in fs.scandir(dirname):
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
yield name
|
|
167
|
-
except OSError:
|
|
168
|
-
pass
|
|
157
|
+
if not dironly or isdir:
|
|
158
|
+
yield name
|
|
169
159
|
except OSError:
|
|
170
160
|
return
|
|
171
161
|
|
megfile/lib/s3_cached_handler.py
CHANGED
|
@@ -18,15 +18,15 @@ class S3CachedHandler(S3MemoryHandler):
|
|
|
18
18
|
remove_cache_when_open: bool = True,
|
|
19
19
|
profile_name: Optional[str] = None,
|
|
20
20
|
):
|
|
21
|
-
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
22
|
-
raise ValueError("unacceptable mode: %r" % mode)
|
|
23
|
-
|
|
24
21
|
self._bucket = bucket
|
|
25
22
|
self._key = key
|
|
26
23
|
self._mode = mode
|
|
27
24
|
self._client = s3_client
|
|
28
25
|
self._profile_name = profile_name
|
|
29
26
|
|
|
27
|
+
if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
|
|
28
|
+
raise ValueError("unacceptable mode: %r" % mode)
|
|
29
|
+
|
|
30
30
|
if cache_path is None:
|
|
31
31
|
cache_path = generate_cache_path(self.name)
|
|
32
32
|
|
|
@@ -137,7 +137,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
|
|
|
137
137
|
def _submit_futures(self):
|
|
138
138
|
content = self._buffer.getvalue()
|
|
139
139
|
if len(content) == 0:
|
|
140
|
-
return
|
|
140
|
+
return # pragma: no cover
|
|
141
141
|
offset = len(content) - self._tail_block_size
|
|
142
142
|
self._buffer = BytesIO(content[offset:])
|
|
143
143
|
self._buffer.seek(0, os.SEEK_END)
|