megfile 3.0.5__py3-none-any.whl → 3.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/cli.py +21 -10
- megfile/errors.py +26 -18
- megfile/fs.py +9 -5
- megfile/fs_path.py +72 -15
- megfile/hdfs.py +3 -2
- megfile/hdfs_path.py +16 -6
- megfile/http_path.py +30 -11
- megfile/lib/http_prefetch_reader.py +26 -10
- megfile/pathlike.py +10 -3
- megfile/s3.py +14 -7
- megfile/s3_path.py +55 -25
- megfile/sftp.py +18 -9
- megfile/sftp_path.py +61 -30
- megfile/smart.py +60 -16
- megfile/version.py +1 -1
- {megfile-3.0.5.dist-info → megfile-3.0.6.dist-info}/METADATA +1 -1
- {megfile-3.0.5.dist-info → megfile-3.0.6.dist-info}/RECORD +22 -22
- {megfile-3.0.5.dist-info → megfile-3.0.6.dist-info}/LICENSE +0 -0
- {megfile-3.0.5.dist-info → megfile-3.0.6.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.0.5.dist-info → megfile-3.0.6.dist-info}/WHEEL +0 -0
- {megfile-3.0.5.dist-info → megfile-3.0.6.dist-info}/entry_points.txt +0 -0
- {megfile-3.0.5.dist-info → megfile-3.0.6.dist-info}/top_level.txt +0 -0
megfile/cli.py
CHANGED
|
@@ -174,12 +174,14 @@ def ll(path: str, recursive: bool):
|
|
|
174
174
|
is_flag=True,
|
|
175
175
|
help='treat dst_path as a normal file.')
|
|
176
176
|
@click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
|
|
177
|
+
@click.option('--skip', is_flag=True, help='Skip existed files.')
|
|
177
178
|
def cp(
|
|
178
179
|
src_path: str,
|
|
179
180
|
dst_path: str,
|
|
180
181
|
recursive: bool,
|
|
181
182
|
no_target_directory: bool,
|
|
182
183
|
progress_bar: bool,
|
|
184
|
+
skip: bool,
|
|
183
185
|
):
|
|
184
186
|
if not no_target_directory and (dst_path.endswith('/') or
|
|
185
187
|
smart_isdir(dst_path)):
|
|
@@ -193,14 +195,16 @@ def cp(
|
|
|
193
195
|
dst_path,
|
|
194
196
|
followlinks=True,
|
|
195
197
|
map_func=executor.map,
|
|
196
|
-
force=True
|
|
198
|
+
force=True,
|
|
199
|
+
overwrite=not skip)
|
|
197
200
|
else:
|
|
198
201
|
smart_sync(
|
|
199
202
|
src_path,
|
|
200
203
|
dst_path,
|
|
201
204
|
followlinks=True,
|
|
202
205
|
map_func=executor.map,
|
|
203
|
-
force=True
|
|
206
|
+
force=True,
|
|
207
|
+
overwrite=not skip)
|
|
204
208
|
else:
|
|
205
209
|
if progress_bar:
|
|
206
210
|
file_size = smart_stat(src_path).size
|
|
@@ -214,10 +218,11 @@ def cp(
|
|
|
214
218
|
def callback(length: int):
|
|
215
219
|
sbar.update(length)
|
|
216
220
|
|
|
217
|
-
smart_copy(
|
|
221
|
+
smart_copy(
|
|
222
|
+
src_path, dst_path, callback=callback, overwrite=not skip)
|
|
218
223
|
sbar.close()
|
|
219
224
|
else:
|
|
220
|
-
smart_copy(src_path, dst_path)
|
|
225
|
+
smart_copy(src_path, dst_path, overwrite=not skip)
|
|
221
226
|
|
|
222
227
|
|
|
223
228
|
@cli.command(short_help='Move files from source to dest.')
|
|
@@ -236,12 +241,14 @@ def cp(
|
|
|
236
241
|
is_flag=True,
|
|
237
242
|
help='treat dst_path as a normal file.')
|
|
238
243
|
@click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
|
|
244
|
+
@click.option('--skip', is_flag=True, help='Skip existed files.')
|
|
239
245
|
def mv(
|
|
240
246
|
src_path: str,
|
|
241
247
|
dst_path: str,
|
|
242
248
|
recursive: bool,
|
|
243
249
|
no_target_directory: bool,
|
|
244
250
|
progress_bar: bool,
|
|
251
|
+
skip: bool,
|
|
245
252
|
):
|
|
246
253
|
if not no_target_directory and (dst_path.endswith('/') or
|
|
247
254
|
smart_isdir(dst_path)):
|
|
@@ -253,15 +260,16 @@ def mv(
|
|
|
253
260
|
if recursive:
|
|
254
261
|
if src_protocol == dst_protocol:
|
|
255
262
|
with tqdm(total=1) as t:
|
|
256
|
-
SmartPath(src_path).rename(dst_path)
|
|
263
|
+
SmartPath(src_path).rename(dst_path, overwrite=not skip)
|
|
257
264
|
t.update(1)
|
|
258
265
|
else:
|
|
259
|
-
smart_sync_with_progress(
|
|
266
|
+
smart_sync_with_progress(
|
|
267
|
+
src_path, dst_path, followlinks=True, overwrite=not skip)
|
|
260
268
|
smart_remove(src_path)
|
|
261
269
|
else:
|
|
262
270
|
if src_protocol == dst_protocol:
|
|
263
271
|
with tqdm(total=1) as t:
|
|
264
|
-
SmartPath(src_path).rename(dst_path)
|
|
272
|
+
SmartPath(src_path).rename(dst_path, overwrite=not skip)
|
|
265
273
|
t.update(1)
|
|
266
274
|
else:
|
|
267
275
|
file_size = smart_stat(src_path).size
|
|
@@ -275,12 +283,13 @@ def mv(
|
|
|
275
283
|
def callback(length: int):
|
|
276
284
|
sbar.update(length)
|
|
277
285
|
|
|
278
|
-
smart_copy(
|
|
286
|
+
smart_copy(
|
|
287
|
+
src_path, dst_path, callback=callback, overwrite=not skip)
|
|
279
288
|
smart_unlink(src_path)
|
|
280
289
|
sbar.close()
|
|
281
290
|
else:
|
|
282
291
|
move_func = smart_move if recursive else smart_rename
|
|
283
|
-
move_func(src_path, dst_path)
|
|
292
|
+
move_func(src_path, dst_path, overwrite=not skip)
|
|
284
293
|
|
|
285
294
|
|
|
286
295
|
@cli.command(short_help='Remove files from path.')
|
|
@@ -314,9 +323,10 @@ def rm(path: str, recursive: bool):
|
|
|
314
323
|
is_flag=True,
|
|
315
324
|
help='Copy files forcely, ignore same files.')
|
|
316
325
|
@click.option('-q', '--quiet', is_flag=True, help='Not show any progress log.')
|
|
326
|
+
@click.option('--skip', is_flag=True, help='Skip existed files.')
|
|
317
327
|
def sync(
|
|
318
328
|
src_path: str, dst_path: str, progress_bar: bool, worker: int,
|
|
319
|
-
force: bool, quiet: bool):
|
|
329
|
+
force: bool, quiet: bool, skip: bool):
|
|
320
330
|
with ThreadPoolExecutor(max_workers=worker) as executor:
|
|
321
331
|
if has_magic(src_path):
|
|
322
332
|
src_root_path = get_non_glob_dir(src_path)
|
|
@@ -384,6 +394,7 @@ def sync(
|
|
|
384
394
|
followlinks=True,
|
|
385
395
|
callback_after_copy_file=callback_after_copy_file,
|
|
386
396
|
force=force,
|
|
397
|
+
overwrite=not skip,
|
|
387
398
|
) for file_entry in file_entries)
|
|
388
399
|
list(executor.map(_smart_sync_single_file, params_iter))
|
|
389
400
|
if not quiet:
|
megfile/errors.py
CHANGED
|
@@ -25,6 +25,9 @@ __all__ = [
|
|
|
25
25
|
'UnsupportedError',
|
|
26
26
|
'HttpPermissionError',
|
|
27
27
|
'HttpFileNotFoundError',
|
|
28
|
+
'HttpBodyIncompleteError',
|
|
29
|
+
'HttpUnknownError',
|
|
30
|
+
'HttpException',
|
|
28
31
|
'ProtocolExistsError',
|
|
29
32
|
'ProtocolNotFoundError',
|
|
30
33
|
'S3UnknownError',
|
|
@@ -124,24 +127,6 @@ def s3_should_retry(error: Exception) -> bool:
|
|
|
124
127
|
return False
|
|
125
128
|
|
|
126
129
|
|
|
127
|
-
http_retry_exceptions = (
|
|
128
|
-
requests.exceptions.ReadTimeout,
|
|
129
|
-
requests.exceptions.ConnectTimeout,
|
|
130
|
-
requests.exceptions.ChunkedEncodingError,
|
|
131
|
-
requests.exceptions.HTTPError,
|
|
132
|
-
requests.exceptions.ProxyError,
|
|
133
|
-
urllib3.exceptions.IncompleteRead,
|
|
134
|
-
urllib3.exceptions.ProtocolError,
|
|
135
|
-
urllib3.exceptions.ReadTimeoutError,
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def http_should_retry(error: Exception) -> bool:
|
|
140
|
-
if isinstance(error, http_retry_exceptions):
|
|
141
|
-
return True
|
|
142
|
-
return False
|
|
143
|
-
|
|
144
|
-
|
|
145
130
|
def patch_method(
|
|
146
131
|
func: Callable,
|
|
147
132
|
max_retries: int,
|
|
@@ -301,6 +286,29 @@ class HttpUnknownError(HttpException, UnknownError):
|
|
|
301
286
|
pass
|
|
302
287
|
|
|
303
288
|
|
|
289
|
+
class HttpBodyIncompleteError(HttpException):
|
|
290
|
+
pass
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
http_retry_exceptions = (
|
|
294
|
+
requests.exceptions.ReadTimeout,
|
|
295
|
+
requests.exceptions.ConnectTimeout,
|
|
296
|
+
requests.exceptions.ChunkedEncodingError,
|
|
297
|
+
requests.exceptions.HTTPError,
|
|
298
|
+
requests.exceptions.ProxyError,
|
|
299
|
+
urllib3.exceptions.IncompleteRead,
|
|
300
|
+
urllib3.exceptions.ProtocolError,
|
|
301
|
+
urllib3.exceptions.ReadTimeoutError,
|
|
302
|
+
HttpBodyIncompleteError,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def http_should_retry(error: Exception) -> bool:
|
|
307
|
+
if isinstance(error, http_retry_exceptions):
|
|
308
|
+
return True
|
|
309
|
+
return False
|
|
310
|
+
|
|
311
|
+
|
|
304
312
|
class ProtocolExistsError(Exception):
|
|
305
313
|
pass
|
|
306
314
|
|
megfile/fs.py
CHANGED
|
@@ -311,7 +311,8 @@ def fs_copy(
|
|
|
311
311
|
src_path: PathLike,
|
|
312
312
|
dst_path: PathLike,
|
|
313
313
|
callback: Optional[Callable[[int], None]] = None,
|
|
314
|
-
followlinks: bool = False
|
|
314
|
+
followlinks: bool = False,
|
|
315
|
+
overwrite: bool = True):
|
|
315
316
|
''' File copy on file system
|
|
316
317
|
Copy content (excluding meta date) of file on `src_path` to `dst_path`. `dst_path` must be a complete file name
|
|
317
318
|
|
|
@@ -331,23 +332,26 @@ def fs_copy(
|
|
|
331
332
|
:param dst_path: Target file path
|
|
332
333
|
:param callback: Called periodically during copy, and the input parameter is the data size (in bytes) of copy since the last call
|
|
333
334
|
:param followlinks: False if regard symlink as file, else True
|
|
335
|
+
:param overwrite: whether or not overwrite file when exists, default is True
|
|
334
336
|
'''
|
|
335
|
-
return FSPath(src_path).copy(dst_path, callback, followlinks)
|
|
337
|
+
return FSPath(src_path).copy(dst_path, callback, followlinks, overwrite)
|
|
336
338
|
|
|
337
339
|
|
|
338
340
|
def fs_sync(
|
|
339
341
|
src_path: PathLike,
|
|
340
342
|
dst_path: PathLike,
|
|
341
343
|
followlinks: bool = False,
|
|
342
|
-
force: bool = False
|
|
344
|
+
force: bool = False,
|
|
345
|
+
overwrite: bool = True) -> None:
|
|
343
346
|
'''Force write of everything to disk.
|
|
344
347
|
|
|
345
348
|
:param src_path: Given path
|
|
346
349
|
:param dst_path: Target file path
|
|
347
350
|
:param followlinks: False if regard symlink as file, else True
|
|
348
|
-
:param force: Sync file forcely, do not ignore same files
|
|
351
|
+
:param force: Sync file forcely, do not ignore same files, priority is higher than 'overwrite', default is False
|
|
352
|
+
:param overwrite: whether or not overwrite file when exists, default is True
|
|
349
353
|
'''
|
|
350
|
-
return FSPath(src_path).sync(dst_path, followlinks, force)
|
|
354
|
+
return FSPath(src_path).sync(dst_path, followlinks, force, overwrite)
|
|
351
355
|
|
|
352
356
|
|
|
353
357
|
def fs_symlink(src_path: PathLike, dst_path: PathLike) -> None:
|
megfile/fs_path.py
CHANGED
|
@@ -3,7 +3,6 @@ import io
|
|
|
3
3
|
import os
|
|
4
4
|
import pathlib
|
|
5
5
|
import shutil
|
|
6
|
-
from shutil import copytree
|
|
7
6
|
from stat import S_ISDIR as stat_isdir
|
|
8
7
|
from stat import S_ISLNK as stat_islnk
|
|
9
8
|
from typing import IO, AnyStr, BinaryIO, Callable, Iterator, List, Optional, Tuple, Union
|
|
@@ -158,24 +157,69 @@ def fs_glob_stat(
|
|
|
158
157
|
os.path.basename(path), path, _make_stat(os.lstat(path)))
|
|
159
158
|
|
|
160
159
|
|
|
161
|
-
def
|
|
160
|
+
def _fs_rename_file(
|
|
161
|
+
src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
|
|
162
162
|
'''
|
|
163
163
|
rename file on fs
|
|
164
164
|
|
|
165
165
|
:param src_path: Given path
|
|
166
166
|
:param dst_path: Given destination path
|
|
167
|
+
:param overwrite: whether or not overwrite file when exists
|
|
167
168
|
'''
|
|
169
|
+
src_path, dst_path = fspath(src_path), fspath(dst_path)
|
|
170
|
+
|
|
171
|
+
if not overwrite and os.path.exists(dst_path):
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
dst_dir = os.path.dirname(dst_path)
|
|
175
|
+
if dst_dir and dst_dir != ".":
|
|
176
|
+
os.makedirs(dst_dir, exist_ok=True)
|
|
168
177
|
shutil.move(src_path, dst_path)
|
|
169
178
|
|
|
170
179
|
|
|
171
|
-
def
|
|
180
|
+
def fs_rename(
|
|
181
|
+
src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
|
|
182
|
+
'''
|
|
183
|
+
rename file on fs
|
|
184
|
+
|
|
185
|
+
:param src_path: Given path
|
|
186
|
+
:param dst_path: Given destination path
|
|
187
|
+
:param overwrite: whether or not overwrite file when exists
|
|
188
|
+
'''
|
|
189
|
+
src_path, dst_path = fspath(src_path), fspath(dst_path)
|
|
190
|
+
if os.path.isfile(src_path):
|
|
191
|
+
return _fs_rename_file(src_path, dst_path, overwrite)
|
|
192
|
+
else:
|
|
193
|
+
os.makedirs(dst_path, exist_ok=True)
|
|
194
|
+
|
|
195
|
+
with os.scandir(src_path) as entries:
|
|
196
|
+
for file_entry in entries:
|
|
197
|
+
src_file_path = file_entry.path
|
|
198
|
+
dst_file_path = dst_path
|
|
199
|
+
relative_path = os.path.relpath(src_file_path, start=src_path)
|
|
200
|
+
if relative_path and relative_path != '.':
|
|
201
|
+
dst_file_path = os.path.join(dst_file_path, relative_path)
|
|
202
|
+
if os.path.exists(dst_file_path) and file_entry.is_dir():
|
|
203
|
+
fs_rename(src_file_path, dst_file_path, overwrite)
|
|
204
|
+
else:
|
|
205
|
+
_fs_rename_file(src_file_path, dst_file_path, overwrite)
|
|
206
|
+
|
|
207
|
+
if os.path.isdir(src_path):
|
|
208
|
+
shutil.rmtree(src_path)
|
|
209
|
+
else:
|
|
210
|
+
os.remove(src_path)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def fs_move(
|
|
214
|
+
src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
|
|
172
215
|
'''
|
|
173
216
|
rename file on fs
|
|
174
217
|
|
|
175
218
|
:param src_path: Given path
|
|
176
219
|
:param dst_path: Given destination path
|
|
220
|
+
:param overwrite: whether or not overwrite file when exists
|
|
177
221
|
'''
|
|
178
|
-
fs_rename(src_path, dst_path)
|
|
222
|
+
return fs_rename(src_path, dst_path, overwrite)
|
|
179
223
|
|
|
180
224
|
|
|
181
225
|
def fs_resolve(path: PathLike) -> str:
|
|
@@ -491,22 +535,24 @@ class FSPath(URIPath):
|
|
|
491
535
|
'''
|
|
492
536
|
return fspath(os.path.relpath(self.path_without_protocol, start=start))
|
|
493
537
|
|
|
494
|
-
def rename(self, dst_path: PathLike) -> 'FSPath':
|
|
538
|
+
def rename(self, dst_path: PathLike, overwrite: bool = True) -> 'FSPath':
|
|
495
539
|
'''
|
|
496
540
|
rename file on fs
|
|
497
541
|
|
|
498
542
|
:param dst_path: Given destination path
|
|
543
|
+
:param overwrite: whether or not overwrite file when exists
|
|
499
544
|
'''
|
|
500
|
-
fs_rename(self.path_without_protocol, dst_path)
|
|
545
|
+
fs_rename(self.path_without_protocol, dst_path, overwrite)
|
|
501
546
|
return self.from_path(dst_path)
|
|
502
547
|
|
|
503
|
-
def replace(self, dst_path: PathLike) -> 'FSPath':
|
|
548
|
+
def replace(self, dst_path: PathLike, overwrite: bool = True) -> 'FSPath':
|
|
504
549
|
'''
|
|
505
550
|
move file on fs
|
|
506
551
|
|
|
507
552
|
:param dst_path: Given destination path
|
|
553
|
+
:param overwrite: whether or not overwrite file when exists
|
|
508
554
|
'''
|
|
509
|
-
return self.rename(dst_path=dst_path)
|
|
555
|
+
return self.rename(dst_path=dst_path, overwrite=overwrite)
|
|
510
556
|
|
|
511
557
|
def remove(self, missing_ok: bool = False) -> None:
|
|
512
558
|
'''
|
|
@@ -678,7 +724,7 @@ class FSPath(URIPath):
|
|
|
678
724
|
:rtype: FSPath
|
|
679
725
|
'''
|
|
680
726
|
return self.from_path(
|
|
681
|
-
|
|
727
|
+
fspath(
|
|
682
728
|
pathlib.Path(
|
|
683
729
|
self.path_without_protocol).resolve(strict=strict)))
|
|
684
730
|
|
|
@@ -718,7 +764,8 @@ class FSPath(URIPath):
|
|
|
718
764
|
self,
|
|
719
765
|
dst_path: PathLike,
|
|
720
766
|
callback: Optional[Callable[[int], None]] = None,
|
|
721
|
-
followlinks: bool = False
|
|
767
|
+
followlinks: bool = False,
|
|
768
|
+
overwrite: bool = True):
|
|
722
769
|
''' File copy on file system
|
|
723
770
|
Copy content (excluding meta date) of file on `src_path` to `dst_path`. `dst_path` must be a complete file name
|
|
724
771
|
|
|
@@ -737,7 +784,11 @@ class FSPath(URIPath):
|
|
|
737
784
|
:param dst_path: Target file path
|
|
738
785
|
:param callback: Called periodically during copy, and the input parameter is the data size (in bytes) of copy since the last call
|
|
739
786
|
:param followlinks: False if regard symlink as file, else True
|
|
787
|
+
:param overwrite: whether or not overwrite file when exists, default is True
|
|
740
788
|
'''
|
|
789
|
+
if not overwrite and os.path.exists((dst_path)):
|
|
790
|
+
return
|
|
791
|
+
|
|
741
792
|
try:
|
|
742
793
|
self._copyfile(dst_path, callback=callback, followlinks=followlinks)
|
|
743
794
|
except FileNotFoundError as error:
|
|
@@ -754,12 +805,14 @@ class FSPath(URIPath):
|
|
|
754
805
|
self,
|
|
755
806
|
dst_path: PathLike,
|
|
756
807
|
followlinks: bool = False,
|
|
757
|
-
force: bool = False
|
|
808
|
+
force: bool = False,
|
|
809
|
+
overwrite: bool = True) -> None:
|
|
758
810
|
'''Force write of everything to disk.
|
|
759
811
|
|
|
760
812
|
:param dst_path: Target file path
|
|
761
813
|
:param followlinks: False if regard symlink as file, else True
|
|
762
|
-
:param force: Sync file forcely, do not ignore same files
|
|
814
|
+
:param force: Sync file forcely, do not ignore same files, priority is higher than 'overwrite', default is False
|
|
815
|
+
:param overwrite: whether or not overwrite file when exists, default is True
|
|
763
816
|
'''
|
|
764
817
|
if self.is_dir(followlinks=followlinks):
|
|
765
818
|
|
|
@@ -767,18 +820,22 @@ class FSPath(URIPath):
|
|
|
767
820
|
ignore_files = []
|
|
768
821
|
for name in names:
|
|
769
822
|
dst_obj = self.from_path(dst_path).joinpath(name)
|
|
770
|
-
if
|
|
823
|
+
if force:
|
|
824
|
+
pass
|
|
825
|
+
elif not overwrite and dst_obj.exists():
|
|
826
|
+
ignore_files.append(name)
|
|
827
|
+
elif dst_obj.exists() and is_same_file(
|
|
771
828
|
self.joinpath(name).stat(), dst_obj.stat(), 'copy'):
|
|
772
829
|
ignore_files.append(name)
|
|
773
830
|
return ignore_files
|
|
774
831
|
|
|
775
|
-
copytree(
|
|
832
|
+
shutil.copytree(
|
|
776
833
|
self.path_without_protocol,
|
|
777
834
|
dst_path,
|
|
778
835
|
ignore=ignore_same_file,
|
|
779
836
|
dirs_exist_ok=True)
|
|
780
837
|
else:
|
|
781
|
-
self.copy(dst_path, followlinks=followlinks)
|
|
838
|
+
self.copy(dst_path, followlinks=followlinks, overwrite=overwrite)
|
|
782
839
|
|
|
783
840
|
def symlink(self, dst_path: PathLike) -> None:
|
|
784
841
|
'''
|
megfile/hdfs.py
CHANGED
|
@@ -131,14 +131,15 @@ def hdfs_load_from(path: PathLike, followlinks: bool = False) -> BinaryIO:
|
|
|
131
131
|
return HdfsPath(path).load(followlinks)
|
|
132
132
|
|
|
133
133
|
|
|
134
|
-
def hdfs_move(
|
|
134
|
+
def hdfs_move(
|
|
135
|
+
src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
|
|
135
136
|
'''
|
|
136
137
|
Move file/directory path from src_path to dst_path
|
|
137
138
|
|
|
138
139
|
:param src_path: Given path
|
|
139
140
|
:param dst_path: Given destination path
|
|
140
141
|
'''
|
|
141
|
-
return HdfsPath(src_path).move(dst_path)
|
|
142
|
+
return HdfsPath(src_path).move(dst_path, overwrite)
|
|
142
143
|
|
|
143
144
|
|
|
144
145
|
def hdfs_remove(path: PathLike, missing_ok: bool = False) -> None:
|
megfile/hdfs_path.py
CHANGED
|
@@ -405,25 +405,35 @@ class HdfsPath(URIPath):
|
|
|
405
405
|
with raise_hdfs_error(self.path_with_protocol):
|
|
406
406
|
self._client.makedirs(self.path_without_protocol, permission=mode)
|
|
407
407
|
|
|
408
|
-
def rename(self, dst_path: PathLike) -> 'HdfsPath':
|
|
408
|
+
def rename(self, dst_path: PathLike, overwrite: bool = True) -> 'HdfsPath':
|
|
409
409
|
'''
|
|
410
410
|
Move hdfs file path from src_path to dst_path
|
|
411
411
|
|
|
412
412
|
:param dst_path: Given destination path
|
|
413
|
+
:param overwrite: whether or not overwrite file when exists
|
|
413
414
|
'''
|
|
414
415
|
dst_path = self.from_path(dst_path)
|
|
415
|
-
|
|
416
|
-
self.
|
|
417
|
-
self.
|
|
416
|
+
if self.is_dir():
|
|
417
|
+
for filename in self.iterdir():
|
|
418
|
+
self.joinpath(filename).rename(dst_path.joinpath(filename)) # pytype: disable=attribute-error
|
|
419
|
+
else:
|
|
420
|
+
if overwrite:
|
|
421
|
+
dst_path.remove(missing_ok=True)
|
|
422
|
+
if overwrite or not dst_path.exists():
|
|
423
|
+
with raise_hdfs_error(self.path_with_protocol):
|
|
424
|
+
self._client.rename(
|
|
425
|
+
self.path_without_protocol,
|
|
426
|
+
dst_path.path_without_protocol)
|
|
427
|
+
self.remove(missing_ok=True)
|
|
418
428
|
return dst_path
|
|
419
429
|
|
|
420
|
-
def move(self, dst_path: PathLike) -> None:
|
|
430
|
+
def move(self, dst_path: PathLike, overwrite: bool = True) -> None:
|
|
421
431
|
'''
|
|
422
432
|
Move file/directory path from src_path to dst_path
|
|
423
433
|
|
|
424
434
|
:param dst_path: Given destination path
|
|
425
435
|
'''
|
|
426
|
-
self.rename(dst_path=dst_path)
|
|
436
|
+
self.rename(dst_path=dst_path, overwrite=overwrite)
|
|
427
437
|
|
|
428
438
|
def remove(self, missing_ok: bool = False) -> None:
|
|
429
439
|
'''
|
megfile/http_path.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import time
|
|
2
|
+
from copy import deepcopy
|
|
2
3
|
from functools import partial
|
|
3
4
|
from io import BufferedReader, BytesIO
|
|
4
5
|
from logging import getLogger as get_logger
|
|
@@ -12,7 +13,7 @@ from megfile.config import DEFAULT_BLOCK_SIZE, HTTP_MAX_RETRY_TIMES
|
|
|
12
13
|
from megfile.errors import http_should_retry, patch_method, translate_http_error
|
|
13
14
|
from megfile.interfaces import PathLike, Readable, StatResult, URIPath
|
|
14
15
|
from megfile.lib.compat import fspath
|
|
15
|
-
from megfile.lib.http_prefetch_reader import HttpPrefetchReader
|
|
16
|
+
from megfile.lib.http_prefetch_reader import DEFAULT_TIMEOUT, HttpPrefetchReader
|
|
16
17
|
from megfile.lib.s3_buffered_writer import DEFAULT_MAX_BUFFER_SIZE
|
|
17
18
|
from megfile.lib.url import get_url_scheme
|
|
18
19
|
from megfile.pathlike import PathLike
|
|
@@ -32,7 +33,7 @@ max_retries = HTTP_MAX_RETRY_TIMES
|
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
def get_http_session(
|
|
35
|
-
timeout: Union[int, Tuple[int, int]] =
|
|
36
|
+
timeout: Optional[Union[int, Tuple[int, int]]] = DEFAULT_TIMEOUT,
|
|
36
37
|
status_forcelist: Iterable[int] = (500, 502, 503, 504)
|
|
37
38
|
) -> requests.Session:
|
|
38
39
|
session = requests.Session()
|
|
@@ -172,10 +173,12 @@ class HttpPath(URIPath):
|
|
|
172
173
|
protocol = "http"
|
|
173
174
|
|
|
174
175
|
def __init__(self, path: PathLike, *other_paths: PathLike):
|
|
175
|
-
if str(path).startswith('https://'):
|
|
176
|
-
self.protocol = 'https'
|
|
177
176
|
super().__init__(path, *other_paths)
|
|
178
177
|
|
|
178
|
+
if fspath(path).startswith('https://'):
|
|
179
|
+
self.protocol = 'https'
|
|
180
|
+
self.request_kwargs = {}
|
|
181
|
+
|
|
179
182
|
@binary_open
|
|
180
183
|
def open(
|
|
181
184
|
self,
|
|
@@ -204,9 +207,15 @@ class HttpPath(URIPath):
|
|
|
204
207
|
raise ValueError('unacceptable mode: %r' % mode)
|
|
205
208
|
|
|
206
209
|
response = None
|
|
210
|
+
request_kwargs = deepcopy(self.request_kwargs)
|
|
211
|
+
timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
|
|
212
|
+
stream = request_kwargs.pop('stream', True)
|
|
207
213
|
try:
|
|
208
|
-
response = get_http_session(
|
|
209
|
-
|
|
214
|
+
response = get_http_session(
|
|
215
|
+
timeout=timeout,
|
|
216
|
+
status_forcelist=(),
|
|
217
|
+
).get(
|
|
218
|
+
self.path_with_protocol, stream=stream, **request_kwargs)
|
|
210
219
|
response.raise_for_status()
|
|
211
220
|
except Exception as error:
|
|
212
221
|
if response:
|
|
@@ -226,7 +235,7 @@ class HttpPath(URIPath):
|
|
|
226
235
|
block_forward = max(int(block_capacity * forward_ratio), 1)
|
|
227
236
|
|
|
228
237
|
reader = HttpPrefetchReader(
|
|
229
|
-
self
|
|
238
|
+
self,
|
|
230
239
|
content_size=content_size,
|
|
231
240
|
max_retries=max_retries,
|
|
232
241
|
max_workers=max_concurrency,
|
|
@@ -254,9 +263,14 @@ class HttpPath(URIPath):
|
|
|
254
263
|
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
255
264
|
'''
|
|
256
265
|
|
|
266
|
+
request_kwargs = deepcopy(self.request_kwargs)
|
|
267
|
+
timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
|
|
268
|
+
stream = request_kwargs.pop('stream', True)
|
|
269
|
+
|
|
257
270
|
try:
|
|
258
|
-
with get_http_session(status_forcelist=()).get(
|
|
259
|
-
self.path_with_protocol, stream=
|
|
271
|
+
with get_http_session(timeout=timeout, status_forcelist=()).get(
|
|
272
|
+
self.path_with_protocol, stream=stream,
|
|
273
|
+
**request_kwargs) as response:
|
|
260
274
|
response.raise_for_status()
|
|
261
275
|
headers = response.headers
|
|
262
276
|
except Exception as error:
|
|
@@ -307,9 +321,14 @@ class HttpPath(URIPath):
|
|
|
307
321
|
:return: return True if exists
|
|
308
322
|
:rtype: bool
|
|
309
323
|
"""
|
|
324
|
+
request_kwargs = deepcopy(self.request_kwargs)
|
|
325
|
+
timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
|
|
326
|
+
stream = request_kwargs.pop('stream', True)
|
|
327
|
+
|
|
310
328
|
try:
|
|
311
|
-
with get_http_session(status_forcelist=()).get(
|
|
312
|
-
self.path_with_protocol, stream=
|
|
329
|
+
with get_http_session(timeout=timeout, status_forcelist=()).get(
|
|
330
|
+
self.path_with_protocol, stream=stream,
|
|
331
|
+
**request_kwargs) as response:
|
|
313
332
|
if response.status_code == 404:
|
|
314
333
|
return False
|
|
315
334
|
return True
|
|
@@ -1,12 +1,15 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from io import BytesIO
|
|
3
2
|
from typing import Optional
|
|
4
3
|
|
|
5
4
|
import requests
|
|
6
5
|
|
|
7
6
|
from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, HTTP_MAX_RETRY_TIMES
|
|
8
|
-
from megfile.errors import UnsupportedError, http_should_retry, patch_method
|
|
7
|
+
from megfile.errors import HttpBodyIncompleteError, UnsupportedError, http_should_retry, patch_method
|
|
9
8
|
from megfile.lib.base_prefetch_reader import BasePrefetchReader
|
|
9
|
+
from megfile.lib.compat import fspath
|
|
10
|
+
from megfile.pathlike import PathLike
|
|
11
|
+
|
|
12
|
+
DEFAULT_TIMEOUT = (60, 60 * 60 * 24)
|
|
10
13
|
|
|
11
14
|
|
|
12
15
|
class HttpPrefetchReader(BasePrefetchReader):
|
|
@@ -19,7 +22,7 @@ class HttpPrefetchReader(BasePrefetchReader):
|
|
|
19
22
|
|
|
20
23
|
def __init__(
|
|
21
24
|
self,
|
|
22
|
-
url:
|
|
25
|
+
url: PathLike,
|
|
23
26
|
*,
|
|
24
27
|
content_size: Optional[int] = None,
|
|
25
28
|
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
@@ -46,22 +49,28 @@ class HttpPrefetchReader(BasePrefetchReader):
|
|
|
46
49
|
if first_index_response['Headers'].get('Accept-Ranges') != 'bytes':
|
|
47
50
|
raise UnsupportedError(
|
|
48
51
|
f'Unsupported server, server must support Accept-Ranges: {self._url}',
|
|
49
|
-
path=self._url,
|
|
52
|
+
path=fspath(self._url),
|
|
50
53
|
)
|
|
51
54
|
return first_index_response['Headers']['Content-Length']
|
|
52
55
|
|
|
53
56
|
@property
|
|
54
57
|
def name(self) -> str:
|
|
55
|
-
return self._url
|
|
58
|
+
return fspath(self._url)
|
|
56
59
|
|
|
57
60
|
def _fetch_response(
|
|
58
61
|
self, start: Optional[int] = None,
|
|
59
62
|
end: Optional[int] = None) -> dict:
|
|
60
63
|
|
|
61
64
|
def fetch_response() -> dict:
|
|
65
|
+
request_kwargs = {}
|
|
66
|
+
if hasattr(self._url, 'request_kwargs'):
|
|
67
|
+
request_kwargs = self._url.request_kwargs
|
|
68
|
+
timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
|
|
69
|
+
stream = request_kwargs.pop('stream', True)
|
|
70
|
+
|
|
62
71
|
if start is None or end is None:
|
|
63
|
-
with requests.get(self._url, timeout=
|
|
64
|
-
stream=
|
|
72
|
+
with requests.get(fspath(self._url), timeout=timeout,
|
|
73
|
+
stream=stream, **request_kwargs) as response:
|
|
65
74
|
return {
|
|
66
75
|
'Headers': response.headers,
|
|
67
76
|
'Cookies': response.cookies,
|
|
@@ -71,9 +80,16 @@ class HttpPrefetchReader(BasePrefetchReader):
|
|
|
71
80
|
range_end = end
|
|
72
81
|
if self._content_size is not None:
|
|
73
82
|
range_end = min(range_end, self._content_size - 1)
|
|
74
|
-
headers =
|
|
75
|
-
|
|
76
|
-
|
|
83
|
+
headers = request_kwargs.pop('headers', {})
|
|
84
|
+
headers["Range"] = f"bytes={start}-{range_end}"
|
|
85
|
+
with requests.get(fspath(self._url), timeout=timeout,
|
|
86
|
+
headers=headers, stream=stream,
|
|
87
|
+
**request_kwargs) as response:
|
|
88
|
+
if len(response.content) != int(
|
|
89
|
+
response.headers['Content-Length']):
|
|
90
|
+
raise HttpBodyIncompleteError(
|
|
91
|
+
f"The downloaded content is incomplete, expected size: {response.headers['Content-Length']}, actual size: {len(response.content)}",
|
|
92
|
+
)
|
|
77
93
|
return {
|
|
78
94
|
'Body': BytesIO(response.content),
|
|
79
95
|
'Headers': response.headers,
|
megfile/pathlike.py
CHANGED
|
@@ -730,16 +730,23 @@ class URIPath(BaseURIPath):
|
|
|
730
730
|
with self.open(mode='r') as f:
|
|
731
731
|
return f.read()
|
|
732
732
|
|
|
733
|
-
def rename(self, dst_path: PathLike) -> 'URIPath':
|
|
733
|
+
def rename(self, dst_path: PathLike, overwrite: bool = True) -> 'URIPath':
|
|
734
|
+
'''
|
|
735
|
+
rename file
|
|
736
|
+
|
|
737
|
+
:param dst_path: Given destination path
|
|
738
|
+
:param overwrite: whether or not overwrite file when exists
|
|
739
|
+
'''
|
|
734
740
|
raise NotImplementedError(f"'rename' is unsupported on '{type(self)}'")
|
|
735
741
|
|
|
736
|
-
def replace(self, dst_path: PathLike) -> 'URIPath':
|
|
742
|
+
def replace(self, dst_path: PathLike, overwrite: bool = True) -> 'URIPath':
|
|
737
743
|
'''
|
|
738
744
|
move file
|
|
739
745
|
|
|
740
746
|
:param dst_path: Given destination path
|
|
747
|
+
:param overwrite: whether or not overwrite file when exists
|
|
741
748
|
'''
|
|
742
|
-
return self.rename(dst_path=dst_path)
|
|
749
|
+
return self.rename(dst_path=dst_path, overwrite=overwrite)
|
|
743
750
|
|
|
744
751
|
def rglob(self, pattern) -> List['URIPath']:
|
|
745
752
|
'''
|