megfile 3.0.5__py3-none-any.whl → 3.0.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/cli.py CHANGED
@@ -174,12 +174,14 @@ def ll(path: str, recursive: bool):
174
174
  is_flag=True,
175
175
  help='treat dst_path as a normal file.')
176
176
  @click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
177
+ @click.option('--skip', is_flag=True, help='Skip existed files.')
177
178
  def cp(
178
179
  src_path: str,
179
180
  dst_path: str,
180
181
  recursive: bool,
181
182
  no_target_directory: bool,
182
183
  progress_bar: bool,
184
+ skip: bool,
183
185
  ):
184
186
  if not no_target_directory and (dst_path.endswith('/') or
185
187
  smart_isdir(dst_path)):
@@ -193,14 +195,16 @@ def cp(
193
195
  dst_path,
194
196
  followlinks=True,
195
197
  map_func=executor.map,
196
- force=True)
198
+ force=True,
199
+ overwrite=not skip)
197
200
  else:
198
201
  smart_sync(
199
202
  src_path,
200
203
  dst_path,
201
204
  followlinks=True,
202
205
  map_func=executor.map,
203
- force=True)
206
+ force=True,
207
+ overwrite=not skip)
204
208
  else:
205
209
  if progress_bar:
206
210
  file_size = smart_stat(src_path).size
@@ -214,10 +218,11 @@ def cp(
214
218
  def callback(length: int):
215
219
  sbar.update(length)
216
220
 
217
- smart_copy(src_path, dst_path, callback=callback)
221
+ smart_copy(
222
+ src_path, dst_path, callback=callback, overwrite=not skip)
218
223
  sbar.close()
219
224
  else:
220
- smart_copy(src_path, dst_path)
225
+ smart_copy(src_path, dst_path, overwrite=not skip)
221
226
 
222
227
 
223
228
  @cli.command(short_help='Move files from source to dest.')
@@ -236,12 +241,14 @@ def cp(
236
241
  is_flag=True,
237
242
  help='treat dst_path as a normal file.')
238
243
  @click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
244
+ @click.option('--skip', is_flag=True, help='Skip existed files.')
239
245
  def mv(
240
246
  src_path: str,
241
247
  dst_path: str,
242
248
  recursive: bool,
243
249
  no_target_directory: bool,
244
250
  progress_bar: bool,
251
+ skip: bool,
245
252
  ):
246
253
  if not no_target_directory and (dst_path.endswith('/') or
247
254
  smart_isdir(dst_path)):
@@ -253,15 +260,16 @@ def mv(
253
260
  if recursive:
254
261
  if src_protocol == dst_protocol:
255
262
  with tqdm(total=1) as t:
256
- SmartPath(src_path).rename(dst_path)
263
+ SmartPath(src_path).rename(dst_path, overwrite=not skip)
257
264
  t.update(1)
258
265
  else:
259
- smart_sync_with_progress(src_path, dst_path, followlinks=True)
266
+ smart_sync_with_progress(
267
+ src_path, dst_path, followlinks=True, overwrite=not skip)
260
268
  smart_remove(src_path)
261
269
  else:
262
270
  if src_protocol == dst_protocol:
263
271
  with tqdm(total=1) as t:
264
- SmartPath(src_path).rename(dst_path)
272
+ SmartPath(src_path).rename(dst_path, overwrite=not skip)
265
273
  t.update(1)
266
274
  else:
267
275
  file_size = smart_stat(src_path).size
@@ -275,12 +283,13 @@ def mv(
275
283
  def callback(length: int):
276
284
  sbar.update(length)
277
285
 
278
- smart_copy(src_path, dst_path, callback=callback)
286
+ smart_copy(
287
+ src_path, dst_path, callback=callback, overwrite=not skip)
279
288
  smart_unlink(src_path)
280
289
  sbar.close()
281
290
  else:
282
291
  move_func = smart_move if recursive else smart_rename
283
- move_func(src_path, dst_path)
292
+ move_func(src_path, dst_path, overwrite=not skip)
284
293
 
285
294
 
286
295
  @cli.command(short_help='Remove files from path.')
@@ -314,9 +323,10 @@ def rm(path: str, recursive: bool):
314
323
  is_flag=True,
315
324
  help='Copy files forcely, ignore same files.')
316
325
  @click.option('-q', '--quiet', is_flag=True, help='Not show any progress log.')
326
+ @click.option('--skip', is_flag=True, help='Skip existed files.')
317
327
  def sync(
318
328
  src_path: str, dst_path: str, progress_bar: bool, worker: int,
319
- force: bool, quiet: bool):
329
+ force: bool, quiet: bool, skip: bool):
320
330
  with ThreadPoolExecutor(max_workers=worker) as executor:
321
331
  if has_magic(src_path):
322
332
  src_root_path = get_non_glob_dir(src_path)
@@ -384,6 +394,7 @@ def sync(
384
394
  followlinks=True,
385
395
  callback_after_copy_file=callback_after_copy_file,
386
396
  force=force,
397
+ overwrite=not skip,
387
398
  ) for file_entry in file_entries)
388
399
  list(executor.map(_smart_sync_single_file, params_iter))
389
400
  if not quiet:
megfile/errors.py CHANGED
@@ -25,6 +25,9 @@ __all__ = [
25
25
  'UnsupportedError',
26
26
  'HttpPermissionError',
27
27
  'HttpFileNotFoundError',
28
+ 'HttpBodyIncompleteError',
29
+ 'HttpUnknownError',
30
+ 'HttpException',
28
31
  'ProtocolExistsError',
29
32
  'ProtocolNotFoundError',
30
33
  'S3UnknownError',
@@ -124,24 +127,6 @@ def s3_should_retry(error: Exception) -> bool:
124
127
  return False
125
128
 
126
129
 
127
- http_retry_exceptions = (
128
- requests.exceptions.ReadTimeout,
129
- requests.exceptions.ConnectTimeout,
130
- requests.exceptions.ChunkedEncodingError,
131
- requests.exceptions.HTTPError,
132
- requests.exceptions.ProxyError,
133
- urllib3.exceptions.IncompleteRead,
134
- urllib3.exceptions.ProtocolError,
135
- urllib3.exceptions.ReadTimeoutError,
136
- )
137
-
138
-
139
- def http_should_retry(error: Exception) -> bool:
140
- if isinstance(error, http_retry_exceptions):
141
- return True
142
- return False
143
-
144
-
145
130
  def patch_method(
146
131
  func: Callable,
147
132
  max_retries: int,
@@ -301,6 +286,29 @@ class HttpUnknownError(HttpException, UnknownError):
301
286
  pass
302
287
 
303
288
 
289
+ class HttpBodyIncompleteError(HttpException):
290
+ pass
291
+
292
+
293
+ http_retry_exceptions = (
294
+ requests.exceptions.ReadTimeout,
295
+ requests.exceptions.ConnectTimeout,
296
+ requests.exceptions.ChunkedEncodingError,
297
+ requests.exceptions.HTTPError,
298
+ requests.exceptions.ProxyError,
299
+ urllib3.exceptions.IncompleteRead,
300
+ urllib3.exceptions.ProtocolError,
301
+ urllib3.exceptions.ReadTimeoutError,
302
+ HttpBodyIncompleteError,
303
+ )
304
+
305
+
306
+ def http_should_retry(error: Exception) -> bool:
307
+ if isinstance(error, http_retry_exceptions):
308
+ return True
309
+ return False
310
+
311
+
304
312
  class ProtocolExistsError(Exception):
305
313
  pass
306
314
 
megfile/fs.py CHANGED
@@ -311,7 +311,8 @@ def fs_copy(
311
311
  src_path: PathLike,
312
312
  dst_path: PathLike,
313
313
  callback: Optional[Callable[[int], None]] = None,
314
- followlinks: bool = False):
314
+ followlinks: bool = False,
315
+ overwrite: bool = True):
315
316
  ''' File copy on file system
316
317
  Copy content (excluding meta date) of file on `src_path` to `dst_path`. `dst_path` must be a complete file name
317
318
 
@@ -331,23 +332,26 @@ def fs_copy(
331
332
  :param dst_path: Target file path
332
333
  :param callback: Called periodically during copy, and the input parameter is the data size (in bytes) of copy since the last call
333
334
  :param followlinks: False if regard symlink as file, else True
335
+ :param overwrite: whether or not overwrite file when exists, default is True
334
336
  '''
335
- return FSPath(src_path).copy(dst_path, callback, followlinks)
337
+ return FSPath(src_path).copy(dst_path, callback, followlinks, overwrite)
336
338
 
337
339
 
338
340
  def fs_sync(
339
341
  src_path: PathLike,
340
342
  dst_path: PathLike,
341
343
  followlinks: bool = False,
342
- force: bool = False) -> None:
344
+ force: bool = False,
345
+ overwrite: bool = True) -> None:
343
346
  '''Force write of everything to disk.
344
347
 
345
348
  :param src_path: Given path
346
349
  :param dst_path: Target file path
347
350
  :param followlinks: False if regard symlink as file, else True
348
- :param force: Sync file forcely, do not ignore same files
351
+ :param force: Sync file forcely, do not ignore same files, priority is higher than 'overwrite', default is False
352
+ :param overwrite: whether or not overwrite file when exists, default is True
349
353
  '''
350
- return FSPath(src_path).sync(dst_path, followlinks, force)
354
+ return FSPath(src_path).sync(dst_path, followlinks, force, overwrite)
351
355
 
352
356
 
353
357
  def fs_symlink(src_path: PathLike, dst_path: PathLike) -> None:
megfile/fs_path.py CHANGED
@@ -3,7 +3,6 @@ import io
3
3
  import os
4
4
  import pathlib
5
5
  import shutil
6
- from shutil import copytree
7
6
  from stat import S_ISDIR as stat_isdir
8
7
  from stat import S_ISLNK as stat_islnk
9
8
  from typing import IO, AnyStr, BinaryIO, Callable, Iterator, List, Optional, Tuple, Union
@@ -158,24 +157,69 @@ def fs_glob_stat(
158
157
  os.path.basename(path), path, _make_stat(os.lstat(path)))
159
158
 
160
159
 
161
- def fs_rename(src_path: PathLike, dst_path: PathLike) -> None:
160
+ def _fs_rename_file(
161
+ src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
162
162
  '''
163
163
  rename file on fs
164
164
 
165
165
  :param src_path: Given path
166
166
  :param dst_path: Given destination path
167
+ :param overwrite: whether or not overwrite file when exists
167
168
  '''
169
+ src_path, dst_path = fspath(src_path), fspath(dst_path)
170
+
171
+ if not overwrite and os.path.exists(dst_path):
172
+ return
173
+
174
+ dst_dir = os.path.dirname(dst_path)
175
+ if dst_dir and dst_dir != ".":
176
+ os.makedirs(dst_dir, exist_ok=True)
168
177
  shutil.move(src_path, dst_path)
169
178
 
170
179
 
171
- def fs_move(src_path: PathLike, dst_path: PathLike) -> None:
180
+ def fs_rename(
181
+ src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
182
+ '''
183
+ rename file on fs
184
+
185
+ :param src_path: Given path
186
+ :param dst_path: Given destination path
187
+ :param overwrite: whether or not overwrite file when exists
188
+ '''
189
+ src_path, dst_path = fspath(src_path), fspath(dst_path)
190
+ if os.path.isfile(src_path):
191
+ return _fs_rename_file(src_path, dst_path, overwrite)
192
+ else:
193
+ os.makedirs(dst_path, exist_ok=True)
194
+
195
+ with os.scandir(src_path) as entries:
196
+ for file_entry in entries:
197
+ src_file_path = file_entry.path
198
+ dst_file_path = dst_path
199
+ relative_path = os.path.relpath(src_file_path, start=src_path)
200
+ if relative_path and relative_path != '.':
201
+ dst_file_path = os.path.join(dst_file_path, relative_path)
202
+ if os.path.exists(dst_file_path) and file_entry.is_dir():
203
+ fs_rename(src_file_path, dst_file_path, overwrite)
204
+ else:
205
+ _fs_rename_file(src_file_path, dst_file_path, overwrite)
206
+
207
+ if os.path.isdir(src_path):
208
+ shutil.rmtree(src_path)
209
+ else:
210
+ os.remove(src_path)
211
+
212
+
213
+ def fs_move(
214
+ src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
172
215
  '''
173
216
  rename file on fs
174
217
 
175
218
  :param src_path: Given path
176
219
  :param dst_path: Given destination path
220
+ :param overwrite: whether or not overwrite file when exists
177
221
  '''
178
- fs_rename(src_path, dst_path)
222
+ return fs_rename(src_path, dst_path, overwrite)
179
223
 
180
224
 
181
225
  def fs_resolve(path: PathLike) -> str:
@@ -491,22 +535,24 @@ class FSPath(URIPath):
491
535
  '''
492
536
  return fspath(os.path.relpath(self.path_without_protocol, start=start))
493
537
 
494
- def rename(self, dst_path: PathLike) -> 'FSPath':
538
+ def rename(self, dst_path: PathLike, overwrite: bool = True) -> 'FSPath':
495
539
  '''
496
540
  rename file on fs
497
541
 
498
542
  :param dst_path: Given destination path
543
+ :param overwrite: whether or not overwrite file when exists
499
544
  '''
500
- fs_rename(self.path_without_protocol, dst_path)
545
+ fs_rename(self.path_without_protocol, dst_path, overwrite)
501
546
  return self.from_path(dst_path)
502
547
 
503
- def replace(self, dst_path: PathLike) -> 'FSPath':
548
+ def replace(self, dst_path: PathLike, overwrite: bool = True) -> 'FSPath':
504
549
  '''
505
550
  move file on fs
506
551
 
507
552
  :param dst_path: Given destination path
553
+ :param overwrite: whether or not overwrite file when exists
508
554
  '''
509
- return self.rename(dst_path=dst_path)
555
+ return self.rename(dst_path=dst_path, overwrite=overwrite)
510
556
 
511
557
  def remove(self, missing_ok: bool = False) -> None:
512
558
  '''
@@ -678,7 +724,7 @@ class FSPath(URIPath):
678
724
  :rtype: FSPath
679
725
  '''
680
726
  return self.from_path(
681
- str(
727
+ fspath(
682
728
  pathlib.Path(
683
729
  self.path_without_protocol).resolve(strict=strict)))
684
730
 
@@ -718,7 +764,8 @@ class FSPath(URIPath):
718
764
  self,
719
765
  dst_path: PathLike,
720
766
  callback: Optional[Callable[[int], None]] = None,
721
- followlinks: bool = False):
767
+ followlinks: bool = False,
768
+ overwrite: bool = True):
722
769
  ''' File copy on file system
723
770
  Copy content (excluding meta date) of file on `src_path` to `dst_path`. `dst_path` must be a complete file name
724
771
 
@@ -737,7 +784,11 @@ class FSPath(URIPath):
737
784
  :param dst_path: Target file path
738
785
  :param callback: Called periodically during copy, and the input parameter is the data size (in bytes) of copy since the last call
739
786
  :param followlinks: False if regard symlink as file, else True
787
+ :param overwrite: whether or not overwrite file when exists, default is True
740
788
  '''
789
+ if not overwrite and os.path.exists((dst_path)):
790
+ return
791
+
741
792
  try:
742
793
  self._copyfile(dst_path, callback=callback, followlinks=followlinks)
743
794
  except FileNotFoundError as error:
@@ -754,12 +805,14 @@ class FSPath(URIPath):
754
805
  self,
755
806
  dst_path: PathLike,
756
807
  followlinks: bool = False,
757
- force: bool = False) -> None:
808
+ force: bool = False,
809
+ overwrite: bool = True) -> None:
758
810
  '''Force write of everything to disk.
759
811
 
760
812
  :param dst_path: Target file path
761
813
  :param followlinks: False if regard symlink as file, else True
762
- :param force: Sync file forcely, do not ignore same files
814
+ :param force: Sync file forcely, do not ignore same files, priority is higher than 'overwrite', default is False
815
+ :param overwrite: whether or not overwrite file when exists, default is True
763
816
  '''
764
817
  if self.is_dir(followlinks=followlinks):
765
818
 
@@ -767,18 +820,22 @@ class FSPath(URIPath):
767
820
  ignore_files = []
768
821
  for name in names:
769
822
  dst_obj = self.from_path(dst_path).joinpath(name)
770
- if not force and dst_obj.exists() and is_same_file(
823
+ if force:
824
+ pass
825
+ elif not overwrite and dst_obj.exists():
826
+ ignore_files.append(name)
827
+ elif dst_obj.exists() and is_same_file(
771
828
  self.joinpath(name).stat(), dst_obj.stat(), 'copy'):
772
829
  ignore_files.append(name)
773
830
  return ignore_files
774
831
 
775
- copytree(
832
+ shutil.copytree(
776
833
  self.path_without_protocol,
777
834
  dst_path,
778
835
  ignore=ignore_same_file,
779
836
  dirs_exist_ok=True)
780
837
  else:
781
- self.copy(dst_path, followlinks=followlinks)
838
+ self.copy(dst_path, followlinks=followlinks, overwrite=overwrite)
782
839
 
783
840
  def symlink(self, dst_path: PathLike) -> None:
784
841
  '''
megfile/hdfs.py CHANGED
@@ -131,14 +131,15 @@ def hdfs_load_from(path: PathLike, followlinks: bool = False) -> BinaryIO:
131
131
  return HdfsPath(path).load(followlinks)
132
132
 
133
133
 
134
- def hdfs_move(src_path: PathLike, dst_path: PathLike) -> None:
134
+ def hdfs_move(
135
+ src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
135
136
  '''
136
137
  Move file/directory path from src_path to dst_path
137
138
 
138
139
  :param src_path: Given path
139
140
  :param dst_path: Given destination path
140
141
  '''
141
- return HdfsPath(src_path).move(dst_path)
142
+ return HdfsPath(src_path).move(dst_path, overwrite)
142
143
 
143
144
 
144
145
  def hdfs_remove(path: PathLike, missing_ok: bool = False) -> None:
megfile/hdfs_path.py CHANGED
@@ -405,25 +405,35 @@ class HdfsPath(URIPath):
405
405
  with raise_hdfs_error(self.path_with_protocol):
406
406
  self._client.makedirs(self.path_without_protocol, permission=mode)
407
407
 
408
- def rename(self, dst_path: PathLike) -> 'HdfsPath':
408
+ def rename(self, dst_path: PathLike, overwrite: bool = True) -> 'HdfsPath':
409
409
  '''
410
410
  Move hdfs file path from src_path to dst_path
411
411
 
412
412
  :param dst_path: Given destination path
413
+ :param overwrite: whether or not overwrite file when exists
413
414
  '''
414
415
  dst_path = self.from_path(dst_path)
415
- with raise_hdfs_error(self.path_with_protocol):
416
- self._client.rename(
417
- self.path_without_protocol, dst_path.path_without_protocol)
416
+ if self.is_dir():
417
+ for filename in self.iterdir():
418
+ self.joinpath(filename).rename(dst_path.joinpath(filename)) # pytype: disable=attribute-error
419
+ else:
420
+ if overwrite:
421
+ dst_path.remove(missing_ok=True)
422
+ if overwrite or not dst_path.exists():
423
+ with raise_hdfs_error(self.path_with_protocol):
424
+ self._client.rename(
425
+ self.path_without_protocol,
426
+ dst_path.path_without_protocol)
427
+ self.remove(missing_ok=True)
418
428
  return dst_path
419
429
 
420
- def move(self, dst_path: PathLike) -> None:
430
+ def move(self, dst_path: PathLike, overwrite: bool = True) -> None:
421
431
  '''
422
432
  Move file/directory path from src_path to dst_path
423
433
 
424
434
  :param dst_path: Given destination path
425
435
  '''
426
- self.rename(dst_path=dst_path)
436
+ self.rename(dst_path=dst_path, overwrite=overwrite)
427
437
 
428
438
  def remove(self, missing_ok: bool = False) -> None:
429
439
  '''
megfile/http_path.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import time
2
+ from copy import deepcopy
2
3
  from functools import partial
3
4
  from io import BufferedReader, BytesIO
4
5
  from logging import getLogger as get_logger
@@ -12,7 +13,7 @@ from megfile.config import DEFAULT_BLOCK_SIZE, HTTP_MAX_RETRY_TIMES
12
13
  from megfile.errors import http_should_retry, patch_method, translate_http_error
13
14
  from megfile.interfaces import PathLike, Readable, StatResult, URIPath
14
15
  from megfile.lib.compat import fspath
15
- from megfile.lib.http_prefetch_reader import HttpPrefetchReader
16
+ from megfile.lib.http_prefetch_reader import DEFAULT_TIMEOUT, HttpPrefetchReader
16
17
  from megfile.lib.s3_buffered_writer import DEFAULT_MAX_BUFFER_SIZE
17
18
  from megfile.lib.url import get_url_scheme
18
19
  from megfile.pathlike import PathLike
@@ -32,7 +33,7 @@ max_retries = HTTP_MAX_RETRY_TIMES
32
33
 
33
34
 
34
35
  def get_http_session(
35
- timeout: Union[int, Tuple[int, int]] = (9, 60),
36
+ timeout: Optional[Union[int, Tuple[int, int]]] = DEFAULT_TIMEOUT,
36
37
  status_forcelist: Iterable[int] = (500, 502, 503, 504)
37
38
  ) -> requests.Session:
38
39
  session = requests.Session()
@@ -172,10 +173,12 @@ class HttpPath(URIPath):
172
173
  protocol = "http"
173
174
 
174
175
  def __init__(self, path: PathLike, *other_paths: PathLike):
175
- if str(path).startswith('https://'):
176
- self.protocol = 'https'
177
176
  super().__init__(path, *other_paths)
178
177
 
178
+ if fspath(path).startswith('https://'):
179
+ self.protocol = 'https'
180
+ self.request_kwargs = {}
181
+
179
182
  @binary_open
180
183
  def open(
181
184
  self,
@@ -204,9 +207,15 @@ class HttpPath(URIPath):
204
207
  raise ValueError('unacceptable mode: %r' % mode)
205
208
 
206
209
  response = None
210
+ request_kwargs = deepcopy(self.request_kwargs)
211
+ timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
212
+ stream = request_kwargs.pop('stream', True)
207
213
  try:
208
- response = get_http_session(status_forcelist=()).get(
209
- self.path_with_protocol, stream=True)
214
+ response = get_http_session(
215
+ timeout=timeout,
216
+ status_forcelist=(),
217
+ ).get(
218
+ self.path_with_protocol, stream=stream, **request_kwargs)
210
219
  response.raise_for_status()
211
220
  except Exception as error:
212
221
  if response:
@@ -226,7 +235,7 @@ class HttpPath(URIPath):
226
235
  block_forward = max(int(block_capacity * forward_ratio), 1)
227
236
 
228
237
  reader = HttpPrefetchReader(
229
- self.path_with_protocol,
238
+ self,
230
239
  content_size=content_size,
231
240
  max_retries=max_retries,
232
241
  max_workers=max_concurrency,
@@ -254,9 +263,14 @@ class HttpPath(URIPath):
254
263
  :raises: HttpPermissionError, HttpFileNotFoundError
255
264
  '''
256
265
 
266
+ request_kwargs = deepcopy(self.request_kwargs)
267
+ timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
268
+ stream = request_kwargs.pop('stream', True)
269
+
257
270
  try:
258
- with get_http_session(status_forcelist=()).get(
259
- self.path_with_protocol, stream=True) as response:
271
+ with get_http_session(timeout=timeout, status_forcelist=()).get(
272
+ self.path_with_protocol, stream=stream,
273
+ **request_kwargs) as response:
260
274
  response.raise_for_status()
261
275
  headers = response.headers
262
276
  except Exception as error:
@@ -307,9 +321,14 @@ class HttpPath(URIPath):
307
321
  :return: return True if exists
308
322
  :rtype: bool
309
323
  """
324
+ request_kwargs = deepcopy(self.request_kwargs)
325
+ timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
326
+ stream = request_kwargs.pop('stream', True)
327
+
310
328
  try:
311
- with get_http_session(status_forcelist=()).get(
312
- self.path_with_protocol, stream=True) as response:
329
+ with get_http_session(timeout=timeout, status_forcelist=()).get(
330
+ self.path_with_protocol, stream=stream,
331
+ **request_kwargs) as response:
313
332
  if response.status_code == 404:
314
333
  return False
315
334
  return True
@@ -1,12 +1,15 @@
1
- import os
2
1
  from io import BytesIO
3
2
  from typing import Optional
4
3
 
5
4
  import requests
6
5
 
7
6
  from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, HTTP_MAX_RETRY_TIMES
8
- from megfile.errors import UnsupportedError, http_should_retry, patch_method
7
+ from megfile.errors import HttpBodyIncompleteError, UnsupportedError, http_should_retry, patch_method
9
8
  from megfile.lib.base_prefetch_reader import BasePrefetchReader
9
+ from megfile.lib.compat import fspath
10
+ from megfile.pathlike import PathLike
11
+
12
+ DEFAULT_TIMEOUT = (60, 60 * 60 * 24)
10
13
 
11
14
 
12
15
  class HttpPrefetchReader(BasePrefetchReader):
@@ -19,7 +22,7 @@ class HttpPrefetchReader(BasePrefetchReader):
19
22
 
20
23
  def __init__(
21
24
  self,
22
- url: str,
25
+ url: PathLike,
23
26
  *,
24
27
  content_size: Optional[int] = None,
25
28
  block_size: int = DEFAULT_BLOCK_SIZE,
@@ -46,22 +49,28 @@ class HttpPrefetchReader(BasePrefetchReader):
46
49
  if first_index_response['Headers'].get('Accept-Ranges') != 'bytes':
47
50
  raise UnsupportedError(
48
51
  f'Unsupported server, server must support Accept-Ranges: {self._url}',
49
- path=self._url,
52
+ path=fspath(self._url),
50
53
  )
51
54
  return first_index_response['Headers']['Content-Length']
52
55
 
53
56
  @property
54
57
  def name(self) -> str:
55
- return self._url
58
+ return fspath(self._url)
56
59
 
57
60
  def _fetch_response(
58
61
  self, start: Optional[int] = None,
59
62
  end: Optional[int] = None) -> dict:
60
63
 
61
64
  def fetch_response() -> dict:
65
+ request_kwargs = {}
66
+ if hasattr(self._url, 'request_kwargs'):
67
+ request_kwargs = self._url.request_kwargs
68
+ timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
69
+ stream = request_kwargs.pop('stream', True)
70
+
62
71
  if start is None or end is None:
63
- with requests.get(self._url, timeout=10,
64
- stream=True) as response:
72
+ with requests.get(fspath(self._url), timeout=timeout,
73
+ stream=stream, **request_kwargs) as response:
65
74
  return {
66
75
  'Headers': response.headers,
67
76
  'Cookies': response.cookies,
@@ -71,9 +80,16 @@ class HttpPrefetchReader(BasePrefetchReader):
71
80
  range_end = end
72
81
  if self._content_size is not None:
73
82
  range_end = min(range_end, self._content_size - 1)
74
- headers = {"Range": f"bytes={start}-{range_end}"}
75
- with requests.get(self._url, timeout=10, headers=headers,
76
- stream=True) as response:
83
+ headers = request_kwargs.pop('headers', {})
84
+ headers["Range"] = f"bytes={start}-{range_end}"
85
+ with requests.get(fspath(self._url), timeout=timeout,
86
+ headers=headers, stream=stream,
87
+ **request_kwargs) as response:
88
+ if len(response.content) != int(
89
+ response.headers['Content-Length']):
90
+ raise HttpBodyIncompleteError(
91
+ f"The downloaded content is incomplete, expected size: {response.headers['Content-Length']}, actual size: {len(response.content)}",
92
+ )
77
93
  return {
78
94
  'Body': BytesIO(response.content),
79
95
  'Headers': response.headers,
megfile/pathlike.py CHANGED
@@ -730,16 +730,23 @@ class URIPath(BaseURIPath):
730
730
  with self.open(mode='r') as f:
731
731
  return f.read()
732
732
 
733
- def rename(self, dst_path: PathLike) -> 'URIPath':
733
+ def rename(self, dst_path: PathLike, overwrite: bool = True) -> 'URIPath':
734
+ '''
735
+ rename file
736
+
737
+ :param dst_path: Given destination path
738
+ :param overwrite: whether or not overwrite file when exists
739
+ '''
734
740
  raise NotImplementedError(f"'rename' is unsupported on '{type(self)}'")
735
741
 
736
- def replace(self, dst_path: PathLike) -> 'URIPath':
742
+ def replace(self, dst_path: PathLike, overwrite: bool = True) -> 'URIPath':
737
743
  '''
738
744
  move file
739
745
 
740
746
  :param dst_path: Given destination path
747
+ :param overwrite: whether or not overwrite file when exists
741
748
  '''
742
- return self.rename(dst_path=dst_path)
749
+ return self.rename(dst_path=dst_path, overwrite=overwrite)
743
750
 
744
751
  def rglob(self, pattern) -> List['URIPath']:
745
752
  '''