megfile 4.1.0.post2__py3-none-any.whl → 4.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/cli.py CHANGED
@@ -1,10 +1,12 @@
1
1
  import configparser
2
2
  import os
3
3
  import shutil
4
+ import signal
4
5
  import sys
5
6
  import time
6
7
  from concurrent.futures import ThreadPoolExecutor
7
8
  from functools import partial
9
+ from queue import Queue
8
10
 
9
11
  import click
10
12
  from tqdm import tqdm
@@ -69,10 +71,13 @@ def cli(debug, log_level):
69
71
 
70
72
 
71
73
  def safe_cli(): # pragma: no cover
74
+ debug = options.get("debug", False)
75
+ if not debug:
76
+ signal.signal(signal.SIGINT, signal.SIG_DFL)
72
77
  try:
73
78
  cli()
74
79
  except Exception as e:
75
- if options.get("debug", False):
80
+ if debug:
76
81
  raise
77
82
  else:
78
83
  click.echo(f"\n[{type(e).__name__}] {e}", err=True)
@@ -246,7 +251,6 @@ def cp(
246
251
  dst_path,
247
252
  followlinks=True,
248
253
  map_func=executor.map,
249
- force=True,
250
254
  overwrite=not skip,
251
255
  )
252
256
  else:
@@ -255,7 +259,6 @@ def cp(
255
259
  dst_path,
256
260
  followlinks=True,
257
261
  map_func=executor.map,
258
- force=True,
259
262
  overwrite=not skip,
260
263
  )
261
264
  else:
@@ -366,23 +369,25 @@ def rm(path: str, recursive: bool):
366
369
  @cli.command(short_help="Make source and dest identical, modifying destination only.")
367
370
  @click.argument("src_path")
368
371
  @click.argument("dst_path")
369
- @click.option("-g", "--progress-bar", is_flag=True, help="Show progress bar.")
370
372
  @click.option(
371
- "-w", "--worker", type=click.INT, default=8, help="Number of concurrent workers."
373
+ "-f", "--force", is_flag=True, help="Copy files forcible, ignore same files."
372
374
  )
375
+ @click.option("--skip", is_flag=True, help="Skip existed files.")
373
376
  @click.option(
374
- "-f", "--force", is_flag=True, help="Copy files forcible, ignore same files."
377
+ "-w", "--worker", type=click.INT, default=-1, help="Number of concurrent workers."
375
378
  )
379
+ @click.option("-g", "--progress-bar", is_flag=True, help="Show progress bar.")
380
+ @click.option("-v", "--verbose", is_flag=True, help="Show more progress log.")
376
381
  @click.option("-q", "--quiet", is_flag=True, help="Not show any progress log.")
377
- @click.option("--skip", is_flag=True, help="Skip existed files.")
378
382
  def sync(
379
383
  src_path: str,
380
384
  dst_path: str,
381
- progress_bar: bool,
382
- worker: int,
383
385
  force: bool,
384
- quiet: bool,
385
386
  skip: bool,
387
+ worker: int,
388
+ progress_bar: bool,
389
+ verbose: bool,
390
+ quiet: bool,
386
391
  ):
387
392
  _sftp_prompt_host_key(src_path)
388
393
  _sftp_prompt_host_key(dst_path)
@@ -390,7 +395,8 @@ def sync(
390
395
  if not smart_exists(dst_path):
391
396
  force = True
392
397
 
393
- with ThreadPoolExecutor(max_workers=worker) as executor:
398
+ max_workers = worker if worker > 0 else (os.cpu_count() or 1) * 2
399
+ with ThreadPoolExecutor(max_workers=max_workers + 1) as executor: # +1 for scan
394
400
  if has_magic(src_path):
395
401
  src_root_path = get_non_glob_dir(src_path)
396
402
  if not smart_exists(src_root_path):
@@ -412,42 +418,66 @@ def sync(
412
418
  src_root_path = src_path
413
419
  scan_func = partial(smart_scan_stat, followlinks=True)
414
420
 
415
- if progress_bar and not quiet:
416
- print("building progress bar", end="\r")
417
- file_entries = []
418
- total_count = total_size = 0
419
- for total_count, file_entry in enumerate(scan_func(src_path), start=1):
420
- if total_count > max_file_object_catch_count:
421
- file_entries = []
422
- else:
423
- file_entries.append(file_entry)
424
- total_size += file_entry.stat.size
425
- print(f"building progress bar, find {total_count} files", end="\r")
426
-
427
- if not file_entries:
428
- file_entries = scan_func(src_path)
429
- else:
430
- total_count = total_size = None
431
- file_entries = scan_func(src_path)
432
-
433
421
  if quiet:
422
+ progress_bar = False
423
+ verbose = False
424
+
425
+ if not progress_bar:
434
426
  callback = callback_after_copy_file = None
427
+
428
+ if verbose:
429
+
430
+ def callback_after_copy_file(src_file_path, dst_file_path):
431
+ print(f"copy {src_file_path} to {dst_file_path} done")
432
+
433
+ file_entries = scan_func(src_path)
435
434
  else:
436
- tbar = tqdm(total=total_count, ascii=True)
435
+ tbar = tqdm(
436
+ total=0,
437
+ ascii=True,
438
+ desc="Files (scaning)",
439
+ )
437
440
  sbar = tqdm(
438
- unit="B",
441
+ total=0,
439
442
  ascii=True,
443
+ unit="B",
440
444
  unit_scale=True,
441
445
  unit_divisor=1024,
442
- total=total_size,
446
+ desc="File size (scaning)",
443
447
  )
444
448
 
445
- def callback(_filename: str, length: int):
446
- sbar.update(length)
447
-
448
449
  def callback_after_copy_file(src_file_path, dst_file_path):
450
+ if verbose:
451
+ tqdm.write(f"copy {src_file_path} to {dst_file_path} done")
449
452
  tbar.update(1)
450
453
 
454
+ def callback(src_file_path: str, length: int):
455
+ sbar.update(length)
456
+
457
+ file_entry_queue = Queue(maxsize=max_file_object_catch_count)
458
+
459
+ def scan_and_put_file_entry_to_queue():
460
+ for file_entry in scan_func(src_path):
461
+ tbar.total += 1
462
+ sbar.total += file_entry.stat.size
463
+ tbar.refresh()
464
+ sbar.refresh()
465
+ file_entry_queue.put(file_entry)
466
+ file_entry_queue.put(None)
467
+ tbar.set_description_str("Files")
468
+ sbar.set_description_str("File size")
469
+
470
+ executor.submit(scan_and_put_file_entry_to_queue)
471
+
472
+ def get_file_entry_from_queue():
473
+ while True:
474
+ file_entry = file_entry_queue.get()
475
+ if file_entry is None:
476
+ break
477
+ yield file_entry
478
+
479
+ file_entries = get_file_entry_from_queue()
480
+
451
481
  params_iter = (
452
482
  dict(
453
483
  src_root_path=src_root_path,
@@ -462,10 +492,10 @@ def sync(
462
492
  for file_entry in file_entries
463
493
  )
464
494
  list(executor.map(_smart_sync_single_file, params_iter))
465
- if not quiet:
495
+
496
+ if progress_bar:
497
+ sbar.update(sbar.total - sbar.n)
466
498
  tbar.close()
467
- if progress_bar:
468
- sbar.update(sbar.total - sbar.n)
469
499
  sbar.close()
470
500
 
471
501
 
megfile/errors.py CHANGED
@@ -114,6 +114,8 @@ s3_retry_exceptions = tuple(s3_retry_exceptions) # pyre-ignore[9]
114
114
  def s3_should_retry(error: Exception) -> bool:
115
115
  if isinstance(error, s3_retry_exceptions): # pyre-ignore[6]
116
116
  return True
117
+ if isinstance(error, botocore.exceptions.SSLError):
118
+ return "EOF" in str(error)
117
119
  if isinstance(error, botocore.exceptions.ClientError):
118
120
  return client_error_code(error) in (
119
121
  "429", # noqa: E501 # TOS ExceedAccountQPSLimit
@@ -128,6 +130,7 @@ def s3_should_retry(error: Exception) -> bool:
128
130
  "ContextCanceled",
129
131
  "Timeout", # noqa: E501 # TOS Timeout
130
132
  "RequestTimeout",
133
+ "RequestTimeTooSkewed",
131
134
  "ExceedAccountQPSLimit",
132
135
  "ExceedAccountRateLimit",
133
136
  "ExceedBucketQPSLimit",
megfile/fs.py CHANGED
@@ -1,4 +1,6 @@
1
1
  import os
2
+ from stat import S_ISDIR as stat_isdir
3
+ from stat import S_ISLNK as stat_islnk
2
4
  from typing import BinaryIO, Callable, Iterator, List, Optional, Tuple
3
5
 
4
6
  from megfile.fs_path import (
@@ -7,7 +9,7 @@ from megfile.fs_path import (
7
9
  fs_path_join,
8
10
  is_fs,
9
11
  )
10
- from megfile.interfaces import Access, FileEntry, PathLike, StatResult
12
+ from megfile.interfaces import Access, ContextIterator, FileEntry, PathLike, StatResult
11
13
 
12
14
  __all__ = [
13
15
  "is_fs",
@@ -169,7 +171,7 @@ def fs_isfile(path: PathLike, followlinks: bool = False) -> bool:
169
171
  return FSPath(path).is_file(followlinks)
170
172
 
171
173
 
172
- def fs_listdir(path: PathLike) -> List[str]:
174
+ def fs_listdir(path: Optional[PathLike] = None) -> List[str]:
173
175
  """
174
176
  Get all contents of given fs path.
175
177
  The result is in ascending alphabetical order.
@@ -177,6 +179,8 @@ def fs_listdir(path: PathLike) -> List[str]:
177
179
  :param path: Given path
178
180
  :returns: All contents have in the path in ascending alphabetical order
179
181
  """
182
+ if path is None:
183
+ return sorted(os.listdir(path))
180
184
  return FSPath(path).listdir()
181
185
 
182
186
 
@@ -255,13 +259,34 @@ def fs_scan_stat(
255
259
  return FSPath(path).scan_stat(missing_ok, followlinks)
256
260
 
257
261
 
258
- def fs_scandir(path: PathLike) -> Iterator[FileEntry]:
262
+ def fs_scandir(path: Optional[PathLike] = None) -> Iterator[FileEntry]:
259
263
  """
260
264
  Get all content of given file path.
261
265
 
262
266
  :param path: Given path
263
267
  :returns: An iterator contains all contents have prefix path
264
268
  """
269
+ if path is None:
270
+
271
+ def create_generator():
272
+ with os.scandir(None) as entries:
273
+ for entry in entries:
274
+ stat = entry.stat()
275
+ yield FileEntry(
276
+ entry.name,
277
+ entry.path,
278
+ StatResult(
279
+ size=stat.st_size,
280
+ ctime=stat.st_ctime,
281
+ mtime=stat.st_mtime,
282
+ isdir=stat_isdir(stat.st_mode),
283
+ islnk=stat_islnk(stat.st_mode),
284
+ extra=stat,
285
+ ),
286
+ )
287
+
288
+ return ContextIterator(create_generator())
289
+
265
290
  return FSPath(path).scandir()
266
291
 
267
292
 
megfile/smart.py CHANGED
@@ -1,8 +1,6 @@
1
1
  import os
2
2
  from collections import defaultdict
3
3
  from functools import partial
4
- from stat import S_ISDIR as stat_isdir
5
- from stat import S_ISLNK as stat_islnk
6
4
  from typing import (
7
5
  IO,
8
6
  Any,
@@ -18,10 +16,14 @@ from typing import (
18
16
  from tqdm import tqdm
19
17
 
20
18
  from megfile.errors import S3UnknownError
21
- from megfile.fs import fs_copy, is_fs
19
+ from megfile.fs import (
20
+ fs_copy,
21
+ fs_listdir,
22
+ fs_scandir,
23
+ is_fs,
24
+ )
22
25
  from megfile.interfaces import (
23
26
  Access,
24
- ContextIterator,
25
27
  FileCacher,
26
28
  FileEntry,
27
29
  NullCacher,
@@ -170,7 +172,7 @@ def smart_listdir(path: Optional[PathLike] = None) -> List[str]:
170
172
  :raises: FileNotFoundError, NotADirectoryError
171
173
  """
172
174
  if path is None:
173
- return sorted(os.listdir(path))
175
+ return fs_listdir()
174
176
  return SmartPath(path).listdir()
175
177
 
176
178
 
@@ -183,25 +185,7 @@ def smart_scandir(path: Optional[PathLike] = None) -> Iterator[FileEntry]:
183
185
  :raises: FileNotFoundError, NotADirectoryError
184
186
  """
185
187
  if path is None:
186
-
187
- def create_generator():
188
- with os.scandir(None) as entries:
189
- for entry in entries:
190
- stat = entry.stat()
191
- yield FileEntry(
192
- entry.name,
193
- entry.path,
194
- StatResult(
195
- size=stat.st_size,
196
- ctime=stat.st_ctime,
197
- mtime=stat.st_mtime,
198
- isdir=stat_isdir(stat.st_mode),
199
- islnk=stat_islnk(stat.st_mode),
200
- extra=stat,
201
- ),
202
- )
203
-
204
- return ContextIterator(create_generator())
188
+ return fs_scandir()
205
189
  return SmartPath(path).scandir()
206
190
 
207
191
 
@@ -404,7 +388,7 @@ def _smart_sync_single_file(items: dict):
404
388
  force = items["force"]
405
389
  overwrite = items["overwrite"]
406
390
 
407
- content_path = os.path.relpath(src_file_path, start=src_root_path)
391
+ content_path = smart_relpath(src_file_path, start=src_root_path)
408
392
  if len(content_path) and content_path != ".":
409
393
  content_path = content_path.lstrip("/")
410
394
  dst_abs_file_path = smart_path_join(dst_root_path, content_path)
@@ -438,6 +422,8 @@ def _smart_sync_single_file(items: dict):
438
422
  callback=copy_callback,
439
423
  followlinks=followlinks,
440
424
  )
425
+ elif callback:
426
+ callback(src_file_path, src_file_stat.size)
441
427
  if callback_after_copy_file:
442
428
  callback_after_copy_file(src_file_path, dst_abs_file_path)
443
429
  return should_sync
@@ -580,19 +566,21 @@ def smart_sync_with_progress(
580
566
  def callback_after_copy_file(src_file_path, dst_file_path):
581
567
  tbar.update(1)
582
568
 
583
- smart_sync(
584
- src_path,
585
- dst_path,
586
- callback=tqdm_callback,
587
- followlinks=followlinks,
588
- callback_after_copy_file=callback_after_copy_file,
589
- src_file_stats=file_stats,
590
- map_func=map_func,
591
- force=force,
592
- overwrite=overwrite,
593
- )
594
- tbar.close()
595
- sbar.close()
569
+ try:
570
+ smart_sync(
571
+ src_path,
572
+ dst_path,
573
+ callback=tqdm_callback,
574
+ followlinks=followlinks,
575
+ callback_after_copy_file=callback_after_copy_file,
576
+ src_file_stats=file_stats,
577
+ map_func=map_func,
578
+ force=force,
579
+ overwrite=overwrite,
580
+ )
581
+ finally:
582
+ tbar.close()
583
+ sbar.close()
596
584
 
597
585
 
598
586
  def smart_remove(path: PathLike, missing_ok: bool = False) -> None:
megfile/smart_path.py CHANGED
@@ -210,4 +210,4 @@ class SmartPath(BasePath):
210
210
 
211
211
 
212
212
  def get_traditional_path(path: PathLike) -> str:
213
- return fspath(SmartPath(path).path)
213
+ return fspath(SmartPath(path).pathlike.path)
megfile/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "4.1.0.post2"
1
+ VERSION = "4.1.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: megfile
3
- Version: 4.1.0.post2
3
+ Version: 4.1.1
4
4
  Summary: Megvii file operation library
5
5
  Author-email: megvii <megfile@megvii.com>
6
6
  Project-URL: Homepage, https://github.com/megvii-research/megfile
@@ -1,8 +1,8 @@
1
1
  megfile/__init__.py,sha256=7oEfu410CFKzDWZ9RjL5xEJ1gtkJkTfvPrL_7TWdJuY,7366
2
- megfile/cli.py,sha256=Y7ZyWsYmzMUjUrJ1Vz-WMWJQcpK-4og_5xyRzfKI91U,24957
2
+ megfile/cli.py,sha256=X9nWGke-VM-FCFdRoEoxVH_SNFkKQesBxhSrRv7jAsI,25884
3
3
  megfile/config.py,sha256=2MMj5QkhlDJQFZRbCQL2c9iDdeMAVctiaPszRBkg5vM,3988
4
- megfile/errors.py,sha256=qSIiqiJmDLb2k1qKiTLdNQzj4VNgfM9bHkSClyCp1Kg,14385
5
- megfile/fs.py,sha256=TJ0ifJ57IEL-gFgQhJD_WVEqmf28zC5kAvmH6c1QzbU,18240
4
+ megfile/errors.py,sha256=ZHp9zEltrmuUqP4Jtg8SY31cQeU0UKzKp1h9Jj8197E,14512
5
+ megfile/fs.py,sha256=KMEqAE35alpcxiy6du5nPFYcaorhUM_kPJMah3q76ng,19160
6
6
  megfile/fs_path.py,sha256=Hozl9LAJ8EMuSWBSZXGj2GNmPZ1sJp9PZs-7hPrLgm8,39341
7
7
  megfile/hdfs.py,sha256=owXr4d3j1frCvlbhmhENcSBnKKDky5cJZzWLOF4ZJMo,13251
8
8
  megfile/hdfs_path.py,sha256=OmUe3vA3Qoxnqtcq0Rs3ygBvzAtqUz3fGo8iP5sWneE,26058
@@ -14,11 +14,11 @@ megfile/s3.py,sha256=abBxnI7RIyn7n7qjGszP1VruYd6Gi9I8QnUOvsHkx1Y,16325
14
14
  megfile/s3_path.py,sha256=zelXhlRVOVSWBE6HJz0vXrrcRzSuj6Cnjd9HLGwPbCM,93644
15
15
  megfile/sftp.py,sha256=uBcLQs-j6Q-q-sWAdd-pgi5Qmb_kq7boJM-0sCfcNO0,26540
16
16
  megfile/sftp_path.py,sha256=Wz4VcQ0pBUuWDGMSxPpPbutrT09mnY6jZNiAqTi5tO4,43840
17
- megfile/smart.py,sha256=YXjCWp000wimSFXE8oqI6dIE8y0RZJo9I046zIkh0ag,37014
18
- megfile/smart_path.py,sha256=Bqg95T2-XZrRXWhH7GT-jMCYzD7i1SIXdczQxtOxiPs,7583
17
+ megfile/smart.py,sha256=Sae2KJzaU0k_qV_Bk0YifOMq8WsV5qQ2pGInDRF546I,36411
18
+ megfile/smart_path.py,sha256=HqCOlDwekqqIyJAll-U9YKmaXjjfCGZD5n5aG80lOKw,7592
19
19
  megfile/stdio.py,sha256=ZwxsnJNJYIT7Iyg5pIw4qiyH8bszG6oAhEJuR-hXGG4,658
20
20
  megfile/stdio_path.py,sha256=cxaDr8rtisTPnN-rjtaEpqQnshwiqwXFUJBM9xWY7Cg,2711
21
- megfile/version.py,sha256=bGSGJb1sKjmDAZKMCver-Yiujxub2hCFXyCS_i7sl5U,25
21
+ megfile/version.py,sha256=JRp6vi1OeDHnCmegsFVWveiyO4FzOzG-JLUcsx9DHSQ,19
22
22
  megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  megfile/lib/base_prefetch_reader.py,sha256=uxVwYknOjc8hLF7q_T2QKMsBqFcrf411ZsuK25CN1eQ,12848
24
24
  megfile/lib/combine_reader.py,sha256=Kp2wEloOUpTlIU7dve87MBpSzmIM-F9OtpTawAjFkiU,4828
@@ -53,10 +53,10 @@ scripts/benchmark/code/s3fs_read.py,sha256=XiTA-qrYblUs-jQWXSnvNg5Wo722C_g47aMMf
53
53
  scripts/benchmark/code/s3fs_write.py,sha256=gdXKkWXYGjLJlRT_J64pJN85XvRg3bZexcAJQEMXwtw,402
54
54
  scripts/benchmark/code/smart_open_read.py,sha256=SA02jHwS9Y31yFtV9CoJcfND5dR0eA_HsGmGNUrpQls,515
55
55
  scripts/benchmark/code/smart_open_write.py,sha256=jDxFJdY97yNH889jz3pawBoei3yaqy8pEMvC_ymHFtM,537
56
- megfile-4.1.0.post2.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
57
- megfile-4.1.0.post2.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
58
- megfile-4.1.0.post2.dist-info/METADATA,sha256=_oXpfxMbIgcf75-z8J-aZ2j-BcnxjmJkWax7Ruj1ctM,9579
59
- megfile-4.1.0.post2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
60
- megfile-4.1.0.post2.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
61
- megfile-4.1.0.post2.dist-info/top_level.txt,sha256=fVg49lk5B9L7jyfWUXWxb0DDSuw5pbr0OU62Tvx8J8M,44
62
- megfile-4.1.0.post2.dist-info/RECORD,,
56
+ megfile-4.1.1.dist-info/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
57
+ megfile-4.1.1.dist-info/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
58
+ megfile-4.1.1.dist-info/METADATA,sha256=FvGLIQyfs2_dp0um9bzkW3A6mDDVlVjFi9GMs0a_DMc,9573
59
+ megfile-4.1.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
60
+ megfile-4.1.1.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
61
+ megfile-4.1.1.dist-info/top_level.txt,sha256=fVg49lk5B9L7jyfWUXWxb0DDSuw5pbr0OU62Tvx8J8M,44
62
+ megfile-4.1.1.dist-info/RECORD,,