megfile 4.2.4__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/sftp_path.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import atexit
2
+ import base64
2
3
  import hashlib
3
4
  import io
4
5
  import os
@@ -29,6 +30,11 @@ _logger = get_logger(__name__)
29
30
  __all__ = [
30
31
  "SftpPath",
31
32
  "is_sftp",
33
+ "sftp_add_host_key",
34
+ "sftp_concat",
35
+ "sftp_copy",
36
+ "sftp_download",
37
+ "sftp_upload",
32
38
  ]
33
39
 
34
40
  SFTP_USERNAME = "SFTP_USERNAME"
@@ -64,7 +70,8 @@ def get_private_key():
64
70
  private_key_path = os.getenv(SFTP_PRIVATE_KEY_PATH)
65
71
  if not os.path.exists(private_key_path):
66
72
  raise FileNotFoundError(
67
- f"Private key file not exist: '{SFTP_PRIVATE_KEY_PATH}'"
73
+ "Private key file not exist, "
74
+ f"path:{private_key_path}, env:'{SFTP_PRIVATE_KEY_PATH}'"
68
75
  )
69
76
  return key_with_types[key_type].from_private_key_file(
70
77
  private_key_path, password=os.getenv(SFTP_PRIVATE_KEY_PASSWORD)
@@ -335,6 +342,236 @@ def _sftp_scan_pairs(
335
342
  yield src_file_path, dst_file_path
336
343
 
337
344
 
345
+ def _check_input(input_str: str, fingerprint: str, times: int = 0) -> bool:
346
+ answers = input_str.strip()
347
+ if answers.lower() in ("yes", "y") or answers == fingerprint:
348
+ return True
349
+ elif answers.lower() in ("no", "n"):
350
+ return False
351
+ elif times >= 10:
352
+ _logger.warning("Retried more than 10 times, give up")
353
+ return False
354
+ else:
355
+ input_str = input("Please type 'yes', 'no' or the fingerprint: ")
356
+ return _check_input(input_str, fingerprint, times=times + 1)
357
+
358
+
359
+ def _prompt_add_to_known_hosts(hostname, key) -> bool:
360
+ fingerprint = hashlib.sha256(key.asbytes()).digest()
361
+ fingerprint = f"SHA256:{base64.b64encode(fingerprint).decode('utf-8')}"
362
+ answers = input(f"""The authenticity of host '{hostname}' can't be established.
363
+ {key.get_name().upper()} key fingerprint is {fingerprint}.
364
+ This key is not known by any other names.
365
+ Are you sure you want to continue connecting (yes/no/[fingerprint])? """)
366
+ return _check_input(answers, fingerprint)
367
+
368
+
369
+ def sftp_add_host_key(
370
+ hostname: str,
371
+ port: int = 22,
372
+ prompt: bool = False,
373
+ host_key_path: Optional["str"] = None,
374
+ ):
375
+ """Add a host key to known_hosts.
376
+
377
+ :param hostname: hostname
378
+ :param port: port, default is 22
379
+ :param prompt: If True, requires user input of 'yes' or 'no' to decide whether to
380
+ add this host key
381
+ :param host_key_path: path of known_hosts, default is ~/.ssh/known_hosts
382
+ """
383
+ if not host_key_path:
384
+ host_key_path = os.path.expanduser("~/.ssh/known_hosts")
385
+
386
+ if not os.path.exists(host_key_path):
387
+ dirname = os.path.dirname(host_key_path)
388
+ if dirname and dirname != ".":
389
+ os.makedirs(dirname, exist_ok=True, mode=0o700)
390
+ with open(host_key_path, "w"):
391
+ pass
392
+ os.chmod(host_key_path, 0o600)
393
+
394
+ host_key = paramiko.hostkeys.HostKeys(host_key_path)
395
+ if host_key.lookup(hostname):
396
+ return
397
+
398
+ transport = paramiko.Transport(
399
+ (
400
+ hostname,
401
+ port,
402
+ )
403
+ )
404
+ transport.connect()
405
+ key = transport.get_remote_server_key()
406
+ transport.close()
407
+
408
+ if prompt:
409
+ result = _prompt_add_to_known_hosts(hostname, key)
410
+ if not result:
411
+ return
412
+
413
+ host_key.add(hostname, key.get_name(), key)
414
+ host_key.save(host_key_path)
415
+
416
+
417
+ def sftp_concat(src_paths: List[PathLike], dst_path: PathLike) -> None:
418
+ """Concatenate sftp files to one file.
419
+
420
+ :param src_paths: Given source paths
421
+ :param dst_path: Given destination path
422
+ """
423
+ dst_path_obj = SftpPath(dst_path)
424
+
425
+ def get_real_path(path: PathLike) -> str:
426
+ return SftpPath(path)._real_path
427
+
428
+ command = ["cat", *map(get_real_path, src_paths), ">", get_real_path(dst_path)]
429
+ exec_result = dst_path_obj._exec_command(command)
430
+ if exec_result.returncode != 0:
431
+ _logger.error(exec_result.stderr)
432
+ raise OSError(f"Failed to concat {src_paths} to {dst_path}")
433
+
434
+
435
+ def sftp_copy(
436
+ src_path: PathLike,
437
+ dst_path: PathLike,
438
+ callback: Optional[Callable[[int], None]] = None,
439
+ followlinks: bool = False,
440
+ overwrite: bool = True,
441
+ ):
442
+ """
443
+ Copy the file to the given destination path.
444
+
445
+ :param src_path: Given path
446
+ :param dst_path: The destination path to copy the file to.
447
+ :param callback: An optional callback function that takes an integer parameter
448
+ and is called periodically during the copy operation to report the number
449
+ of bytes copied.
450
+ :param followlinks: Whether to follow symbolic links when copying directories.
451
+ :raises IsADirectoryError: If the source is a directory.
452
+ :raises OSError: If there is an error copying the file.
453
+ """
454
+ return SftpPath(src_path).copy(dst_path, callback, followlinks, overwrite)
455
+
456
+
457
+ def sftp_download(
458
+ src_url: PathLike,
459
+ dst_url: PathLike,
460
+ callback: Optional[Callable[[int], None]] = None,
461
+ followlinks: bool = False,
462
+ overwrite: bool = True,
463
+ ):
464
+ """
465
+ Downloads a file from sftp to local filesystem.
466
+
467
+ :param src_url: source sftp path
468
+ :param dst_url: target fs path
469
+ :param callback: Called periodically during copy, and the input parameter is
470
+ the data size (in bytes) of copy since the last call
471
+ :param followlinks: False if regard symlink as file, else True
472
+ :param overwrite: whether or not overwrite file when exists, default is True
473
+ """
474
+ from megfile.fs_path import FSPath, is_fs
475
+
476
+ if not is_fs(dst_url):
477
+ raise OSError(f"dst_url is not fs path: {dst_url}")
478
+ if not is_sftp(src_url) and not isinstance(src_url, SftpPath):
479
+ raise OSError(f"src_url is not sftp path: {src_url}")
480
+
481
+ dst_path = FSPath(dst_url)
482
+ if not overwrite and dst_path.exists():
483
+ return
484
+
485
+ if isinstance(src_url, SftpPath):
486
+ src_path: SftpPath = src_url
487
+ else:
488
+ src_path: SftpPath = SftpPath(src_url)
489
+
490
+ if followlinks and src_path.is_symlink():
491
+ src_path = src_path.readlink()
492
+ if src_path.is_dir():
493
+ raise IsADirectoryError("Is a directory: %r" % src_url)
494
+ if str(dst_url).endswith("/"):
495
+ raise IsADirectoryError("Is a directory: %r" % dst_url)
496
+
497
+ dst_path.parent.makedirs(exist_ok=True)
498
+
499
+ sftp_callback = None
500
+ if callback:
501
+ bytes_transferred_before = 0
502
+
503
+ def sftp_callback(bytes_transferred: int, _total_bytes: int):
504
+ nonlocal bytes_transferred_before
505
+ callback(bytes_transferred - bytes_transferred_before) # pyre-ignore[29]
506
+ bytes_transferred_before = bytes_transferred
507
+
508
+ src_path._client.get(
509
+ src_path._real_path, dst_path.path_without_protocol, callback=sftp_callback
510
+ )
511
+
512
+ src_stat = src_path.stat()
513
+ dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
514
+ dst_path.chmod(src_stat.st_mode)
515
+
516
+
517
+ def sftp_upload(
518
+ src_url: PathLike,
519
+ dst_url: PathLike,
520
+ callback: Optional[Callable[[int], None]] = None,
521
+ followlinks: bool = False,
522
+ overwrite: bool = True,
523
+ ):
524
+ """
525
+ Uploads a file from local filesystem to sftp server.
526
+
527
+ :param src_url: source fs path
528
+ :param dst_url: target sftp path
529
+ :param callback: Called periodically during copy, and the input parameter is
530
+ the data size (in bytes) of copy since the last call
531
+ :param overwrite: whether or not overwrite file when exists, default is True
532
+ """
533
+ from megfile.fs_path import FSPath, is_fs
534
+
535
+ if not is_fs(src_url):
536
+ raise OSError(f"src_url is not fs path: {src_url}")
537
+ if not is_sftp(dst_url) and not isinstance(dst_url, SftpPath):
538
+ raise OSError(f"dst_url is not sftp path: {dst_url}")
539
+
540
+ if followlinks and os.path.islink(src_url):
541
+ src_url = os.readlink(src_url)
542
+ if os.path.isdir(src_url):
543
+ raise IsADirectoryError("Is a directory: %r" % src_url)
544
+ if str(dst_url).endswith("/"):
545
+ raise IsADirectoryError("Is a directory: %r" % dst_url)
546
+
547
+ src_path = FSPath(src_url)
548
+ if isinstance(dst_url, SftpPath):
549
+ dst_path: SftpPath = dst_url
550
+ else:
551
+ dst_path: SftpPath = SftpPath(dst_url)
552
+ if not overwrite and dst_path.exists():
553
+ return
554
+
555
+ dst_path.parent.makedirs(exist_ok=True)
556
+
557
+ sftp_callback = None
558
+ if callback:
559
+ bytes_transferred_before = 0
560
+
561
+ def sftp_callback(bytes_transferred: int, _total_bytes: int):
562
+ nonlocal bytes_transferred_before
563
+ callback(bytes_transferred - bytes_transferred_before) # pyre-ignore[29]
564
+ bytes_transferred_before = bytes_transferred
565
+
566
+ dst_path._client.put(
567
+ src_path.path_without_protocol, dst_path._real_path, callback=sftp_callback
568
+ )
569
+
570
+ src_stat = src_path.stat()
571
+ dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
572
+ dst_path.chmod(src_stat.st_mode)
573
+
574
+
338
575
  @SmartPath.register
339
576
  class SftpPath(URIPath):
340
577
  """sftp protocol
@@ -1133,7 +1370,7 @@ class SftpPath(URIPath):
1133
1370
 
1134
1371
  src_stat = self.stat()
1135
1372
  dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
1136
- dst_path._client.chmod(dst_path._real_path, src_stat.st_mode)
1373
+ dst_path.chmod(src_stat.st_mode)
1137
1374
 
1138
1375
  def sync(
1139
1376
  self,
megfile/smart.py CHANGED
@@ -16,14 +16,13 @@ from typing import (
16
16
  from tqdm import tqdm
17
17
 
18
18
  from megfile.errors import S3UnknownError
19
- from megfile.fs import (
19
+ from megfile.fs_path import (
20
20
  fs_copy,
21
- fs_listdir,
22
- fs_scandir,
23
21
  is_fs,
24
22
  )
25
23
  from megfile.interfaces import (
26
24
  Access,
25
+ ContextIterator,
27
26
  FileCacher,
28
27
  FileEntry,
29
28
  NullCacher,
@@ -34,7 +33,7 @@ from megfile.lib.combine_reader import CombineReader
34
33
  from megfile.lib.compare import get_sync_type, is_same_file
35
34
  from megfile.lib.compat import fspath
36
35
  from megfile.lib.glob import globlize, ungloblize
37
- from megfile.s3 import (
36
+ from megfile.s3_path import (
38
37
  is_s3,
39
38
  s3_concat,
40
39
  s3_copy,
@@ -43,7 +42,7 @@ from megfile.s3 import (
43
42
  s3_open,
44
43
  s3_upload,
45
44
  )
46
- from megfile.sftp import sftp_concat, sftp_copy, sftp_download, sftp_upload
45
+ from megfile.sftp_path import sftp_concat, sftp_copy, sftp_download, sftp_upload
47
46
  from megfile.smart_path import SmartPath, get_traditional_path
48
47
  from megfile.utils import combine, copyfileobj, generate_cache_path
49
48
 
@@ -172,7 +171,7 @@ def smart_listdir(path: Optional[PathLike] = None) -> List[str]:
172
171
  :raises: FileNotFoundError, NotADirectoryError
173
172
  """
174
173
  if path is None:
175
- return fs_listdir()
174
+ return sorted(os.listdir(path))
176
175
  return SmartPath(path).listdir()
177
176
 
178
177
 
@@ -185,7 +184,28 @@ def smart_scandir(path: Optional[PathLike] = None) -> Iterator[FileEntry]:
185
184
  :raises: FileNotFoundError, NotADirectoryError
186
185
  """
187
186
  if path is None:
188
- return fs_scandir()
187
+
188
+ def create_generator():
189
+ from stat import S_ISDIR as stat_isdir
190
+ from stat import S_ISLNK as stat_islnk
191
+
192
+ with os.scandir(None) as entries:
193
+ for entry in entries:
194
+ stat = entry.stat()
195
+ yield FileEntry(
196
+ entry.name,
197
+ entry.path,
198
+ StatResult(
199
+ size=stat.st_size,
200
+ ctime=stat.st_ctime,
201
+ mtime=stat.st_mtime,
202
+ isdir=stat_isdir(stat.st_mode),
203
+ islnk=stat_islnk(stat.st_mode),
204
+ extra=stat,
205
+ ),
206
+ )
207
+
208
+ return ContextIterator(create_generator())
189
209
  return SmartPath(path).scandir()
190
210
 
191
211
 
@@ -359,8 +379,8 @@ def smart_copy(
359
379
  if smart_islink(src_path) and is_s3(dst_path) and not followlinks:
360
380
  return
361
381
 
362
- src_protocol, _ = SmartPath._extract_protocol(src_path)
363
- dst_protocol, _ = SmartPath._extract_protocol(dst_path)
382
+ src_protocol = SmartPath._extract_protocol(src_path)
383
+ dst_protocol = SmartPath._extract_protocol(dst_path)
364
384
 
365
385
  copy_func = _get_copy_func(src_protocol, dst_protocol)
366
386
 
@@ -406,8 +426,8 @@ def _smart_sync_single_file(items: dict):
406
426
  # this function is equal to smart_copy
407
427
  dst_abs_file_path = dst_root_path
408
428
 
409
- src_protocol, _ = SmartPath._extract_protocol(src_file_path)
410
- dst_protocol, _ = SmartPath._extract_protocol(dst_abs_file_path)
429
+ src_protocol = SmartPath._extract_protocol(src_file_path)
430
+ dst_protocol = SmartPath._extract_protocol(dst_abs_file_path)
411
431
  should_sync = True
412
432
  try:
413
433
  if not force:
@@ -617,8 +637,8 @@ def smart_rename(
617
637
  """
618
638
  if smart_isdir(src_path):
619
639
  raise IsADirectoryError("%r is a directory" % src_path)
620
- src_protocol, _ = SmartPath._extract_protocol(src_path)
621
- dst_protocol, _ = SmartPath._extract_protocol(dst_path)
640
+ src_protocol = SmartPath._extract_protocol(src_path)
641
+ dst_protocol = SmartPath._extract_protocol(dst_path)
622
642
  if src_protocol == dst_protocol:
623
643
  SmartPath(src_path).rename(dst_path, overwrite=overwrite)
624
644
  return
@@ -634,8 +654,8 @@ def smart_move(src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -
634
654
  :param dst_path: Given destination path
635
655
  :param overwrite: whether or not overwrite file when exists
636
656
  """
637
- src_protocol, _ = SmartPath._extract_protocol(src_path)
638
- dst_protocol, _ = SmartPath._extract_protocol(dst_path)
657
+ src_protocol = SmartPath._extract_protocol(src_path)
658
+ dst_protocol = SmartPath._extract_protocol(dst_path)
639
659
  if src_protocol == dst_protocol:
640
660
  SmartPath(src_path).rename(dst_path, overwrite=overwrite)
641
661
  return
@@ -826,7 +846,7 @@ def _group_glob(globstr: PathLike) -> List[str]:
826
846
  expanded_glob = ungloblize(globstr)
827
847
 
828
848
  for single_glob in expanded_glob:
829
- protocol, _ = SmartPath._extract_protocol(single_glob)
849
+ protocol = SmartPath._extract_protocol(single_glob)
830
850
  glob_dict[protocol].append(single_glob)
831
851
 
832
852
  group_glob_list = []
@@ -1011,7 +1031,7 @@ def smart_load_content(
1011
1031
  def smart_save_content(path: PathLike, content: bytes) -> None:
1012
1032
  """Save bytes content to specified path
1013
1033
 
1014
- param path: Path to save content
1034
+ :param path: Path to save content
1015
1035
  """
1016
1036
  with smart_open(path, "wb") as fd:
1017
1037
  fd.write(content)
@@ -1021,7 +1041,7 @@ def smart_load_text(path: PathLike) -> str:
1021
1041
  """
1022
1042
  Read content from path
1023
1043
 
1024
- param path: Path to be read
1044
+ :param path: Path to be read
1025
1045
  """
1026
1046
  with smart_open(path) as fd:
1027
1047
  return fd.read() # pytype: disable=bad-return-type
@@ -1030,16 +1050,27 @@ def smart_load_text(path: PathLike) -> str:
1030
1050
  def smart_save_text(path: PathLike, text: str) -> None:
1031
1051
  """Save text to specified path
1032
1052
 
1033
- param path: Path to save text
1053
+ :param path: Path to save text
1034
1054
  """
1035
1055
  with smart_open(path, "w") as fd:
1036
1056
  fd.write(text)
1037
1057
 
1038
1058
 
1039
1059
  class SmartCacher(FileCacher):
1060
+ """Smart cache files in local filesystem"""
1061
+
1040
1062
  cache_path = None
1041
1063
 
1042
1064
  def __init__(self, path: str, cache_path: Optional[str] = None, mode: str = "r"):
1065
+ """
1066
+ :param path: Path to cache
1067
+ :type path: str
1068
+ :param cache_path: Path to cache file, defaults to None, will use ``/tmp``
1069
+ :type cache_path: Optional[str], optional
1070
+ :param mode: Mode to open cache file, defaults to "r"
1071
+ :type mode: str, optional
1072
+ :raises ValueError: If mode is not one of "r", "w", "a"
1073
+ """
1043
1074
  if mode not in ("r", "w", "a"):
1044
1075
  raise ValueError("unacceptable mode: %r" % mode)
1045
1076
  if cache_path is None:
@@ -1059,12 +1090,22 @@ class SmartCacher(FileCacher):
1059
1090
  os.unlink(self.cache_path)
1060
1091
 
1061
1092
 
1062
- def smart_cache(path, cacher=SmartCacher, **options):
1093
+ def smart_cache(path, cacher=SmartCacher, **options) -> FileCacher:
1063
1094
  """Return a path to Posixpath Interface
1064
1095
 
1065
- param path: Path to cache
1066
- param s3_cacher: Cacher for s3 path
1067
- param options: Optional arguments for s3_cacher
1096
+ Examples: ::
1097
+
1098
+ >>> import subprocess
1099
+ >>> from megfile import smart_cache
1100
+ >>> with smart_cache(
1101
+ ... 's3://mybucket/myfile.mp4',
1102
+ ... mode='r',
1103
+ ... ) as cache_path:
1104
+ ... subprocess.run(['ffprobe', cache_path])
1105
+
1106
+ :param path: Path to cache
1107
+ :param s3_cacher: Cacher for s3 path
1108
+ :param options: Optional arguments for s3_cacher
1068
1109
  """
1069
1110
  if not is_fs(path):
1070
1111
  return cacher(path, **options)
@@ -1074,7 +1115,7 @@ def smart_cache(path, cacher=SmartCacher, **options):
1074
1115
  def smart_touch(path: PathLike):
1075
1116
  """Create a new file on path
1076
1117
 
1077
- param path: Path to create file
1118
+ :param path: Path to create file
1078
1119
  """
1079
1120
  with smart_open(path, "w"):
1080
1121
  pass
@@ -1083,9 +1124,9 @@ def smart_touch(path: PathLike):
1083
1124
  def smart_getmd5(path: PathLike, recalculate: bool = False, followlinks: bool = False):
1084
1125
  """Get md5 value of file
1085
1126
 
1086
- param path: File path
1087
- param recalculate: calculate md5 in real-time or not return s3 etag when path is s3
1088
- param followlinks: If is True, calculate md5 for real file
1127
+ :param path: File path
1128
+ :param recalculate: calculate md5 in real-time or not return s3 etag when path is s3
1129
+ :param followlinks: If is True, calculate md5 for real file
1089
1130
  """
1090
1131
  return SmartPath(path).md5(recalculate=recalculate, followlinks=followlinks)
1091
1132
 
@@ -1110,9 +1151,9 @@ def smart_concat(src_paths: List[PathLike], dst_path: PathLike) -> None:
1110
1151
  if not src_paths:
1111
1152
  return
1112
1153
 
1113
- dst_protocol, _ = SmartPath._extract_protocol(dst_path)
1154
+ dst_protocol = SmartPath._extract_protocol(dst_path)
1114
1155
  for src_path in src_paths:
1115
- src_protocol, _ = SmartPath._extract_protocol(src_path)
1156
+ src_protocol = SmartPath._extract_protocol(src_path)
1116
1157
  if src_protocol != dst_protocol:
1117
1158
  concat_func = _default_concat_func
1118
1159
  break