megfile 3.1.6__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. megfile/cli.py +12 -7
  2. megfile/config.py +34 -44
  3. megfile/fs.py +169 -11
  4. megfile/fs_path.py +183 -259
  5. megfile/hdfs.py +106 -5
  6. megfile/hdfs_path.py +34 -90
  7. megfile/http.py +50 -1
  8. megfile/http_path.py +27 -65
  9. megfile/interfaces.py +1 -8
  10. megfile/lib/base_prefetch_reader.py +62 -78
  11. megfile/lib/combine_reader.py +5 -0
  12. megfile/lib/glob.py +3 -6
  13. megfile/lib/hdfs_prefetch_reader.py +7 -7
  14. megfile/lib/http_prefetch_reader.py +6 -6
  15. megfile/lib/s3_buffered_writer.py +67 -64
  16. megfile/lib/s3_cached_handler.py +1 -2
  17. megfile/lib/s3_limited_seekable_writer.py +3 -7
  18. megfile/lib/s3_memory_handler.py +1 -2
  19. megfile/lib/s3_pipe_handler.py +1 -2
  20. megfile/lib/s3_prefetch_reader.py +15 -20
  21. megfile/lib/s3_share_cache_reader.py +8 -5
  22. megfile/pathlike.py +397 -401
  23. megfile/s3.py +118 -17
  24. megfile/s3_path.py +150 -224
  25. megfile/sftp.py +300 -10
  26. megfile/sftp_path.py +46 -322
  27. megfile/smart.py +33 -27
  28. megfile/smart_path.py +9 -14
  29. megfile/stdio.py +1 -1
  30. megfile/stdio_path.py +2 -2
  31. megfile/utils/__init__.py +11 -4
  32. megfile/version.py +1 -1
  33. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/METADATA +7 -7
  34. megfile-4.0.0.dist-info/RECORD +52 -0
  35. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/WHEEL +1 -1
  36. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/top_level.txt +0 -2
  37. docs/conf.py +0 -65
  38. megfile-3.1.6.dist-info/RECORD +0 -55
  39. scripts/convert_results_to_sarif.py +0 -91
  40. scripts/generate_file.py +0 -344
  41. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/LICENSE +0 -0
  42. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/LICENSE.pyre +0 -0
  43. {megfile-3.1.6.dist-info → megfile-4.0.0.dist-info}/entry_points.txt +0 -0
megfile/sftp_path.py CHANGED
@@ -6,22 +6,21 @@ import os
6
6
  import random
7
7
  import shlex
8
8
  import socket
9
- import subprocess
9
+ import subprocess # nosec B404
10
10
  from functools import cached_property
11
11
  from logging import getLogger as get_logger
12
12
  from stat import S_ISDIR, S_ISLNK, S_ISREG
13
- from typing import IO, BinaryIO, Callable, Iterator, List, Optional, Tuple, Union
13
+ from typing import IO, BinaryIO, Callable, Iterator, List, Optional, Tuple, Type, Union
14
14
  from urllib.parse import urlsplit, urlunsplit
15
15
 
16
16
  import paramiko
17
17
 
18
- from megfile.config import SFTP_MAX_RETRY_TIMES
18
+ from megfile.config import SFTP_HOST_KEY_POLICY, SFTP_MAX_RETRY_TIMES
19
19
  from megfile.errors import SameFileError, _create_missing_ok_generator, patch_method
20
20
  from megfile.interfaces import ContextIterator, FileEntry, PathLike, StatResult
21
21
  from megfile.lib.compare import is_same_file
22
22
  from megfile.lib.compat import fspath
23
23
  from megfile.lib.glob import FSFunc, iglob
24
- from megfile.lib.joinpath import uri_join
25
24
  from megfile.pathlike import URIPath
26
25
  from megfile.smart_path import SmartPath
27
26
  from megfile.utils import calculate_md5, thread_local
@@ -31,23 +30,13 @@ _logger = get_logger(__name__)
31
30
  __all__ = [
32
31
  "SftpPath",
33
32
  "is_sftp",
34
- "sftp_readlink",
35
- "sftp_glob",
36
- "sftp_iglob",
37
- "sftp_glob_stat",
38
- "sftp_resolve",
39
- "sftp_download",
40
- "sftp_upload",
41
- "sftp_path_join",
42
- "sftp_concat",
43
- "sftp_lstat",
44
33
  ]
45
34
 
46
35
  SFTP_USERNAME = "SFTP_USERNAME"
47
- SFTP_PASSWORD = "SFTP_PASSWORD"
36
+ SFTP_PASSWORD = "SFTP_PASSWORD" # nosec B105
48
37
  SFTP_PRIVATE_KEY_PATH = "SFTP_PRIVATE_KEY_PATH"
49
38
  SFTP_PRIVATE_KEY_TYPE = "SFTP_PRIVATE_KEY_TYPE"
50
- SFTP_PRIVATE_KEY_PASSWORD = "SFTP_PRIVATE_KEY_PASSWORD"
39
+ SFTP_PRIVATE_KEY_PASSWORD = "SFTP_PRIVATE_KEY_PASSWORD" # nosec B105
51
40
  SFTP_MAX_UNAUTH_CONN = "SFTP_MAX_UNAUTH_CONN"
52
41
  MAX_RETRIES = SFTP_MAX_RETRY_TIMES
53
42
  DEFAULT_SSH_CONNECT_TIMEOUT = 5
@@ -120,10 +109,11 @@ def _patch_sftp_client_request(
120
109
  port: Optional[int] = None,
121
110
  username: Optional[str] = None,
122
111
  password: Optional[str] = None,
112
+ default_policy: Type[paramiko.MissingHostKeyPolicy] = paramiko.RejectPolicy,
123
113
  ):
124
114
  def retry_callback(error, *args, **kwargs):
125
115
  client.close()
126
- ssh_client = get_ssh_client(hostname, port, username, password)
116
+ ssh_client = get_ssh_client(hostname, port, username, password, default_policy)
127
117
  ssh_client.close()
128
118
  atexit.unregister(ssh_client.close)
129
119
  ssh_key = f"ssh_client:{hostname},{port},{username},{password}"
@@ -134,7 +124,11 @@ def _patch_sftp_client_request(
134
124
  del thread_local[sftp_key]
135
125
 
136
126
  new_sftp_client = get_sftp_client(
137
- hostname=hostname, port=port, username=username, password=password
127
+ hostname=hostname,
128
+ port=port,
129
+ username=username,
130
+ password=password,
131
+ default_policy=default_policy,
138
132
  )
139
133
  client.sock = new_sftp_client.sock
140
134
 
@@ -152,17 +146,24 @@ def _get_sftp_client(
152
146
  port: Optional[int] = None,
153
147
  username: Optional[str] = None,
154
148
  password: Optional[str] = None,
149
+ default_policy: Type[paramiko.MissingHostKeyPolicy] = paramiko.RejectPolicy,
155
150
  ) -> paramiko.SFTPClient:
156
151
  """Get sftp client
157
152
 
158
153
  :returns: sftp client
159
154
  """
160
155
  session = get_ssh_session(
161
- hostname=hostname, port=port, username=username, password=password
156
+ hostname=hostname,
157
+ port=port,
158
+ username=username,
159
+ password=password,
160
+ default_policy=default_policy,
162
161
  )
163
162
  session.invoke_subsystem("sftp")
164
163
  sftp_client = paramiko.SFTPClient(session)
165
- _patch_sftp_client_request(sftp_client, hostname, port, username, password)
164
+ _patch_sftp_client_request(
165
+ sftp_client, hostname, port, username, password, default_policy
166
+ )
166
167
  return sftp_client
167
168
 
168
169
 
@@ -171,6 +172,7 @@ def get_sftp_client(
171
172
  port: Optional[int] = None,
172
173
  username: Optional[str] = None,
173
174
  password: Optional[str] = None,
175
+ default_policy: Type[paramiko.MissingHostKeyPolicy] = paramiko.RejectPolicy,
174
176
  ) -> paramiko.SFTPClient:
175
177
  """Get sftp client
176
178
 
@@ -183,6 +185,7 @@ def get_sftp_client(
183
185
  port,
184
186
  username,
185
187
  password,
188
+ default_policy,
186
189
  )
187
190
 
188
191
 
@@ -191,19 +194,27 @@ def _get_ssh_client(
191
194
  port: Optional[int] = None,
192
195
  username: Optional[str] = None,
193
196
  password: Optional[str] = None,
197
+ default_policy: Type[paramiko.MissingHostKeyPolicy] = paramiko.RejectPolicy,
194
198
  ) -> paramiko.SSHClient:
195
199
  hostname, port, username, password, private_key = provide_connect_info(
196
200
  hostname=hostname, port=port, username=username, password=password
197
201
  )
198
202
 
203
+ policies = {
204
+ "auto": paramiko.AutoAddPolicy,
205
+ "reject": paramiko.RejectPolicy,
206
+ "warning": paramiko.WarningPolicy,
207
+ }
208
+ policy = policies.get(SFTP_HOST_KEY_POLICY, default_policy)() # pyre-ignore[29]
209
+
199
210
  ssh_client = paramiko.SSHClient()
200
- ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
211
+ ssh_client.set_missing_host_key_policy(policy)
201
212
  max_unauth_connections = int(os.getenv(SFTP_MAX_UNAUTH_CONN, 10))
202
213
  try:
203
214
  fd = os.open(
204
215
  os.path.join(
205
- "/tmp",
206
- f"megfile-sftp-{hostname}-{random.randint(1, max_unauth_connections)}",
216
+ "/tmp", # nosec B108
217
+ f"megfile-sftp-{hostname}-{random.randint(1, max_unauth_connections)}", # nosec B311
207
218
  ),
208
219
  os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
209
220
  )
@@ -237,6 +248,7 @@ def get_ssh_client(
237
248
  port: Optional[int] = None,
238
249
  username: Optional[str] = None,
239
250
  password: Optional[str] = None,
251
+ default_policy: Type[paramiko.MissingHostKeyPolicy] = paramiko.RejectPolicy,
240
252
  ) -> paramiko.SSHClient:
241
253
  return thread_local(
242
254
  f"ssh_client:{hostname},{port},{username},{password}",
@@ -245,6 +257,7 @@ def get_ssh_client(
245
257
  port,
246
258
  username,
247
259
  password,
260
+ default_policy,
248
261
  )
249
262
 
250
263
 
@@ -253,9 +266,10 @@ def get_ssh_session(
253
266
  port: Optional[int] = None,
254
267
  username: Optional[str] = None,
255
268
  password: Optional[str] = None,
269
+ default_policy: Type[paramiko.MissingHostKeyPolicy] = paramiko.RejectPolicy,
256
270
  ) -> paramiko.Channel:
257
271
  def retry_callback(error, *args, **kwargs):
258
- ssh_client = get_ssh_client(hostname, port, username, password)
272
+ ssh_client = get_ssh_client(hostname, port, username, password, default_policy)
259
273
  ssh_client.close()
260
274
  atexit.unregister(ssh_client.close)
261
275
  ssh_key = f"ssh_client:{hostname},{port},{username},{password}"
@@ -270,7 +284,7 @@ def get_ssh_session(
270
284
  max_retries=MAX_RETRIES,
271
285
  should_retry=sftp_should_retry,
272
286
  retry_callback=retry_callback,
273
- )(hostname, port, username, password)
287
+ )(hostname, port, username, password, default_policy)
274
288
 
275
289
 
276
290
  def _open_session(
@@ -278,8 +292,9 @@ def _open_session(
278
292
  port: Optional[int] = None,
279
293
  username: Optional[str] = None,
280
294
  password: Optional[str] = None,
295
+ default_policy: Type[paramiko.MissingHostKeyPolicy] = paramiko.RejectPolicy,
281
296
  ) -> paramiko.Channel:
282
- ssh_client = get_ssh_client(hostname, port, username, password)
297
+ ssh_client = get_ssh_client(hostname, port, username, password, default_policy)
283
298
  transport = ssh_client.get_transport()
284
299
  if not transport:
285
300
  raise paramiko.SSHException("Get transport error")
@@ -302,136 +317,11 @@ def is_sftp(path: PathLike) -> bool:
302
317
  return parts.scheme == "sftp"
303
318
 
304
319
 
305
- def sftp_readlink(path: PathLike) -> "str":
306
- """
307
- Return a SftpPath instance representing the path to which the symbolic link points.
308
-
309
- :param path: Given path
310
- :returns: Return a SftpPath instance representing the path to
311
- which the symbolic link points.
312
- """
313
- return SftpPath(path).readlink().path_with_protocol
314
-
315
-
316
- def sftp_glob(
317
- path: PathLike, recursive: bool = True, missing_ok: bool = True
318
- ) -> List[str]:
319
- """Return path list in ascending alphabetical order,
320
- in which path matches glob pattern
321
-
322
- 1. If doesn't match any path, return empty list
323
- Notice: ``glob.glob`` in standard library returns ['a/'] instead of empty list
324
- when pathname is like `a/**`, recursive is True and directory 'a' doesn't exist.
325
- fs_glob behaves like ``glob.glob`` in standard library under such circumstance.
326
- 2. No guarantee that each path in result is different, which means:
327
- Assume there exists a path `/a/b/c/b/d.txt`
328
- use path pattern like `/**/b/**/*.txt` to glob,
329
- the path above will be returned twice
330
- 3. `**` will match any matched file, directory, symlink and '' by default,
331
- when recursive is `True`
332
- 4. fs_glob returns same as glob.glob(pathname, recursive=True)
333
- in ascending alphabetical order.
334
- 5. Hidden files (filename stars with '.') will not be found in the result
335
-
336
- :param path: Given path
337
- :param pattern: Glob the given relative pattern in the directory represented
338
- by this path
339
- :param recursive: If False, `**` will not search directory recursively
340
- :param missing_ok: If False and target path doesn't match any file,
341
- raise FileNotFoundError
342
- :returns: A list contains paths match `pathname`
343
- """
344
- return list(sftp_iglob(path=path, recursive=recursive, missing_ok=missing_ok))
345
-
346
-
347
- def sftp_glob_stat(
348
- path: PathLike, recursive: bool = True, missing_ok: bool = True
349
- ) -> Iterator[FileEntry]:
350
- """Return a list contains tuples of path and file stat, in ascending alphabetical
351
- order, in which path matches glob pattern
352
-
353
- 1. If doesn't match any path, return empty list
354
- Notice: ``glob.glob`` in standard library returns ['a/'] instead of empty list
355
- when pathname is like `a/**`, recursive is True and directory 'a' doesn't exist.
356
- sftp_glob behaves like ``glob.glob`` in standard library under such circumstance.
357
- 2. No guarantee that each path in result is different, which means:
358
- Assume there exists a path `/a/b/c/b/d.txt`
359
- use path pattern like `/**/b/**/*.txt` to glob,
360
- the path above will be returned twice
361
- 3. `**` will match any matched file, directory, symlink and '' by default,
362
- when recursive is `True`
363
- 4. fs_glob returns same as glob.glob(pathname, recursive=True) in
364
- ascending alphabetical order.
365
- 5. Hidden files (filename stars with '.') will not be found in the result
366
-
367
- :param path: Given path
368
- :param pattern: Glob the given relative pattern in the directory represented
369
- by this path
370
- :param recursive: If False, `**` will not search directory recursively
371
- :param missing_ok: If False and target path doesn't match any file,
372
- raise FileNotFoundError
373
- :returns: A list contains tuples of path and file stat,
374
- in which paths match `pathname`
375
- """
376
- for path in sftp_iglob(path=path, recursive=recursive, missing_ok=missing_ok):
377
- path_object = SftpPath(path)
378
- yield FileEntry(
379
- path_object.name, path_object.path_with_protocol, path_object.lstat()
380
- )
381
-
382
-
383
- def sftp_iglob(
384
- path: PathLike, recursive: bool = True, missing_ok: bool = True
385
- ) -> Iterator[str]:
386
- """Return path iterator in ascending alphabetical order,
387
- in which path matches glob pattern
388
-
389
- 1. If doesn't match any path, return empty list
390
- Notice: ``glob.glob`` in standard library returns ['a/'] instead of empty list
391
- when pathname is like `a/**`, recursive is True and directory 'a' doesn't exist.
392
- fs_glob behaves like ``glob.glob`` in standard library under such circumstance.
393
- 2. No guarantee that each path in result is different, which means:
394
- Assume there exists a path `/a/b/c/b/d.txt`
395
- use path pattern like `/**/b/**/*.txt` to glob,
396
- the path above will be returned twice
397
- 3. `**` will match any matched file, directory, symlink and '' by default,
398
- when recursive is `True`
399
- 4. fs_glob returns same as glob.glob(pathname, recursive=True) in
400
- ascending alphabetical order.
401
- 5. Hidden files (filename stars with '.') will not be found in the result
402
-
403
- :param path: Given path
404
- :param pattern: Glob the given relative pattern in the directory represented
405
- by this path
406
- :param recursive: If False, `**` will not search directory recursively
407
- :param missing_ok: If False and target path doesn't match any file,
408
- raise FileNotFoundError
409
- :returns: An iterator contains paths match `pathname`
410
- """
411
-
412
- for path in SftpPath(path).iglob(
413
- pattern="", recursive=recursive, missing_ok=missing_ok
414
- ):
415
- yield path.path_with_protocol
416
-
417
-
418
- def sftp_resolve(path: PathLike, strict=False) -> "str":
419
- """Equal to fs_realpath
420
-
421
- :param path: Given path
422
- :param strict: Ignore this parameter, just for compatibility
423
- :return: Return the canonical path of the specified filename,
424
- eliminating any symbolic links encountered in the path.
425
- :rtype: SftpPath
426
- """
427
- return SftpPath(path).resolve(strict).path_with_protocol
428
-
429
-
430
320
  def _sftp_scan_pairs(
431
321
  src_url: PathLike, dst_url: PathLike
432
322
  ) -> Iterator[Tuple[PathLike, PathLike]]:
433
323
  for src_file_path in SftpPath(src_url).scan():
434
- content_path = src_file_path[len(src_url) :]
324
+ content_path = src_file_path[len(fspath(src_url)) :]
435
325
  if len(content_path) > 0:
436
326
  dst_file_path = SftpPath(dst_url).joinpath(content_path).path_with_protocol
437
327
  else:
@@ -439,175 +329,6 @@ def _sftp_scan_pairs(
439
329
  yield src_file_path, dst_file_path
440
330
 
441
331
 
442
- def sftp_download(
443
- src_url: PathLike,
444
- dst_url: PathLike,
445
- callback: Optional[Callable[[int], None]] = None,
446
- followlinks: bool = False,
447
- overwrite: bool = True,
448
- ):
449
- """
450
- Downloads a file from sftp to local filesystem.
451
-
452
- :param src_url: source sftp path
453
- :param dst_url: target fs path
454
- :param callback: Called periodically during copy, and the input parameter is
455
- the data size (in bytes) of copy since the last call
456
- :param followlinks: False if regard symlink as file, else True
457
- :param overwrite: whether or not overwrite file when exists, default is True
458
- """
459
- from megfile.fs import is_fs
460
- from megfile.fs_path import FSPath
461
-
462
- if not is_fs(dst_url):
463
- raise OSError(f"dst_url is not fs path: {dst_url}")
464
- if not is_sftp(src_url) and not isinstance(src_url, SftpPath):
465
- raise OSError(f"src_url is not sftp path: {src_url}")
466
-
467
- dst_path = FSPath(dst_url)
468
- if not overwrite and dst_path.exists():
469
- return
470
-
471
- if isinstance(src_url, SftpPath):
472
- src_path = src_url
473
- else:
474
- src_path = SftpPath(src_url)
475
-
476
- if followlinks and src_path.is_symlink():
477
- src_path = src_path.readlink()
478
- if src_path.is_dir():
479
- raise IsADirectoryError("Is a directory: %r" % src_url)
480
- if str(dst_url).endswith("/"):
481
- raise IsADirectoryError("Is a directory: %r" % dst_url)
482
-
483
- dst_path.parent.makedirs(exist_ok=True)
484
-
485
- sftp_callback = None
486
- if callback:
487
- bytes_transferred_before = 0
488
-
489
- def sftp_callback(bytes_transferred: int, _total_bytes: int):
490
- nonlocal bytes_transferred_before
491
- callback(bytes_transferred - bytes_transferred_before) # pyre-ignore[29]
492
- bytes_transferred_before = bytes_transferred
493
-
494
- src_path._client.get(
495
- src_path._real_path, dst_path.path_without_protocol, callback=sftp_callback
496
- )
497
-
498
- src_stat = src_path.stat()
499
- dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
500
- dst_path.chmod(src_stat.st_mode)
501
-
502
-
503
- def sftp_upload(
504
- src_url: PathLike,
505
- dst_url: PathLike,
506
- callback: Optional[Callable[[int], None]] = None,
507
- followlinks: bool = False,
508
- overwrite: bool = True,
509
- ):
510
- """
511
- Uploads a file from local filesystem to sftp server.
512
-
513
- :param src_url: source fs path
514
- :param dst_url: target sftp path
515
- :param callback: Called periodically during copy, and the input parameter is
516
- the data size (in bytes) of copy since the last call
517
- :param overwrite: whether or not overwrite file when exists, default is True
518
- """
519
- from megfile.fs import is_fs
520
- from megfile.fs_path import FSPath
521
-
522
- if not is_fs(src_url):
523
- raise OSError(f"src_url is not fs path: {src_url}")
524
- if not is_sftp(dst_url) and not isinstance(dst_url, SftpPath):
525
- raise OSError(f"dst_url is not sftp path: {dst_url}")
526
-
527
- if followlinks and os.path.islink(src_url):
528
- src_url = os.readlink(src_url)
529
- if os.path.isdir(src_url):
530
- raise IsADirectoryError("Is a directory: %r" % src_url)
531
- if str(dst_url).endswith("/"):
532
- raise IsADirectoryError("Is a directory: %r" % dst_url)
533
-
534
- src_path = FSPath(src_url)
535
- if isinstance(dst_url, SftpPath):
536
- dst_path = dst_url
537
- else:
538
- dst_path = SftpPath(dst_url)
539
- if not overwrite and dst_path.exists():
540
- return
541
-
542
- dst_path.parent.makedirs(exist_ok=True)
543
-
544
- sftp_callback = None
545
- if callback:
546
- bytes_transferred_before = 0
547
-
548
- def sftp_callback(bytes_transferred: int, _total_bytes: int):
549
- nonlocal bytes_transferred_before
550
- callback(bytes_transferred - bytes_transferred_before) # pyre-ignore[29]
551
- bytes_transferred_before = bytes_transferred
552
-
553
- dst_path._client.put(
554
- src_path.path_without_protocol, dst_path._real_path, callback=sftp_callback
555
- )
556
-
557
- src_stat = src_path.stat()
558
- dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
559
- dst_path.chmod(src_stat.st_mode)
560
-
561
-
562
- def sftp_path_join(path: PathLike, *other_paths: PathLike) -> str:
563
- """
564
- Concat 2 or more path to a complete path
565
-
566
- :param path: Given path
567
- :param other_paths: Paths to be concatenated
568
- :returns: Concatenated complete path
569
-
570
- .. note ::
571
-
572
- The difference between this function and ``os.path.join`` is that this function
573
- ignores left side slash (which indicates absolute path) in ``other_paths``
574
- and will directly concat.
575
-
576
- e.g. os.path.join('/path', 'to', '/file') => '/file',
577
- but sftp_path_join('/path', 'to', '/file') => '/path/to/file'
578
- """
579
- return uri_join(fspath(path), *map(fspath, other_paths))
580
-
581
-
582
- def sftp_concat(src_paths: List[PathLike], dst_path: PathLike) -> None:
583
- """Concatenate sftp files to one file.
584
-
585
- :param src_paths: Given source paths
586
- :param dst_path: Given destination path
587
- """
588
- dst_path_obj = SftpPath(dst_path)
589
-
590
- def get_real_path(path: PathLike) -> str:
591
- return SftpPath(path)._real_path
592
-
593
- command = ["cat", *map(get_real_path, src_paths), ">", get_real_path(dst_path)]
594
- exec_result = dst_path_obj._exec_command(command)
595
- if exec_result.returncode != 0:
596
- _logger.error(exec_result.stderr)
597
- raise OSError(f"Failed to concat {src_paths} to {dst_path}")
598
-
599
-
600
- def sftp_lstat(path: PathLike) -> StatResult:
601
- """
602
- Get StatResult of file on sftp, including file size and mtime,
603
- referring to fs_getsize and fs_getmtime
604
-
605
- :param path: Given path
606
- :returns: StatResult
607
- """
608
- return SftpPath(path).lstat()
609
-
610
-
611
332
  @SmartPath.register
612
333
  class SftpPath(URIPath):
613
334
  """sftp protocol
@@ -620,6 +341,7 @@ class SftpPath(URIPath):
620
341
  """
621
342
 
622
343
  protocol = "sftp"
344
+ default_policy = paramiko.RejectPolicy
623
345
 
624
346
  def __init__(self, path: "PathLike", *other_paths: "PathLike"):
625
347
  super().__init__(path, *other_paths)
@@ -652,6 +374,7 @@ class SftpPath(URIPath):
652
374
  port=self._urlsplit_parts.port,
653
375
  username=self._urlsplit_parts.username,
654
376
  password=self._urlsplit_parts.password,
377
+ default_policy=self.default_policy,
655
378
  )
656
379
 
657
380
  def _generate_path_object(self, sftp_local_path: str, resolve: bool = False):
@@ -1334,11 +1057,12 @@ class SftpPath(URIPath):
1334
1057
  port=self._urlsplit_parts.port,
1335
1058
  username=self._urlsplit_parts.username,
1336
1059
  password=self._urlsplit_parts.password,
1060
+ default_policy=self.default_policy,
1337
1061
  ) as chan:
1338
1062
  chan.settimeout(timeout)
1339
1063
  if environment:
1340
1064
  chan.update_environment(environment)
1341
- chan.exec_command(" ".join([shlex.quote(arg) for arg in command]))
1065
+ chan.exec_command(" ".join([shlex.quote(arg) for arg in command])) # nosec B601
1342
1066
  stdout = (
1343
1067
  chan.makefile("r", bufsize).read().decode(errors="backslashreplace")
1344
1068
  )
megfile/smart.py CHANGED
@@ -397,7 +397,9 @@ def smart_copy(
397
397
  def _smart_sync_single_file(items: dict):
398
398
  src_root_path = items["src_root_path"]
399
399
  dst_root_path = items["dst_root_path"]
400
- src_file_path = items["src_file_path"]
400
+ src_file_entry = items["src_file_entry"]
401
+ src_file_path = src_file_entry.path
402
+ src_file_stat = src_file_entry.stat
401
403
  callback = items["callback"]
402
404
  followlinks = items["followlinks"]
403
405
  callback_after_copy_file = items["callback_after_copy_file"]
@@ -417,17 +419,17 @@ def _smart_sync_single_file(items: dict):
417
419
  dst_protocol, _ = SmartPath._extract_protocol(dst_abs_file_path)
418
420
  should_sync = True
419
421
  try:
420
- if force:
421
- pass
422
- elif not overwrite and smart_exists(dst_abs_file_path):
423
- should_sync = False
424
- elif smart_exists(dst_abs_file_path) and is_same_file(
425
- smart_stat(src_file_path, follow_symlinks=followlinks),
426
- smart_stat(dst_abs_file_path, follow_symlinks=followlinks),
427
- get_sync_type(src_protocol, dst_protocol),
428
- ):
429
- should_sync = False
430
- except NotImplementedError:
422
+ if not force:
423
+ dst_file_stat = smart_stat(dst_abs_file_path, follow_symlinks=followlinks)
424
+ if not overwrite:
425
+ should_sync = False
426
+ elif is_same_file(
427
+ src_file_stat,
428
+ dst_file_stat,
429
+ get_sync_type(src_protocol, dst_protocol),
430
+ ):
431
+ should_sync = False
432
+ except (NotImplementedError, FileNotFoundError):
431
433
  pass
432
434
 
433
435
  if should_sync:
@@ -513,15 +515,16 @@ def smart_sync(
513
515
  src_path, dst_path = get_traditional_path(src_path), get_traditional_path(dst_path)
514
516
  if not src_file_stats:
515
517
  src_file_stats = smart_scan_stat(src_path, followlinks=followlinks)
518
+ if not smart_exists(dst_path):
519
+ force = True
516
520
 
517
521
  def create_generator():
518
522
  for src_file_entry in src_file_stats:
519
523
  if src_file_entry.name:
520
- src_file_path = src_file_entry.path
521
524
  yield dict(
522
525
  src_root_path=src_path,
523
526
  dst_root_path=dst_path,
524
- src_file_path=src_file_path,
527
+ src_file_entry=src_file_entry,
525
528
  callback=callback,
526
529
  followlinks=followlinks,
527
530
  callback_after_copy_file=callback_after_copy_file,
@@ -671,9 +674,10 @@ def smart_makedirs(path: PathLike, exist_ok: bool = False) -> None:
671
674
  def smart_open(
672
675
  path: PathLike,
673
676
  mode: str = "r",
674
- s3_open_func: Callable[[str, str], BinaryIO] = s3_open,
675
677
  encoding: Optional[str] = None,
676
678
  errors: Optional[str] = None,
679
+ *,
680
+ s3_open_func: Callable[[str, str], BinaryIO] = s3_open,
677
681
  **options,
678
682
  ) -> IO:
679
683
  r"""
@@ -685,16 +689,6 @@ def smart_open(
685
689
  this function create directories automatically, instead of
686
690
  raising FileNotFoundError
687
691
 
688
- Currently, supported protocols are:
689
-
690
- 1. s3: "s3://<bucket>/<key>"
691
-
692
- 2. http(s): http(s) url
693
-
694
- 3. stdio: "stdio://-"
695
-
696
- 4. FS file: Besides above mentioned protocols, other path are considered fs path
697
-
698
692
  Here are a few examples: ::
699
693
 
700
694
  >>> import cv2
@@ -708,12 +702,24 @@ def smart_open(
708
702
 
709
703
  :param path: Given path
710
704
  :param mode: Mode to open file, supports r'[rwa][tb]?\+?'
711
- :param s3_open_func: Function used to open s3_url. Require the function includes 2
712
- necessary parameters, file path and mode
713
705
  :param encoding: encoding is the name of the encoding used to decode or encode
714
706
  the file. This should only be used in text mode.
715
707
  :param errors: errors is an optional string that specifies how encoding and decoding
716
708
  errors are to be handled—this cannot be used in binary mode.
709
+ :param buffering: buffering is an optional integer used to
710
+ set the buffering policy. Only be used when support.
711
+ :param followlinks: follow symbolic link, default `False`. Only be used when support
712
+ :param s3_open_func: Function used to open s3_url. Require the function includes
713
+ 2 necessary parameters, file path and mode. only be used in s3 path.
714
+ :param max_workers: Max download / upload thread number, `None` by default,
715
+ will use global thread pool with 8 threads. Only be used in s3, http, hdfs.
716
+ :param max_buffer_size: Max cached buffer size in memory, 128MB by default.
717
+ Set to `0` will disable cache. Only be used in s3, http, hdfs.
718
+ :param block_forward: How many blocks of data cached from offset position, only for
719
+ read mode. Only be used in s3, http, hdfs.
720
+ :param block_size: Size of single block. Each block will be uploaded by single
721
+ thread. Only be used in s3, http, hdfs.
722
+
717
723
  :returns: File-Like object
718
724
  :raises: FileNotFoundError, IsADirectoryError, ValueError
719
725
  """