megfile 4.2.3__py3-none-any.whl → 4.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/sftp2_path.py ADDED
@@ -0,0 +1,1090 @@
1
+ import getpass
2
+ import hashlib
3
+ import io
4
+ import os
5
+ import shlex
6
+ import socket
7
+ import subprocess
8
+ from functools import cached_property
9
+ from logging import getLogger as get_logger
10
+ from stat import S_ISDIR, S_ISLNK, S_ISREG
11
+ from typing import IO, BinaryIO, Callable, Iterator, List, Optional, Tuple, Union
12
+ from urllib.parse import urlsplit, urlunsplit
13
+
14
+ import ssh2.session # type: ignore
15
+ import ssh2.sftp # type: ignore
16
+ from ssh2.exceptions import SFTPProtocolError # type: ignore
17
+ from ssh2.sftp_handle import SFTPAttributes # type: ignore
18
+
19
+ from megfile.config import SFTP_MAX_RETRY_TIMES
20
+ from megfile.errors import SameFileError, _create_missing_ok_generator
21
+ from megfile.interfaces import ContextIterator, FileEntry, PathLike, StatResult
22
+ from megfile.lib.compare import is_same_file
23
+ from megfile.lib.compat import fspath
24
+ from megfile.lib.glob import FSFunc, iglob
25
+ from megfile.pathlike import URIPath
26
+ from megfile.smart_path import SmartPath
27
+ from megfile.utils import calculate_md5, copyfileobj, thread_local
28
+
29
+ _logger = get_logger(__name__)
30
+
31
+ __all__ = [
32
+ "Sftp2Path",
33
+ "is_sftp2",
34
+ ]
35
+
36
+ SFTP2_USERNAME = "SFTP2_USERNAME"
37
+ SFTP2_PASSWORD = "SFTP2_PASSWORD"
38
+ SFTP2_PRIVATE_KEY_PATH = "SFTP2_PRIVATE_KEY_PATH"
39
+ SFTP2_PRIVATE_KEY_TYPE = "SFTP2_PRIVATE_KEY_TYPE"
40
+ SFTP2_PRIVATE_KEY_PASSWORD = "SFTP2_PRIVATE_KEY_PASSWORD"
41
+ SFTP2_MAX_UNAUTH_CONN = "SFTP2_MAX_UNAUTH_CONN"
42
+ MAX_RETRIES = SFTP_MAX_RETRY_TIMES
43
+ DEFAULT_SSH_CONNECT_TIMEOUT = 5
44
+ DEFAULT_SSH_KEEPALIVE_INTERVAL = 15
45
+
46
+ # SFTP2-specific buffer sizes and chunk sizes
47
+ SFTP2_BUFFER_SIZE = 1 * 2**20 # 1MB buffer for file operations
48
+
49
+
50
+ def _make_stat(stat) -> StatResult:
51
+ """Convert ssh2.sftp stats to StatResult"""
52
+ # ssh2-python uses different attribute names than paramiko
53
+ size = getattr(stat, "filesize", 0) if stat else 0
54
+ mtime = getattr(stat, "mtime", 0.0) if stat else 0.0
55
+ # ssh2-python uses 'permissions' instead of 'st_mode'
56
+ mode = getattr(stat, "permissions", 0) if stat else 0
57
+
58
+ return StatResult(
59
+ size=size,
60
+ mtime=mtime,
61
+ isdir=S_ISDIR(mode),
62
+ islnk=S_ISLNK(mode),
63
+ extra=stat,
64
+ )
65
+
66
+
67
+ def get_private_key():
68
+ """Get private key for SSH authentication"""
69
+ private_key_path = os.getenv(SFTP2_PRIVATE_KEY_PATH)
70
+ if private_key_path:
71
+ if not os.path.exists(private_key_path):
72
+ raise FileNotFoundError(f"Private key file not exist: '{private_key_path}'")
73
+ private_key_password = os.getenv(SFTP2_PRIVATE_KEY_PASSWORD)
74
+ if private_key_password:
75
+ return private_key_path, private_key_password
76
+ return private_key_path, ""
77
+ return None
78
+
79
+
80
+ def provide_connect_info(
81
+ hostname: str,
82
+ port: Optional[int] = None,
83
+ username: Optional[str] = None,
84
+ password: Optional[str] = None,
85
+ ):
86
+ """Provide connection information"""
87
+ if not port:
88
+ port = 22
89
+ if not username:
90
+ username = os.getenv(SFTP2_USERNAME)
91
+ if not username:
92
+ # 如果没有指定用户名,使用当前系统用户名
93
+ username = getpass.getuser()
94
+ if not password:
95
+ password = os.getenv(SFTP2_PASSWORD)
96
+ private_key = get_private_key()
97
+ return hostname, port, username, password, private_key
98
+
99
+
100
+ def sftp2_should_retry(error: Exception) -> bool:
101
+ """Determine if an error should trigger a retry"""
102
+ if isinstance(error, (ConnectionError, socket.timeout)):
103
+ return True
104
+ elif isinstance(error, OSError):
105
+ for err_msg in ["Socket is closed", "Cannot assign requested address"]:
106
+ if err_msg in str(error):
107
+ return True
108
+ return False
109
+
110
+
111
+ def _get_ssh2_session(
112
+ hostname: str,
113
+ port: Optional[int] = None,
114
+ username: Optional[str] = None,
115
+ password: Optional[str] = None,
116
+ ) -> ssh2.session.Session:
117
+ """Create SSH2 session"""
118
+ hostname, port, username, password, private_key = provide_connect_info(
119
+ hostname=hostname, port=port, username=username, password=password
120
+ )
121
+
122
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
123
+ sock.settimeout(DEFAULT_SSH_CONNECT_TIMEOUT)
124
+ sock.connect((hostname, port))
125
+
126
+ session = ssh2.session.Session()
127
+ session.handshake(sock)
128
+
129
+ # 尝试多种认证方法
130
+ authenticated = False
131
+
132
+ # 1. 如果提供了私钥,优先使用私钥认证
133
+ if private_key and username:
134
+ try:
135
+ # For ssh2-python, we need to handle key authentication differently
136
+ key_path, passphrase = private_key
137
+ result = session.userauth_publickey_fromfile(
138
+ username,
139
+ key_path,
140
+ passphrase=passphrase,
141
+ )
142
+ if result == 0: # 0 indicates success in ssh2-python
143
+ authenticated = True
144
+ _logger.debug(f"Authentication successed with key: {key_path}")
145
+ except Exception as e:
146
+ _logger.debug(f"Private key authentication failed: {type(e).__name__}: {e}")
147
+
148
+ # 2. 如果提供了密码,尝试密码认证
149
+ if not authenticated and password and username:
150
+ try:
151
+ result = session.userauth_password(username, password)
152
+ if result == 0:
153
+ authenticated = True
154
+ _logger.debug("Authentication successed with password")
155
+ except Exception as e:
156
+ _logger.debug(f"Password authentication failed: {type(e).__name__}: {e}")
157
+
158
+ # 3. 尝试使用 SSH agent 认证
159
+ if not authenticated and username:
160
+ try:
161
+ # ssh2-python 使用 agent_init() 和 agent_auth() 方法
162
+ session.agent_init()
163
+ session.agent_auth(username)
164
+ authenticated = True
165
+ _logger.debug("Successfully authenticated with SSH agent")
166
+ except Exception as e:
167
+ _logger.debug(f"SSH agent authentication failed: {type(e).__name__}: {e}")
168
+
169
+ # 4. 尝试使用默认的公钥认证 (~/.ssh/id_rsa, ~/.ssh/id_dsa 等)
170
+ if not authenticated and username:
171
+ default_key_paths = [
172
+ os.path.expanduser("~/.ssh/id_rsa"),
173
+ os.path.expanduser("~/.ssh/id_dsa"),
174
+ os.path.expanduser("~/.ssh/id_ecdsa"),
175
+ os.path.expanduser("~/.ssh/id_ed25519"),
176
+ ]
177
+
178
+ for key_path in default_key_paths:
179
+ if os.path.exists(key_path):
180
+ try:
181
+ result = session.userauth_publickey_fromfile(
182
+ username,
183
+ key_path, # 私钥文件路径
184
+ )
185
+
186
+ if result == 0:
187
+ authenticated = True
188
+ _logger.debug(
189
+ f"Successfully authenticated with key: {key_path}"
190
+ )
191
+ break
192
+ except Exception as e:
193
+ _logger.debug(
194
+ f"Public key authentication with {key_path} failed: {e}"
195
+ )
196
+
197
+ if not authenticated:
198
+ sock.close()
199
+ raise ValueError(
200
+ f"Authentication failed for {username}@{hostname}. "
201
+ "Please check your SSH configuration, SSH agent, or provide "
202
+ "explicit credentials."
203
+ )
204
+
205
+ return session
206
+
207
+
208
+ def get_ssh2_session(
209
+ hostname: str,
210
+ port: Optional[int] = None,
211
+ username: Optional[str] = None,
212
+ password: Optional[str] = None,
213
+ ) -> ssh2.session.Session:
214
+ """Get cached SSH2 session"""
215
+ return thread_local(
216
+ f"ssh2_session:{hostname},{port},{username},{password}",
217
+ _get_ssh2_session,
218
+ hostname,
219
+ port,
220
+ username,
221
+ password,
222
+ )
223
+
224
+
225
+ def _get_sftp2_client(
226
+ hostname: str,
227
+ port: Optional[int] = None,
228
+ username: Optional[str] = None,
229
+ password: Optional[str] = None,
230
+ ) -> ssh2.sftp.SFTP:
231
+ """Get SFTP2 client"""
232
+ session = get_ssh2_session(hostname, port, username, password)
233
+ sftp = session.sftp_init()
234
+ return sftp
235
+
236
+
237
+ def get_sftp2_client(
238
+ hostname: str,
239
+ port: Optional[int] = None,
240
+ username: Optional[str] = None,
241
+ password: Optional[str] = None,
242
+ ) -> ssh2.sftp.SFTP:
243
+ """Get cached SFTP2 client"""
244
+ return thread_local(
245
+ f"sftp2_client:{hostname},{port},{username},{password}",
246
+ _get_sftp2_client,
247
+ hostname,
248
+ port,
249
+ username,
250
+ password,
251
+ )
252
+
253
+
254
+ def is_sftp2(path: PathLike) -> bool:
255
+ """Test if a path is sftp2 path
256
+
257
+ :param path: Path to be tested
258
+ :returns: True of a path is sftp2 path, else False
259
+ """
260
+ path = fspath(path)
261
+ parts = urlsplit(path)
262
+ return parts.scheme == "sftp2"
263
+
264
+
265
+ def _sftp2_scan_pairs(
266
+ src_url: PathLike, dst_url: PathLike
267
+ ) -> Iterator[Tuple[PathLike, PathLike]]:
268
+ for src_file_path in Sftp2Path(src_url).scan():
269
+ content_path = src_file_path[len(fspath(src_url)) :]
270
+ if len(content_path) > 0:
271
+ dst_file_path = Sftp2Path(dst_url).joinpath(content_path).path_with_protocol
272
+ else:
273
+ dst_file_path = dst_url
274
+ yield src_file_path, dst_file_path
275
+
276
+
277
+ class Sftp2RawFile(io.RawIOBase):
278
+ """Raw SFTP file wrapper - implements only readinto for BufferedReader"""
279
+
280
+ def __init__(self, sftp_handle, path: str, mode: str = "r"):
281
+ self.sftp_handle = sftp_handle
282
+ self.path = path
283
+ self.mode = mode
284
+ self.name = path
285
+ self._closed = False
286
+
287
+ def readable(self) -> bool:
288
+ return "r" in self.mode
289
+
290
+ def writable(self) -> bool:
291
+ return "w" in self.mode or "a" in self.mode or "x" in self.mode
292
+
293
+ def seekable(self) -> bool:
294
+ return True
295
+
296
+ @property
297
+ def closed(self) -> bool:
298
+ return self._closed
299
+
300
+ def readinto(self, buffer) -> int:
301
+ """Read into a pre-allocated buffer. Required by BufferedReader."""
302
+ if self._closed:
303
+ raise ValueError("I/O operation on closed file")
304
+
305
+ # ssh2-python returns (bytes_read, data)
306
+ bytes_read, chunk = self.sftp_handle.read(len(buffer))
307
+ if bytes_read > 0:
308
+ # Direct memory copy should be faster
309
+ buffer[:bytes_read] = chunk
310
+ return bytes_read
311
+ return 0
312
+
313
+ def read(self, size: int = -1) -> bytes:
314
+ """Fallback read method - optimized for direct use"""
315
+ if self._closed:
316
+ raise ValueError("I/O operation on closed file")
317
+
318
+ if size <= 0:
319
+ # For read-all, use readinto with BytesIO for consistency
320
+ result = io.BytesIO()
321
+ buffer = bytearray(SFTP2_BUFFER_SIZE)
322
+ while True:
323
+ n = self.readinto(buffer)
324
+ if n == 0:
325
+ break
326
+ result.write(buffer[:n])
327
+ return result.getvalue()
328
+ else:
329
+ # For fixed size reads, use readinto
330
+ buffer = bytearray(size)
331
+ n = self.readinto(buffer)
332
+ return bytes(buffer[:n])
333
+
334
+ def write(self, data: bytes) -> int:
335
+ if self._closed:
336
+ raise ValueError("I/O operation on closed file")
337
+ _, bytes_written = self.sftp_handle.write(bytes(data))
338
+ return bytes_written
339
+
340
+ def close(self):
341
+ if not self._closed:
342
+ self.sftp_handle.close()
343
+ self._closed = True
344
+
345
+ def flush(self):
346
+ """Flush the file. This is a no-op for SFTP files."""
347
+ pass
348
+
349
+ def tell(self) -> int:
350
+ """Return current position. Uses SFTP handle tell methods."""
351
+ if self._closed:
352
+ raise ValueError("I/O operation on closed file")
353
+
354
+ # Use SFTP handle's tell method
355
+ if hasattr(self.sftp_handle, "tell64"):
356
+ return self.sftp_handle.tell64()
357
+ else:
358
+ # If SFTP tell is not available or fails, raise error
359
+ raise OSError("tell not supported for this SFTP implementation")
360
+
361
+ def seek(self, offset: int, whence: int = 0) -> int:
362
+ """Seek to position. Uses SFTP handle seek methods."""
363
+ if self._closed:
364
+ raise ValueError("I/O operation on closed file")
365
+
366
+ # Try to use SFTP handle's native seek functionality
367
+ if hasattr(self.sftp_handle, "seek64"):
368
+ # Calculate absolute position based on whence
369
+ if whence == 0: # SEEK_SET
370
+ target_pos = offset
371
+ elif whence == 1: # SEEK_CUR
372
+ current_pos = self.tell()
373
+ target_pos = current_pos + offset
374
+ elif whence == 2: # SEEK_END
375
+ # For SEEK_END, we need file size - not commonly supported
376
+ raise OSError("SEEK_END not supported for SFTP files")
377
+ else:
378
+ raise OSError(f"invalid whence ({whence}, should be 0, 1, or 2)")
379
+
380
+ if target_pos < 0:
381
+ raise OSError("negative seek position")
382
+
383
+ # Perform the seek
384
+ self.sftp_handle.seek64(target_pos)
385
+ return target_pos
386
+ else:
387
+ # Fallback: SFTP doesn't support seek
388
+ raise OSError("seek not supported for this SFTP implementation")
389
+
390
+ def fileno(self) -> int:
391
+ """Return file descriptor. Not supported for SFTP."""
392
+ # Return -1 to indicate no file descriptor (standard practice)
393
+ return -1
394
+
395
+ def isatty(self) -> bool:
396
+ """Return whether this is a tty. Always False for SFTP files."""
397
+ return False
398
+
399
+ def truncate(self, size: Optional[int] = None) -> int:
400
+ """Truncate file. Not supported for SFTP."""
401
+ raise OSError("truncate not supported for SFTP files")
402
+
403
+ def __enter__(self):
404
+ return self
405
+
406
+ def __exit__(self, exc_type, exc_val, exc_tb):
407
+ self.close()
408
+
409
+
410
+ @SmartPath.register
411
+ class Sftp2Path(URIPath):
412
+ """sftp2 protocol
413
+
414
+ uri format:
415
+ - absolute path
416
+ - sftp2://[username[:password]@]hostname[:port]//file_path
417
+ - relative path
418
+ - sftp2://[username[:password]@]hostname[:port]/file_path
419
+ """
420
+
421
+ protocol = "sftp2"
422
+
423
+ def __init__(self, path: "PathLike", *other_paths: "PathLike"):
424
+ super().__init__(path, *other_paths)
425
+ parts = urlsplit(self.path)
426
+ self._urlsplit_parts = parts
427
+ self._real_path = parts.path
428
+ if parts.path.startswith("//"):
429
+ self._root_dir = "/"
430
+ else:
431
+ self._root_dir = "/" # Default to absolute path for ssh2
432
+ self._real_path = (
433
+ parts.path.lstrip("/")
434
+ if not parts.path.startswith("//")
435
+ else parts.path[2:]
436
+ )
437
+ if not self._real_path.startswith("/"):
438
+ self._real_path = f"/{self._real_path}"
439
+
440
+ @cached_property
441
+ def parts(self) -> Tuple[str, ...]:
442
+ """A tuple giving access to the path's various components"""
443
+ if self._urlsplit_parts.path.startswith("//"):
444
+ new_parts = self._urlsplit_parts._replace(path="//")
445
+ else:
446
+ new_parts = self._urlsplit_parts._replace(path="/")
447
+ parts = [urlunsplit(new_parts)]
448
+ path = self._urlsplit_parts.path.lstrip("/")
449
+ if path != "":
450
+ parts.extend(path.split("/"))
451
+ return tuple(parts)
452
+
453
+ @property
454
+ def _client(self):
455
+ return get_sftp2_client(
456
+ hostname=self._urlsplit_parts.hostname,
457
+ port=self._urlsplit_parts.port,
458
+ username=self._urlsplit_parts.username,
459
+ password=self._urlsplit_parts.password,
460
+ )
461
+
462
+ @property
463
+ def _session(self):
464
+ """Get SSH session for executing server-side commands"""
465
+ return get_ssh2_session(
466
+ hostname=self._urlsplit_parts.hostname,
467
+ port=self._urlsplit_parts.port,
468
+ username=self._urlsplit_parts.username,
469
+ password=self._urlsplit_parts.password,
470
+ )
471
+
472
+ def _exec_command(self, command: List[str]) -> subprocess.CompletedProcess:
473
+ """Execute a command on the remote server via SSH
474
+
475
+ Returns:
476
+ subprocess.CompletedProcess object
477
+ """
478
+ session = self._session
479
+ channel = session.open_session()
480
+
481
+ # Execute the command
482
+ channel.execute(shlex.join(command))
483
+
484
+ # Read output
485
+ stdout = io.BytesIO()
486
+ stderr = io.BytesIO()
487
+
488
+ while True:
489
+ # Read stdout
490
+ size, data = channel.read()
491
+ if size > 0:
492
+ stdout.write(data)
493
+
494
+ # Read stderr
495
+ size, data = channel.read_stderr()
496
+ if size > 0:
497
+ stderr.write(data)
498
+
499
+ # Check if finished
500
+ if channel.eof():
501
+ break
502
+
503
+ # Get exit status
504
+ exit_code = channel.get_exit_status()
505
+ channel.close()
506
+
507
+ return subprocess.CompletedProcess(
508
+ args=command,
509
+ returncode=exit_code,
510
+ stdout=stdout.getvalue().decode("utf-8", errors="replace"),
511
+ stderr=stderr.getvalue().decode("utf-8", errors="replace"),
512
+ )
513
+
514
+ def _generate_path_object(self, sftp_local_path: str, resolve: bool = False):
515
+ if resolve or self._root_dir == "/":
516
+ sftp_local_path = f"//{sftp_local_path.lstrip('/')}"
517
+ else:
518
+ sftp_local_path = os.path.relpath(sftp_local_path, start=self._root_dir)
519
+ if sftp_local_path == ".":
520
+ sftp_local_path = "/"
521
+ new_parts = self._urlsplit_parts._replace(path=sftp_local_path)
522
+ return self.from_path(urlunsplit(new_parts))
523
+
524
+ def exists(self, followlinks: bool = False) -> bool:
525
+ """
526
+ Test if the path exists
527
+
528
+ :param followlinks: False if regard symlink as file, else True
529
+ :returns: True if the path exists, else False
530
+ """
531
+ try:
532
+ self.stat(follow_symlinks=followlinks)
533
+ return True
534
+ except FileNotFoundError:
535
+ return False
536
+
537
+ def getmtime(self, follow_symlinks: bool = False) -> float:
538
+ """Get last-modified time of the file on the given path"""
539
+ return self.stat(follow_symlinks=follow_symlinks).mtime
540
+
541
+ def getsize(self, follow_symlinks: bool = False) -> int:
542
+ """Get file size on the given file path (in bytes)"""
543
+ return self.stat(follow_symlinks=follow_symlinks).size
544
+
545
+ def glob(
546
+ self, pattern, recursive: bool = True, missing_ok: bool = True
547
+ ) -> List["Sftp2Path"]:
548
+ """Return path list in ascending alphabetical order"""
549
+ return list(
550
+ self.iglob(pattern=pattern, recursive=recursive, missing_ok=missing_ok)
551
+ )
552
+
553
+ def glob_stat(
554
+ self, pattern, recursive: bool = True, missing_ok: bool = True
555
+ ) -> Iterator[FileEntry]:
556
+ """Return a list contains tuples of path and file stat"""
557
+ for path_obj in self.iglob(
558
+ pattern=pattern, recursive=recursive, missing_ok=missing_ok
559
+ ):
560
+ yield FileEntry(path_obj.name, path_obj.path, path_obj.lstat())
561
+
562
+ def iglob(
563
+ self, pattern, recursive: bool = True, missing_ok: bool = True
564
+ ) -> Iterator["Sftp2Path"]:
565
+ """Return path iterator in ascending alphabetical order"""
566
+ glob_path = self.path_with_protocol
567
+ if pattern:
568
+ glob_path = self.joinpath(pattern).path_with_protocol
569
+
570
+ def _scandir(dirname: str) -> Iterator[Tuple[str, bool]]:
571
+ result = []
572
+ for entry in self.from_path(dirname).scandir():
573
+ result.append((entry.name, entry.is_dir()))
574
+ for name, is_dir in sorted(result):
575
+ yield name, is_dir
576
+
577
+ def _exist(path: PathLike, followlinks: bool = False):
578
+ return self.from_path(path).exists(followlinks=followlinks)
579
+
580
+ def _is_dir(path: PathLike, followlinks: bool = False):
581
+ return self.from_path(path).is_dir(followlinks=followlinks)
582
+
583
+ fs = FSFunc(_exist, _is_dir, _scandir)
584
+ for real_path in _create_missing_ok_generator(
585
+ iglob(fspath(glob_path), recursive=recursive, fs=fs),
586
+ missing_ok,
587
+ FileNotFoundError(f"No match any file: {glob_path!r}"),
588
+ ):
589
+ yield self.from_path(real_path)
590
+
591
+ def is_dir(self, followlinks: bool = False) -> bool:
592
+ """Test if a path is directory"""
593
+ try:
594
+ stat = self.stat(follow_symlinks=followlinks)
595
+ return stat.is_dir()
596
+ except FileNotFoundError:
597
+ return False
598
+
599
+ def is_file(self, followlinks: bool = False) -> bool:
600
+ """Test if a path is file"""
601
+ try:
602
+ stat = self.stat(follow_symlinks=followlinks)
603
+ return (
604
+ S_ISREG(stat.st_mode) if hasattr(stat, "st_mode") else not stat.is_dir()
605
+ )
606
+ except FileNotFoundError:
607
+ return False
608
+
609
+ def listdir(self) -> List[str]:
610
+ """Get all contents of given sftp2 path"""
611
+ with self.scandir() as entries:
612
+ return sorted([entry.name for entry in entries])
613
+
614
+ def iterdir(self) -> Iterator["Sftp2Path"]:
615
+ """Get all contents of given sftp2 path"""
616
+ with self.scandir() as entries:
617
+ for entry in entries:
618
+ yield self.joinpath(entry.name)
619
+
620
+ def load(self) -> BinaryIO:
621
+ """Read all content on specified path and write into memory"""
622
+ with self.open(mode="rb") as f:
623
+ data = f.read()
624
+ return io.BytesIO(data)
625
+
626
+ def mkdir(self, mode=0o777, parents: bool = False, exist_ok: bool = False):
627
+ """Make a directory on sftp2"""
628
+ if self.exists():
629
+ if not exist_ok:
630
+ raise FileExistsError(f"File exists: '{self.path_with_protocol}'")
631
+ return
632
+
633
+ if parents:
634
+ parent_path_objects = []
635
+ for parent_path_object in self.parents:
636
+ if parent_path_object.exists():
637
+ break
638
+ else:
639
+ parent_path_objects.append(parent_path_object)
640
+ for parent_path_object in parent_path_objects[::-1]:
641
+ parent_path_object.mkdir(mode=mode, parents=False, exist_ok=True)
642
+ try:
643
+ self._client.mkdir(self._real_path, mode)
644
+ except OSError:
645
+ if not self.exists():
646
+ raise
647
+
648
+ def realpath(self) -> str:
649
+ """Return the real path of given path"""
650
+ return self.resolve().path_with_protocol
651
+
652
+ def _is_same_backend(self, other: "Sftp2Path") -> bool:
653
+ return (
654
+ self._urlsplit_parts.hostname == other._urlsplit_parts.hostname
655
+ and self._urlsplit_parts.username == other._urlsplit_parts.username
656
+ and self._urlsplit_parts.password == other._urlsplit_parts.password
657
+ and self._urlsplit_parts.port == other._urlsplit_parts.port
658
+ )
659
+
660
+ def _is_same_protocol(self, path):
661
+ return is_sftp2(path)
662
+
663
+ def rename(self, dst_path: PathLike, overwrite: bool = True) -> "Sftp2Path":
664
+ """Rename file on sftp2"""
665
+ if not self._is_same_protocol(dst_path):
666
+ raise OSError(f"Not a {self.protocol} path: {dst_path!r}")
667
+
668
+ dst_path = self.from_path(str(dst_path).rstrip("/"))
669
+ src_stat = self.stat()
670
+
671
+ if self._is_same_backend(dst_path):
672
+ if overwrite:
673
+ dst_path.remove(missing_ok=True)
674
+ self._client.rename(self._real_path, dst_path._real_path)
675
+ else:
676
+ self.sync(dst_path, overwrite=overwrite)
677
+ self.remove(missing_ok=True)
678
+ else:
679
+ if self.is_dir():
680
+ for file_entry in self.scandir():
681
+ self.from_path(file_entry.path).rename(
682
+ dst_path.joinpath(file_entry.name)
683
+ )
684
+ self._client.rmdir(self._real_path)
685
+ else:
686
+ if overwrite or not dst_path.exists():
687
+ with self.open("rb") as fsrc:
688
+ with dst_path.open("wb") as fdst:
689
+ copyfileobj(fsrc, fdst)
690
+ self.unlink()
691
+
692
+ dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
693
+ dst_path.chmod(src_stat.st_mode)
694
+ return dst_path
695
+
696
+ def replace(self, dst_path: PathLike, overwrite: bool = True) -> "Sftp2Path":
697
+ """Move file on sftp2"""
698
+ return self.rename(dst_path=dst_path, overwrite=overwrite)
699
+
700
+ def remove(self, missing_ok: bool = False) -> None:
701
+ """Remove the file or directory on sftp2"""
702
+ if missing_ok and not self.exists():
703
+ return
704
+ if self.is_dir():
705
+ for file_entry in self.scandir():
706
+ self.from_path(file_entry.path).remove(missing_ok=missing_ok)
707
+ self._client.rmdir(self._real_path)
708
+ else:
709
+ self._client.unlink(self._real_path)
710
+
711
+ def scan(self, missing_ok: bool = True, followlinks: bool = False) -> Iterator[str]:
712
+ """Iteratively traverse only files in given directory"""
713
+ scan_stat_iter = self.scan_stat(missing_ok=missing_ok, followlinks=followlinks)
714
+ for file_entry in scan_stat_iter:
715
+ yield file_entry.path
716
+
717
+ def scan_stat(
718
+ self, missing_ok: bool = True, followlinks: bool = False
719
+ ) -> Iterator[FileEntry]:
720
+ """Iteratively traverse only files in given directory"""
721
+
722
+ def create_generator() -> Iterator[FileEntry]:
723
+ try:
724
+ stat = self.stat(follow_symlinks=followlinks)
725
+ except FileNotFoundError:
726
+ return
727
+ if not stat.is_dir():
728
+ yield FileEntry(
729
+ self.name,
730
+ self.path_with_protocol,
731
+ self.stat(follow_symlinks=followlinks),
732
+ )
733
+ return
734
+
735
+ for name in self.listdir():
736
+ current_path = self.joinpath(name)
737
+ if current_path.is_dir():
738
+ yield from current_path.scan_stat(
739
+ missing_ok=missing_ok, followlinks=followlinks
740
+ )
741
+ else:
742
+ yield FileEntry(
743
+ current_path.name,
744
+ current_path.path_with_protocol,
745
+ current_path.stat(follow_symlinks=followlinks),
746
+ )
747
+
748
+ return _create_missing_ok_generator(
749
+ create_generator(),
750
+ missing_ok,
751
+ FileNotFoundError(f"No match any file in: {self.path_with_protocol!r}"),
752
+ )
753
+
754
+ def scandir(self) -> ContextIterator:
755
+ """Get all content of given file path"""
756
+ real_path = self._real_path
757
+ stat_result = None
758
+ try:
759
+ stat_result = self.stat(follow_symlinks=False)
760
+ except Exception:
761
+ raise NotADirectoryError(f"Not a directory: '{self.path_with_protocol}'")
762
+
763
+ if stat_result.is_symlink():
764
+ real_path = self.readlink()._real_path
765
+ elif not stat_result.is_dir():
766
+ raise NotADirectoryError(f"Not a directory: '{self.path_with_protocol}'")
767
+
768
+ def create_generator():
769
+ # Use opendir and readdir from ssh2-python
770
+ dir_handle = self._client.opendir(real_path)
771
+ try:
772
+ # ssh2-python's readdir returns a generator
773
+ # First call returns all entries, subsequent calls return empty
774
+ entries_gen = dir_handle.readdir()
775
+ entries = list(entries_gen) if entries_gen else []
776
+
777
+ for name_len, name_bytes, stat_obj in entries:
778
+ name = name_bytes.decode("utf-8")
779
+ if name in (".", ".."):
780
+ continue
781
+
782
+ # Convert stat_obj to StatResult
783
+ stat_info = _make_stat(stat_obj)
784
+ yield FileEntry(
785
+ name,
786
+ self.joinpath(name).path_with_protocol,
787
+ stat_info,
788
+ )
789
+ finally:
790
+ dir_handle.close()
791
+
792
+ return ContextIterator(create_generator())
793
+
794
+ def stat(self, follow_symlinks=True) -> StatResult:
795
+ """Get StatResult of file on sftp2"""
796
+ try:
797
+ if follow_symlinks:
798
+ stat = self._client.stat(self._real_path)
799
+ else:
800
+ stat = self._client.lstat(self._real_path)
801
+ return _make_stat(stat)
802
+ except SFTPProtocolError as e: # pytype: disable=mro-error
803
+ raise FileNotFoundError(
804
+ f"No such file or directory: {self.path_with_protocol!r}"
805
+ ) from e
806
+
807
+ def lstat(self) -> StatResult:
808
+ """Get StatResult without following symlinks"""
809
+ return self.stat(follow_symlinks=False)
810
+
811
+ def unlink(self, missing_ok: bool = False) -> None:
812
+ """Remove the file on sftp2"""
813
+ if missing_ok and not self.exists():
814
+ return
815
+ self._client.unlink(self._real_path)
816
+
817
+ def walk(
818
+ self, followlinks: bool = False
819
+ ) -> Iterator[Tuple[str, List[str], List[str]]]:
820
+ """Generate the file names in a directory tree by walking the tree top-down"""
821
+ if not self.exists(followlinks=followlinks):
822
+ return
823
+
824
+ if self.is_file(followlinks=followlinks):
825
+ return
826
+
827
+ stack = [self._real_path]
828
+ while stack:
829
+ root = stack.pop()
830
+ dirs, files = [], []
831
+
832
+ # Use scandir instead of readdir for consistency
833
+ root_path = self._generate_path_object(root)
834
+ with root_path.scandir() as entries:
835
+ for entry in entries:
836
+ if entry.is_dir():
837
+ dirs.append(entry.name)
838
+ elif entry.is_file():
839
+ files.append(entry.name)
840
+
841
+ dirs = sorted(dirs)
842
+ files = sorted(files)
843
+
844
+ yield self._generate_path_object(root).path_with_protocol, dirs, files
845
+
846
+ stack.extend(
847
+ (os.path.join(root, directory) for directory in reversed(dirs))
848
+ )
849
+
850
+ def resolve(self, strict=False) -> "Sftp2Path":
851
+ """Return the canonical path"""
852
+ path = self._client.realpath(self._real_path)
853
+ return self._generate_path_object(path, resolve=True)
854
+
855
+ def md5(self, recalculate: bool = False, followlinks: bool = False):
856
+ """Calculate the md5 value of the file"""
857
+ if self.is_dir():
858
+ hash_md5 = hashlib.md5()
859
+ for file_name in self.listdir():
860
+ chunk = (
861
+ self.joinpath(file_name)
862
+ .md5(recalculate=recalculate, followlinks=followlinks)
863
+ .encode()
864
+ )
865
+ hash_md5.update(chunk)
866
+ return hash_md5.hexdigest()
867
+ with self.open("rb") as src:
868
+ md5 = calculate_md5(src)
869
+ return md5
870
+
871
+ def symlink(self, dst_path: PathLike) -> None:
872
+ """Create a symbolic link pointing to src_path named dst_path"""
873
+ dst_path = self.from_path(dst_path)
874
+ if dst_path.exists(followlinks=False):
875
+ raise FileExistsError(f"File exists: '{dst_path.path_with_protocol}'")
876
+ return self._client.symlink(self._real_path, dst_path._real_path)
877
+
878
+ def readlink(self) -> "Sftp2Path":
879
+ """Return a Sftp2Path instance representing the path to which the
880
+ symbolic link points"""
881
+ if not self.exists():
882
+ raise FileNotFoundError(
883
+ f"No such file or directory: '{self.path_with_protocol}'"
884
+ )
885
+ if not self.is_symlink():
886
+ raise OSError(f"Not a symlink: {self.path_with_protocol!r}")
887
+ try:
888
+ path = self._client.realpath(self._real_path)
889
+ if not path:
890
+ raise OSError(f"Not a symlink: {self.path_with_protocol!r}")
891
+ if not path.startswith("/"):
892
+ return self.parent.joinpath(path)
893
+ return self._generate_path_object(path)
894
+ except FileNotFoundError:
895
+ raise FileNotFoundError(
896
+ f"No such file or directory: '{self.path_with_protocol}'"
897
+ )
898
+ except Exception:
899
+ raise OSError(f"Not a symlink: {self.path_with_protocol!r}")
900
+
901
+ def is_symlink(self) -> bool:
902
+ """Test whether a path is a symbolic link"""
903
+ try:
904
+ return self.lstat().is_symlink()
905
+ except FileNotFoundError:
906
+ return False
907
+
908
+ def cwd(self) -> "Sftp2Path":
909
+ """Return current working directory"""
910
+ path = self._client.realpath(".")
911
+ return self._generate_path_object(path)
912
+
913
+ def save(self, file_object: BinaryIO):
914
+ """Write the opened binary stream to path"""
915
+ with self.open(mode="wb") as output:
916
+ output.write(file_object.read())
917
+
918
+ def open(
919
+ self,
920
+ mode: str = "r",
921
+ *,
922
+ buffering=-1,
923
+ encoding: Optional[str] = None,
924
+ errors: Optional[str] = None,
925
+ **kwargs,
926
+ ) -> IO:
927
+ """Open a file on the path"""
928
+ if "w" in mode or "x" in mode or "a" in mode:
929
+ if self.is_dir():
930
+ raise IsADirectoryError(f"Is a directory: {self.path_with_protocol!r}")
931
+ self.parent.mkdir(parents=True, exist_ok=True)
932
+ elif not self.exists():
933
+ raise FileNotFoundError(f"No such file: {self.path_with_protocol!r}")
934
+
935
+ # Convert mode for ssh2-python
936
+ ssh2_mode = 0
937
+ if "r" in mode:
938
+ ssh2_mode |= ssh2.sftp.LIBSSH2_FXF_READ
939
+ if "w" in mode:
940
+ ssh2_mode |= (
941
+ ssh2.sftp.LIBSSH2_FXF_WRITE
942
+ | ssh2.sftp.LIBSSH2_FXF_CREAT
943
+ | ssh2.sftp.LIBSSH2_FXF_TRUNC
944
+ )
945
+ if "a" in mode:
946
+ ssh2_mode |= (
947
+ ssh2.sftp.LIBSSH2_FXF_WRITE
948
+ | ssh2.sftp.LIBSSH2_FXF_CREAT
949
+ | ssh2.sftp.LIBSSH2_FXF_APPEND
950
+ )
951
+
952
+ sftp_handle = self._client.open(self._real_path, ssh2_mode, 0o644)
953
+
954
+ # Create raw file wrapper
955
+ raw_file = Sftp2RawFile(sftp_handle, self.path, mode)
956
+
957
+ if "r" in mode:
958
+ if "b" in mode:
959
+ # Binary read mode - use BufferedReader for optimal performance
960
+ fileobj = io.BufferedReader(raw_file, buffer_size=SFTP2_BUFFER_SIZE)
961
+ else:
962
+ # Text read mode - wrap BufferedReader with TextIOWrapper
963
+ buffered = io.BufferedReader(raw_file, buffer_size=SFTP2_BUFFER_SIZE)
964
+ fileobj = io.TextIOWrapper(buffered, encoding=encoding, errors=errors)
965
+ elif "w" in mode or "a" in mode:
966
+ if "b" in mode:
967
+ # Binary write mode - use BufferedWriter for optimal performance
968
+ fileobj = io.BufferedWriter(raw_file, buffer_size=SFTP2_BUFFER_SIZE)
969
+ else:
970
+ # Text write mode - wrap BufferedWriter with TextIOWrapper
971
+ buffered = io.BufferedWriter(raw_file, buffer_size=SFTP2_BUFFER_SIZE)
972
+ fileobj = io.TextIOWrapper(buffered, encoding=encoding, errors=errors)
973
+ else:
974
+ raise ValueError(f"Invalid mode: {mode}")
975
+
976
+ return fileobj
977
+
978
+ def chmod(self, mode: int, *, follow_symlinks: bool = True):
979
+ """Change the file mode and permissions"""
980
+ stat = SFTPAttributes()
981
+ stat.permissions = int(mode)
982
+ return self._client.setstat(self._real_path, stat)
983
+
984
+ def absolute(self) -> "Sftp2Path":
985
+ """Make the path absolute"""
986
+ return self.resolve()
987
+
988
+ def rmdir(self):
989
+ """Remove this directory. The directory must be empty"""
990
+ if len(self.listdir()) > 0:
991
+ raise OSError(f"Directory not empty: '{self.path_with_protocol}'")
992
+ return self._client.rmdir(self._real_path)
993
+
994
+ def copy(
995
+ self,
996
+ dst_path: PathLike,
997
+ callback: Optional[Callable[[int], None]] = None,
998
+ followlinks: bool = False,
999
+ overwrite: bool = True,
1000
+ ):
1001
+ """Copy the file to the given destination path"""
1002
+ if followlinks and self.is_symlink():
1003
+ return self.readlink().copy(dst_path=dst_path, callback=callback)
1004
+
1005
+ if not self._is_same_protocol(dst_path):
1006
+ raise OSError(f"Not a {self.protocol} path: {dst_path!r}")
1007
+ if str(dst_path).endswith("/"):
1008
+ raise IsADirectoryError(f"Is a directory: {dst_path!r}")
1009
+
1010
+ if self.is_dir():
1011
+ raise IsADirectoryError(f"Is a directory: {self.path_with_protocol!r}")
1012
+
1013
+ if not overwrite and self.from_path(dst_path).exists():
1014
+ return
1015
+
1016
+ self.from_path(os.path.dirname(fspath(dst_path))).makedirs(exist_ok=True)
1017
+ dst_path = self.from_path(dst_path)
1018
+
1019
+ if self._is_same_backend(dst_path):
1020
+ if self._real_path == dst_path._real_path:
1021
+ raise SameFileError(
1022
+ f"'{self.path}' and '{dst_path.path}' are the same file"
1023
+ )
1024
+ # Same server - use server-side copy command for efficiency
1025
+ exec_result = self._exec_command(
1026
+ [
1027
+ "cp",
1028
+ self._real_path,
1029
+ dst_path._real_path,
1030
+ ]
1031
+ )
1032
+
1033
+ if exec_result.returncode != 0:
1034
+ _logger.error(exec_result.stderr)
1035
+ raise OSError(
1036
+ f"Failed to copy file, returncode: {exec_result.returncode}, "
1037
+ f"{exec_result.stderr}"
1038
+ )
1039
+
1040
+ if callback:
1041
+ callback(self.stat(follow_symlinks=followlinks).size)
1042
+
1043
+ else:
1044
+ # Fallback to traditional SFTP copy (download then upload)
1045
+ with self.open("rb") as fsrc:
1046
+ with dst_path.open("wb") as fdst:
1047
+ copyfileobj(fsrc, fdst, callback)
1048
+
1049
+ src_stat = self.stat()
1050
+ dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
1051
+ dst_path.chmod(src_stat.st_mode)
1052
+
1053
+ def sync(
1054
+ self,
1055
+ dst_path: PathLike,
1056
+ followlinks: bool = False,
1057
+ force: bool = False,
1058
+ overwrite: bool = True,
1059
+ ):
1060
+ """Copy file/directory on src_url to dst_url"""
1061
+ if not self._is_same_protocol(dst_path):
1062
+ raise OSError(f"Not a {self.protocol} path: {dst_path!r}")
1063
+
1064
+ for src_file_path, dst_file_path in _sftp2_scan_pairs(
1065
+ self.path_with_protocol, dst_path
1066
+ ):
1067
+ dst_path = self.from_path(dst_file_path)
1068
+ src_path = self.from_path(src_file_path)
1069
+
1070
+ if force:
1071
+ pass
1072
+ elif not overwrite and dst_path.exists():
1073
+ continue
1074
+ elif dst_path.exists() and is_same_file(
1075
+ src_path.stat(), dst_path.stat(), "copy"
1076
+ ):
1077
+ continue
1078
+
1079
+ src_path.copy(
1080
+ dst_file_path,
1081
+ followlinks=followlinks,
1082
+ overwrite=True,
1083
+ )
1084
+
1085
+ def utime(self, atime: Union[float, int], mtime: Union[float, int]) -> None:
1086
+ """Set the access and modified times of the file"""
1087
+ stat = SFTPAttributes()
1088
+ stat.atime = int(atime)
1089
+ stat.mtime = int(mtime)
1090
+ self._client.setstat(self._real_path, stat)