megfile 3.1.6.post1__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/cli.py +12 -7
- megfile/config.py +27 -39
- megfile/fs.py +169 -11
- megfile/fs_path.py +183 -259
- megfile/hdfs.py +106 -5
- megfile/hdfs_path.py +34 -90
- megfile/http.py +50 -1
- megfile/http_path.py +27 -65
- megfile/interfaces.py +1 -8
- megfile/lib/base_prefetch_reader.py +62 -78
- megfile/lib/combine_reader.py +5 -0
- megfile/lib/glob.py +3 -6
- megfile/lib/hdfs_prefetch_reader.py +7 -7
- megfile/lib/http_prefetch_reader.py +6 -6
- megfile/lib/s3_buffered_writer.py +67 -64
- megfile/lib/s3_cached_handler.py +1 -2
- megfile/lib/s3_limited_seekable_writer.py +3 -7
- megfile/lib/s3_memory_handler.py +1 -2
- megfile/lib/s3_pipe_handler.py +1 -2
- megfile/lib/s3_prefetch_reader.py +10 -19
- megfile/lib/s3_share_cache_reader.py +8 -5
- megfile/pathlike.py +397 -401
- megfile/s3.py +118 -17
- megfile/s3_path.py +126 -209
- megfile/sftp.py +300 -10
- megfile/sftp_path.py +46 -322
- megfile/smart.py +33 -27
- megfile/smart_path.py +9 -14
- megfile/stdio.py +1 -1
- megfile/stdio_path.py +2 -2
- megfile/utils/__init__.py +3 -4
- megfile/version.py +1 -1
- {megfile-3.1.6.post1.dist-info → megfile-4.0.0.dist-info}/METADATA +7 -7
- megfile-4.0.0.dist-info/RECORD +52 -0
- {megfile-3.1.6.post1.dist-info → megfile-4.0.0.dist-info}/WHEEL +1 -1
- {megfile-3.1.6.post1.dist-info → megfile-4.0.0.dist-info}/top_level.txt +0 -2
- docs/conf.py +0 -65
- megfile-3.1.6.post1.dist-info/RECORD +0 -55
- scripts/convert_results_to_sarif.py +0 -91
- scripts/generate_file.py +0 -344
- {megfile-3.1.6.post1.dist-info → megfile-4.0.0.dist-info}/LICENSE +0 -0
- {megfile-3.1.6.post1.dist-info → megfile-4.0.0.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.6.post1.dist-info → megfile-4.0.0.dist-info}/entry_points.txt +0 -0
megfile/sftp.py
CHANGED
|
@@ -1,21 +1,17 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from logging import getLogger as get_logger
|
|
1
3
|
from typing import IO, BinaryIO, Callable, Iterator, List, Optional, Tuple
|
|
2
4
|
|
|
3
5
|
from megfile.interfaces import FileEntry, PathLike, StatResult
|
|
6
|
+
from megfile.lib.compat import fspath
|
|
7
|
+
from megfile.lib.joinpath import uri_join
|
|
4
8
|
from megfile.sftp_path import (
|
|
5
9
|
SftpPath,
|
|
6
10
|
is_sftp,
|
|
7
|
-
sftp_concat,
|
|
8
|
-
sftp_download,
|
|
9
|
-
sftp_glob,
|
|
10
|
-
sftp_glob_stat,
|
|
11
|
-
sftp_iglob,
|
|
12
|
-
sftp_lstat,
|
|
13
|
-
sftp_path_join,
|
|
14
|
-
sftp_readlink,
|
|
15
|
-
sftp_resolve,
|
|
16
|
-
sftp_upload,
|
|
17
11
|
)
|
|
18
12
|
|
|
13
|
+
_logger = get_logger(__name__)
|
|
14
|
+
|
|
19
15
|
__all__ = [
|
|
20
16
|
"is_sftp",
|
|
21
17
|
"sftp_readlink",
|
|
@@ -59,6 +55,300 @@ __all__ = [
|
|
|
59
55
|
]
|
|
60
56
|
|
|
61
57
|
|
|
58
|
+
def sftp_readlink(path: PathLike) -> "str":
|
|
59
|
+
"""
|
|
60
|
+
Return a SftpPath instance representing the path to which the symbolic link points.
|
|
61
|
+
|
|
62
|
+
:param path: Given path
|
|
63
|
+
:returns: Return a SftpPath instance representing the path to
|
|
64
|
+
which the symbolic link points.
|
|
65
|
+
"""
|
|
66
|
+
return SftpPath(path).readlink().path_with_protocol
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def sftp_glob(
|
|
70
|
+
path: PathLike, recursive: bool = True, missing_ok: bool = True
|
|
71
|
+
) -> List[str]:
|
|
72
|
+
"""Return path list in ascending alphabetical order,
|
|
73
|
+
in which path matches glob pattern
|
|
74
|
+
|
|
75
|
+
1. If doesn't match any path, return empty list
|
|
76
|
+
Notice: ``glob.glob`` in standard library returns ['a/'] instead of empty list
|
|
77
|
+
when pathname is like `a/**`, recursive is True and directory 'a' doesn't exist.
|
|
78
|
+
fs_glob behaves like ``glob.glob`` in standard library under such circumstance.
|
|
79
|
+
2. No guarantee that each path in result is different, which means:
|
|
80
|
+
Assume there exists a path `/a/b/c/b/d.txt`
|
|
81
|
+
use path pattern like `/**/b/**/*.txt` to glob,
|
|
82
|
+
the path above will be returned twice
|
|
83
|
+
3. `**` will match any matched file, directory, symlink and '' by default,
|
|
84
|
+
when recursive is `True`
|
|
85
|
+
4. fs_glob returns same as glob.glob(pathname, recursive=True)
|
|
86
|
+
in ascending alphabetical order.
|
|
87
|
+
5. Hidden files (filename stars with '.') will not be found in the result
|
|
88
|
+
|
|
89
|
+
:param path: Given path
|
|
90
|
+
:param pattern: Glob the given relative pattern in the directory represented
|
|
91
|
+
by this path
|
|
92
|
+
:param recursive: If False, `**` will not search directory recursively
|
|
93
|
+
:param missing_ok: If False and target path doesn't match any file,
|
|
94
|
+
raise FileNotFoundError
|
|
95
|
+
:returns: A list contains paths match `pathname`
|
|
96
|
+
"""
|
|
97
|
+
return list(sftp_iglob(path=path, recursive=recursive, missing_ok=missing_ok))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def sftp_glob_stat(
|
|
101
|
+
path: PathLike, recursive: bool = True, missing_ok: bool = True
|
|
102
|
+
) -> Iterator[FileEntry]:
|
|
103
|
+
"""Return a list contains tuples of path and file stat, in ascending alphabetical
|
|
104
|
+
order, in which path matches glob pattern
|
|
105
|
+
|
|
106
|
+
1. If doesn't match any path, return empty list
|
|
107
|
+
Notice: ``glob.glob`` in standard library returns ['a/'] instead of empty list
|
|
108
|
+
when pathname is like `a/**`, recursive is True and directory 'a' doesn't exist.
|
|
109
|
+
sftp_glob behaves like ``glob.glob`` in standard library under such circumstance.
|
|
110
|
+
2. No guarantee that each path in result is different, which means:
|
|
111
|
+
Assume there exists a path `/a/b/c/b/d.txt`
|
|
112
|
+
use path pattern like `/**/b/**/*.txt` to glob,
|
|
113
|
+
the path above will be returned twice
|
|
114
|
+
3. `**` will match any matched file, directory, symlink and '' by default,
|
|
115
|
+
when recursive is `True`
|
|
116
|
+
4. fs_glob returns same as glob.glob(pathname, recursive=True) in
|
|
117
|
+
ascending alphabetical order.
|
|
118
|
+
5. Hidden files (filename stars with '.') will not be found in the result
|
|
119
|
+
|
|
120
|
+
:param path: Given path
|
|
121
|
+
:param pattern: Glob the given relative pattern in the directory represented
|
|
122
|
+
by this path
|
|
123
|
+
:param recursive: If False, `**` will not search directory recursively
|
|
124
|
+
:param missing_ok: If False and target path doesn't match any file,
|
|
125
|
+
raise FileNotFoundError
|
|
126
|
+
:returns: A list contains tuples of path and file stat,
|
|
127
|
+
in which paths match `pathname`
|
|
128
|
+
"""
|
|
129
|
+
for path in sftp_iglob(path=path, recursive=recursive, missing_ok=missing_ok):
|
|
130
|
+
path_object = SftpPath(path)
|
|
131
|
+
yield FileEntry(
|
|
132
|
+
path_object.name, path_object.path_with_protocol, path_object.lstat()
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def sftp_iglob(
|
|
137
|
+
path: PathLike, recursive: bool = True, missing_ok: bool = True
|
|
138
|
+
) -> Iterator[str]:
|
|
139
|
+
"""Return path iterator in ascending alphabetical order,
|
|
140
|
+
in which path matches glob pattern
|
|
141
|
+
|
|
142
|
+
1. If doesn't match any path, return empty list
|
|
143
|
+
Notice: ``glob.glob`` in standard library returns ['a/'] instead of empty list
|
|
144
|
+
when pathname is like `a/**`, recursive is True and directory 'a' doesn't exist.
|
|
145
|
+
fs_glob behaves like ``glob.glob`` in standard library under such circumstance.
|
|
146
|
+
2. No guarantee that each path in result is different, which means:
|
|
147
|
+
Assume there exists a path `/a/b/c/b/d.txt`
|
|
148
|
+
use path pattern like `/**/b/**/*.txt` to glob,
|
|
149
|
+
the path above will be returned twice
|
|
150
|
+
3. `**` will match any matched file, directory, symlink and '' by default,
|
|
151
|
+
when recursive is `True`
|
|
152
|
+
4. fs_glob returns same as glob.glob(pathname, recursive=True) in
|
|
153
|
+
ascending alphabetical order.
|
|
154
|
+
5. Hidden files (filename stars with '.') will not be found in the result
|
|
155
|
+
|
|
156
|
+
:param path: Given path
|
|
157
|
+
:param pattern: Glob the given relative pattern in the directory represented
|
|
158
|
+
by this path
|
|
159
|
+
:param recursive: If False, `**` will not search directory recursively
|
|
160
|
+
:param missing_ok: If False and target path doesn't match any file,
|
|
161
|
+
raise FileNotFoundError
|
|
162
|
+
:returns: An iterator contains paths match `pathname`
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
for path in SftpPath(path).iglob(
|
|
166
|
+
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
167
|
+
):
|
|
168
|
+
yield path.path_with_protocol
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def sftp_resolve(path: PathLike, strict=False) -> "str":
|
|
172
|
+
"""Equal to fs_realpath
|
|
173
|
+
|
|
174
|
+
:param path: Given path
|
|
175
|
+
:param strict: Ignore this parameter, just for compatibility
|
|
176
|
+
:return: Return the canonical path of the specified filename,
|
|
177
|
+
eliminating any symbolic links encountered in the path.
|
|
178
|
+
:rtype: SftpPath
|
|
179
|
+
"""
|
|
180
|
+
return SftpPath(path).resolve(strict).path_with_protocol
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def sftp_download(
|
|
184
|
+
src_url: PathLike,
|
|
185
|
+
dst_url: PathLike,
|
|
186
|
+
callback: Optional[Callable[[int], None]] = None,
|
|
187
|
+
followlinks: bool = False,
|
|
188
|
+
overwrite: bool = True,
|
|
189
|
+
):
|
|
190
|
+
"""
|
|
191
|
+
Downloads a file from sftp to local filesystem.
|
|
192
|
+
|
|
193
|
+
:param src_url: source sftp path
|
|
194
|
+
:param dst_url: target fs path
|
|
195
|
+
:param callback: Called periodically during copy, and the input parameter is
|
|
196
|
+
the data size (in bytes) of copy since the last call
|
|
197
|
+
:param followlinks: False if regard symlink as file, else True
|
|
198
|
+
:param overwrite: whether or not overwrite file when exists, default is True
|
|
199
|
+
"""
|
|
200
|
+
from megfile.fs import is_fs
|
|
201
|
+
from megfile.fs_path import FSPath
|
|
202
|
+
|
|
203
|
+
if not is_fs(dst_url):
|
|
204
|
+
raise OSError(f"dst_url is not fs path: {dst_url}")
|
|
205
|
+
if not is_sftp(src_url) and not isinstance(src_url, SftpPath):
|
|
206
|
+
raise OSError(f"src_url is not sftp path: {src_url}")
|
|
207
|
+
|
|
208
|
+
dst_path = FSPath(dst_url)
|
|
209
|
+
if not overwrite and dst_path.exists():
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
if isinstance(src_url, SftpPath):
|
|
213
|
+
src_path: SftpPath = src_url
|
|
214
|
+
else:
|
|
215
|
+
src_path: SftpPath = SftpPath(src_url)
|
|
216
|
+
|
|
217
|
+
if followlinks and src_path.is_symlink():
|
|
218
|
+
src_path = src_path.readlink()
|
|
219
|
+
if src_path.is_dir():
|
|
220
|
+
raise IsADirectoryError("Is a directory: %r" % src_url)
|
|
221
|
+
if str(dst_url).endswith("/"):
|
|
222
|
+
raise IsADirectoryError("Is a directory: %r" % dst_url)
|
|
223
|
+
|
|
224
|
+
dst_path.parent.makedirs(exist_ok=True)
|
|
225
|
+
|
|
226
|
+
sftp_callback = None
|
|
227
|
+
if callback:
|
|
228
|
+
bytes_transferred_before = 0
|
|
229
|
+
|
|
230
|
+
def sftp_callback(bytes_transferred: int, _total_bytes: int):
|
|
231
|
+
nonlocal bytes_transferred_before
|
|
232
|
+
callback(bytes_transferred - bytes_transferred_before) # pyre-ignore[29]
|
|
233
|
+
bytes_transferred_before = bytes_transferred
|
|
234
|
+
|
|
235
|
+
src_path._client.get(
|
|
236
|
+
src_path._real_path, dst_path.path_without_protocol, callback=sftp_callback
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
src_stat = src_path.stat()
|
|
240
|
+
dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
|
|
241
|
+
dst_path.chmod(src_stat.st_mode)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def sftp_upload(
|
|
245
|
+
src_url: PathLike,
|
|
246
|
+
dst_url: PathLike,
|
|
247
|
+
callback: Optional[Callable[[int], None]] = None,
|
|
248
|
+
followlinks: bool = False,
|
|
249
|
+
overwrite: bool = True,
|
|
250
|
+
):
|
|
251
|
+
"""
|
|
252
|
+
Uploads a file from local filesystem to sftp server.
|
|
253
|
+
|
|
254
|
+
:param src_url: source fs path
|
|
255
|
+
:param dst_url: target sftp path
|
|
256
|
+
:param callback: Called periodically during copy, and the input parameter is
|
|
257
|
+
the data size (in bytes) of copy since the last call
|
|
258
|
+
:param overwrite: whether or not overwrite file when exists, default is True
|
|
259
|
+
"""
|
|
260
|
+
from megfile.fs import is_fs
|
|
261
|
+
from megfile.fs_path import FSPath
|
|
262
|
+
|
|
263
|
+
if not is_fs(src_url):
|
|
264
|
+
raise OSError(f"src_url is not fs path: {src_url}")
|
|
265
|
+
if not is_sftp(dst_url) and not isinstance(dst_url, SftpPath):
|
|
266
|
+
raise OSError(f"dst_url is not sftp path: {dst_url}")
|
|
267
|
+
|
|
268
|
+
if followlinks and os.path.islink(src_url):
|
|
269
|
+
src_url = os.readlink(src_url)
|
|
270
|
+
if os.path.isdir(src_url):
|
|
271
|
+
raise IsADirectoryError("Is a directory: %r" % src_url)
|
|
272
|
+
if str(dst_url).endswith("/"):
|
|
273
|
+
raise IsADirectoryError("Is a directory: %r" % dst_url)
|
|
274
|
+
|
|
275
|
+
src_path = FSPath(src_url)
|
|
276
|
+
if isinstance(dst_url, SftpPath):
|
|
277
|
+
dst_path: SftpPath = dst_url
|
|
278
|
+
else:
|
|
279
|
+
dst_path: SftpPath = SftpPath(dst_url)
|
|
280
|
+
if not overwrite and dst_path.exists():
|
|
281
|
+
return
|
|
282
|
+
|
|
283
|
+
dst_path.parent.makedirs(exist_ok=True)
|
|
284
|
+
|
|
285
|
+
sftp_callback = None
|
|
286
|
+
if callback:
|
|
287
|
+
bytes_transferred_before = 0
|
|
288
|
+
|
|
289
|
+
def sftp_callback(bytes_transferred: int, _total_bytes: int):
|
|
290
|
+
nonlocal bytes_transferred_before
|
|
291
|
+
callback(bytes_transferred - bytes_transferred_before) # pyre-ignore[29]
|
|
292
|
+
bytes_transferred_before = bytes_transferred
|
|
293
|
+
|
|
294
|
+
dst_path._client.put(
|
|
295
|
+
src_path.path_without_protocol, dst_path._real_path, callback=sftp_callback
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
src_stat = src_path.stat()
|
|
299
|
+
dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
|
|
300
|
+
dst_path.chmod(src_stat.st_mode)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def sftp_path_join(path: PathLike, *other_paths: PathLike) -> str:
|
|
304
|
+
"""
|
|
305
|
+
Concat 2 or more path to a complete path
|
|
306
|
+
|
|
307
|
+
:param path: Given path
|
|
308
|
+
:param other_paths: Paths to be concatenated
|
|
309
|
+
:returns: Concatenated complete path
|
|
310
|
+
|
|
311
|
+
.. note ::
|
|
312
|
+
|
|
313
|
+
The difference between this function and ``os.path.join`` is that this function
|
|
314
|
+
ignores left side slash (which indicates absolute path) in ``other_paths``
|
|
315
|
+
and will directly concat.
|
|
316
|
+
|
|
317
|
+
e.g. os.path.join('/path', 'to', '/file') => '/file',
|
|
318
|
+
but sftp_path_join('/path', 'to', '/file') => '/path/to/file'
|
|
319
|
+
"""
|
|
320
|
+
return uri_join(fspath(path), *map(fspath, other_paths))
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def sftp_concat(src_paths: List[PathLike], dst_path: PathLike) -> None:
|
|
324
|
+
"""Concatenate sftp files to one file.
|
|
325
|
+
|
|
326
|
+
:param src_paths: Given source paths
|
|
327
|
+
:param dst_path: Given destination path
|
|
328
|
+
"""
|
|
329
|
+
dst_path_obj = SftpPath(dst_path)
|
|
330
|
+
|
|
331
|
+
def get_real_path(path: PathLike) -> str:
|
|
332
|
+
return SftpPath(path)._real_path
|
|
333
|
+
|
|
334
|
+
command = ["cat", *map(get_real_path, src_paths), ">", get_real_path(dst_path)]
|
|
335
|
+
exec_result = dst_path_obj._exec_command(command)
|
|
336
|
+
if exec_result.returncode != 0:
|
|
337
|
+
_logger.error(exec_result.stderr)
|
|
338
|
+
raise OSError(f"Failed to concat {src_paths} to {dst_path}")
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def sftp_lstat(path: PathLike) -> StatResult:
|
|
342
|
+
"""
|
|
343
|
+
Get StatResult of file on sftp, including file size and mtime,
|
|
344
|
+
referring to fs_getsize and fs_getmtime
|
|
345
|
+
|
|
346
|
+
:param path: Given path
|
|
347
|
+
:returns: StatResult
|
|
348
|
+
"""
|
|
349
|
+
return SftpPath(path).lstat()
|
|
350
|
+
|
|
351
|
+
|
|
62
352
|
def sftp_exists(path: PathLike, followlinks: bool = False) -> bool:
|
|
63
353
|
"""
|
|
64
354
|
Test if the path exists
|