megfile 3.1.6.post1__py3-none-any.whl → 4.0.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. megfile/cli.py +12 -7
  2. megfile/config.py +27 -39
  3. megfile/fs.py +169 -12
  4. megfile/fs_path.py +183 -260
  5. megfile/hdfs.py +106 -5
  6. megfile/hdfs_path.py +34 -90
  7. megfile/http.py +50 -1
  8. megfile/http_path.py +27 -65
  9. megfile/interfaces.py +1 -8
  10. megfile/lib/base_prefetch_reader.py +62 -78
  11. megfile/lib/combine_reader.py +5 -0
  12. megfile/lib/glob.py +3 -6
  13. megfile/lib/hdfs_prefetch_reader.py +7 -7
  14. megfile/lib/http_prefetch_reader.py +6 -6
  15. megfile/lib/s3_buffered_writer.py +71 -65
  16. megfile/lib/s3_cached_handler.py +1 -2
  17. megfile/lib/s3_limited_seekable_writer.py +3 -7
  18. megfile/lib/s3_memory_handler.py +1 -2
  19. megfile/lib/s3_pipe_handler.py +1 -2
  20. megfile/lib/s3_prefetch_reader.py +10 -19
  21. megfile/lib/s3_share_cache_reader.py +8 -5
  22. megfile/pathlike.py +397 -401
  23. megfile/s3.py +118 -17
  24. megfile/s3_path.py +126 -209
  25. megfile/sftp.py +300 -10
  26. megfile/sftp_path.py +46 -322
  27. megfile/smart.py +33 -27
  28. megfile/smart_path.py +9 -14
  29. megfile/stdio.py +1 -1
  30. megfile/stdio_path.py +2 -2
  31. megfile/utils/__init__.py +3 -4
  32. megfile/version.py +1 -1
  33. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/METADATA +7 -7
  34. megfile-4.0.0.post1.dist-info/RECORD +52 -0
  35. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/WHEEL +1 -1
  36. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/top_level.txt +0 -2
  37. docs/conf.py +0 -65
  38. megfile-3.1.6.post1.dist-info/RECORD +0 -55
  39. scripts/convert_results_to_sarif.py +0 -91
  40. scripts/generate_file.py +0 -344
  41. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/LICENSE +0 -0
  42. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/LICENSE.pyre +0 -0
  43. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/entry_points.txt +0 -0
megfile/sftp.py CHANGED
@@ -1,21 +1,17 @@
1
+ import os
2
+ from logging import getLogger as get_logger
1
3
  from typing import IO, BinaryIO, Callable, Iterator, List, Optional, Tuple
2
4
 
3
5
  from megfile.interfaces import FileEntry, PathLike, StatResult
6
+ from megfile.lib.compat import fspath
7
+ from megfile.lib.joinpath import uri_join
4
8
  from megfile.sftp_path import (
5
9
  SftpPath,
6
10
  is_sftp,
7
- sftp_concat,
8
- sftp_download,
9
- sftp_glob,
10
- sftp_glob_stat,
11
- sftp_iglob,
12
- sftp_lstat,
13
- sftp_path_join,
14
- sftp_readlink,
15
- sftp_resolve,
16
- sftp_upload,
17
11
  )
18
12
 
13
+ _logger = get_logger(__name__)
14
+
19
15
  __all__ = [
20
16
  "is_sftp",
21
17
  "sftp_readlink",
@@ -59,6 +55,300 @@ __all__ = [
59
55
  ]
60
56
 
61
57
 
58
+ def sftp_readlink(path: PathLike) -> "str":
59
+ """
60
+ Return a SftpPath instance representing the path to which the symbolic link points.
61
+
62
+ :param path: Given path
63
+ :returns: Return a SftpPath instance representing the path to
64
+ which the symbolic link points.
65
+ """
66
+ return SftpPath(path).readlink().path_with_protocol
67
+
68
+
69
+ def sftp_glob(
70
+ path: PathLike, recursive: bool = True, missing_ok: bool = True
71
+ ) -> List[str]:
72
+ """Return path list in ascending alphabetical order,
73
+ in which path matches glob pattern
74
+
75
+ 1. If doesn't match any path, return empty list
76
+ Notice: ``glob.glob`` in standard library returns ['a/'] instead of empty list
77
+ when pathname is like `a/**`, recursive is True and directory 'a' doesn't exist.
78
+ fs_glob behaves like ``glob.glob`` in standard library under such circumstance.
79
+ 2. No guarantee that each path in result is different, which means:
80
+ Assume there exists a path `/a/b/c/b/d.txt`
81
+ use path pattern like `/**/b/**/*.txt` to glob,
82
+ the path above will be returned twice
83
+ 3. `**` will match any matched file, directory, symlink and '' by default,
84
+ when recursive is `True`
85
+ 4. fs_glob returns same as glob.glob(pathname, recursive=True)
86
+ in ascending alphabetical order.
87
+ 5. Hidden files (filename stars with '.') will not be found in the result
88
+
89
+ :param path: Given path
90
+ :param pattern: Glob the given relative pattern in the directory represented
91
+ by this path
92
+ :param recursive: If False, `**` will not search directory recursively
93
+ :param missing_ok: If False and target path doesn't match any file,
94
+ raise FileNotFoundError
95
+ :returns: A list contains paths match `pathname`
96
+ """
97
+ return list(sftp_iglob(path=path, recursive=recursive, missing_ok=missing_ok))
98
+
99
+
100
+ def sftp_glob_stat(
101
+ path: PathLike, recursive: bool = True, missing_ok: bool = True
102
+ ) -> Iterator[FileEntry]:
103
+ """Return a list contains tuples of path and file stat, in ascending alphabetical
104
+ order, in which path matches glob pattern
105
+
106
+ 1. If doesn't match any path, return empty list
107
+ Notice: ``glob.glob`` in standard library returns ['a/'] instead of empty list
108
+ when pathname is like `a/**`, recursive is True and directory 'a' doesn't exist.
109
+ sftp_glob behaves like ``glob.glob`` in standard library under such circumstance.
110
+ 2. No guarantee that each path in result is different, which means:
111
+ Assume there exists a path `/a/b/c/b/d.txt`
112
+ use path pattern like `/**/b/**/*.txt` to glob,
113
+ the path above will be returned twice
114
+ 3. `**` will match any matched file, directory, symlink and '' by default,
115
+ when recursive is `True`
116
+ 4. fs_glob returns same as glob.glob(pathname, recursive=True) in
117
+ ascending alphabetical order.
118
+ 5. Hidden files (filename stars with '.') will not be found in the result
119
+
120
+ :param path: Given path
121
+ :param pattern: Glob the given relative pattern in the directory represented
122
+ by this path
123
+ :param recursive: If False, `**` will not search directory recursively
124
+ :param missing_ok: If False and target path doesn't match any file,
125
+ raise FileNotFoundError
126
+ :returns: A list contains tuples of path and file stat,
127
+ in which paths match `pathname`
128
+ """
129
+ for path in sftp_iglob(path=path, recursive=recursive, missing_ok=missing_ok):
130
+ path_object = SftpPath(path)
131
+ yield FileEntry(
132
+ path_object.name, path_object.path_with_protocol, path_object.lstat()
133
+ )
134
+
135
+
136
+ def sftp_iglob(
137
+ path: PathLike, recursive: bool = True, missing_ok: bool = True
138
+ ) -> Iterator[str]:
139
+ """Return path iterator in ascending alphabetical order,
140
+ in which path matches glob pattern
141
+
142
+ 1. If doesn't match any path, return empty list
143
+ Notice: ``glob.glob`` in standard library returns ['a/'] instead of empty list
144
+ when pathname is like `a/**`, recursive is True and directory 'a' doesn't exist.
145
+ fs_glob behaves like ``glob.glob`` in standard library under such circumstance.
146
+ 2. No guarantee that each path in result is different, which means:
147
+ Assume there exists a path `/a/b/c/b/d.txt`
148
+ use path pattern like `/**/b/**/*.txt` to glob,
149
+ the path above will be returned twice
150
+ 3. `**` will match any matched file, directory, symlink and '' by default,
151
+ when recursive is `True`
152
+ 4. fs_glob returns same as glob.glob(pathname, recursive=True) in
153
+ ascending alphabetical order.
154
+ 5. Hidden files (filename stars with '.') will not be found in the result
155
+
156
+ :param path: Given path
157
+ :param pattern: Glob the given relative pattern in the directory represented
158
+ by this path
159
+ :param recursive: If False, `**` will not search directory recursively
160
+ :param missing_ok: If False and target path doesn't match any file,
161
+ raise FileNotFoundError
162
+ :returns: An iterator contains paths match `pathname`
163
+ """
164
+
165
+ for path in SftpPath(path).iglob(
166
+ pattern="", recursive=recursive, missing_ok=missing_ok
167
+ ):
168
+ yield path.path_with_protocol
169
+
170
+
171
+ def sftp_resolve(path: PathLike, strict=False) -> "str":
172
+ """Equal to fs_realpath
173
+
174
+ :param path: Given path
175
+ :param strict: Ignore this parameter, just for compatibility
176
+ :return: Return the canonical path of the specified filename,
177
+ eliminating any symbolic links encountered in the path.
178
+ :rtype: SftpPath
179
+ """
180
+ return SftpPath(path).resolve(strict).path_with_protocol
181
+
182
+
183
+ def sftp_download(
184
+ src_url: PathLike,
185
+ dst_url: PathLike,
186
+ callback: Optional[Callable[[int], None]] = None,
187
+ followlinks: bool = False,
188
+ overwrite: bool = True,
189
+ ):
190
+ """
191
+ Downloads a file from sftp to local filesystem.
192
+
193
+ :param src_url: source sftp path
194
+ :param dst_url: target fs path
195
+ :param callback: Called periodically during copy, and the input parameter is
196
+ the data size (in bytes) of copy since the last call
197
+ :param followlinks: False if regard symlink as file, else True
198
+ :param overwrite: whether or not overwrite file when exists, default is True
199
+ """
200
+ from megfile.fs import is_fs
201
+ from megfile.fs_path import FSPath
202
+
203
+ if not is_fs(dst_url):
204
+ raise OSError(f"dst_url is not fs path: {dst_url}")
205
+ if not is_sftp(src_url) and not isinstance(src_url, SftpPath):
206
+ raise OSError(f"src_url is not sftp path: {src_url}")
207
+
208
+ dst_path = FSPath(dst_url)
209
+ if not overwrite and dst_path.exists():
210
+ return
211
+
212
+ if isinstance(src_url, SftpPath):
213
+ src_path: SftpPath = src_url
214
+ else:
215
+ src_path: SftpPath = SftpPath(src_url)
216
+
217
+ if followlinks and src_path.is_symlink():
218
+ src_path = src_path.readlink()
219
+ if src_path.is_dir():
220
+ raise IsADirectoryError("Is a directory: %r" % src_url)
221
+ if str(dst_url).endswith("/"):
222
+ raise IsADirectoryError("Is a directory: %r" % dst_url)
223
+
224
+ dst_path.parent.makedirs(exist_ok=True)
225
+
226
+ sftp_callback = None
227
+ if callback:
228
+ bytes_transferred_before = 0
229
+
230
+ def sftp_callback(bytes_transferred: int, _total_bytes: int):
231
+ nonlocal bytes_transferred_before
232
+ callback(bytes_transferred - bytes_transferred_before) # pyre-ignore[29]
233
+ bytes_transferred_before = bytes_transferred
234
+
235
+ src_path._client.get(
236
+ src_path._real_path, dst_path.path_without_protocol, callback=sftp_callback
237
+ )
238
+
239
+ src_stat = src_path.stat()
240
+ dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
241
+ dst_path.chmod(src_stat.st_mode)
242
+
243
+
244
+ def sftp_upload(
245
+ src_url: PathLike,
246
+ dst_url: PathLike,
247
+ callback: Optional[Callable[[int], None]] = None,
248
+ followlinks: bool = False,
249
+ overwrite: bool = True,
250
+ ):
251
+ """
252
+ Uploads a file from local filesystem to sftp server.
253
+
254
+ :param src_url: source fs path
255
+ :param dst_url: target sftp path
256
+ :param callback: Called periodically during copy, and the input parameter is
257
+ the data size (in bytes) of copy since the last call
258
+ :param overwrite: whether or not overwrite file when exists, default is True
259
+ """
260
+ from megfile.fs import is_fs
261
+ from megfile.fs_path import FSPath
262
+
263
+ if not is_fs(src_url):
264
+ raise OSError(f"src_url is not fs path: {src_url}")
265
+ if not is_sftp(dst_url) and not isinstance(dst_url, SftpPath):
266
+ raise OSError(f"dst_url is not sftp path: {dst_url}")
267
+
268
+ if followlinks and os.path.islink(src_url):
269
+ src_url = os.readlink(src_url)
270
+ if os.path.isdir(src_url):
271
+ raise IsADirectoryError("Is a directory: %r" % src_url)
272
+ if str(dst_url).endswith("/"):
273
+ raise IsADirectoryError("Is a directory: %r" % dst_url)
274
+
275
+ src_path = FSPath(src_url)
276
+ if isinstance(dst_url, SftpPath):
277
+ dst_path: SftpPath = dst_url
278
+ else:
279
+ dst_path: SftpPath = SftpPath(dst_url)
280
+ if not overwrite and dst_path.exists():
281
+ return
282
+
283
+ dst_path.parent.makedirs(exist_ok=True)
284
+
285
+ sftp_callback = None
286
+ if callback:
287
+ bytes_transferred_before = 0
288
+
289
+ def sftp_callback(bytes_transferred: int, _total_bytes: int):
290
+ nonlocal bytes_transferred_before
291
+ callback(bytes_transferred - bytes_transferred_before) # pyre-ignore[29]
292
+ bytes_transferred_before = bytes_transferred
293
+
294
+ dst_path._client.put(
295
+ src_path.path_without_protocol, dst_path._real_path, callback=sftp_callback
296
+ )
297
+
298
+ src_stat = src_path.stat()
299
+ dst_path.utime(src_stat.st_atime, src_stat.st_mtime)
300
+ dst_path.chmod(src_stat.st_mode)
301
+
302
+
303
+ def sftp_path_join(path: PathLike, *other_paths: PathLike) -> str:
304
+ """
305
+ Concat 2 or more path to a complete path
306
+
307
+ :param path: Given path
308
+ :param other_paths: Paths to be concatenated
309
+ :returns: Concatenated complete path
310
+
311
+ .. note ::
312
+
313
+ The difference between this function and ``os.path.join`` is that this function
314
+ ignores left side slash (which indicates absolute path) in ``other_paths``
315
+ and will directly concat.
316
+
317
+ e.g. os.path.join('/path', 'to', '/file') => '/file',
318
+ but sftp_path_join('/path', 'to', '/file') => '/path/to/file'
319
+ """
320
+ return uri_join(fspath(path), *map(fspath, other_paths))
321
+
322
+
323
+ def sftp_concat(src_paths: List[PathLike], dst_path: PathLike) -> None:
324
+ """Concatenate sftp files to one file.
325
+
326
+ :param src_paths: Given source paths
327
+ :param dst_path: Given destination path
328
+ """
329
+ dst_path_obj = SftpPath(dst_path)
330
+
331
+ def get_real_path(path: PathLike) -> str:
332
+ return SftpPath(path)._real_path
333
+
334
+ command = ["cat", *map(get_real_path, src_paths), ">", get_real_path(dst_path)]
335
+ exec_result = dst_path_obj._exec_command(command)
336
+ if exec_result.returncode != 0:
337
+ _logger.error(exec_result.stderr)
338
+ raise OSError(f"Failed to concat {src_paths} to {dst_path}")
339
+
340
+
341
+ def sftp_lstat(path: PathLike) -> StatResult:
342
+ """
343
+ Get StatResult of file on sftp, including file size and mtime,
344
+ referring to fs_getsize and fs_getmtime
345
+
346
+ :param path: Given path
347
+ :returns: StatResult
348
+ """
349
+ return SftpPath(path).lstat()
350
+
351
+
62
352
  def sftp_exists(path: PathLike, followlinks: bool = False) -> bool:
63
353
  """
64
354
  Test if the path exists