megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. docs/conf.py +2 -4
  2. megfile/__init__.py +394 -203
  3. megfile/cli.py +258 -238
  4. megfile/config.py +25 -21
  5. megfile/errors.py +124 -114
  6. megfile/fs.py +174 -140
  7. megfile/fs_path.py +462 -354
  8. megfile/hdfs.py +133 -101
  9. megfile/hdfs_path.py +290 -236
  10. megfile/http.py +15 -14
  11. megfile/http_path.py +111 -107
  12. megfile/interfaces.py +70 -65
  13. megfile/lib/base_prefetch_reader.py +84 -65
  14. megfile/lib/combine_reader.py +12 -12
  15. megfile/lib/compare.py +17 -13
  16. megfile/lib/compat.py +1 -5
  17. megfile/lib/fnmatch.py +29 -30
  18. megfile/lib/glob.py +46 -54
  19. megfile/lib/hdfs_prefetch_reader.py +40 -25
  20. megfile/lib/hdfs_tools.py +1 -3
  21. megfile/lib/http_prefetch_reader.py +69 -46
  22. megfile/lib/joinpath.py +5 -5
  23. megfile/lib/lazy_handler.py +7 -3
  24. megfile/lib/s3_buffered_writer.py +58 -51
  25. megfile/lib/s3_cached_handler.py +13 -14
  26. megfile/lib/s3_limited_seekable_writer.py +37 -28
  27. megfile/lib/s3_memory_handler.py +34 -30
  28. megfile/lib/s3_pipe_handler.py +24 -25
  29. megfile/lib/s3_prefetch_reader.py +71 -52
  30. megfile/lib/s3_share_cache_reader.py +37 -24
  31. megfile/lib/shadow_handler.py +7 -3
  32. megfile/lib/stdio_handler.py +9 -8
  33. megfile/lib/url.py +3 -3
  34. megfile/pathlike.py +259 -228
  35. megfile/s3.py +220 -153
  36. megfile/s3_path.py +977 -802
  37. megfile/sftp.py +190 -156
  38. megfile/sftp_path.py +540 -450
  39. megfile/smart.py +397 -330
  40. megfile/smart_path.py +100 -105
  41. megfile/stdio.py +10 -9
  42. megfile/stdio_path.py +32 -35
  43. megfile/utils/__init__.py +73 -54
  44. megfile/utils/mutex.py +11 -14
  45. megfile/version.py +1 -1
  46. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
  47. megfile-3.1.2.dist-info/RECORD +55 -0
  48. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
  49. scripts/convert_results_to_sarif.py +45 -78
  50. scripts/generate_file.py +140 -64
  51. megfile-3.1.1.dist-info/RECORD +0 -55
  52. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
  53. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
  54. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
  55. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/fs.py CHANGED
@@ -1,218 +1,241 @@
1
1
  from typing import BinaryIO, Callable, Iterator, List, Optional, Tuple
2
2
 
3
- from megfile.fs_path import FSPath, StatResult, _make_stat, fs_cwd, fs_glob, fs_glob_stat, fs_home, fs_iglob, fs_lstat, fs_makedirs, fs_move, fs_path_join, fs_readlink, fs_rename, fs_resolve, is_fs
3
+ from megfile.fs_path import (
4
+ FSPath,
5
+ _make_stat,
6
+ fs_cwd,
7
+ fs_glob,
8
+ fs_glob_stat,
9
+ fs_home,
10
+ fs_iglob,
11
+ fs_lstat,
12
+ fs_makedirs,
13
+ fs_move,
14
+ fs_path_join,
15
+ fs_readlink,
16
+ fs_rename,
17
+ fs_resolve,
18
+ is_fs,
19
+ )
4
20
  from megfile.interfaces import Access, FileEntry, PathLike, StatResult
5
21
 
6
22
  __all__ = [
7
- 'is_fs',
8
- 'StatResult',
9
- 'fs_path_join',
10
- '_make_stat',
11
- 'fs_readlink',
12
- 'fs_cwd',
13
- 'fs_home',
14
- 'fs_iglob',
15
- 'fs_glob',
16
- 'fs_glob_stat',
17
- 'fs_rename',
18
- 'fs_resolve',
19
- 'fs_move',
20
- 'fs_makedirs',
21
- 'fs_lstat',
22
- 'fs_isabs',
23
- 'fs_abspath',
24
- 'fs_access',
25
- 'fs_exists',
26
- 'fs_getmtime',
27
- 'fs_getsize',
28
- 'fs_expanduser',
29
- 'fs_isdir',
30
- 'fs_isfile',
31
- 'fs_listdir',
32
- 'fs_load_from',
33
- 'fs_realpath',
34
- 'fs_relpath',
35
- 'fs_remove',
36
- 'fs_scan',
37
- 'fs_scan_stat',
38
- 'fs_scandir',
39
- 'fs_stat',
40
- 'fs_unlink',
41
- 'fs_walk',
42
- 'fs_getmd5',
43
- 'fs_copy',
44
- 'fs_sync',
45
- 'fs_symlink',
46
- 'fs_islink',
47
- 'fs_ismount',
48
- 'fs_save_as',
23
+ "is_fs",
24
+ "fs_path_join",
25
+ "_make_stat",
26
+ "fs_readlink",
27
+ "fs_cwd",
28
+ "fs_home",
29
+ "fs_iglob",
30
+ "fs_glob",
31
+ "fs_glob_stat",
32
+ "fs_rename",
33
+ "fs_resolve",
34
+ "fs_move",
35
+ "fs_makedirs",
36
+ "fs_lstat",
37
+ "fs_isabs",
38
+ "fs_abspath",
39
+ "fs_access",
40
+ "fs_exists",
41
+ "fs_getmtime",
42
+ "fs_getsize",
43
+ "fs_expanduser",
44
+ "fs_isdir",
45
+ "fs_isfile",
46
+ "fs_listdir",
47
+ "fs_load_from",
48
+ "fs_realpath",
49
+ "fs_relpath",
50
+ "fs_remove",
51
+ "fs_scan",
52
+ "fs_scan_stat",
53
+ "fs_scandir",
54
+ "fs_stat",
55
+ "fs_unlink",
56
+ "fs_walk",
57
+ "fs_getmd5",
58
+ "fs_copy",
59
+ "fs_sync",
60
+ "fs_symlink",
61
+ "fs_islink",
62
+ "fs_ismount",
63
+ "fs_save_as",
49
64
  ]
50
65
 
51
66
 
52
67
  def fs_isabs(path: PathLike) -> bool:
53
- '''Test whether a path is absolute
68
+ """Test whether a path is absolute
54
69
 
55
70
  :param path: Given path
56
71
  :returns: True if a path is absolute, else False
57
- '''
72
+ """
58
73
  return FSPath(path).is_absolute()
59
74
 
60
75
 
61
76
  def fs_abspath(path: PathLike) -> str:
62
- '''Return the absolute path of given path
77
+ """Return the absolute path of given path
63
78
 
64
79
  :param path: Given path
65
80
  :returns: Absolute path of given path
66
- '''
81
+ """
67
82
  return FSPath(path).abspath()
68
83
 
69
84
 
70
85
  def fs_access(path: PathLike, mode: Access = Access.READ) -> bool:
71
- '''
86
+ """
72
87
  Test if path has access permission described by mode
73
88
  Using ``os.access``
74
89
 
75
90
  :param path: Given path
76
91
  :param mode: access mode
77
92
  :returns: Access: Enum, the read/write access that path has.
78
- '''
93
+ """
79
94
  return FSPath(path).access(mode)
80
95
 
81
96
 
82
97
  def fs_exists(path: PathLike, followlinks: bool = False) -> bool:
83
- '''
98
+ """
84
99
  Test if the path exists
85
100
 
86
101
  .. note::
87
102
 
88
- The difference between this function and ``os.path.exists`` is that this function regard symlink as file.
103
+ The difference between this function and ``os.path.exists`` is that
104
+ this function regard symlink as file.
89
105
  In other words, this function is equal to ``os.path.lexists``
90
106
 
91
107
  :param path: Given path
92
108
  :param followlinks: False if regard symlink as file, else True
93
109
  :returns: True if the path exists, else False
94
110
 
95
- '''
111
+ """
96
112
  return FSPath(path).exists(followlinks)
97
113
 
98
114
 
99
115
  def fs_getmtime(path: PathLike, follow_symlinks: bool = False) -> float:
100
- '''
116
+ """
101
117
  Get last-modified time of the file on the given path (in Unix timestamp format).
102
- If the path is an existent directory, return the latest modified time of all file in it.
118
+ If the path is an existent directory,
119
+ return the latest modified time of all file in it.
103
120
 
104
121
  :param path: Given path
105
122
  :returns: last-modified time
106
- '''
123
+ """
107
124
  return FSPath(path).getmtime(follow_symlinks)
108
125
 
109
126
 
110
127
  def fs_getsize(path: PathLike, follow_symlinks: bool = False) -> int:
111
- '''
128
+ """
112
129
  Get file size on the given file path (in bytes).
113
- If the path in a directory, return the sum of all file size in it, including file in subdirectories (if exist).
114
- The result excludes the size of directory itself. In other words, return 0 Byte on an empty directory path.
130
+ If the path in a directory, return the sum of all file size in it,
131
+ including file in subdirectories (if exist).
132
+ The result excludes the size of directory itself.
133
+ In other words, return 0 Byte on an empty directory path.
115
134
 
116
135
  :param path: Given path
117
136
  :returns: File size
118
137
 
119
- '''
138
+ """
120
139
  return FSPath(path).getsize(follow_symlinks)
121
140
 
122
141
 
123
142
  def fs_expanduser(path: PathLike):
124
- '''Expand ~ and ~user constructions. If user or $HOME is unknown,
125
- do nothing.
126
- '''
143
+ """Expand ~ and ~user constructions. If user or $HOME is unknown,
144
+ do nothing.
145
+ """
127
146
  return FSPath(path).expanduser()
128
147
 
129
148
 
130
149
  def fs_isdir(path: PathLike, followlinks: bool = False) -> bool:
131
- '''
150
+ """
132
151
  Test if a path is directory
133
152
 
134
153
  .. note::
135
154
 
136
- The difference between this function and ``os.path.isdir`` is that this function regard symlink as file
155
+ The difference between this function and ``os.path.isdir`` is that
156
+ this function regard symlink as file
137
157
 
138
158
  :param path: Given path
139
159
  :param followlinks: False if regard symlink as file, else True
140
160
  :returns: True if the path is a directory, else False
141
161
 
142
- '''
162
+ """
143
163
  return FSPath(path).is_dir(followlinks)
144
164
 
145
165
 
146
166
  def fs_isfile(path: PathLike, followlinks: bool = False) -> bool:
147
- '''
167
+ """
148
168
  Test if a path is file
149
169
 
150
170
  .. note::
151
171
 
152
- The difference between this function and ``os.path.isfile`` is that this function regard symlink as file
172
+ The difference between this function and ``os.path.isfile`` is that
173
+ this function regard symlink as file
153
174
 
154
175
  :param path: Given path
155
176
  :param followlinks: False if regard symlink as file, else True
156
177
  :returns: True if the path is a file, else False
157
178
 
158
- '''
179
+ """
159
180
  return FSPath(path).is_file(followlinks)
160
181
 
161
182
 
162
183
  def fs_listdir(path: PathLike) -> List[str]:
163
- '''
164
- Get all contents of given fs path. The result is in ascending alphabetical order.
184
+ """
185
+ Get all contents of given fs path.
186
+ The result is in ascending alphabetical order.
165
187
 
166
188
  :param path: Given path
167
189
  :returns: All contents have in the path in ascending alphabetical order
168
- '''
190
+ """
169
191
  return FSPath(path).listdir()
170
192
 
171
193
 
172
194
  def fs_load_from(path: PathLike) -> BinaryIO:
173
- '''Read all content on specified path and write into memory
195
+ """Read all content on specified path and write into memory
174
196
 
175
197
  User should close the BinaryIO manually
176
198
 
177
199
  :param path: Given path
178
200
  :returns: Binary stream
179
- '''
201
+ """
180
202
  return FSPath(path).load()
181
203
 
182
204
 
183
205
  def fs_realpath(path: PathLike) -> str:
184
- '''Return the real path of given path
206
+ """Return the real path of given path
185
207
 
186
208
  :param path: Given path
187
209
  :returns: Real path of given path
188
- '''
210
+ """
189
211
  return FSPath(path).realpath()
190
212
 
191
213
 
192
214
  def fs_relpath(path: PathLike, start: Optional[str] = None) -> str:
193
- '''Return the relative path of given path
215
+ """Return the relative path of given path
194
216
 
195
217
  :param path: Given path
196
218
  :param start: Given start directory
197
219
  :returns: Relative path from start
198
- '''
220
+ """
199
221
  return FSPath(path).relpath(start)
200
222
 
201
223
 
202
224
  def fs_remove(path: PathLike, missing_ok: bool = False) -> None:
203
- '''
225
+ """
204
226
  Remove the file or directory on fs
205
227
 
206
228
  :param path: Given path
207
- :param missing_ok: if False and target file/directory not exists, raise FileNotFoundError
208
- '''
229
+ :param missing_ok: if False and target file/directory not exists,
230
+ raise FileNotFoundError
231
+ """
209
232
  return FSPath(path).remove(missing_ok)
210
233
 
211
234
 
212
- def fs_scan(path: PathLike,
213
- missing_ok: bool = True,
214
- followlinks: bool = False) -> Iterator[str]:
215
- '''
235
+ def fs_scan(
236
+ path: PathLike, missing_ok: bool = True, followlinks: bool = False
237
+ ) -> Iterator[str]:
238
+ """
216
239
  Iteratively traverse only files in given directory, in alphabetical order.
217
240
  Every iteration on generator yields a path string.
218
241
 
@@ -221,86 +244,92 @@ def fs_scan(path: PathLike,
221
244
  If path is a bucket path, return all file paths in the bucket
222
245
 
223
246
  :param path: Given path
224
- :param missing_ok: If False and there's no file in the directory, raise FileNotFoundError
247
+ :param missing_ok: If False and there's no file in the directory,
248
+ raise FileNotFoundError
225
249
  :returns: A file path generator
226
- '''
250
+ """
227
251
  return FSPath(path).scan(missing_ok, followlinks)
228
252
 
229
253
 
230
254
  def fs_scan_stat(
231
- path: PathLike,
232
- missing_ok: bool = True,
233
- followlinks: bool = False) -> Iterator[FileEntry]:
234
- '''
255
+ path: PathLike, missing_ok: bool = True, followlinks: bool = False
256
+ ) -> Iterator[FileEntry]:
257
+ """
235
258
  Iteratively traverse only files in given directory, in alphabetical order.
236
259
  Every iteration on generator yields a tuple of path string and file stat
237
260
 
238
261
  :param path: Given path
239
- :param missing_ok: If False and there's no file in the directory, raise FileNotFoundError
262
+ :param missing_ok: If False and there's no file in the directory,
263
+ raise FileNotFoundError
240
264
  :returns: A file path generator
241
- '''
265
+ """
242
266
  return FSPath(path).scan_stat(missing_ok, followlinks)
243
267
 
244
268
 
245
269
  def fs_scandir(path: PathLike) -> Iterator[FileEntry]:
246
- '''
270
+ """
247
271
  Get all content of given file path.
248
272
 
249
273
  :param path: Given path
250
274
  :returns: An iterator contains all contents have prefix path
251
- '''
275
+ """
252
276
  return FSPath(path).scandir()
253
277
 
254
278
 
255
279
  def fs_stat(path: PathLike, follow_symlinks=True) -> StatResult:
256
- '''
257
- Get StatResult of file on fs, including file size and mtime, referring to fs_getsize and fs_getmtime
280
+ """
281
+ Get StatResult of file on fs, including file size and mtime,
282
+ referring to fs_getsize and fs_getmtime
258
283
 
259
284
  :param path: Given path
260
285
  :returns: StatResult
261
- '''
286
+ """
262
287
  return FSPath(path).stat(follow_symlinks)
263
288
 
264
289
 
265
290
  def fs_unlink(path: PathLike, missing_ok: bool = False) -> None:
266
- '''
291
+ """
267
292
  Remove the file on fs
268
293
 
269
294
  :param path: Given path
270
295
  :param missing_ok: if False and target file not exists, raise FileNotFoundError
271
- '''
296
+ """
272
297
  return FSPath(path).unlink(missing_ok)
273
298
 
274
299
 
275
300
  def fs_walk(
276
- path: PathLike,
277
- followlinks: bool = False
301
+ path: PathLike, followlinks: bool = False
278
302
  ) -> Iterator[Tuple[str, List[str], List[str]]]:
279
- '''
303
+ """
280
304
  Generate the file names in a directory tree by walking the tree top-down.
281
305
  For each directory in the tree rooted at directory path (including path itself),
282
306
  it yields a 3-tuple (root, dirs, files).
283
307
 
284
- root: a string of current path
285
- dirs: name list of subdirectories (excluding '.' and '..' if they exist) in 'root'. The list is sorted by ascending alphabetical order
286
- files: name list of non-directory files (link is regarded as file) in 'root'. The list is sorted by ascending alphabetical order
308
+ - root: a string of current path
309
+ - dirs: name list of subdirectories (excluding '.' and '..' if they exist)
310
+ in 'root'. The list is sorted by ascending alphabetical order
311
+ - files: name list of non-directory files (link is regarded as file) in 'root'.
312
+ The list is sorted by ascending alphabetical order
287
313
 
288
- If path not exists, or path is a file (link is regarded as file), return an empty generator
314
+ If path not exists, or path is a file (link is regarded as file),
315
+ return an empty generator
289
316
 
290
317
  .. note::
291
318
 
292
- Be aware that setting ``followlinks`` to True can lead to infinite recursion if a link points to a parent directory of itself. fs_walk() does not keep track of the directories it visited already.
319
+ Be aware that setting ``followlinks`` to True can lead to infinite recursion
320
+ if a link points to a parent directory of itself. fs_walk() does not keep
321
+ track of the directories it visited already.
293
322
 
294
323
  :param path: Given path
295
324
  :param followlinks: False if regard symlink as file, else True
325
+
296
326
  :returns: A 3-tuple generator
297
- '''
327
+ """
298
328
  return FSPath(path).walk(followlinks)
299
329
 
300
330
 
301
- def fs_getmd5(
302
- path: PathLike, recalculate: bool = False, followlinks: bool = True):
303
- '''
331
+ def fs_getmd5(path: PathLike, recalculate: bool = False, followlinks: bool = True):
332
+ """
304
333
  Calculate the md5 value of the file
305
334
 
306
335
  :param path: Given path
@@ -308,18 +337,20 @@ def fs_getmd5(
308
337
  :param followlinks: Ignore this parameter, just for compatibility
309
338
 
310
339
  returns: md5 of file
311
- '''
340
+ """
312
341
  return FSPath(path).md5(recalculate, followlinks)
313
342
 
314
343
 
315
344
  def fs_copy(
316
- src_path: PathLike,
317
- dst_path: PathLike,
318
- callback: Optional[Callable[[int], None]] = None,
319
- followlinks: bool = False,
320
- overwrite: bool = True):
321
- ''' File copy on file system
322
- Copy content (excluding meta date) of file on `src_path` to `dst_path`. `dst_path` must be a complete file name
345
+ src_path: PathLike,
346
+ dst_path: PathLike,
347
+ callback: Optional[Callable[[int], None]] = None,
348
+ followlinks: bool = False,
349
+ overwrite: bool = True,
350
+ ):
351
+ """File copy on file system
352
+ Copy content (excluding meta date) of file on `src_path` to `dst_path`.
353
+ `dst_path` must be a complete file name
323
354
 
324
355
  .. note ::
325
356
 
@@ -327,72 +358,75 @@ def fs_copy(
327
358
 
328
359
  1. If parent directory of dst_path doesn't exist, create it
329
360
 
330
- 2. Allow callback function, None by default. callback: Optional[Callable[[int], None]],
331
-
332
- the int data is means the size (in bytes) of the written data that is passed periodically
361
+ 2. Allow callback function, None by default.
362
+ callback: Optional[Callable[[int], None]], the int data is means
363
+ the size (in bytes) of the written data that is passed periodically
333
364
 
334
365
  3. This function is thread-unsafe
335
366
 
336
367
  :param src_path: Given path
337
368
  :param dst_path: Target file path
338
- :param callback: Called periodically during copy, and the input parameter is the data size (in bytes) of copy since the last call
369
+ :param callback: Called periodically during copy, and the input parameter is
370
+ the data size (in bytes) of copy since the last call
339
371
  :param followlinks: False if regard symlink as file, else True
340
372
  :param overwrite: whether or not overwrite file when exists, default is True
341
- '''
373
+ """
342
374
  return FSPath(src_path).copy(dst_path, callback, followlinks, overwrite)
343
375
 
344
376
 
345
377
  def fs_sync(
346
- src_path: PathLike,
347
- dst_path: PathLike,
348
- followlinks: bool = False,
349
- force: bool = False,
350
- overwrite: bool = True) -> None:
351
- '''Force write of everything to disk.
378
+ src_path: PathLike,
379
+ dst_path: PathLike,
380
+ followlinks: bool = False,
381
+ force: bool = False,
382
+ overwrite: bool = True,
383
+ ) -> None:
384
+ """Force write of everything to disk.
352
385
 
353
386
  :param src_path: Given path
354
387
  :param dst_path: Target file path
355
388
  :param followlinks: False if regard symlink as file, else True
356
- :param force: Sync file forcible, do not ignore same files, priority is higher than 'overwrite', default is False
389
+ :param force: Sync file forcible, do not ignore same files,
390
+ priority is higher than 'overwrite', default is False
357
391
  :param overwrite: whether or not overwrite file when exists, default is True
358
- '''
392
+ """
359
393
  return FSPath(src_path).sync(dst_path, followlinks, force, overwrite)
360
394
 
361
395
 
362
396
  def fs_symlink(src_path: PathLike, dst_path: PathLike) -> None:
363
- '''
397
+ """
364
398
  Create a symbolic link pointing to src_path named dst_path.
365
399
 
366
400
  :param src_path: Given path
367
401
  :param dst_path: Destination path
368
- '''
402
+ """
369
403
  return FSPath(src_path).symlink(dst_path)
370
404
 
371
405
 
372
406
  def fs_islink(path: PathLike) -> bool:
373
- '''Test whether a path is a symbolic link
407
+ """Test whether a path is a symbolic link
374
408
 
375
409
  :param path: Given path
376
410
  :return: If path is a symbolic link return True, else False
377
411
  :rtype: bool
378
- '''
412
+ """
379
413
  return FSPath(path).is_symlink()
380
414
 
381
415
 
382
416
  def fs_ismount(path: PathLike) -> bool:
383
- '''Test whether a path is a mount point
417
+ """Test whether a path is a mount point
384
418
 
385
419
  :param path: Given path
386
420
  :returns: True if a path is a mount point, else False
387
- '''
421
+ """
388
422
  return FSPath(path).is_mount()
389
423
 
390
424
 
391
425
  def fs_save_as(file_object: BinaryIO, path: PathLike):
392
- '''Write the opened binary stream to path
426
+ """Write the opened binary stream to path
393
427
  If parent directory of path doesn't exist, it will be created.
394
428
 
395
429
  :param path: Given path
396
430
  :param file_object: stream to be read
397
- '''
431
+ """
398
432
  return FSPath(path).save(file_object)