megfile 3.0.6.post1__py3-none-any.whl → 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. docs/conf.py +71 -0
  2. megfile/cli.py +16 -16
  3. megfile/config.py +37 -6
  4. megfile/errors.py +26 -20
  5. megfile/fs.py +13 -8
  6. megfile/fs_path.py +69 -49
  7. megfile/hdfs.py +13 -8
  8. megfile/hdfs_path.py +49 -41
  9. megfile/http.py +1 -1
  10. megfile/http_path.py +35 -28
  11. megfile/interfaces.py +119 -48
  12. megfile/lib/base_prefetch_reader.py +9 -8
  13. megfile/lib/combine_reader.py +7 -7
  14. megfile/lib/fnmatch.py +2 -2
  15. megfile/lib/glob.py +3 -3
  16. megfile/lib/hdfs_prefetch_reader.py +2 -1
  17. megfile/lib/http_prefetch_reader.py +3 -2
  18. megfile/lib/lazy_handler.py +6 -5
  19. megfile/lib/s3_buffered_writer.py +8 -7
  20. megfile/lib/s3_cached_handler.py +3 -4
  21. megfile/lib/s3_limited_seekable_writer.py +5 -3
  22. megfile/lib/s3_memory_handler.py +10 -6
  23. megfile/lib/s3_pipe_handler.py +1 -1
  24. megfile/lib/s3_prefetch_reader.py +7 -5
  25. megfile/lib/s3_share_cache_reader.py +2 -2
  26. megfile/lib/shadow_handler.py +5 -5
  27. megfile/lib/stdio_handler.py +3 -3
  28. megfile/pathlike.py +156 -170
  29. megfile/s3.py +19 -13
  30. megfile/s3_path.py +98 -83
  31. megfile/sftp.py +25 -16
  32. megfile/sftp_path.py +109 -94
  33. megfile/smart.py +38 -28
  34. megfile/smart_path.py +6 -6
  35. megfile/stdio.py +3 -3
  36. megfile/stdio_path.py +5 -5
  37. megfile/utils/__init__.py +8 -27
  38. megfile/version.py +1 -1
  39. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.dist-info}/METADATA +4 -5
  40. megfile-3.1.0.dist-info/RECORD +55 -0
  41. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.dist-info}/WHEEL +1 -1
  42. megfile-3.1.0.dist-info/entry_points.txt +2 -0
  43. megfile-3.1.0.dist-info/top_level.txt +7 -0
  44. scripts/convert_results_to_sarif.py +124 -0
  45. scripts/generate_file.py +268 -0
  46. megfile-3.0.6.post1.dist-info/RECORD +0 -52
  47. megfile-3.0.6.post1.dist-info/entry_points.txt +0 -2
  48. megfile-3.0.6.post1.dist-info/top_level.txt +0 -1
  49. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.dist-info}/LICENSE +0 -0
  50. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.dist-info}/LICENSE.pyre +0 -0
docs/conf.py ADDED
@@ -0,0 +1,71 @@
1
+ # Configuration file for the Sphinx documentation builder.
2
+ #
3
+ # This file only contains a selection of the most common options. For a full
4
+ # list see the documentation:
5
+ # http://www.sphinx-doc.org/en/master/config
6
+
7
+ # -- Path setup --------------------------------------------------------------
8
+
9
+ # If extensions (or modules to document with autodoc) are in another directory,
10
+ # add these directories to sys.path here. If the directory is relative to the
11
+ # documentation root, use os.path.abspath to make it absolute, like shown here.
12
+ #
13
+ import os
14
+ import sys
15
+
16
+ sys.path.insert(0, os.path.abspath('../megfile'))
17
+
18
+ # -- Project information -----------------------------------------------------
19
+
20
+ project = 'megfile'
21
+ copyright = '2019, r-eng'
22
+ author = 'r-eng'
23
+
24
+ # -- General configuration ---------------------------------------------------
25
+
26
+ # Add any Sphinx extension module names here, as strings. They can be
27
+ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
28
+ # ones.
29
+ extensions = [
30
+ 'sphinx.ext.autodoc',
31
+ 'sphinx.ext.viewcode',
32
+ 'sphinx.ext.todo',
33
+ 'm2r2',
34
+ 'sphinx_rtd_theme',
35
+ 'sphinx_click',
36
+ 'sphinxcontrib.jquery',
37
+ ]
38
+
39
+ # Add any paths that contain templates here, relative to this directory.
40
+ templates_path = ['_templates']
41
+
42
+ # The language for content autogenerated by Sphinx. Refer to documentation
43
+ # for a list of supported languages.
44
+ #
45
+ # This is also used if you do content translation via gettext catalogs.
46
+ # Usually you set "language" from the command line for these cases.
47
+ language = 'en'
48
+
49
+ # List of patterns, relative to source directory, that match files and
50
+ # directories to ignore when looking for source files.
51
+ # This pattern also affects html_static_path and html_extra_path.
52
+ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
53
+
54
+ # -- Options for HTML output -------------------------------------------------
55
+
56
+ # The theme to use for HTML and HTML Help pages. See the documentation for
57
+ # a list of builtin themes.
58
+ #
59
+ html_theme = "sphinx_rtd_theme"
60
+
61
+ # Add any paths that contain custom static files (such as style sheets) here,
62
+ # relative to this directory. They are copied after the builtin static files,
63
+ # so a file named "default.css" will overwrite the builtin "default.css".
64
+ html_static_path = ['_static']
65
+
66
+ # -- Extension configuration -------------------------------------------------
67
+
68
+ # -- Options for todo extension ----------------------------------------------
69
+
70
+ # If true, `todo` and `todoList` produce output, else they produce nothing.
71
+ todo_include_todos = True
megfile/cli.py CHANGED
@@ -14,7 +14,7 @@ from megfile.config import DEFAULT_BLOCK_SIZE
14
14
  from megfile.hdfs_path import DEFAULT_HDFS_TIMEOUT
15
15
  from megfile.interfaces import FileEntry
16
16
  from megfile.lib.glob import get_non_glob_dir, has_magic
17
- from megfile.smart import _smart_sync_single_file, smart_copy, smart_exists, smart_getmd5, smart_getmtime, smart_getsize, smart_glob_stat, smart_isdir, smart_isfile, smart_makedirs, smart_move, smart_open, smart_path_join, smart_readlink, smart_realpath, smart_remove, smart_rename, smart_scan_stat, smart_scandir, smart_stat, smart_sync, smart_sync_with_progress, smart_touch, smart_unlink
17
+ from megfile.smart import _smart_sync_single_file, smart_copy, smart_exists, smart_getmd5, smart_getmtime, smart_getsize, smart_glob_stat, smart_isdir, smart_isfile, smart_makedirs, smart_move, smart_open, smart_path_join, smart_readlink, smart_remove, smart_rename, smart_scan_stat, smart_scandir, smart_stat, smart_sync, smart_sync_with_progress, smart_touch, smart_unlink
18
18
  from megfile.smart_path import SmartPath
19
19
  from megfile.utils import get_human_size
20
20
  from megfile.version import VERSION
@@ -176,12 +176,12 @@ def ll(path: str, recursive: bool):
176
176
  @click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
177
177
  @click.option('--skip', is_flag=True, help='Skip existed files.')
178
178
  def cp(
179
- src_path: str,
180
- dst_path: str,
181
- recursive: bool,
182
- no_target_directory: bool,
183
- progress_bar: bool,
184
- skip: bool,
179
+ src_path: str,
180
+ dst_path: str,
181
+ recursive: bool,
182
+ no_target_directory: bool,
183
+ progress_bar: bool,
184
+ skip: bool,
185
185
  ):
186
186
  if not no_target_directory and (dst_path.endswith('/') or
187
187
  smart_isdir(dst_path)):
@@ -243,12 +243,12 @@ def cp(
243
243
  @click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
244
244
  @click.option('--skip', is_flag=True, help='Skip existed files.')
245
245
  def mv(
246
- src_path: str,
247
- dst_path: str,
248
- recursive: bool,
249
- no_target_directory: bool,
250
- progress_bar: bool,
251
- skip: bool,
246
+ src_path: str,
247
+ dst_path: str,
248
+ recursive: bool,
249
+ no_target_directory: bool,
250
+ progress_bar: bool,
251
+ skip: bool,
252
252
  ):
253
253
  if not no_target_directory and (dst_path.endswith('/') or
254
254
  smart_isdir(dst_path)):
@@ -321,7 +321,7 @@ def rm(path: str, recursive: bool):
321
321
  '-f',
322
322
  '--force',
323
323
  is_flag=True,
324
- help='Copy files forcely, ignore same files.')
324
+ help='Copy files forcible, ignore same files.')
325
325
  @click.option('-q', '--quiet', is_flag=True, help='Not show any progress log.')
326
326
  @click.option('--skip', is_flag=True, help='Skip existed files.')
327
327
  def sync(
@@ -419,8 +419,8 @@ def touch(path: str):
419
419
  @cli.command(short_help='Concatenate any files and send them to stdout.')
420
420
  @click.argument('path')
421
421
  def cat(path: str):
422
- with smart_open(path, 'rb') as file:
423
- shutil.copyfileobj(file, sys.stdout.buffer)
422
+ with smart_open(path, 'rb') as f:
423
+ shutil.copyfileobj(f, sys.stdout.buffer) # pytype: disable=wrong-arg-types
424
424
 
425
425
 
426
426
  @cli.command(
megfile/config.py CHANGED
@@ -1,12 +1,43 @@
1
1
  import os
2
+ from logging import getLogger
3
+
4
+ _logger = getLogger(__name__)
5
+
6
+ DEFAULT_BLOCK_SIZE = int(os.getenv('MEGFILE_BLOCK_SIZE') or 8 * 2**20)
7
+
8
+ if os.getenv('MEGFILE_MAX_BUFFER_SIZE'):
9
+ DEFAULT_MAX_BUFFER_SIZE = int(os.environ['MEGFILE_MAX_BUFFER_SIZE'])
10
+ if DEFAULT_MAX_BUFFER_SIZE < DEFAULT_BLOCK_SIZE:
11
+ DEFAULT_MAX_BUFFER_SIZE = DEFAULT_BLOCK_SIZE
12
+ _logger.warning(
13
+ "Env 'MEGFILE_MAX_BUFFER_SIZE' is smaller than block size, will not use buffer."
14
+ )
15
+ DEFAULT_BLOCK_CAPACITY = DEFAULT_MAX_BUFFER_SIZE // DEFAULT_BLOCK_SIZE
16
+ if os.getenv('MEGFILE_BLOCK_CAPACITY'):
17
+ _logger.warning(
18
+ "Env 'MEGFILE_MAX_BUFFER_SIZE' and 'MEGFILE_BLOCK_CAPACITY' are both set, 'MEGFILE_BLOCK_CAPACITY' will be ignored."
19
+ )
20
+ elif os.getenv('MEGFILE_BLOCK_CAPACITY'):
21
+ DEFAULT_BLOCK_CAPACITY = int(os.environ['MEGFILE_BLOCK_CAPACITY'])
22
+ DEFAULT_MAX_BUFFER_SIZE = DEFAULT_BLOCK_SIZE * DEFAULT_BLOCK_CAPACITY
23
+ else:
24
+ DEFAULT_MAX_BUFFER_SIZE = 128 * 2**20
25
+ DEFAULT_BLOCK_CAPACITY = 16
26
+
27
+ DEFAULT_MIN_BLOCK_SIZE = int(
28
+ os.getenv('MEGFILE_MIN_BLOCK_SIZE') or DEFAULT_BLOCK_SIZE)
29
+
30
+ if os.getenv('MEGFILE_MAX_BLOCK_SIZE'):
31
+ DEFAULT_MAX_BLOCK_SIZE = int(os.environ['MEGFILE_MAX_BLOCK_SIZE'])
32
+ if DEFAULT_MAX_BLOCK_SIZE < DEFAULT_BLOCK_SIZE:
33
+ DEFAULT_MAX_BLOCK_SIZE = DEFAULT_BLOCK_SIZE
34
+ _logger.warning(
35
+ "Env 'MEGFILE_MAX_BLOCK_SIZE' is smaller than block size, will be ignored."
36
+ )
37
+ else:
38
+ DEFAULT_MAX_BLOCK_SIZE = max(128 * 2**20, DEFAULT_BLOCK_SIZE)
2
39
 
3
- DEFAULT_BLOCK_SIZE = int(os.getenv('MEGFILE_BLOCK_SIZE') or 8 * 2**20) # 8MB
4
- DEFAULT_MAX_BLOCK_SIZE = int(
5
- os.getenv('MEGFILE_MAX_BLOCK_SIZE') or DEFAULT_BLOCK_SIZE * 16) # 128MB
6
- DEFAULT_MAX_BUFFER_SIZE = int(
7
- os.getenv('MEGFILE_MAX_BUFFER_SIZE') or DEFAULT_BLOCK_SIZE * 16) # 128MB
8
40
  GLOBAL_MAX_WORKERS = int(os.getenv('MEGFILE_MAX_WORKERS') or 32)
9
- DEFAULT_BLOCK_CAPACITY = int(os.getenv('MEGFILE_BLOCK_CAPACITY') or 16)
10
41
  DEFAULT_MAX_RETRY_TIMES = int(os.getenv('MEGFILE_MAX_RETRY_TIMES') or 10)
11
42
 
12
43
  # for logging the size of file had read or wrote
megfile/errors.py CHANGED
@@ -1,3 +1,4 @@
1
+ # pyre-ignore-all-errors[16]
1
2
  import time
2
3
  from contextlib import contextmanager
3
4
  from functools import wraps
@@ -78,16 +79,16 @@ def full_error_message(error):
78
79
 
79
80
 
80
81
  def client_error_code(error: ClientError) -> str:
81
- error_data = error.response.get('Error', {}) # pytype: disable=attribute-error
82
+ error_data = error.response.get('Error', {})
82
83
  return error_data.get('Code') or error_data.get('code', 'Unknown')
83
84
 
84
85
 
85
86
  def client_error_message(error: ClientError) -> str:
86
- return error.response.get('Error', {}).get('Message', 'Unknown') # pytype: disable=attribute-error
87
+ return error.response.get('Error', {}).get('Message', 'Unknown')
87
88
 
88
89
 
89
90
  def param_validation_error_report(error: ParamValidationError) -> str:
90
- return error.kwargs.get('report', 'Unknown') # pytype: disable=attribute-error
91
+ return error.kwargs.get('report', 'Unknown')
91
92
 
92
93
 
93
94
  s3_retry_exceptions = [
@@ -106,12 +107,13 @@ s3_retry_exceptions = [
106
107
  ]
107
108
  if hasattr(botocore.exceptions,
108
109
  'ResponseStreamingError'): # backport botocore==1.23.24
109
- s3_retry_exceptions.append(botocore.exceptions.ResponseStreamingError)
110
- s3_retry_exceptions = tuple(s3_retry_exceptions)
110
+ s3_retry_exceptions.append(
111
+ botocore.exceptions.ResponseStreamingError) # pyre-ignore[6]
112
+ s3_retry_exceptions = tuple(s3_retry_exceptions) # pyre-ignore[9]
111
113
 
112
114
 
113
115
  def s3_should_retry(error: Exception) -> bool:
114
- if isinstance(error, s3_retry_exceptions):
116
+ if isinstance(error, s3_retry_exceptions): # pyre-ignore[6]
115
117
  return True
116
118
  if isinstance(error, botocore.exceptions.ClientError):
117
119
  return client_error_code(error) in (
@@ -170,19 +172,21 @@ def _create_missing_ok_generator(generator, missing_ok: bool, error: Exception):
170
172
  yield from generator
171
173
  return
172
174
 
173
- zero_elum = True
175
+ zero_elem = True
174
176
  for item in generator:
175
- zero_elum = False
177
+ zero_elem = False
176
178
  yield item
177
179
 
178
- if zero_elum:
180
+ if zero_elem:
179
181
  raise error
180
182
 
181
183
 
182
184
  class UnknownError(Exception):
183
185
 
184
186
  def __init__(
185
- self, error: Exception, path: PathLike,
187
+ self,
188
+ error: Exception,
189
+ path: PathLike,
186
190
  extra: Optional[str] = None):
187
191
  message = 'Unknown error encountered: %r, error: %s' % (
188
192
  path, full_error_message(error))
@@ -317,7 +321,7 @@ class ProtocolNotFoundError(Exception):
317
321
  pass
318
322
 
319
323
 
320
- def translate_fs_error(fs_error: Exception, fs_path: PathLike):
324
+ def translate_fs_error(fs_error: Exception, fs_path: PathLike) -> Exception:
321
325
  if isinstance(fs_error, OSError):
322
326
  if fs_error.filename is None:
323
327
  fs_error.filename = fs_path
@@ -369,8 +373,7 @@ def translate_s3_error(s3_error: Exception, s3_url: PathLike) -> Exception:
369
373
  return S3UnknownError(s3_error, s3_url)
370
374
 
371
375
 
372
- def translate_http_error(
373
- http_error: Optional[Exception], http_url: str) -> Exception:
376
+ def translate_http_error(http_error: Exception, http_url: str) -> Exception:
374
377
  '''Generate exception according to http_error and status_code
375
378
 
376
379
  .. note ::
@@ -405,20 +408,23 @@ def s3_error_code_should_retry(error: str) -> bool:
405
408
  return False
406
409
 
407
410
 
408
- def translate_hdfs_error(hdfs_error: Exception, hdfs_path: PathLike):
411
+ def translate_hdfs_error(
412
+ hdfs_error: Exception, hdfs_path: PathLike) -> Exception:
409
413
  from megfile.lib.hdfs_tools import hdfs_api
410
414
 
415
+ # pytype: disable=attribute-error
411
416
  if hdfs_api and isinstance(hdfs_error, hdfs_api.HdfsError):
412
- if hdfs_error.message and 'Path is not a file' in hdfs_error.message: # pytype: disable=attribute-error
417
+ if hdfs_error.message and 'Path is not a file' in hdfs_error.message:
413
418
  return IsADirectoryError('Is a directory: %r' % hdfs_path)
414
- elif hdfs_error.message and 'Path is not a directory' in hdfs_error.message: # pytype: disable=attribute-error
419
+ elif hdfs_error.message and 'Path is not a directory' in hdfs_error.message:
415
420
  return NotADirectoryError('Not a directory: %r' % hdfs_path)
416
- elif hdfs_error.status_code in (401, 403): # pytype: disable=attribute-error
421
+ elif hdfs_error.status_code in (401, 403):
417
422
  return PermissionError('Permission denied: %r' % hdfs_path)
418
- elif hdfs_error.status_code == 400: # pytype: disable=attribute-error
419
- return ValueError(f'{hdfs_error.message}, path: {hdfs_path}') # pytype: disable=attribute-error
420
- elif hdfs_error.status_code == 404: # pytype: disable=attribute-error
423
+ elif hdfs_error.status_code == 400:
424
+ return ValueError(f'{hdfs_error.message}, path: {hdfs_path}')
425
+ elif hdfs_error.status_code == 404:
421
426
  return FileNotFoundError(f'No match file: {hdfs_path}')
427
+ # pytype: enable=attribute-error
422
428
  return hdfs_error
423
429
 
424
430
 
megfile/fs.py CHANGED
@@ -161,10 +161,10 @@ def fs_isfile(path: PathLike, followlinks: bool = False) -> bool:
161
161
 
162
162
  def fs_listdir(path: PathLike) -> List[str]:
163
163
  '''
164
- Get all contents of given fs path. The result is in acsending alphabetical order.
164
+ Get all contents of given fs path. The result is in ascending alphabetical order.
165
165
 
166
166
  :param path: Given path
167
- :returns: All contents have in the path in acsending alphabetical order
167
+ :returns: All contents have in the path in ascending alphabetical order
168
168
  '''
169
169
  return FSPath(path).listdir()
170
170
 
@@ -209,7 +209,8 @@ def fs_remove(path: PathLike, missing_ok: bool = False) -> None:
209
209
  return FSPath(path).remove(missing_ok)
210
210
 
211
211
 
212
- def fs_scan(path: PathLike, missing_ok: bool = True,
212
+ def fs_scan(path: PathLike,
213
+ missing_ok: bool = True,
213
214
  followlinks: bool = False) -> Iterator[str]:
214
215
  '''
215
216
  Iteratively traverse only files in given directory, in alphabetical order.
@@ -227,7 +228,8 @@ def fs_scan(path: PathLike, missing_ok: bool = True,
227
228
 
228
229
 
229
230
  def fs_scan_stat(
230
- path: PathLike, missing_ok: bool = True,
231
+ path: PathLike,
232
+ missing_ok: bool = True,
231
233
  followlinks: bool = False) -> Iterator[FileEntry]:
232
234
  '''
233
235
  Iteratively traverse only files in given directory, in alphabetical order.
@@ -270,8 +272,10 @@ def fs_unlink(path: PathLike, missing_ok: bool = False) -> None:
270
272
  return FSPath(path).unlink(missing_ok)
271
273
 
272
274
 
273
- def fs_walk(path: PathLike, followlinks: bool = False
274
- ) -> Iterator[Tuple[str, List[str], List[str]]]:
275
+ def fs_walk(
276
+ path: PathLike,
277
+ followlinks: bool = False
278
+ ) -> Iterator[Tuple[str, List[str], List[str]]]:
275
279
  '''
276
280
  Generate the file names in a directory tree by walking the tree top-down.
277
281
  For each directory in the tree rooted at directory path (including path itself),
@@ -302,6 +306,7 @@ def fs_getmd5(
302
306
  :param path: Given path
303
307
  :param recalculate: Ignore this parameter, just for compatibility
304
308
  :param followlinks: Ignore this parameter, just for compatibility
309
+
305
310
  returns: md5 of file
306
311
  '''
307
312
  return FSPath(path).md5(recalculate, followlinks)
@@ -348,7 +353,7 @@ def fs_sync(
348
353
  :param src_path: Given path
349
354
  :param dst_path: Target file path
350
355
  :param followlinks: False if regard symlink as file, else True
351
- :param force: Sync file forcely, do not ignore same files, priority is higher than 'overwrite', default is False
356
+ :param force: Sync file forcible, do not ignore same files, priority is higher than 'overwrite', default is False
352
357
  :param overwrite: whether or not overwrite file when exists, default is True
353
358
  '''
354
359
  return FSPath(src_path).sync(dst_path, followlinks, force, overwrite)
@@ -359,7 +364,7 @@ def fs_symlink(src_path: PathLike, dst_path: PathLike) -> None:
359
364
  Create a symbolic link pointing to src_path named dst_path.
360
365
 
361
366
  :param src_path: Given path
362
- :param dst_path: Desination path
367
+ :param dst_path: Destination path
363
368
  '''
364
369
  return FSPath(src_path).symlink(dst_path)
365
370