megfile 3.0.6.post1__py3-none-any.whl → 3.1.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. docs/conf.py +67 -0
  2. megfile/cli.py +16 -16
  3. megfile/config.py +37 -6
  4. megfile/errors.py +26 -20
  5. megfile/fs.py +13 -8
  6. megfile/fs_path.py +69 -49
  7. megfile/hdfs.py +13 -8
  8. megfile/hdfs_path.py +49 -41
  9. megfile/http.py +1 -1
  10. megfile/http_path.py +35 -28
  11. megfile/interfaces.py +119 -48
  12. megfile/lib/base_prefetch_reader.py +9 -8
  13. megfile/lib/combine_reader.py +7 -7
  14. megfile/lib/fnmatch.py +2 -2
  15. megfile/lib/glob.py +3 -3
  16. megfile/lib/hdfs_prefetch_reader.py +2 -1
  17. megfile/lib/http_prefetch_reader.py +3 -2
  18. megfile/lib/lazy_handler.py +6 -5
  19. megfile/lib/s3_buffered_writer.py +8 -7
  20. megfile/lib/s3_cached_handler.py +3 -4
  21. megfile/lib/s3_limited_seekable_writer.py +5 -3
  22. megfile/lib/s3_memory_handler.py +10 -6
  23. megfile/lib/s3_pipe_handler.py +1 -1
  24. megfile/lib/s3_prefetch_reader.py +7 -5
  25. megfile/lib/s3_share_cache_reader.py +2 -2
  26. megfile/lib/shadow_handler.py +5 -5
  27. megfile/lib/stdio_handler.py +3 -3
  28. megfile/pathlike.py +156 -170
  29. megfile/s3.py +19 -13
  30. megfile/s3_path.py +98 -83
  31. megfile/sftp.py +25 -16
  32. megfile/sftp_path.py +109 -94
  33. megfile/smart.py +38 -28
  34. megfile/smart_path.py +6 -6
  35. megfile/stdio.py +3 -3
  36. megfile/stdio_path.py +5 -5
  37. megfile/utils/__init__.py +8 -27
  38. megfile/version.py +1 -1
  39. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/METADATA +4 -5
  40. megfile-3.1.0.post1.dist-info/RECORD +55 -0
  41. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/WHEEL +1 -1
  42. megfile-3.1.0.post1.dist-info/top_level.txt +7 -0
  43. scripts/convert_results_to_sarif.py +124 -0
  44. scripts/generate_file.py +268 -0
  45. megfile-3.0.6.post1.dist-info/RECORD +0 -52
  46. megfile-3.0.6.post1.dist-info/top_level.txt +0 -1
  47. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/LICENSE +0 -0
  48. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/LICENSE.pyre +0 -0
  49. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  import os
2
- from io import UnsupportedOperation
3
- from typing import Iterable, List, Optional
2
+ from typing import Optional
4
3
 
5
4
  from megfile.errors import translate_fs_error, translate_s3_error
6
5
  from megfile.lib.s3_memory_handler import S3MemoryHandler
@@ -36,13 +35,13 @@ class S3CachedHandler(S3MemoryHandler):
36
35
  self._download_fileobj()
37
36
 
38
37
  if remove_cache_when_open:
39
- os.unlink(self._cache_path)
38
+ os.unlink(self._cache_path) # pyre-ignore[6]
40
39
 
41
40
  def fileno(self) -> int:
42
41
  # allow numpy.array to create a memmaped ndarray
43
42
  return self._fileobj.fileno()
44
43
 
45
44
  def _translate_error(self, error: Exception):
46
- error = translate_fs_error(error, self._cache_path)
45
+ error = translate_fs_error(error, self._cache_path) # pyre-ignore[6]
47
46
  error = translate_s3_error(error, self.name)
48
47
  return error
@@ -3,7 +3,7 @@ from io import BytesIO
3
3
  from logging import getLogger as get_logger
4
4
  from typing import Optional
5
5
 
6
- from megfile.config import DEFAULT_BLOCK_SIZE, DEFAULT_MAX_BLOCK_SIZE, DEFAULT_MAX_BUFFER_SIZE
6
+ from megfile.config import DEFAULT_MAX_BLOCK_SIZE, DEFAULT_MAX_BUFFER_SIZE, DEFAULT_MIN_BLOCK_SIZE
7
7
  from megfile.errors import raise_s3_error
8
8
  from megfile.interfaces import Seekable
9
9
  from megfile.lib.s3_buffered_writer import S3BufferedWriter
@@ -11,7 +11,7 @@ from megfile.lib.s3_buffered_writer import S3BufferedWriter
11
11
  _logger = get_logger(__name__)
12
12
 
13
13
 
14
- class S3LimitedSeekableWriter(Seekable, S3BufferedWriter):
14
+ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
15
15
  ''' For file format like msgpack and mp4, it's a pain that you need to write
16
16
  header before writing the data. So it's kind of hard to make streaming write
17
17
  to unseekable file system like s3. In this case, we will try to keep the first
@@ -25,7 +25,7 @@ class S3LimitedSeekableWriter(Seekable, S3BufferedWriter):
25
25
  key: str,
26
26
  *,
27
27
  s3_client,
28
- block_size: int = DEFAULT_BLOCK_SIZE,
28
+ block_size: int = DEFAULT_MIN_BLOCK_SIZE,
29
29
  head_block_size: Optional[int] = None,
30
30
  tail_block_size: Optional[int] = None,
31
31
  max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
@@ -69,6 +69,8 @@ class S3LimitedSeekableWriter(Seekable, S3BufferedWriter):
69
69
  target_offset = self._offset + offset
70
70
  elif whence == os.SEEK_END:
71
71
  target_offset = self._content_size + offset
72
+ else:
73
+ raise OSError('Unsupported whence value: %d' % whence)
72
74
 
73
75
  if target_offset < self._head_block_size:
74
76
  self._head_buffer.seek(target_offset)
@@ -2,11 +2,11 @@ import os
2
2
  from io import BytesIO, UnsupportedOperation
3
3
  from typing import Iterable, List, Optional
4
4
 
5
- from megfile.errors import S3ConfigError, UnknownError, raise_s3_error, translate_fs_error, translate_s3_error
5
+ from megfile.errors import S3ConfigError, UnknownError, raise_s3_error, translate_s3_error
6
6
  from megfile.interfaces import Readable, Seekable, Writable
7
7
 
8
8
 
9
- class S3MemoryHandler(Readable, Seekable, Writable):
9
+ class S3MemoryHandler(Readable[bytes], Seekable, Writable[bytes]):
10
10
 
11
11
  def __init__(
12
12
  self,
@@ -41,8 +41,8 @@ class S3MemoryHandler(Readable, Seekable, Writable):
41
41
  def tell(self) -> int:
42
42
  return self._fileobj.tell()
43
43
 
44
- def seek(self, cookie: int, whence: int = os.SEEK_SET) -> int:
45
- return self._fileobj.seek(cookie, whence)
44
+ def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
45
+ return self._fileobj.seek(offset, whence)
46
46
 
47
47
  def readable(self) -> bool:
48
48
  return self._mode[0] == 'r' or self._mode[-1] == '+'
@@ -55,12 +55,16 @@ class S3MemoryHandler(Readable, Seekable, Writable):
55
55
  def readline(self, size: Optional[int] = None) -> bytes:
56
56
  if not self.readable():
57
57
  raise UnsupportedOperation('not readable')
58
+ if size is None:
59
+ size = -1
58
60
  return self._fileobj.readline(size)
59
61
 
60
- def readlines(self) -> List[bytes]:
62
+ def readlines(self, hint: Optional[int] = None) -> List[bytes]:
61
63
  if not self.readable():
62
64
  raise UnsupportedOperation('not readable')
63
- return self._fileobj.readlines()
65
+ if hint is None:
66
+ hint = -1
67
+ return self._fileobj.readlines(hint)
64
68
 
65
69
  def writable(self) -> bool:
66
70
  return self._mode[0] == 'w' or \
@@ -24,7 +24,7 @@ def _close_s3_pipes(): # pragma: no cover
24
24
  try_close_pipe(w)
25
25
 
26
26
 
27
- class S3PipeHandler(Readable, Writable):
27
+ class S3PipeHandler(Readable[bytes], Writable[bytes]):
28
28
 
29
29
  def __init__(
30
30
  self,
@@ -1,4 +1,3 @@
1
- import os
2
1
  from concurrent.futures import Future
3
2
  from io import BytesIO
4
3
  from typing import Optional
@@ -42,6 +41,8 @@ class S3PrefetchReader(BasePrefetchReader):
42
41
  self._key = key
43
42
  self._client = s3_client
44
43
  self._profile_name = profile_name
44
+ self._content_etag = None
45
+ self._content_info = None
45
46
 
46
47
  super().__init__(
47
48
  block_size=block_size,
@@ -58,7 +59,7 @@ class S3PrefetchReader(BasePrefetchReader):
58
59
  first_index_response['ContentRange'].split('/')[-1])
59
60
  except S3InvalidRangeError:
60
61
  # usually when read a empty file
61
- # TODO: use minio test empty file: https://hub.docker.com/r/minio/minio
62
+ # can use minio test empty file: https://hub.docker.com/r/minio/minio
62
63
  first_index_response = self._fetch_response()
63
64
  content_size = int(first_index_response['ContentLength'])
64
65
 
@@ -76,7 +77,8 @@ class S3PrefetchReader(BasePrefetchReader):
76
77
  self._bucket, self._key)
77
78
 
78
79
  def _fetch_response(
79
- self, start: Optional[int] = None,
80
+ self,
81
+ start: Optional[int] = None,
80
82
  end: Optional[int] = None) -> dict:
81
83
 
82
84
  def fetch_response() -> dict:
@@ -103,9 +105,9 @@ class S3PrefetchReader(BasePrefetchReader):
103
105
  index + 1) * self._block_size - 1
104
106
  response = self._fetch_response(start=start, end=end)
105
107
  etag = response.get('ETag', None)
106
- if etag is not None and etag != self._content_etag: # pytype: disable=attribute-error
108
+ if etag is not None and etag != self._content_etag:
107
109
  raise S3FileChangedError(
108
110
  'File changed: %r, etag before: %s, after: %s' %
109
- (self.name, self._content_info, response)) # pytype: disable=attribute-error
111
+ (self.name, self._content_info, response))
110
112
 
111
113
  return response['Body']
@@ -44,14 +44,14 @@ class S3ShareCacheReader(S3PrefetchReader):
44
44
  profile_name=profile_name,
45
45
  )
46
46
 
47
- def _get_futures(self):
47
+ def _get_futures(self) -> 'ShareCacheFutureManager':
48
48
  futures = thread_local(
49
49
  'S3ShareCacheReader.' + self._cache_key, ShareCacheFutureManager)
50
50
  futures.register(self.name)
51
51
  return futures
52
52
 
53
53
  def _seek_buffer(self, index: int, offset: int = 0):
54
- # The corresponding block is probably not downloaded when seeked to a new position
54
+ # The corresponding block is probably not downloaded when sought to a new position
55
55
  # So record the offset first, set it when it is accessed
56
56
  self._cached_offset = offset
57
57
  self._block_index = index
@@ -58,18 +58,18 @@ class ShadowHandler(Readable, Seekable, Writable, BaseShadowHandler):
58
58
  def readable(self) -> bool:
59
59
  return is_readable(self._file_object)
60
60
 
61
- def read(self, size: Optional[int] = None) -> AnyStr: # pytype: disable=signature-mismatch
61
+ def read(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
62
62
  with self._ensure_offset():
63
- return self._file_object.read(size)
63
+ return self._file_object.read(size) # pyre-ignore[6]
64
64
 
65
- def readline(self, size: Optional[int] = None) -> AnyStr: # pytype: disable=signature-mismatch
65
+ def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
66
66
  with self._ensure_offset():
67
- return self._file_object.readline(size)
67
+ return self._file_object.readline(size) # pyre-ignore[6]
68
68
 
69
69
  def writable(self) -> bool:
70
70
  return is_writable(self._file_object)
71
71
 
72
- def write(self, data: AnyStr): # pytype: disable=signature-mismatch
72
+ def write(self, data: AnyStr):
73
73
  with self._ensure_offset():
74
74
  return self._file_object.write(data)
75
75
 
@@ -40,10 +40,10 @@ class STDReader(STDHandler, Readable):
40
40
  def name(self) -> str:
41
41
  return "stdin"
42
42
 
43
- def read(self, size: Optional[int] = None) -> AnyStr: # pytype: disable=signature-mismatch
43
+ def read(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
44
44
  return self._handler.read(size)
45
45
 
46
- def readline(self, size: Optional[int] = None) -> AnyStr: # pytype: disable=signature-mismatch
46
+ def readline(self, size: Optional[int] = None) -> AnyStr: # pyre-ignore[34]
47
47
  return self._handler.readline()
48
48
 
49
49
 
@@ -79,5 +79,5 @@ class STDWriter(STDHandler, Writable):
79
79
  def name(self) -> str:
80
80
  return self._name
81
81
 
82
- def write(self, data: AnyStr) -> int: # pytype: disable=signature-mismatch
82
+ def write(self, data: AnyStr) -> int:
83
83
  return self._handler.write(data)