megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. docs/conf.py +2 -4
  2. megfile/__init__.py +394 -203
  3. megfile/cli.py +258 -238
  4. megfile/config.py +25 -21
  5. megfile/errors.py +124 -114
  6. megfile/fs.py +174 -140
  7. megfile/fs_path.py +462 -354
  8. megfile/hdfs.py +133 -101
  9. megfile/hdfs_path.py +290 -236
  10. megfile/http.py +15 -14
  11. megfile/http_path.py +111 -107
  12. megfile/interfaces.py +70 -65
  13. megfile/lib/base_prefetch_reader.py +84 -65
  14. megfile/lib/combine_reader.py +12 -12
  15. megfile/lib/compare.py +17 -13
  16. megfile/lib/compat.py +1 -5
  17. megfile/lib/fnmatch.py +29 -30
  18. megfile/lib/glob.py +46 -54
  19. megfile/lib/hdfs_prefetch_reader.py +40 -25
  20. megfile/lib/hdfs_tools.py +1 -3
  21. megfile/lib/http_prefetch_reader.py +69 -46
  22. megfile/lib/joinpath.py +5 -5
  23. megfile/lib/lazy_handler.py +7 -3
  24. megfile/lib/s3_buffered_writer.py +58 -51
  25. megfile/lib/s3_cached_handler.py +13 -14
  26. megfile/lib/s3_limited_seekable_writer.py +37 -28
  27. megfile/lib/s3_memory_handler.py +34 -30
  28. megfile/lib/s3_pipe_handler.py +24 -25
  29. megfile/lib/s3_prefetch_reader.py +71 -52
  30. megfile/lib/s3_share_cache_reader.py +37 -24
  31. megfile/lib/shadow_handler.py +7 -3
  32. megfile/lib/stdio_handler.py +9 -8
  33. megfile/lib/url.py +3 -3
  34. megfile/pathlike.py +259 -228
  35. megfile/s3.py +220 -153
  36. megfile/s3_path.py +977 -802
  37. megfile/sftp.py +190 -156
  38. megfile/sftp_path.py +540 -450
  39. megfile/smart.py +397 -330
  40. megfile/smart_path.py +100 -105
  41. megfile/stdio.py +10 -9
  42. megfile/stdio_path.py +32 -35
  43. megfile/utils/__init__.py +73 -54
  44. megfile/utils/mutex.py +11 -14
  45. megfile/version.py +1 -1
  46. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
  47. megfile-3.1.2.dist-info/RECORD +55 -0
  48. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
  49. scripts/convert_results_to_sarif.py +45 -78
  50. scripts/generate_file.py +140 -64
  51. megfile-3.1.1.dist-info/RECORD +0 -55
  52. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
  53. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
  54. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
  55. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/smart_path.py CHANGED
@@ -9,7 +9,6 @@ from .interfaces import BasePath, BaseURIPath, PathLike
9
9
 
10
10
 
11
11
  def _bind_function(name):
12
-
13
12
  def smart_method(self, *args, **kwargs):
14
13
  return getattr(self.pathlike, name)(*args, **kwargs)
15
14
 
@@ -19,7 +18,6 @@ def _bind_function(name):
19
18
 
20
19
 
21
20
  def _bind_property(name):
22
-
23
21
  @property
24
22
  def smart_property(self):
25
23
  return getattr(self.pathlike, name)
@@ -41,8 +39,7 @@ class SmartPath(BasePath):
41
39
  self.pathlike = pathlike
42
40
 
43
41
  @staticmethod
44
- def _extract_protocol(
45
- path: Union[PathLike, int]) -> Tuple[str, Union[str, int]]:
42
+ def _extract_protocol(path: Union[PathLike, int]) -> Tuple[str, Union[str, int]]:
46
43
  if isinstance(path, int):
47
44
  protocol = "file"
48
45
  path_without_protocol = path
@@ -52,25 +49,23 @@ class SmartPath(BasePath):
52
49
  protocol = "file"
53
50
  path_without_protocol = path
54
51
  else:
55
- path_without_protocol = path[len(protocol) + 3:]
52
+ path_without_protocol = path[len(protocol) + 3 :]
56
53
  elif isinstance(path, (BaseURIPath, SmartPath)):
57
54
  protocol = path.protocol
58
55
  path_without_protocol = str(path)
59
56
  elif isinstance(path, (PurePath, BasePath)):
60
- protocol, path_without_protocol = SmartPath._extract_protocol(
61
- fspath(path))
57
+ protocol, path_without_protocol = SmartPath._extract_protocol(fspath(path))
62
58
  else:
63
- raise ProtocolNotFoundError('protocol not found: %r' % path)
59
+ raise ProtocolNotFoundError("protocol not found: %r" % path)
64
60
  return protocol, path_without_protocol
65
61
 
66
62
  @classmethod
67
63
  def _create_pathlike(cls, path: Union[PathLike, int]) -> BaseURIPath:
68
64
  protocol, _ = cls._extract_protocol(path)
69
- if protocol.startswith('s3+'):
70
- protocol = 's3'
65
+ if protocol.startswith("s3+"):
66
+ protocol = "s3"
71
67
  if protocol not in cls._registered_protocols:
72
- raise ProtocolNotFoundError(
73
- 'protocol %r not found: %r' % (protocol, path))
68
+ raise ProtocolNotFoundError("protocol %r not found: %r" % (protocol, path))
74
69
  path_class = cls._registered_protocols[protocol]
75
70
  return path_class(path)
76
71
 
@@ -78,47 +73,47 @@ class SmartPath(BasePath):
78
73
  def register(cls, path_class, override_ok: bool = False):
79
74
  protocol = path_class.protocol
80
75
  if protocol in cls._registered_protocols and not override_ok:
81
- raise ProtocolExistsError('protocol already exists: %r' % protocol)
76
+ raise ProtocolExistsError("protocol already exists: %r" % protocol)
82
77
  cls._registered_protocols[protocol] = path_class
83
78
  return path_class
84
79
 
85
- symlink = _bind_function('symlink')
86
- symlink_to = _bind_function('symlink_to')
87
- hardlink_to = _bind_function('hardlink_to')
88
- readlink = _bind_function('readlink')
89
- is_dir = _bind_function('is_dir')
90
- is_file = _bind_function('is_file')
91
- is_symlink = _bind_function('is_symlink')
92
- access = _bind_function('access')
93
- exists = _bind_function('exists')
94
- listdir = _bind_function('listdir')
95
- scandir = _bind_function('scandir')
96
- getsize = _bind_function('getsize')
97
- getmtime = _bind_function('getmtime')
98
- stat = _bind_function('stat')
99
- lstat = _bind_function('lstat')
100
- remove = _bind_function('remove')
101
- rename = _bind_function('rename')
102
- replace = _bind_function('replace')
103
- unlink = _bind_function('unlink')
104
- mkdir = _bind_function('mkdir')
105
- open = _bind_function('open')
106
- touch = _bind_function('touch')
107
- walk = _bind_function('walk')
108
- scan = _bind_function('scan')
109
- scan_stat = _bind_function('scan_stat')
110
- glob = _bind_function('glob')
111
- iglob = _bind_function('iglob')
112
- glob_stat = _bind_function('glob_stat')
113
- load = _bind_function('load')
114
- save = _bind_function('save')
115
- joinpath = _bind_function('joinpath')
116
- abspath = _bind_function('abspath')
117
- realpath = _bind_function('realpath')
118
- relpath = _bind_function('relpath')
119
- is_absolute = _bind_function('is_absolute')
120
- is_mount = _bind_function('is_mount')
121
- md5 = _bind_function('md5')
80
+ symlink = _bind_function("symlink")
81
+ symlink_to = _bind_function("symlink_to")
82
+ hardlink_to = _bind_function("hardlink_to")
83
+ readlink = _bind_function("readlink")
84
+ is_dir = _bind_function("is_dir")
85
+ is_file = _bind_function("is_file")
86
+ is_symlink = _bind_function("is_symlink")
87
+ access = _bind_function("access")
88
+ exists = _bind_function("exists")
89
+ listdir = _bind_function("listdir")
90
+ scandir = _bind_function("scandir")
91
+ getsize = _bind_function("getsize")
92
+ getmtime = _bind_function("getmtime")
93
+ stat = _bind_function("stat")
94
+ lstat = _bind_function("lstat")
95
+ remove = _bind_function("remove")
96
+ rename = _bind_function("rename")
97
+ replace = _bind_function("replace")
98
+ unlink = _bind_function("unlink")
99
+ mkdir = _bind_function("mkdir")
100
+ open = _bind_function("open")
101
+ touch = _bind_function("touch")
102
+ walk = _bind_function("walk")
103
+ scan = _bind_function("scan")
104
+ scan_stat = _bind_function("scan_stat")
105
+ glob = _bind_function("glob")
106
+ iglob = _bind_function("iglob")
107
+ glob_stat = _bind_function("glob_stat")
108
+ load = _bind_function("load")
109
+ save = _bind_function("save")
110
+ joinpath = _bind_function("joinpath")
111
+ abspath = _bind_function("abspath")
112
+ realpath = _bind_function("realpath")
113
+ relpath = _bind_function("relpath")
114
+ is_absolute = _bind_function("is_absolute")
115
+ is_mount = _bind_function("is_mount")
116
+ md5 = _bind_function("md5")
122
117
 
123
118
  @property
124
119
  def protocol(self) -> str:
@@ -128,61 +123,61 @@ class SmartPath(BasePath):
128
123
  def from_uri(cls, path: str):
129
124
  return cls(path)
130
125
 
131
- as_uri = _bind_function('as_uri')
132
- as_posix = _bind_function('as_posix')
133
- __lt__ = _bind_function('__lt__')
134
- __le__ = _bind_function('__le__')
135
- __gt__ = _bind_function('__gt__')
136
- __ge__ = _bind_function('__ge__')
137
- __fspath__ = _bind_function('__fspath__')
138
- __truediv__ = _bind_function('__truediv__')
139
-
140
- joinpath = _bind_function('joinpath')
141
- is_reserved = _bind_function('is_reserved')
142
- match = _bind_function('match')
143
- relative_to = _bind_function('relative_to')
144
- with_name = _bind_function('with_name')
145
- with_suffix = _bind_function('with_suffix')
146
- with_stem = _bind_function('with_stem')
147
- is_absolute = _bind_function('is_absolute')
148
- is_mount = _bind_function('is_mount')
149
- abspath = _bind_function('abspath')
150
- realpath = _bind_function('realpath')
151
- relpath = _bind_function('relpath')
152
- iterdir = _bind_function('iterdir')
153
- cwd = _bind_function('cwd')
154
- home = _bind_function('home')
155
- expanduser = _bind_function('expanduser')
156
- resolve = _bind_function('resolve')
157
- chmod = _bind_function('chmod')
158
- lchmod = _bind_function('lchmod')
159
- group = _bind_function('group')
160
- is_socket = _bind_function('is_socket')
161
- is_fifo = _bind_function('is_fifo')
162
- is_block_device = _bind_function('is_block_device')
163
- is_char_device = _bind_function('is_char_device')
164
- owner = _bind_function('owner')
165
- absolute = _bind_function('absolute')
166
- rmdir = _bind_function('rmdir')
167
- is_relative_to = _bind_function('is_relative_to')
168
- read_bytes = _bind_function('read_bytes')
169
- read_text = _bind_function('read_text')
170
- rglob = _bind_function('rglob')
171
- samefile = _bind_function('samefile')
172
- write_bytes = _bind_function('write_bytes')
173
- write_text = _bind_function('write_text')
174
- utime = _bind_function('utime')
175
-
176
- drive = _bind_property('drive')
177
- root = _bind_property('root')
178
- anchor = _bind_property('anchor')
179
- parts = _bind_property('parts')
180
- parents = _bind_property('parents')
181
- parent = _bind_property('parent')
182
- name = _bind_property('name')
183
- suffix = _bind_property('suffix')
184
- suffixes = _bind_property('suffixes')
185
- stem = _bind_property('stem')
126
+ as_uri = _bind_function("as_uri")
127
+ as_posix = _bind_function("as_posix")
128
+ __lt__ = _bind_function("__lt__")
129
+ __le__ = _bind_function("__le__")
130
+ __gt__ = _bind_function("__gt__")
131
+ __ge__ = _bind_function("__ge__")
132
+ __fspath__ = _bind_function("__fspath__")
133
+ __truediv__ = _bind_function("__truediv__")
134
+
135
+ joinpath = _bind_function("joinpath")
136
+ is_reserved = _bind_function("is_reserved")
137
+ match = _bind_function("match")
138
+ relative_to = _bind_function("relative_to")
139
+ with_name = _bind_function("with_name")
140
+ with_suffix = _bind_function("with_suffix")
141
+ with_stem = _bind_function("with_stem")
142
+ is_absolute = _bind_function("is_absolute")
143
+ is_mount = _bind_function("is_mount")
144
+ abspath = _bind_function("abspath")
145
+ realpath = _bind_function("realpath")
146
+ relpath = _bind_function("relpath")
147
+ iterdir = _bind_function("iterdir")
148
+ cwd = _bind_function("cwd")
149
+ home = _bind_function("home")
150
+ expanduser = _bind_function("expanduser")
151
+ resolve = _bind_function("resolve")
152
+ chmod = _bind_function("chmod")
153
+ lchmod = _bind_function("lchmod")
154
+ group = _bind_function("group")
155
+ is_socket = _bind_function("is_socket")
156
+ is_fifo = _bind_function("is_fifo")
157
+ is_block_device = _bind_function("is_block_device")
158
+ is_char_device = _bind_function("is_char_device")
159
+ owner = _bind_function("owner")
160
+ absolute = _bind_function("absolute")
161
+ rmdir = _bind_function("rmdir")
162
+ is_relative_to = _bind_function("is_relative_to")
163
+ read_bytes = _bind_function("read_bytes")
164
+ read_text = _bind_function("read_text")
165
+ rglob = _bind_function("rglob")
166
+ samefile = _bind_function("samefile")
167
+ write_bytes = _bind_function("write_bytes")
168
+ write_text = _bind_function("write_text")
169
+ utime = _bind_function("utime")
170
+
171
+ drive = _bind_property("drive")
172
+ root = _bind_property("root")
173
+ anchor = _bind_property("anchor")
174
+ parts = _bind_property("parts")
175
+ parents = _bind_property("parents")
176
+ parent = _bind_property("parent")
177
+ name = _bind_property("name")
178
+ suffix = _bind_property("suffix")
179
+ suffixes = _bind_property("suffixes")
180
+ stem = _bind_property("stem")
186
181
 
187
182
 
188
183
  def get_traditional_path(path: PathLike) -> str:
megfile/stdio.py CHANGED
@@ -4,18 +4,19 @@ from megfile.interfaces import PathLike
4
4
  from megfile.stdio_path import StdioPath, is_stdio
5
5
 
6
6
  __all__ = [
7
- 'is_stdio',
8
- 'stdio_open',
7
+ "is_stdio",
8
+ "stdio_open",
9
9
  ]
10
10
 
11
11
 
12
12
  def stdio_open(
13
- path: PathLike,
14
- mode: str = 'rb',
15
- encoding: Optional[str] = None,
16
- errors: Optional[str] = None,
17
- **kwargs) -> IO:
18
- '''Used to read or write stdio
13
+ path: PathLike,
14
+ mode: str = "rb",
15
+ encoding: Optional[str] = None,
16
+ errors: Optional[str] = None,
17
+ **kwargs,
18
+ ) -> IO:
19
+ """Used to read or write stdio
19
20
 
20
21
  .. note ::
21
22
 
@@ -24,5 +25,5 @@ def stdio_open(
24
25
  :param path: Given path
25
26
  :param mode: Only supports 'rb' and 'wb' now
26
27
  :return: STDReader, STDWriter
27
- '''
28
+ """
28
29
  return StdioPath(path).open(mode, encoding, errors) # pyre-ignore[6]
megfile/stdio_path.py CHANGED
@@ -8,14 +8,11 @@ from megfile.lib.url import get_url_scheme
8
8
  from megfile.smart_path import SmartPath
9
9
  from megfile.utils import get_binary_mode
10
10
 
11
- __all__ = [
12
- "StdioPath",
13
- "is_stdio",
14
- ]
11
+ __all__ = ["StdioPath", "is_stdio"]
15
12
 
16
13
 
17
14
  def is_stdio(path: PathLike) -> bool:
18
- '''stdio scheme definition: stdio://-
15
+ """stdio scheme definition: stdio://-
19
16
 
20
17
  .. note ::
21
18
 
@@ -23,23 +20,22 @@ def is_stdio(path: PathLike) -> bool:
23
20
 
24
21
  :param path: Path to be tested
25
22
  :returns: True of a path is stdio url, else False
26
- '''
23
+ """
27
24
 
28
25
  path = fspath(path)
29
- if not isinstance(path, str) or not path.startswith('stdio://'):
26
+ if not isinstance(path, str) or not path.startswith("stdio://"):
30
27
  return False
31
28
 
32
29
  scheme = get_url_scheme(path)
33
- return scheme == 'stdio'
30
+ return scheme == "stdio"
34
31
 
35
32
 
36
33
  @SmartPath.register
37
34
  class StdioPath(BaseURIPath):
38
-
39
35
  protocol = "stdio"
40
36
 
41
- def _open(self, mode: str = 'rb') -> Union[STDReader, STDWriter]:
42
- '''Used to read or write stdio
37
+ def _open(self, mode: str = "rb") -> Union[STDReader, STDWriter]:
38
+ """Used to read or write stdio
43
39
 
44
40
  .. note ::
45
41
 
@@ -48,37 +44,39 @@ class StdioPath(BaseURIPath):
48
44
  :param path: stdio path, stdio://- or stdio://0 stdio://1 stdio://2
49
45
  :param mode: Only supports 'rb' and 'wb' now
50
46
  :return: STDReader, STDWriter
51
- '''
47
+ """
52
48
 
53
- if mode not in ('rb', 'wb', 'rt', 'wt', 'r', 'w'):
54
- raise ValueError('unacceptable mode: %r' % mode)
49
+ if mode not in ("rb", "wb", "rt", "wt", "r", "w"):
50
+ raise ValueError("unacceptable mode: %r" % mode)
55
51
 
56
52
  mode = get_binary_mode(mode)
57
53
 
58
- if self.path_with_protocol not in ('stdio://-', 'stdio://0',
59
- 'stdio://1', 'stdio://2'):
60
- raise ValueError('unacceptable path: %r' % self.path_with_protocol)
54
+ if self.path_with_protocol not in (
55
+ "stdio://-",
56
+ "stdio://0",
57
+ "stdio://1",
58
+ "stdio://2",
59
+ ):
60
+ raise ValueError("unacceptable path: %r" % self.path_with_protocol)
61
61
 
62
- if self.path_with_protocol in ('stdio://1',
63
- 'stdio://2') and 'r' in mode:
64
- raise ValueError(
65
- 'cannot open for reading: %r' % self.path_with_protocol)
62
+ if self.path_with_protocol in ("stdio://1", "stdio://2") and "r" in mode:
63
+ raise ValueError("cannot open for reading: %r" % self.path_with_protocol)
66
64
 
67
- if self.path_with_protocol == 'stdio://0' and 'w' in mode:
68
- raise ValueError(
69
- 'cannot open for writing: %r' % self.path_with_protocol)
65
+ if self.path_with_protocol == "stdio://0" and "w" in mode:
66
+ raise ValueError("cannot open for writing: %r" % self.path_with_protocol)
70
67
 
71
- if 'r' in mode:
68
+ if "r" in mode:
72
69
  return STDReader(mode)
73
70
  return STDWriter(self.path_with_protocol, mode)
74
71
 
75
72
  def open(
76
- self,
77
- mode: str = 'rb',
78
- encoding: Optional[str] = None,
79
- errors: Optional[str] = None,
80
- **kwargs) -> IO:
81
- '''Used to read or write stdio
73
+ self,
74
+ mode: str = "rb",
75
+ encoding: Optional[str] = None,
76
+ errors: Optional[str] = None,
77
+ **kwargs,
78
+ ) -> IO:
79
+ """Used to read or write stdio
82
80
 
83
81
  .. note ::
84
82
 
@@ -86,12 +84,11 @@ class StdioPath(BaseURIPath):
86
84
 
87
85
  :param mode: Only supports 'rb' and 'wb' now
88
86
  :return: STDReader, STDWriter
89
- '''
87
+ """
90
88
  fileobj = self._open(mode)
91
89
 
92
- if 'b' not in mode:
93
- fileobj = io.TextIOWrapper(
94
- fileobj, encoding=encoding, errors=errors)
90
+ if "b" not in mode:
91
+ fileobj = io.TextIOWrapper(fileobj, encoding=encoding, errors=errors)
95
92
  fileobj.mode = mode # pyre-ignore[41]
96
93
 
97
94
  return fileobj