megfile 3.1.0.post2__py3-none-any.whl → 3.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. docs/conf.py +2 -4
  2. megfile/__init__.py +394 -203
  3. megfile/cli.py +258 -238
  4. megfile/config.py +25 -21
  5. megfile/errors.py +124 -114
  6. megfile/fs.py +174 -140
  7. megfile/fs_path.py +462 -354
  8. megfile/hdfs.py +133 -101
  9. megfile/hdfs_path.py +290 -236
  10. megfile/http.py +15 -14
  11. megfile/http_path.py +111 -107
  12. megfile/interfaces.py +70 -65
  13. megfile/lib/base_prefetch_reader.py +94 -69
  14. megfile/lib/combine_reader.py +13 -12
  15. megfile/lib/compare.py +17 -13
  16. megfile/lib/compat.py +1 -5
  17. megfile/lib/fnmatch.py +29 -30
  18. megfile/lib/glob.py +54 -55
  19. megfile/lib/hdfs_prefetch_reader.py +40 -25
  20. megfile/lib/hdfs_tools.py +1 -3
  21. megfile/lib/http_prefetch_reader.py +69 -46
  22. megfile/lib/joinpath.py +5 -5
  23. megfile/lib/lazy_handler.py +7 -3
  24. megfile/lib/s3_buffered_writer.py +61 -52
  25. megfile/lib/s3_cached_handler.py +14 -13
  26. megfile/lib/s3_limited_seekable_writer.py +38 -28
  27. megfile/lib/s3_memory_handler.py +35 -29
  28. megfile/lib/s3_pipe_handler.py +25 -24
  29. megfile/lib/s3_prefetch_reader.py +71 -52
  30. megfile/lib/s3_share_cache_reader.py +37 -24
  31. megfile/lib/shadow_handler.py +8 -3
  32. megfile/lib/stdio_handler.py +9 -8
  33. megfile/lib/url.py +3 -3
  34. megfile/pathlike.py +259 -228
  35. megfile/s3.py +220 -153
  36. megfile/s3_path.py +977 -802
  37. megfile/sftp.py +190 -156
  38. megfile/sftp_path.py +540 -450
  39. megfile/smart.py +397 -330
  40. megfile/smart_path.py +100 -105
  41. megfile/stdio.py +10 -9
  42. megfile/stdio_path.py +32 -35
  43. megfile/utils/__init__.py +75 -54
  44. megfile/utils/mutex.py +11 -14
  45. megfile/version.py +1 -1
  46. {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
  47. megfile-3.1.2.dist-info/RECORD +55 -0
  48. {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
  49. scripts/convert_results_to_sarif.py +45 -78
  50. scripts/generate_file.py +140 -64
  51. megfile-3.1.0.post2.dist-info/RECORD +0 -55
  52. {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
  53. {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
  54. {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
  55. {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
@@ -5,11 +5,10 @@ from typing import IO, AnyStr, List, Optional, Union
5
5
  from megfile.interfaces import Readable, Seekable
6
6
  from megfile.utils import get_content_size, get_mode, get_name, is_readable
7
7
 
8
- NEWLINE = ord('\n')
8
+ NEWLINE = ord("\n")
9
9
 
10
10
 
11
11
  class CombineReader(Readable, Seekable):
12
-
13
12
  def __init__(self, file_objects: List[IO], name: str):
14
13
  self._file_objects = file_objects
15
14
  self._blocks_sizes = []
@@ -19,14 +18,15 @@ class CombineReader(Readable, Seekable):
19
18
  self._mode = None
20
19
  for file_object in self._file_objects:
21
20
  if not is_readable(file_object):
22
- raise IOError('not readable: %r' % get_name(file_object))
21
+ raise IOError("not readable: %r" % get_name(file_object))
23
22
  mode = get_mode(file_object)
24
23
  if self._mode is None:
25
24
  self._mode = mode
26
25
  if self._mode != mode:
27
26
  raise IOError(
28
- 'inconsistent mode: %r, expected: %r, got: %r' %
29
- (get_name(file_object), self._mode, mode))
27
+ "inconsistent mode: %r, expected: %r, got: %r"
28
+ % (get_name(file_object), self._mode, mode)
29
+ )
30
30
  self._blocks_sizes.append(self._content_size)
31
31
  self._content_size += get_content_size(file_object)
32
32
  self._blocks_sizes.append(self._content_size)
@@ -36,7 +36,7 @@ class CombineReader(Readable, Seekable):
36
36
  for index, size in enumerate(self._blocks_sizes):
37
37
  if self._offset < size:
38
38
  return index - 1, self._offset - self._blocks_sizes[index - 1]
39
- raise IOError('offset out of range: %d' % self._offset)
39
+ raise IOError("offset out of range: %d" % self._offset)
40
40
 
41
41
  @property
42
42
  def name(self) -> str:
@@ -50,12 +50,12 @@ class CombineReader(Readable, Seekable):
50
50
  return self._offset
51
51
 
52
52
  def _empty_bytes(self) -> AnyStr: # pyre-ignore[34]
53
- if 'b' in self._mode:
54
- return b'' # pyre-ignore[7]
55
- return '' # pyre-ignore[7]
53
+ if "b" in self._mode:
54
+ return b"" # pyre-ignore[7]
55
+ return "" # pyre-ignore[7]
56
56
 
57
57
  def _empty_buffer(self) -> Union[BytesIO, StringIO]:
58
- if 'b' in self._mode:
58
+ if "b" in self._mode:
59
59
  return BytesIO()
60
60
  return StringIO()
61
61
 
@@ -99,6 +99,7 @@ class CombineReader(Readable, Seekable):
99
99
  return buffer.getvalue() # pyre-ignore[7]
100
100
 
101
101
  def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
102
+ offset = int(offset) # user maybe put offset with 'numpy.uint64' type
102
103
  if whence == os.SEEK_SET:
103
104
  target_offset = offset
104
105
  elif whence == os.SEEK_CUR:
@@ -106,10 +107,10 @@ class CombineReader(Readable, Seekable):
106
107
  elif whence == os.SEEK_END:
107
108
  target_offset = self._content_size + offset
108
109
  else:
109
- raise ValueError('invalid whence: %r' % whence)
110
+ raise ValueError("invalid whence: %r" % whence)
110
111
 
111
112
  if target_offset < 0:
112
- raise ValueError('negative seek value %r' % target_offset)
113
+ raise ValueError("negative seek value %r" % target_offset)
113
114
 
114
115
  self._offset = target_offset
115
116
  return self._offset
megfile/lib/compare.py CHANGED
@@ -5,17 +5,19 @@ from megfile.pathlike import StatResult
5
5
 
6
6
 
7
7
  def get_sync_type(src_protocol, dst_protocol):
8
- if src_protocol == 's3' and dst_protocol != 's3':
9
- return 'download'
10
- elif src_protocol != 's3' and dst_protocol == 's3':
11
- return 'upload'
8
+ if src_protocol == "s3" and dst_protocol != "s3":
9
+ return "download"
10
+ elif src_protocol != "s3" and dst_protocol == "s3":
11
+ return "upload"
12
12
  else:
13
- return 'copy'
13
+ return "copy"
14
14
 
15
15
 
16
16
  def compare_time(
17
- src_stat: Union[StatResult, stat_result],
18
- dest_stat: Union[StatResult, stat_result], sync_type: str):
17
+ src_stat: Union[StatResult, stat_result],
18
+ dest_stat: Union[StatResult, stat_result],
19
+ sync_type: str,
20
+ ):
19
21
  """
20
22
  :returns: True if the file does not need updating based on time of
21
23
  last modification and type of operation.
@@ -35,7 +37,6 @@ def compare_time(
35
37
  # at the source location.
36
38
  return False
37
39
  elif sync_type == "download":
38
-
39
40
  if delta <= 0:
40
41
  return True
41
42
  else:
@@ -45,14 +46,17 @@ def compare_time(
45
46
 
46
47
 
47
48
  def is_same_file(
48
- src_stat: Union[StatResult, stat_result],
49
- dest_stat: Union[StatResult, stat_result], sync_type: str):
49
+ src_stat: Union[StatResult, stat_result],
50
+ dest_stat: Union[StatResult, stat_result],
51
+ sync_type: str,
52
+ ):
50
53
  """
51
- Determines whether or not the source and destination files should be synced based on a comparison of their size and last modified time.
54
+ Determines whether or not the source and destination files should be synced based on
55
+ a comparison of their size and last modified time.
52
56
 
53
- :param src_stat: A Union[StatResult, stat_result] object representing the source file to be compared.
57
+ :param src_stat: A object representing the source file to be compared.
54
58
  :type src_stat: Union[StatResult, stat_result]
55
- :param dest_stat: A Union[StatResult, stat_result] object representing the destination file to be compared.
59
+ :param dest_stat: A object representing the destination file to be compared.
56
60
  :type dest_stat: Union[StatResult, stat_result]
57
61
 
58
62
  :return: A boolean value indicating whether or not the files should be synced.
megfile/lib/compat.py CHANGED
@@ -1,11 +1,7 @@
1
1
  import os
2
2
  from os import PathLike
3
3
 
4
- __all__ = [
5
- 'PathLike',
6
- 'fspath',
7
- 'copytree',
8
- ]
4
+ __all__ = ["PathLike", "fspath"]
9
5
 
10
6
 
11
7
  def fspath(path) -> str:
megfile/lib/fnmatch.py CHANGED
@@ -9,6 +9,7 @@ expression. They cache the compiled regular expressions for speed.
9
9
  The function translate(PATTERN) returns a regular expression
10
10
  corresponding to PATTERN. (It does not compile it.)
11
11
  """
12
+
12
13
  """Compared with the standard library, syntax '{seq1,seq2}' is supported"""
13
14
 
14
15
  import functools
@@ -67,7 +68,7 @@ def filter(names: List[str], pat: str) -> List[str]:
67
68
 
68
69
 
69
70
  def _compat(res: str) -> str:
70
- return r'(?s:%s)\Z' % res
71
+ return r"(?s:%s)\Z" % res
71
72
 
72
73
 
73
74
  def translate(pat: str) -> str:
@@ -77,58 +78,56 @@ def translate(pat: str) -> str:
77
78
  """
78
79
 
79
80
  i, n = 0, len(pat)
80
- res = ''
81
+ res = ""
81
82
  while i < n:
82
83
  c = pat[i]
83
84
  i = i + 1
84
- if c == '*':
85
+ if c == "*":
85
86
  j = i
86
- while j < n and pat[j] == '*':
87
+ while j < n and pat[j] == "*":
87
88
  j = j + 1
88
89
  if j > i:
89
- if (j < n and pat[j] == '/') and \
90
- (i <= 1 or pat[i - 2] == '/'):
90
+ if (j < n and pat[j] == "/") and (i <= 1 or pat[i - 2] == "/"):
91
91
  # hit /**/ instead of /seq**/
92
92
  j = j + 1
93
- res = res + r'(.*/)?'
93
+ res = res + r"(.*/)?"
94
94
  else:
95
- res = res + r'.*'
95
+ res = res + r".*"
96
96
  else:
97
- res = res + r'[^/]*'
97
+ res = res + r"[^/]*"
98
98
  i = j
99
- elif c == '?':
100
- res = res + r'.'
101
- elif c == '[':
99
+ elif c == "?":
100
+ res = res + r"."
101
+ elif c == "[":
102
102
  j = i
103
- if j < n and pat[j] == '!':
103
+ if j < n and pat[j] == "!":
104
104
  j = j + 1
105
- if j < n and pat[j] == ']':
105
+ if j < n and pat[j] == "]":
106
106
  j = j + 1
107
- while j < n and pat[j] != ']':
107
+ while j < n and pat[j] != "]":
108
108
  j = j + 1
109
109
  if j >= n:
110
- res = res + r'\['
110
+ res = res + r"\["
111
111
  else:
112
- stuff = pat[i:j].replace('\\', r'\\')
112
+ stuff = pat[i:j].replace("\\", r"\\")
113
113
  i = j + 1
114
- if stuff[0] == '!':
115
- stuff = r'^' + stuff[1:]
116
- elif stuff[0] == '^':
117
- stuff = '\\' + stuff
118
- res = r'%s[%s]' % (res, stuff)
119
- elif c == '{':
114
+ if stuff[0] == "!":
115
+ stuff = r"^" + stuff[1:]
116
+ elif stuff[0] == "^":
117
+ stuff = "\\" + stuff
118
+ res = r"%s[%s]" % (res, stuff)
119
+ elif c == "{":
120
120
  j = i
121
- if j < n and pat[j] == '}':
121
+ if j < n and pat[j] == "}":
122
122
  j = j + 1
123
- while j < n and pat[j] != '}':
123
+ while j < n and pat[j] != "}":
124
124
  j = j + 1
125
125
  if j >= n:
126
- res = res + r'\{'
126
+ res = res + r"\{"
127
127
  else:
128
- stuff = pat[i:j].replace('\\', r'\\')
129
- stuff = r'|'.join(
130
- map(re.escape, stuff.split(','))) # pyre-ignore[6]
131
- res = r'%s(%s)' % (res, stuff)
128
+ stuff = pat[i:j].replace("\\", r"\\")
129
+ stuff = r"|".join(map(re.escape, stuff.split(","))) # pyre-ignore[6]
130
+ res = r"%s(%s)" % (res, stuff)
132
131
  i = j + 1
133
132
  else:
134
133
  res = res + re.escape(c)
megfile/lib/glob.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Filename globbing utility."""
2
+
2
3
  """remove once py35 is dead"""
3
4
 
4
5
  import os
@@ -10,16 +11,16 @@ from typing import Iterator, List, Tuple
10
11
  from megfile.lib import fnmatch
11
12
 
12
13
  # Python 3.5+ Compatible
13
- '''
14
+ """
14
15
  class FSFunc(NamedTuple):
15
16
  exists: Callable[[str], bool]
16
17
  isdir: Callable[[str], bool]
17
18
  scandir: Callable[[str], Iterator[Tuple[str, bool]]] # name, isdir
18
19
 
19
20
  in Python 3.6+
20
- '''
21
+ """
21
22
 
22
- FSFunc = NamedTuple('FSFunc', ['exists', 'isdir', 'scandir'])
23
+ FSFunc = NamedTuple("FSFunc", ["exists", "isdir", "scandir"])
23
24
 
24
25
 
25
26
  def _exists(path: str) -> bool:
@@ -39,10 +40,8 @@ DEFAULT_FILESYSTEM_FUNC = FSFunc(_exists, _isdir, _scandir)
39
40
 
40
41
 
41
42
  def glob(
42
- pathname: str,
43
- *,
44
- recursive: bool = False,
45
- fs: FSFunc = DEFAULT_FILESYSTEM_FUNC) -> List[str]:
43
+ pathname: str, *, recursive: bool = False, fs: FSFunc = DEFAULT_FILESYSTEM_FUNC
44
+ ) -> List[str]:
46
45
  """Return a list of paths matching a pathname pattern.
47
46
 
48
47
  The pattern may contain simple shell-style wildcards a la
@@ -57,10 +56,8 @@ def glob(
57
56
 
58
57
 
59
58
  def iglob(
60
- pathname: str,
61
- *,
62
- recursive: bool = False,
63
- fs: FSFunc = DEFAULT_FILESYSTEM_FUNC) -> Iterator[str]:
59
+ pathname: str, *, recursive: bool = False, fs: FSFunc = DEFAULT_FILESYSTEM_FUNC
60
+ ) -> Iterator[str]:
64
61
  """Return an iterator which yields the paths matching a pathname pattern.
65
62
 
66
63
  The pattern may contain simple shell-style wildcards a la
@@ -74,21 +71,24 @@ def iglob(
74
71
  it = _iglob(pathname, recursive, False, fs)
75
72
  if recursive and _isrecursive(pathname):
76
73
  s = next(it) # skip empty string
77
- assert not s
74
+ if s:
75
+ # TODO: replace AssertionError with OSError in 4.0.0
76
+ raise AssertionError("iglob with recursive=True error")
78
77
  return it
79
78
 
80
79
 
81
- def _iglob(pathname: str, recursive: bool, dironly: bool,
82
- fs: FSFunc) -> Iterator[str]:
83
- if '://' in pathname:
84
- protocol, path_without_protocol = pathname.split('://', 1)
80
+ def _iglob(pathname: str, recursive: bool, dironly: bool, fs: FSFunc) -> Iterator[str]:
81
+ if "://" in pathname:
82
+ protocol, path_without_protocol = pathname.split("://", 1)
85
83
  else:
86
84
  protocol, path_without_protocol = "", pathname
87
85
  dirname, basename = os.path.split(path_without_protocol)
88
86
  if protocol:
89
87
  dirname = "://".join([protocol, dirname])
90
88
  if not has_magic(pathname):
91
- assert not dironly
89
+ if dironly:
90
+ # TODO: replace AssertionError with OSError in 4.0.0
91
+ raise AssertionError("can't use dironly with non-magic patterns in _iglob")
92
92
  if basename:
93
93
  if fs.exists(pathname):
94
94
  yield pathname
@@ -148,9 +148,10 @@ def _glob0(dirname: str, basename: str, dironly: bool, fs: FSFunc) -> List[str]:
148
148
 
149
149
  # This helper function recursively yields relative pathnames inside a literal
150
150
  # directory.
151
- def _glob2(dirname: str, pattern: str, dironly: bool,
152
- fs: FSFunc) -> Iterator[str]:
153
- assert _isrecursive(pattern)
151
+ def _glob2(dirname: str, pattern: str, dironly: bool, fs: FSFunc) -> Iterator[str]:
152
+ if not _isrecursive(pattern):
153
+ # TODO: replace AssertionError with OSError in 4.0.0
154
+ raise AssertionError("error call '_glob2' with non-glob pattern")
154
155
  yield pattern[:0]
155
156
  yield from _rlistdir(dirname, dironly, fs)
156
157
 
@@ -187,10 +188,10 @@ def _rlistdir(dirname: str, dironly: bool, fs: FSFunc) -> Iterator[str]:
187
188
  yield os.path.join(x, y)
188
189
 
189
190
 
190
- magic_check = re.compile(r'([*?[{])')
191
- magic_decheck = re.compile(r'\[(.)\]')
192
- brace_check = re.compile(r'(\{.*\})')
193
- unbrace_check = re.compile(r'([*?[])')
191
+ magic_check = re.compile(r"([*?[{])")
192
+ magic_decheck = re.compile(r"\[(.)\]")
193
+ brace_check = re.compile(r"(\{.*\})")
194
+ unbrace_check = re.compile(r"([*?[])")
194
195
 
195
196
 
196
197
  def has_magic(s: str) -> bool:
@@ -204,46 +205,44 @@ def has_magic_ignore_brace(s: str) -> bool:
204
205
 
205
206
 
206
207
  def _ishidden(path: str) -> bool:
207
- return path[0] == '.'
208
+ return path[0] == "."
208
209
 
209
210
 
210
211
  def _isrecursive(pattern: str) -> bool:
211
- return pattern == '**'
212
+ return pattern == "**"
212
213
 
213
214
 
214
215
  def escape(pathname):
215
- """Escape all special characters.
216
- """
216
+ """Escape all special characters."""
217
217
  # Escaping is done by wrapping any of "*?[" between square brackets.
218
218
  # Metacharacters do not work in the drive part and shouldn't be escaped.
219
219
  drive, pathname = os.path.splitdrive(pathname)
220
- pathname = magic_check.sub(r'[\1]', pathname)
220
+ pathname = magic_check.sub(r"[\1]", pathname)
221
221
  return drive + pathname
222
222
 
223
223
 
224
224
  def unescape(pathname):
225
- """Unescape all special characters.
226
- """
225
+ """Unescape all special characters."""
227
226
  drive, pathname = os.path.splitdrive(pathname)
228
- pathname = magic_decheck.sub(r'\1', pathname)
227
+ pathname = magic_decheck.sub(r"\1", pathname)
229
228
  return drive + pathname
230
229
 
231
230
 
232
- def _find_suffix(path_list: List[str], prefix: str,
233
- split_sign: str) -> List[str]:
231
+ def _find_suffix(path_list: List[str], prefix: str, split_sign: str) -> List[str]:
234
232
  suffix = []
235
233
  temp_path_list = []
236
234
  for path_index in range(0, len(path_list)):
237
- temp_path_list.append(
238
- path_list[path_index][len(prefix):].split(split_sign))
235
+ temp_path_list.append(path_list[path_index][len(prefix) :].split(split_sign))
239
236
  i = 0
240
237
  while True:
241
238
  i = i - 1
242
239
  if len(temp_path_list[0]) <= abs(i):
243
240
  return suffix
244
241
  for path_index in range(1, len(path_list)):
245
- if len(temp_path_list[path_index]) <= abs(
246
- i) or temp_path_list[path_index][i] != temp_path_list[0][i]:
242
+ if (
243
+ len(temp_path_list[path_index]) <= abs(i)
244
+ or temp_path_list[path_index][i] != temp_path_list[0][i]
245
+ ):
247
246
  return suffix
248
247
  else:
249
248
  suffix.insert(0, temp_path_list[0][i])
@@ -253,8 +252,8 @@ def globlize(path_list: List[str]) -> str:
253
252
  path_list = sorted(path_list)
254
253
  if path_list[0] == path_list[-1]:
255
254
  return path_list[0]
256
- first_path = path_list[0].split('/')
257
- last_path = path_list[-1].split('/')
255
+ first_path = path_list[0].split("/")
256
+ last_path = path_list[-1].split("/")
258
257
  prefix = []
259
258
 
260
259
  for i in range(0, min(len(first_path), len(last_path))):
@@ -265,46 +264,46 @@ def globlize(path_list: List[str]) -> str:
265
264
  if len(prefix) == 0:
266
265
  prefix = ""
267
266
  else:
268
- prefix = '/'.join(prefix) + '/'
269
- suffix = _find_suffix(path_list, prefix, '/')
267
+ prefix = "/".join(prefix) + "/"
268
+ suffix = _find_suffix(path_list, prefix, "/")
270
269
 
271
270
  if len(suffix) == 0:
272
- suffix = _find_suffix(path_list, prefix, '.')
271
+ suffix = _find_suffix(path_list, prefix, ".")
273
272
  if len(suffix) == 0:
274
273
  suffix = ""
275
274
  else:
276
- suffix = '.' + '.'.join(suffix)
275
+ suffix = "." + ".".join(suffix)
277
276
  else:
278
- suffix = '/' + '/'.join(suffix)
277
+ suffix = "/" + "/".join(suffix)
279
278
 
280
279
  path = []
281
280
  for i in path_list:
282
- if i[len(prefix):len(i) - len(suffix)] not in path:
283
- path.append(unescape(i[len(prefix):len(i) - len(suffix)]))
284
- return prefix + "{" + ','.join(path) + "}" + suffix
281
+ if i[len(prefix) : len(i) - len(suffix)] not in path:
282
+ path.append(unescape(i[len(prefix) : len(i) - len(suffix)]))
283
+ return prefix + "{" + ",".join(path) + "}" + suffix
285
284
 
286
285
 
287
286
  def ungloblize(glob: str) -> List[str]:
288
287
  path_list = [glob]
289
288
  while True:
290
289
  temp_path = path_list[0]
291
- begin = temp_path.find('{')
292
- end = temp_path.find('}', begin)
290
+ begin = temp_path.find("{")
291
+ end = temp_path.find("}", begin)
293
292
  if end == -1:
294
293
  break
295
294
  path_list.pop(0)
296
- subpath_list = temp_path[begin + 1:end].split(',')
295
+ subpath_list = temp_path[begin + 1 : end].split(",")
297
296
  for subpath in subpath_list:
298
- path = temp_path[:begin] + escape(subpath) + temp_path[end + 1:]
297
+ path = temp_path[:begin] + escape(subpath) + temp_path[end + 1 :]
299
298
  path_list.append(path)
300
299
  return path_list
301
300
 
302
301
 
303
302
  def get_non_glob_dir(glob: str):
304
303
  root_dir = []
305
- if glob.startswith('/'):
306
- root_dir.append('/')
307
- for name in glob.split('/'):
304
+ if glob.startswith("/"):
305
+ root_dir.append("/")
306
+ for name in glob.split("/"):
308
307
  if has_magic(name):
309
308
  break
310
309
  root_dir.append(name)
@@ -1,28 +1,38 @@
1
1
  from io import BytesIO
2
2
  from typing import Optional
3
3
 
4
- from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, HDFS_MAX_RETRY_TIMES
4
+ from megfile.config import (
5
+ DEFAULT_BLOCK_CAPACITY,
6
+ DEFAULT_BLOCK_SIZE,
7
+ HDFS_MAX_RETRY_TIMES,
8
+ )
5
9
  from megfile.errors import raise_hdfs_error
6
10
  from megfile.lib.base_prefetch_reader import BasePrefetchReader
7
11
 
8
12
 
9
13
  class HdfsPrefetchReader(BasePrefetchReader):
10
- '''
11
- Reader to fast read the hdfs content. This will divide the file content into equal parts of block_size size, and will use LRU to cache at most block_capacity blocks in memory.
12
- open(), seek() and read() will trigger prefetch read. The prefetch will cached block_forward blocks of data from offset position (the position after reading if the called function is read).
13
- '''
14
+ """
15
+ Reader to fast read the hdfs content. This will divide the file content into equal
16
+ parts of block_size size, and will use LRU to cache at most block_capacity blocks
17
+ in memory.
18
+
19
+ open(), seek() and read() will trigger prefetch read. The prefetch will cached
20
+ block_forward blocks of data from offset position (the position after reading
21
+ if the called function is read).
22
+ """
14
23
 
15
24
  def __init__(
16
- self,
17
- hdfs_path: str,
18
- *,
19
- client,
20
- block_size: int = DEFAULT_BLOCK_SIZE,
21
- block_capacity: int = DEFAULT_BLOCK_CAPACITY,
22
- block_forward: Optional[int] = None,
23
- max_retries: int = HDFS_MAX_RETRY_TIMES,
24
- max_workers: Optional[int] = None,
25
- profile_name: Optional[str] = None):
25
+ self,
26
+ hdfs_path: str,
27
+ *,
28
+ client,
29
+ block_size: int = DEFAULT_BLOCK_SIZE,
30
+ block_capacity: int = DEFAULT_BLOCK_CAPACITY,
31
+ block_forward: Optional[int] = None,
32
+ max_retries: int = HDFS_MAX_RETRY_TIMES,
33
+ max_workers: Optional[int] = None,
34
+ profile_name: Optional[str] = None,
35
+ ):
26
36
  self._path = hdfs_path
27
37
  self._client = client
28
38
  self._profile_name = profile_name
@@ -32,22 +42,27 @@ class HdfsPrefetchReader(BasePrefetchReader):
32
42
  block_capacity=block_capacity,
33
43
  block_forward=block_forward,
34
44
  max_retries=max_retries,
35
- max_workers=max_workers)
45
+ max_workers=max_workers,
46
+ )
36
47
 
37
48
  def _get_content_size(self):
38
49
  with raise_hdfs_error(self._path):
39
- return self._client.status(self._path)['length']
50
+ return self._client.status(self._path)["length"]
40
51
 
41
52
  @property
42
53
  def name(self) -> str:
43
- return 'hdfs%s://%s' % (
44
- f"+{self._profile_name}" if self._profile_name else "", self._path)
54
+ return "hdfs%s://%s" % (
55
+ f"+{self._profile_name}" if self._profile_name else "",
56
+ self._path,
57
+ )
45
58
 
46
59
  def _fetch_response(
47
- self,
48
- start: Optional[int] = None,
49
- end: Optional[int] = None) -> dict:
60
+ self, start: Optional[int] = None, end: Optional[int] = None
61
+ ) -> dict:
50
62
  with raise_hdfs_error(self.name):
51
- with self._client.read(self._path, offset=start or 0, length=end -
52
- start if start and end else None) as f:
53
- return {'Body': BytesIO(f.read())}
63
+ with self._client.read(
64
+ self._path,
65
+ offset=start or 0,
66
+ length=end - start if start and end else None,
67
+ ) as f:
68
+ return {"Body": BytesIO(f.read())}
megfile/lib/hdfs_tools.py CHANGED
@@ -3,9 +3,7 @@ try:
3
3
  except ImportError: # pragma: no cover
4
4
  hdfs_api = None
5
5
 
6
- __all__ = [
7
- 'hdfs_api',
8
- ]
6
+ __all__ = ["hdfs_api"]
9
7
 
10
8
  if hdfs_api:
11
9
  _to_error = hdfs_api.client._to_error