megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. docs/conf.py +2 -4
  2. megfile/__init__.py +394 -203
  3. megfile/cli.py +258 -238
  4. megfile/config.py +25 -21
  5. megfile/errors.py +124 -114
  6. megfile/fs.py +174 -140
  7. megfile/fs_path.py +462 -354
  8. megfile/hdfs.py +133 -101
  9. megfile/hdfs_path.py +290 -236
  10. megfile/http.py +15 -14
  11. megfile/http_path.py +111 -107
  12. megfile/interfaces.py +70 -65
  13. megfile/lib/base_prefetch_reader.py +84 -65
  14. megfile/lib/combine_reader.py +12 -12
  15. megfile/lib/compare.py +17 -13
  16. megfile/lib/compat.py +1 -5
  17. megfile/lib/fnmatch.py +29 -30
  18. megfile/lib/glob.py +46 -54
  19. megfile/lib/hdfs_prefetch_reader.py +40 -25
  20. megfile/lib/hdfs_tools.py +1 -3
  21. megfile/lib/http_prefetch_reader.py +69 -46
  22. megfile/lib/joinpath.py +5 -5
  23. megfile/lib/lazy_handler.py +7 -3
  24. megfile/lib/s3_buffered_writer.py +58 -51
  25. megfile/lib/s3_cached_handler.py +13 -14
  26. megfile/lib/s3_limited_seekable_writer.py +37 -28
  27. megfile/lib/s3_memory_handler.py +34 -30
  28. megfile/lib/s3_pipe_handler.py +24 -25
  29. megfile/lib/s3_prefetch_reader.py +71 -52
  30. megfile/lib/s3_share_cache_reader.py +37 -24
  31. megfile/lib/shadow_handler.py +7 -3
  32. megfile/lib/stdio_handler.py +9 -8
  33. megfile/lib/url.py +3 -3
  34. megfile/pathlike.py +259 -228
  35. megfile/s3.py +220 -153
  36. megfile/s3_path.py +977 -802
  37. megfile/sftp.py +190 -156
  38. megfile/sftp_path.py +540 -450
  39. megfile/smart.py +397 -330
  40. megfile/smart_path.py +100 -105
  41. megfile/stdio.py +10 -9
  42. megfile/stdio_path.py +32 -35
  43. megfile/utils/__init__.py +73 -54
  44. megfile/utils/mutex.py +11 -14
  45. megfile/version.py +1 -1
  46. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
  47. megfile-3.1.2.dist-info/RECORD +55 -0
  48. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
  49. scripts/convert_results_to_sarif.py +45 -78
  50. scripts/generate_file.py +140 -64
  51. megfile-3.1.1.dist-info/RECORD +0 -55
  52. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
  53. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
  54. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
  55. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
@@ -5,11 +5,10 @@ from typing import IO, AnyStr, List, Optional, Union
5
5
  from megfile.interfaces import Readable, Seekable
6
6
  from megfile.utils import get_content_size, get_mode, get_name, is_readable
7
7
 
8
- NEWLINE = ord('\n')
8
+ NEWLINE = ord("\n")
9
9
 
10
10
 
11
11
  class CombineReader(Readable, Seekable):
12
-
13
12
  def __init__(self, file_objects: List[IO], name: str):
14
13
  self._file_objects = file_objects
15
14
  self._blocks_sizes = []
@@ -19,14 +18,15 @@ class CombineReader(Readable, Seekable):
19
18
  self._mode = None
20
19
  for file_object in self._file_objects:
21
20
  if not is_readable(file_object):
22
- raise IOError('not readable: %r' % get_name(file_object))
21
+ raise IOError("not readable: %r" % get_name(file_object))
23
22
  mode = get_mode(file_object)
24
23
  if self._mode is None:
25
24
  self._mode = mode
26
25
  if self._mode != mode:
27
26
  raise IOError(
28
- 'inconsistent mode: %r, expected: %r, got: %r' %
29
- (get_name(file_object), self._mode, mode))
27
+ "inconsistent mode: %r, expected: %r, got: %r"
28
+ % (get_name(file_object), self._mode, mode)
29
+ )
30
30
  self._blocks_sizes.append(self._content_size)
31
31
  self._content_size += get_content_size(file_object)
32
32
  self._blocks_sizes.append(self._content_size)
@@ -36,7 +36,7 @@ class CombineReader(Readable, Seekable):
36
36
  for index, size in enumerate(self._blocks_sizes):
37
37
  if self._offset < size:
38
38
  return index - 1, self._offset - self._blocks_sizes[index - 1]
39
- raise IOError('offset out of range: %d' % self._offset)
39
+ raise IOError("offset out of range: %d" % self._offset)
40
40
 
41
41
  @property
42
42
  def name(self) -> str:
@@ -50,12 +50,12 @@ class CombineReader(Readable, Seekable):
50
50
  return self._offset
51
51
 
52
52
  def _empty_bytes(self) -> AnyStr: # pyre-ignore[34]
53
- if 'b' in self._mode:
54
- return b'' # pyre-ignore[7]
55
- return '' # pyre-ignore[7]
53
+ if "b" in self._mode:
54
+ return b"" # pyre-ignore[7]
55
+ return "" # pyre-ignore[7]
56
56
 
57
57
  def _empty_buffer(self) -> Union[BytesIO, StringIO]:
58
- if 'b' in self._mode:
58
+ if "b" in self._mode:
59
59
  return BytesIO()
60
60
  return StringIO()
61
61
 
@@ -107,10 +107,10 @@ class CombineReader(Readable, Seekable):
107
107
  elif whence == os.SEEK_END:
108
108
  target_offset = self._content_size + offset
109
109
  else:
110
- raise ValueError('invalid whence: %r' % whence)
110
+ raise ValueError("invalid whence: %r" % whence)
111
111
 
112
112
  if target_offset < 0:
113
- raise ValueError('negative seek value %r' % target_offset)
113
+ raise ValueError("negative seek value %r" % target_offset)
114
114
 
115
115
  self._offset = target_offset
116
116
  return self._offset
megfile/lib/compare.py CHANGED
@@ -5,17 +5,19 @@ from megfile.pathlike import StatResult
5
5
 
6
6
 
7
7
  def get_sync_type(src_protocol, dst_protocol):
8
- if src_protocol == 's3' and dst_protocol != 's3':
9
- return 'download'
10
- elif src_protocol != 's3' and dst_protocol == 's3':
11
- return 'upload'
8
+ if src_protocol == "s3" and dst_protocol != "s3":
9
+ return "download"
10
+ elif src_protocol != "s3" and dst_protocol == "s3":
11
+ return "upload"
12
12
  else:
13
- return 'copy'
13
+ return "copy"
14
14
 
15
15
 
16
16
  def compare_time(
17
- src_stat: Union[StatResult, stat_result],
18
- dest_stat: Union[StatResult, stat_result], sync_type: str):
17
+ src_stat: Union[StatResult, stat_result],
18
+ dest_stat: Union[StatResult, stat_result],
19
+ sync_type: str,
20
+ ):
19
21
  """
20
22
  :returns: True if the file does not need updating based on time of
21
23
  last modification and type of operation.
@@ -35,7 +37,6 @@ def compare_time(
35
37
  # at the source location.
36
38
  return False
37
39
  elif sync_type == "download":
38
-
39
40
  if delta <= 0:
40
41
  return True
41
42
  else:
@@ -45,14 +46,17 @@ def compare_time(
45
46
 
46
47
 
47
48
  def is_same_file(
48
- src_stat: Union[StatResult, stat_result],
49
- dest_stat: Union[StatResult, stat_result], sync_type: str):
49
+ src_stat: Union[StatResult, stat_result],
50
+ dest_stat: Union[StatResult, stat_result],
51
+ sync_type: str,
52
+ ):
50
53
  """
51
- Determines whether or not the source and destination files should be synced based on a comparison of their size and last modified time.
54
+ Determines whether or not the source and destination files should be synced based on
55
+ a comparison of their size and last modified time.
52
56
 
53
- :param src_stat: A Union[StatResult, stat_result] object representing the source file to be compared.
57
+ :param src_stat: A object representing the source file to be compared.
54
58
  :type src_stat: Union[StatResult, stat_result]
55
- :param dest_stat: A Union[StatResult, stat_result] object representing the destination file to be compared.
59
+ :param dest_stat: A object representing the destination file to be compared.
56
60
  :type dest_stat: Union[StatResult, stat_result]
57
61
 
58
62
  :return: A boolean value indicating whether or not the files should be synced.
megfile/lib/compat.py CHANGED
@@ -1,11 +1,7 @@
1
1
  import os
2
2
  from os import PathLike
3
3
 
4
- __all__ = [
5
- 'PathLike',
6
- 'fspath',
7
- 'copytree',
8
- ]
4
+ __all__ = ["PathLike", "fspath"]
9
5
 
10
6
 
11
7
  def fspath(path) -> str:
megfile/lib/fnmatch.py CHANGED
@@ -9,6 +9,7 @@ expression. They cache the compiled regular expressions for speed.
9
9
  The function translate(PATTERN) returns a regular expression
10
10
  corresponding to PATTERN. (It does not compile it.)
11
11
  """
12
+
12
13
  """Compared with the standard library, syntax '{seq1,seq2}' is supported"""
13
14
 
14
15
  import functools
@@ -67,7 +68,7 @@ def filter(names: List[str], pat: str) -> List[str]:
67
68
 
68
69
 
69
70
  def _compat(res: str) -> str:
70
- return r'(?s:%s)\Z' % res
71
+ return r"(?s:%s)\Z" % res
71
72
 
72
73
 
73
74
  def translate(pat: str) -> str:
@@ -77,58 +78,56 @@ def translate(pat: str) -> str:
77
78
  """
78
79
 
79
80
  i, n = 0, len(pat)
80
- res = ''
81
+ res = ""
81
82
  while i < n:
82
83
  c = pat[i]
83
84
  i = i + 1
84
- if c == '*':
85
+ if c == "*":
85
86
  j = i
86
- while j < n and pat[j] == '*':
87
+ while j < n and pat[j] == "*":
87
88
  j = j + 1
88
89
  if j > i:
89
- if (j < n and pat[j] == '/') and \
90
- (i <= 1 or pat[i - 2] == '/'):
90
+ if (j < n and pat[j] == "/") and (i <= 1 or pat[i - 2] == "/"):
91
91
  # hit /**/ instead of /seq**/
92
92
  j = j + 1
93
- res = res + r'(.*/)?'
93
+ res = res + r"(.*/)?"
94
94
  else:
95
- res = res + r'.*'
95
+ res = res + r".*"
96
96
  else:
97
- res = res + r'[^/]*'
97
+ res = res + r"[^/]*"
98
98
  i = j
99
- elif c == '?':
100
- res = res + r'.'
101
- elif c == '[':
99
+ elif c == "?":
100
+ res = res + r"."
101
+ elif c == "[":
102
102
  j = i
103
- if j < n and pat[j] == '!':
103
+ if j < n and pat[j] == "!":
104
104
  j = j + 1
105
- if j < n and pat[j] == ']':
105
+ if j < n and pat[j] == "]":
106
106
  j = j + 1
107
- while j < n and pat[j] != ']':
107
+ while j < n and pat[j] != "]":
108
108
  j = j + 1
109
109
  if j >= n:
110
- res = res + r'\['
110
+ res = res + r"\["
111
111
  else:
112
- stuff = pat[i:j].replace('\\', r'\\')
112
+ stuff = pat[i:j].replace("\\", r"\\")
113
113
  i = j + 1
114
- if stuff[0] == '!':
115
- stuff = r'^' + stuff[1:]
116
- elif stuff[0] == '^':
117
- stuff = '\\' + stuff
118
- res = r'%s[%s]' % (res, stuff)
119
- elif c == '{':
114
+ if stuff[0] == "!":
115
+ stuff = r"^" + stuff[1:]
116
+ elif stuff[0] == "^":
117
+ stuff = "\\" + stuff
118
+ res = r"%s[%s]" % (res, stuff)
119
+ elif c == "{":
120
120
  j = i
121
- if j < n and pat[j] == '}':
121
+ if j < n and pat[j] == "}":
122
122
  j = j + 1
123
- while j < n and pat[j] != '}':
123
+ while j < n and pat[j] != "}":
124
124
  j = j + 1
125
125
  if j >= n:
126
- res = res + r'\{'
126
+ res = res + r"\{"
127
127
  else:
128
- stuff = pat[i:j].replace('\\', r'\\')
129
- stuff = r'|'.join(
130
- map(re.escape, stuff.split(','))) # pyre-ignore[6]
131
- res = r'%s(%s)' % (res, stuff)
128
+ stuff = pat[i:j].replace("\\", r"\\")
129
+ stuff = r"|".join(map(re.escape, stuff.split(","))) # pyre-ignore[6]
130
+ res = r"%s(%s)" % (res, stuff)
132
131
  i = j + 1
133
132
  else:
134
133
  res = res + re.escape(c)
megfile/lib/glob.py CHANGED
@@ -1,4 +1,5 @@
1
1
  """Filename globbing utility."""
2
+
2
3
  """remove once py35 is dead"""
3
4
 
4
5
  import os
@@ -10,16 +11,16 @@ from typing import Iterator, List, Tuple
10
11
  from megfile.lib import fnmatch
11
12
 
12
13
  # Python 3.5+ Compatible
13
- '''
14
+ """
14
15
  class FSFunc(NamedTuple):
15
16
  exists: Callable[[str], bool]
16
17
  isdir: Callable[[str], bool]
17
18
  scandir: Callable[[str], Iterator[Tuple[str, bool]]] # name, isdir
18
19
 
19
20
  in Python 3.6+
20
- '''
21
+ """
21
22
 
22
- FSFunc = NamedTuple('FSFunc', ['exists', 'isdir', 'scandir'])
23
+ FSFunc = NamedTuple("FSFunc", ["exists", "isdir", "scandir"])
23
24
 
24
25
 
25
26
  def _exists(path: str) -> bool:
@@ -39,10 +40,8 @@ DEFAULT_FILESYSTEM_FUNC = FSFunc(_exists, _isdir, _scandir)
39
40
 
40
41
 
41
42
  def glob(
42
- pathname: str,
43
- *,
44
- recursive: bool = False,
45
- fs: FSFunc = DEFAULT_FILESYSTEM_FUNC) -> List[str]:
43
+ pathname: str, *, recursive: bool = False, fs: FSFunc = DEFAULT_FILESYSTEM_FUNC
44
+ ) -> List[str]:
46
45
  """Return a list of paths matching a pathname pattern.
47
46
 
48
47
  The pattern may contain simple shell-style wildcards a la
@@ -57,10 +56,8 @@ def glob(
57
56
 
58
57
 
59
58
  def iglob(
60
- pathname: str,
61
- *,
62
- recursive: bool = False,
63
- fs: FSFunc = DEFAULT_FILESYSTEM_FUNC) -> Iterator[str]:
59
+ pathname: str, *, recursive: bool = False, fs: FSFunc = DEFAULT_FILESYSTEM_FUNC
60
+ ) -> Iterator[str]:
64
61
  """Return an iterator which yields the paths matching a pathname pattern.
65
62
 
66
63
  The pattern may contain simple shell-style wildcards a la
@@ -80,10 +77,9 @@ def iglob(
80
77
  return it
81
78
 
82
79
 
83
- def _iglob(pathname: str, recursive: bool, dironly: bool,
84
- fs: FSFunc) -> Iterator[str]:
85
- if '://' in pathname:
86
- protocol, path_without_protocol = pathname.split('://', 1)
80
+ def _iglob(pathname: str, recursive: bool, dironly: bool, fs: FSFunc) -> Iterator[str]:
81
+ if "://" in pathname:
82
+ protocol, path_without_protocol = pathname.split("://", 1)
87
83
  else:
88
84
  protocol, path_without_protocol = "", pathname
89
85
  dirname, basename = os.path.split(path_without_protocol)
@@ -92,8 +88,7 @@ def _iglob(pathname: str, recursive: bool, dironly: bool,
92
88
  if not has_magic(pathname):
93
89
  if dironly:
94
90
  # TODO: replace AssertionError with OSError in 4.0.0
95
- raise AssertionError(
96
- "can't use dironly with non-magic patterns in _iglob")
91
+ raise AssertionError("can't use dironly with non-magic patterns in _iglob")
97
92
  if basename:
98
93
  if fs.exists(pathname):
99
94
  yield pathname
@@ -153,8 +148,7 @@ def _glob0(dirname: str, basename: str, dironly: bool, fs: FSFunc) -> List[str]:
153
148
 
154
149
  # This helper function recursively yields relative pathnames inside a literal
155
150
  # directory.
156
- def _glob2(dirname: str, pattern: str, dironly: bool,
157
- fs: FSFunc) -> Iterator[str]:
151
+ def _glob2(dirname: str, pattern: str, dironly: bool, fs: FSFunc) -> Iterator[str]:
158
152
  if not _isrecursive(pattern):
159
153
  # TODO: replace AssertionError with OSError in 4.0.0
160
154
  raise AssertionError("error call '_glob2' with non-glob pattern")
@@ -194,10 +188,10 @@ def _rlistdir(dirname: str, dironly: bool, fs: FSFunc) -> Iterator[str]:
194
188
  yield os.path.join(x, y)
195
189
 
196
190
 
197
- magic_check = re.compile(r'([*?[{])')
198
- magic_decheck = re.compile(r'\[(.)\]')
199
- brace_check = re.compile(r'(\{.*\})')
200
- unbrace_check = re.compile(r'([*?[])')
191
+ magic_check = re.compile(r"([*?[{])")
192
+ magic_decheck = re.compile(r"\[(.)\]")
193
+ brace_check = re.compile(r"(\{.*\})")
194
+ unbrace_check = re.compile(r"([*?[])")
201
195
 
202
196
 
203
197
  def has_magic(s: str) -> bool:
@@ -211,46 +205,44 @@ def has_magic_ignore_brace(s: str) -> bool:
211
205
 
212
206
 
213
207
  def _ishidden(path: str) -> bool:
214
- return path[0] == '.'
208
+ return path[0] == "."
215
209
 
216
210
 
217
211
  def _isrecursive(pattern: str) -> bool:
218
- return pattern == '**'
212
+ return pattern == "**"
219
213
 
220
214
 
221
215
  def escape(pathname):
222
- """Escape all special characters.
223
- """
216
+ """Escape all special characters."""
224
217
  # Escaping is done by wrapping any of "*?[" between square brackets.
225
218
  # Metacharacters do not work in the drive part and shouldn't be escaped.
226
219
  drive, pathname = os.path.splitdrive(pathname)
227
- pathname = magic_check.sub(r'[\1]', pathname)
220
+ pathname = magic_check.sub(r"[\1]", pathname)
228
221
  return drive + pathname
229
222
 
230
223
 
231
224
  def unescape(pathname):
232
- """Unescape all special characters.
233
- """
225
+ """Unescape all special characters."""
234
226
  drive, pathname = os.path.splitdrive(pathname)
235
- pathname = magic_decheck.sub(r'\1', pathname)
227
+ pathname = magic_decheck.sub(r"\1", pathname)
236
228
  return drive + pathname
237
229
 
238
230
 
239
- def _find_suffix(path_list: List[str], prefix: str,
240
- split_sign: str) -> List[str]:
231
+ def _find_suffix(path_list: List[str], prefix: str, split_sign: str) -> List[str]:
241
232
  suffix = []
242
233
  temp_path_list = []
243
234
  for path_index in range(0, len(path_list)):
244
- temp_path_list.append(
245
- path_list[path_index][len(prefix):].split(split_sign))
235
+ temp_path_list.append(path_list[path_index][len(prefix) :].split(split_sign))
246
236
  i = 0
247
237
  while True:
248
238
  i = i - 1
249
239
  if len(temp_path_list[0]) <= abs(i):
250
240
  return suffix
251
241
  for path_index in range(1, len(path_list)):
252
- if len(temp_path_list[path_index]) <= abs(
253
- i) or temp_path_list[path_index][i] != temp_path_list[0][i]:
242
+ if (
243
+ len(temp_path_list[path_index]) <= abs(i)
244
+ or temp_path_list[path_index][i] != temp_path_list[0][i]
245
+ ):
254
246
  return suffix
255
247
  else:
256
248
  suffix.insert(0, temp_path_list[0][i])
@@ -260,8 +252,8 @@ def globlize(path_list: List[str]) -> str:
260
252
  path_list = sorted(path_list)
261
253
  if path_list[0] == path_list[-1]:
262
254
  return path_list[0]
263
- first_path = path_list[0].split('/')
264
- last_path = path_list[-1].split('/')
255
+ first_path = path_list[0].split("/")
256
+ last_path = path_list[-1].split("/")
265
257
  prefix = []
266
258
 
267
259
  for i in range(0, min(len(first_path), len(last_path))):
@@ -272,46 +264,46 @@ def globlize(path_list: List[str]) -> str:
272
264
  if len(prefix) == 0:
273
265
  prefix = ""
274
266
  else:
275
- prefix = '/'.join(prefix) + '/'
276
- suffix = _find_suffix(path_list, prefix, '/')
267
+ prefix = "/".join(prefix) + "/"
268
+ suffix = _find_suffix(path_list, prefix, "/")
277
269
 
278
270
  if len(suffix) == 0:
279
- suffix = _find_suffix(path_list, prefix, '.')
271
+ suffix = _find_suffix(path_list, prefix, ".")
280
272
  if len(suffix) == 0:
281
273
  suffix = ""
282
274
  else:
283
- suffix = '.' + '.'.join(suffix)
275
+ suffix = "." + ".".join(suffix)
284
276
  else:
285
- suffix = '/' + '/'.join(suffix)
277
+ suffix = "/" + "/".join(suffix)
286
278
 
287
279
  path = []
288
280
  for i in path_list:
289
- if i[len(prefix):len(i) - len(suffix)] not in path:
290
- path.append(unescape(i[len(prefix):len(i) - len(suffix)]))
291
- return prefix + "{" + ','.join(path) + "}" + suffix
281
+ if i[len(prefix) : len(i) - len(suffix)] not in path:
282
+ path.append(unescape(i[len(prefix) : len(i) - len(suffix)]))
283
+ return prefix + "{" + ",".join(path) + "}" + suffix
292
284
 
293
285
 
294
286
  def ungloblize(glob: str) -> List[str]:
295
287
  path_list = [glob]
296
288
  while True:
297
289
  temp_path = path_list[0]
298
- begin = temp_path.find('{')
299
- end = temp_path.find('}', begin)
290
+ begin = temp_path.find("{")
291
+ end = temp_path.find("}", begin)
300
292
  if end == -1:
301
293
  break
302
294
  path_list.pop(0)
303
- subpath_list = temp_path[begin + 1:end].split(',')
295
+ subpath_list = temp_path[begin + 1 : end].split(",")
304
296
  for subpath in subpath_list:
305
- path = temp_path[:begin] + escape(subpath) + temp_path[end + 1:]
297
+ path = temp_path[:begin] + escape(subpath) + temp_path[end + 1 :]
306
298
  path_list.append(path)
307
299
  return path_list
308
300
 
309
301
 
310
302
  def get_non_glob_dir(glob: str):
311
303
  root_dir = []
312
- if glob.startswith('/'):
313
- root_dir.append('/')
314
- for name in glob.split('/'):
304
+ if glob.startswith("/"):
305
+ root_dir.append("/")
306
+ for name in glob.split("/"):
315
307
  if has_magic(name):
316
308
  break
317
309
  root_dir.append(name)
@@ -1,28 +1,38 @@
1
1
  from io import BytesIO
2
2
  from typing import Optional
3
3
 
4
- from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, HDFS_MAX_RETRY_TIMES
4
+ from megfile.config import (
5
+ DEFAULT_BLOCK_CAPACITY,
6
+ DEFAULT_BLOCK_SIZE,
7
+ HDFS_MAX_RETRY_TIMES,
8
+ )
5
9
  from megfile.errors import raise_hdfs_error
6
10
  from megfile.lib.base_prefetch_reader import BasePrefetchReader
7
11
 
8
12
 
9
13
  class HdfsPrefetchReader(BasePrefetchReader):
10
- '''
11
- Reader to fast read the hdfs content. This will divide the file content into equal parts of block_size size, and will use LRU to cache at most block_capacity blocks in memory.
12
- open(), seek() and read() will trigger prefetch read. The prefetch will cached block_forward blocks of data from offset position (the position after reading if the called function is read).
13
- '''
14
+ """
15
+ Reader to fast read the hdfs content. This will divide the file content into equal
16
+ parts of block_size size, and will use LRU to cache at most block_capacity blocks
17
+ in memory.
18
+
19
+ open(), seek() and read() will trigger prefetch read. The prefetch will cached
20
+ block_forward blocks of data from offset position (the position after reading
21
+ if the called function is read).
22
+ """
14
23
 
15
24
  def __init__(
16
- self,
17
- hdfs_path: str,
18
- *,
19
- client,
20
- block_size: int = DEFAULT_BLOCK_SIZE,
21
- block_capacity: int = DEFAULT_BLOCK_CAPACITY,
22
- block_forward: Optional[int] = None,
23
- max_retries: int = HDFS_MAX_RETRY_TIMES,
24
- max_workers: Optional[int] = None,
25
- profile_name: Optional[str] = None):
25
+ self,
26
+ hdfs_path: str,
27
+ *,
28
+ client,
29
+ block_size: int = DEFAULT_BLOCK_SIZE,
30
+ block_capacity: int = DEFAULT_BLOCK_CAPACITY,
31
+ block_forward: Optional[int] = None,
32
+ max_retries: int = HDFS_MAX_RETRY_TIMES,
33
+ max_workers: Optional[int] = None,
34
+ profile_name: Optional[str] = None,
35
+ ):
26
36
  self._path = hdfs_path
27
37
  self._client = client
28
38
  self._profile_name = profile_name
@@ -32,22 +42,27 @@ class HdfsPrefetchReader(BasePrefetchReader):
32
42
  block_capacity=block_capacity,
33
43
  block_forward=block_forward,
34
44
  max_retries=max_retries,
35
- max_workers=max_workers)
45
+ max_workers=max_workers,
46
+ )
36
47
 
37
48
  def _get_content_size(self):
38
49
  with raise_hdfs_error(self._path):
39
- return self._client.status(self._path)['length']
50
+ return self._client.status(self._path)["length"]
40
51
 
41
52
  @property
42
53
  def name(self) -> str:
43
- return 'hdfs%s://%s' % (
44
- f"+{self._profile_name}" if self._profile_name else "", self._path)
54
+ return "hdfs%s://%s" % (
55
+ f"+{self._profile_name}" if self._profile_name else "",
56
+ self._path,
57
+ )
45
58
 
46
59
  def _fetch_response(
47
- self,
48
- start: Optional[int] = None,
49
- end: Optional[int] = None) -> dict:
60
+ self, start: Optional[int] = None, end: Optional[int] = None
61
+ ) -> dict:
50
62
  with raise_hdfs_error(self.name):
51
- with self._client.read(self._path, offset=start or 0, length=end -
52
- start if start and end else None) as f:
53
- return {'Body': BytesIO(f.read())}
63
+ with self._client.read(
64
+ self._path,
65
+ offset=start or 0,
66
+ length=end - start if start and end else None,
67
+ ) as f:
68
+ return {"Body": BytesIO(f.read())}
megfile/lib/hdfs_tools.py CHANGED
@@ -3,9 +3,7 @@ try:
3
3
  except ImportError: # pragma: no cover
4
4
  hdfs_api = None
5
5
 
6
- __all__ = [
7
- 'hdfs_api',
8
- ]
6
+ __all__ = ["hdfs_api"]
9
7
 
10
8
  if hdfs_api:
11
9
  _to_error = hdfs_api.client._to_error