megfile 2.2.7__py3-none-any.whl → 2.2.8.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/__init__.py CHANGED
@@ -1,5 +1,7 @@
1
1
  from megfile.fs import fs_abspath, fs_access, fs_cwd, fs_exists, fs_expanduser, fs_getmd5, fs_getmtime, fs_getsize, fs_glob, fs_glob_stat, fs_home, fs_iglob, fs_isabs, fs_isdir, fs_isfile, fs_islink, fs_ismount, fs_listdir, fs_load_from, fs_lstat, fs_makedirs, fs_move, fs_readlink, fs_realpath, fs_relpath, fs_remove, fs_rename, fs_resolve, fs_save_as, fs_scan, fs_scan_stat, fs_scandir, fs_stat, fs_symlink, fs_sync, fs_unlink, fs_walk, is_fs
2
2
  from megfile.fs_path import FSPath
3
+ from megfile.hdfs import hdfs_exists, hdfs_getmd5, hdfs_getmtime, hdfs_getsize, hdfs_glob, hdfs_glob_stat, hdfs_iglob, hdfs_isdir, hdfs_isfile, hdfs_listdir, hdfs_load_from, hdfs_makedirs, hdfs_move, hdfs_open, hdfs_remove, hdfs_save_as, hdfs_scan, hdfs_scan_stat, hdfs_scandir, hdfs_stat, hdfs_unlink, hdfs_walk, is_hdfs
4
+ from megfile.hdfs_path import HdfsPath
3
5
  from megfile.http import http_exists, http_getmtime, http_getsize, http_open, http_stat, is_http
4
6
  from megfile.http_path import HttpPath, HttpsPath
5
7
  from megfile.s3 import is_s3, s3_access, s3_buffered_open, s3_cached_open, s3_concat, s3_copy, s3_download, s3_exists, s3_getmd5, s3_getmtime, s3_getsize, s3_glob, s3_glob_stat, s3_hasbucket, s3_iglob, s3_isdir, s3_isfile, s3_listdir, s3_load_content, s3_load_from, s3_lstat, s3_makedirs, s3_memory_open, s3_move, s3_open, s3_path_join, s3_pipe_open, s3_prefetch_open, s3_readlink, s3_remove, s3_rename, s3_save_as, s3_scan, s3_scan_stat, s3_scandir, s3_stat, s3_symlink, s3_sync, s3_unlink, s3_upload, s3_walk
@@ -178,6 +180,29 @@ __all__ = [
178
180
  'sftp_copy',
179
181
  'sftp_sync',
180
182
  'sftp_concat',
183
+ 'is_hdfs',
184
+ 'hdfs_exists',
185
+ 'hdfs_stat',
186
+ 'hdfs_getmtime',
187
+ 'hdfs_getsize',
188
+ 'hdfs_isdir',
189
+ 'hdfs_isfile',
190
+ 'hdfs_listdir',
191
+ 'hdfs_load_from',
192
+ 'hdfs_move',
193
+ 'hdfs_remove',
194
+ 'hdfs_scan',
195
+ 'hdfs_scan_stat',
196
+ 'hdfs_scandir',
197
+ 'hdfs_unlink',
198
+ 'hdfs_walk',
199
+ 'hdfs_getmd5',
200
+ 'hdfs_save_as',
201
+ 'hdfs_open',
202
+ 'hdfs_glob',
203
+ 'hdfs_glob_stat',
204
+ 'hdfs_iglob',
205
+ 'hdfs_makedirs',
181
206
  'S3Path',
182
207
  'FSPath',
183
208
  'HttpPath',
@@ -185,4 +210,5 @@ __all__ = [
185
210
  'StdioPath',
186
211
  'SmartPath',
187
212
  'SftpPath',
213
+ 'HdfsPath',
188
214
  ]
megfile/cli.py CHANGED
@@ -1,3 +1,4 @@
1
+ import configparser
1
2
  import logging
2
3
  import os
3
4
  import shutil
@@ -9,9 +10,10 @@ from functools import partial
9
10
  import click
10
11
  from tqdm import tqdm
11
12
 
13
+ from megfile.hdfs_path import DEFAULT_HDFS_TIMEOUT
12
14
  from megfile.interfaces import FileEntry
13
15
  from megfile.lib.glob import get_non_glob_dir, has_magic
14
- from megfile.smart import _smart_sync_single_file, smart_copy, smart_getmd5, smart_getmtime, smart_getsize, smart_glob_stat, smart_isdir, smart_isfile, smart_makedirs, smart_move, smart_open, smart_path_join, smart_remove, smart_rename, smart_scan_stat, smart_scandir, smart_stat, smart_sync, smart_sync_with_progress, smart_touch, smart_unlink
16
+ from megfile.smart import _smart_sync_single_file, smart_copy, smart_exists, smart_getmd5, smart_getmtime, smart_getsize, smart_glob_stat, smart_isdir, smart_isfile, smart_makedirs, smart_move, smart_open, smart_path_join, smart_remove, smart_rename, smart_scan_stat, smart_scandir, smart_stat, smart_sync, smart_sync_with_progress, smart_touch, smart_unlink
15
17
  from megfile.smart_path import SmartPath
16
18
  from megfile.utils import get_human_size
17
19
  from megfile.version import VERSION
@@ -286,7 +288,7 @@ def sync(
286
288
  src_root_path = get_non_glob_dir(src_path)
287
289
 
288
290
  def scan_func(path):
289
- for glob_file_entry in smart_glob_stat(path):
291
+ for glob_file_entry in smart_glob_stat(path, missing_ok=False):
290
292
  if glob_file_entry.is_file():
291
293
  yield glob_file_entry
292
294
  else:
@@ -295,7 +297,8 @@ def sync(
295
297
  yield file_entry
296
298
  else:
297
299
  src_root_path = src_path
298
- scan_func = partial(smart_scan_stat, followlinks=True)
300
+ scan_func = partial(
301
+ smart_scan_stat, followlinks=True, missing_ok=False)
299
302
 
300
303
  if progress_bar and not quiet:
301
304
  print('building progress bar', end='\r')
@@ -493,7 +496,8 @@ def config():
493
496
  '--path',
494
497
  type=str,
495
498
  default='~/.aws/credentials',
496
- help='s3 config file')
499
+ help='s3 config file, default is $HOME/.aws/credentials',
500
+ )
497
501
  @click.option(
498
502
  '-n', '--profile-name', type=str, default='default', help='s3 config file')
499
503
  @click.argument('aws_access_key_id')
@@ -504,6 +508,8 @@ def config():
504
508
  def s3(
505
509
  path, profile_name, aws_access_key_id, aws_secret_access_key,
506
510
  endpoint_url, addressing_style, no_cover):
511
+ path = os.path.expanduser(path)
512
+
507
513
  config_dict = {
508
514
  'name': profile_name,
509
515
  'aws_access_key_id': aws_access_key_id,
@@ -533,6 +539,7 @@ def s3(
533
539
  s3['addressing_style'])
534
540
  return content
535
541
 
542
+ os.makedirs(os.path.dirname(path), exist_ok=True) # make sure dirpath exist
536
543
  if not os.path.exists(path): #If this file doesn't exist.
537
544
  content_str = dumps(config_dict)
538
545
  with open(path, 'w') as fp:
@@ -556,7 +563,7 @@ def s3(
556
563
  # Given profile_name has been used.
557
564
  if cur_name == profile_name:
558
565
  if no_cover: # default True(cover the same-name config).
559
- raise NameError(f'{profile_name} has been used.')
566
+ raise NameError(f'profile-name has been used: {profile_name}')
560
567
  used = True
561
568
  sections[i] = dumps(config_dict)
562
569
  continue
@@ -570,6 +577,51 @@ def s3(
570
577
  click.echo(f'Your oss config has been saved into {path}')
571
578
 
572
579
 
580
+ @config.command(short_help='Return the config file for s3')
581
+ @click.argument('url')
582
+ @click.option(
583
+ '-p',
584
+ '--path',
585
+ default='~/.hdfscli.cfg',
586
+ help='s3 config file, default is $HOME/.hdfscli.cfg',
587
+ )
588
+ @click.option('-n', '--profile-name', default='default', help='s3 config file')
589
+ @click.option('-u', '--user', help='user name')
590
+ @click.option('-r', '--root', help="hdfs path's root dir")
591
+ @click.option('-t', '--token', help="token for requesting hdfs server")
592
+ @click.option(
593
+ '-o',
594
+ '--timeout',
595
+ help=f"request hdfs server timeout, default {DEFAULT_HDFS_TIMEOUT}")
596
+ @click.option('--no-cover', is_flag=True, help='Not cover the same-name config')
597
+ def hdfs(url, path, profile_name, user, root, token, timeout, no_cover):
598
+ path = os.path.expanduser(path)
599
+ current_config = {
600
+ 'url': url,
601
+ 'user': user,
602
+ 'root': root,
603
+ 'token': token,
604
+ 'timeout': timeout,
605
+ }
606
+ profile_name = f"{profile_name}.alias"
607
+ config = configparser.ConfigParser()
608
+ if os.path.exists(path):
609
+ config.read(path)
610
+ if 'global' not in config.sections():
611
+ config['global'] = {'default.alias': 'default'}
612
+ if profile_name in config.sections():
613
+ if no_cover:
614
+ raise NameError(f'profile-name has been used: {profile_name[:-6]}')
615
+ else:
616
+ config[profile_name] = {}
617
+ for key, value in current_config.items():
618
+ if value:
619
+ config[profile_name][key] = value
620
+ with open(path, 'w') as fp:
621
+ config.write(fp)
622
+ click.echo(f'Your hdfs config has been saved into {path}')
623
+
624
+
573
625
  if __name__ == '__main__':
574
626
  # Usage: python -m megfile.cli
575
627
  safe_cli() # pragma: no cover
megfile/errors.py CHANGED
@@ -384,3 +384,28 @@ def s3_error_code_should_retry(error: str) -> bool:
384
384
  if error in ['InternalError', 'ServiceUnavailable', 'SlowDown']:
385
385
  return True
386
386
  return False
387
+
388
+
389
+ def translate_hdfs_error(hdfs_error: Exception, hdfs_path: PathLike):
390
+ from megfile.lib.hdfs_tools import hdfs_api
391
+
392
+ if hdfs_api and isinstance(hdfs_error, hdfs_api.HdfsError):
393
+ if hdfs_error.message and 'Path is not a file' in hdfs_error.message: # pytype: disable=attribute-error
394
+ return IsADirectoryError('Is a directory: %r' % hdfs_path)
395
+ elif hdfs_error.message and 'Path is not a directory' in hdfs_error.message: # pytype: disable=attribute-error
396
+ return NotADirectoryError('Not a directory: %r' % hdfs_path)
397
+ elif hdfs_error.status_code in (401, 403): # pytype: disable=attribute-error
398
+ return PermissionError('Permission denied: %r' % hdfs_path)
399
+ elif hdfs_error.status_code == 400: # pytype: disable=attribute-error
400
+ return ValueError(f'{hdfs_error.message}, path: {hdfs_path}') # pytype: disable=attribute-error
401
+ elif hdfs_error.status_code == 404: # pytype: disable=attribute-error
402
+ return FileNotFoundError(f'No match file: {hdfs_path}')
403
+ return hdfs_error
404
+
405
+
406
+ @contextmanager
407
+ def raise_hdfs_error(hdfs_path: PathLike):
408
+ try:
409
+ yield
410
+ except Exception as error:
411
+ raise translate_hdfs_error(error, hdfs_path)
megfile/fs.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from typing import BinaryIO, Callable, Iterator, List, Optional, Tuple
2
2
 
3
- from megfile.fs_path import FSPath, StatResult, _make_stat, fs_cwd, fs_glob, fs_glob_stat, fs_home, fs_iglob, fs_makedirs, fs_move, fs_path_join, fs_readlink, fs_rename, fs_resolve, is_fs
3
+ from megfile.fs_path import FSPath, StatResult, _make_stat, fs_cwd, fs_glob, fs_glob_stat, fs_home, fs_iglob, fs_lstat, fs_makedirs, fs_move, fs_path_join, fs_readlink, fs_rename, fs_resolve, is_fs
4
4
  from megfile.interfaces import Access, FileEntry, PathLike, StatResult
5
5
 
6
6
  __all__ = [
@@ -18,6 +18,7 @@ __all__ = [
18
18
  'fs_resolve',
19
19
  'fs_move',
20
20
  'fs_makedirs',
21
+ 'fs_lstat',
21
22
  'fs_isabs',
22
23
  'fs_abspath',
23
24
  'fs_access',
@@ -36,7 +37,6 @@ __all__ = [
36
37
  'fs_scan_stat',
37
38
  'fs_scandir',
38
39
  'fs_stat',
39
- 'fs_lstat',
40
40
  'fs_unlink',
41
41
  'fs_walk',
42
42
  'fs_getmd5',
@@ -260,16 +260,6 @@ def fs_stat(path: PathLike, follow_symlinks=True) -> StatResult:
260
260
  return FSPath(path).stat(follow_symlinks)
261
261
 
262
262
 
263
- def fs_lstat(path: PathLike) -> StatResult:
264
- '''
265
- Like Path.stat() but, if the path points to a symbolic link, return the symbolic link’s information rather than its target’s.
266
-
267
- :param path: Given path
268
- :returns: StatResult
269
- '''
270
- return FSPath(path).lstat()
271
-
272
-
273
263
  def fs_unlink(path: PathLike, missing_ok: bool = False) -> None:
274
264
  '''
275
265
  Remove the file on fs
megfile/fs_path.py CHANGED
@@ -37,6 +37,7 @@ __all__ = [
37
37
  'fs_resolve',
38
38
  'fs_move',
39
39
  'fs_makedirs',
40
+ 'fs_lstat',
40
41
  ]
41
42
 
42
43
 
@@ -200,6 +201,16 @@ def fs_makedirs(path: PathLike, exist_ok: bool = False):
200
201
  return FSPath(path).mkdir(parents=True, exist_ok=exist_ok)
201
202
 
202
203
 
204
+ def fs_lstat(path: PathLike) -> StatResult:
205
+ '''
206
+ Like Path.stat() but, if the path points to a symbolic link, return the symbolic link’s information rather than its target’s.
207
+
208
+ :param path: Given path
209
+ :returns: StatResult
210
+ '''
211
+ return FSPath(path).lstat()
212
+
213
+
203
214
  @SmartPath.register
204
215
  class FSPath(URIPath):
205
216
  """file protocol
@@ -601,14 +612,6 @@ class FSPath(URIPath):
601
612
  mtime = stat.st_mtime
602
613
  return result._replace(size=size, ctime=ctime, mtime=mtime)
603
614
 
604
- def lstat(self) -> StatResult:
605
- '''
606
- Like Path.stat() but, if the path points to a symbolic link, return the symbolic link’s information rather than its target’s.
607
-
608
- :returns: StatResult
609
- '''
610
- return self.stat(follow_symlinks=False)
611
-
612
615
  def unlink(self, missing_ok: bool = False) -> None:
613
616
  '''
614
617
  Remove the file on fs
megfile/hdfs.py ADDED
@@ -0,0 +1,269 @@
1
+ from typing import IO, AnyStr, BinaryIO, Iterator, List, Optional, Tuple
2
+
3
+ from megfile.hdfs_path import HdfsPath, hdfs_glob, hdfs_glob_stat, hdfs_iglob, hdfs_makedirs, is_hdfs
4
+ from megfile.interfaces import FileEntry, PathLike, StatResult
5
+
6
+ __all__ = [
7
+ 'is_hdfs',
8
+ 'hdfs_glob',
9
+ 'hdfs_glob_stat',
10
+ 'hdfs_iglob',
11
+ 'hdfs_makedirs',
12
+ 'hdfs_exists',
13
+ 'hdfs_stat',
14
+ 'hdfs_getmtime',
15
+ 'hdfs_getsize',
16
+ 'hdfs_isdir',
17
+ 'hdfs_isfile',
18
+ 'hdfs_listdir',
19
+ 'hdfs_load_from',
20
+ 'hdfs_move',
21
+ 'hdfs_remove',
22
+ 'hdfs_scan',
23
+ 'hdfs_scan_stat',
24
+ 'hdfs_scandir',
25
+ 'hdfs_unlink',
26
+ 'hdfs_walk',
27
+ 'hdfs_getmd5',
28
+ 'hdfs_save_as',
29
+ 'hdfs_open',
30
+ ]
31
+
32
+
33
+ def hdfs_exists(path: PathLike, followlinks: bool = False) -> bool:
34
+ '''
35
+ Test if path exists
36
+
37
+ If the bucket of path are not permitted to read, return False
38
+
39
+ :param path: Given path
40
+ :returns: True if path eixsts, else False
41
+ '''
42
+ return HdfsPath(path).exists(followlinks)
43
+
44
+
45
+ def hdfs_stat(path: PathLike, follow_symlinks=True) -> StatResult:
46
+ '''
47
+ Get StatResult of path file, including file size and mtime, referring to hdfs_getsize and hdfs_getmtime
48
+
49
+ If path is not an existent path, which means hdfs_exist(path) returns False, then raise FileNotFoundError
50
+ If attempt to get StatResult of complete hdfs, such as hdfs_dir_url == 'hdfs://', raise BucketNotFoundError
51
+
52
+ :param path: Given path
53
+ :returns: StatResult
54
+ :raises: FileNotFoundError
55
+ '''
56
+ return HdfsPath(path).stat(follow_symlinks)
57
+
58
+
59
+ def hdfs_getmtime(path: PathLike, follow_symlinks: bool = False) -> float:
60
+ '''
61
+ Get last-modified time of the file on the given path path (in Unix timestamp format).
62
+ If the path is an existent directory, return the latest modified time of all file in it. The mtime of empty directory is 1970-01-01 00:00:00
63
+
64
+ If path is not an existent path, which means hdfs_exist(path) returns False, then raise FileNotFoundError
65
+
66
+ :param path: Given path
67
+ :returns: Last-modified time
68
+ :raises: FileNotFoundError
69
+ '''
70
+ return HdfsPath(path).getmtime(follow_symlinks)
71
+
72
+
73
+ def hdfs_getsize(path: PathLike, follow_symlinks: bool = False) -> int:
74
+ '''
75
+ Get file size on the given path path (in bytes).
76
+ If the path in a directory, return the sum of all file size in it, including file in subdirectories (if exist).
77
+ The result excludes the size of directory itself. In other words, return 0 Byte on an empty directory path.
78
+
79
+ If path is not an existent path, which means hdfs_exist(path) returns False, then raise FileNotFoundError
80
+
81
+ :param path: Given path
82
+ :returns: File size
83
+ :raises: FileNotFoundError
84
+ '''
85
+ return HdfsPath(path).getsize(follow_symlinks)
86
+
87
+
88
+ def hdfs_isdir(path: PathLike, followlinks: bool = False) -> bool:
89
+ '''
90
+ Test if an hdfs url is directory
91
+ Specific procedures are as follows:
92
+ If there exists a suffix, of which ``os.path.join(path, suffix)`` is a file
93
+ If the url is empty bucket or hdfs://
94
+
95
+ :param path: Given path
96
+ :param followlinks: whether followlinks is True or False, result is the same. Because hdfs symlink not support dir.
97
+ :returns: True if path is hdfs directory, else False
98
+ '''
99
+ return HdfsPath(path).is_dir(followlinks)
100
+
101
+
102
+ def hdfs_isfile(path: PathLike, followlinks: bool = False) -> bool:
103
+ '''
104
+ Test if an path is file
105
+
106
+ :param path: Given path
107
+ :returns: True if path is hdfs file, else False
108
+ '''
109
+ return HdfsPath(path).is_file(followlinks)
110
+
111
+
112
+ def hdfs_listdir(path: PathLike, followlinks: bool = False) -> List[str]:
113
+ '''
114
+ Get all contents of given path.
115
+
116
+ :param path: Given path
117
+ :returns: All contents have prefix of path.
118
+ :raises: FileNotFoundError, NotADirectoryError
119
+ '''
120
+ return HdfsPath(path).listdir(followlinks)
121
+
122
+
123
+ def hdfs_load_from(path: PathLike, followlinks: bool = False) -> BinaryIO:
124
+ '''Read all content in binary on specified path and write into memory
125
+
126
+ User should close the BinaryIO manually
127
+
128
+ :param path: Given path
129
+ :returns: BinaryIO
130
+ '''
131
+ return HdfsPath(path).load(followlinks)
132
+
133
+
134
+ def hdfs_move(src_path: PathLike, dst_path: PathLike) -> None:
135
+ '''
136
+ Move file/directory path from src_path to dst_path
137
+
138
+ :param src_path: Given path
139
+ :param dst_path: Given destination path
140
+ '''
141
+ return HdfsPath(src_path).move(dst_path)
142
+
143
+
144
+ def hdfs_remove(path: PathLike, missing_ok: bool = False) -> None:
145
+ '''
146
+ Remove the file or directory on hdfs, `hdfs://` and `hdfs://bucket` are not permitted to remove
147
+
148
+ :param path: Given path
149
+ :param missing_ok: if False and target file/directory not exists, raise FileNotFoundError
150
+ :raises: FileNotFoundError, UnsupportedError
151
+ '''
152
+ return HdfsPath(path).remove(missing_ok)
153
+
154
+
155
+ def hdfs_scan(
156
+ path: PathLike, missing_ok: bool = True,
157
+ followlinks: bool = False) -> Iterator[str]:
158
+ '''
159
+ Iteratively traverse only files in given hdfs directory.
160
+ Every iteration on generator yields a path string.
161
+
162
+ If path is a file path, yields the file only
163
+ If path is a non-existent path, return an empty generator
164
+ If path is a bucket path, return all file paths in the bucket
165
+ If path is an empty bucket, return an empty generator
166
+ If path doesn't contain any bucket, which is path == 'hdfs://', raise UnsupportedError. walk() on complete hdfs is not supported in megfile
167
+
168
+ :param path: Given path
169
+ :param missing_ok: If False and there's no file in the directory, raise FileNotFoundError
170
+ :raises: UnsupportedError
171
+ :returns: A file path generator
172
+ '''
173
+ return HdfsPath(path).scan(missing_ok, followlinks)
174
+
175
+
176
+ def hdfs_scan_stat(
177
+ path: PathLike, missing_ok: bool = True,
178
+ followlinks: bool = False) -> Iterator[FileEntry]:
179
+ '''
180
+ Iteratively traverse only files in given directory.
181
+ Every iteration on generator yields a tuple of path string and file stat
182
+
183
+ :param path: Given path
184
+ :param missing_ok: If False and there's no file in the directory, raise FileNotFoundError
185
+ :raises: UnsupportedError
186
+ :returns: A file path generator
187
+ '''
188
+ return HdfsPath(path).scan_stat(missing_ok, followlinks)
189
+
190
+
191
+ def hdfs_scandir(path: PathLike,
192
+ followlinks: bool = False) -> Iterator[FileEntry]:
193
+ '''
194
+ Get all contents of given path, the order of result is not guaranteed.
195
+
196
+ :param path: Given path
197
+ :returns: All contents have prefix of path
198
+ :raises: FileNotFoundError, NotADirectoryError
199
+ '''
200
+ return HdfsPath(path).scandir(followlinks)
201
+
202
+
203
+ def hdfs_unlink(path: PathLike, missing_ok: bool = False) -> None:
204
+ '''
205
+ Remove the file on hdfs
206
+
207
+ :param path: Given path
208
+ :param missing_ok: if False and target file not exists, raise FileNotFoundError
209
+ :raises: FileNotFoundError, IsADirectoryError
210
+ '''
211
+ return HdfsPath(path).unlink(missing_ok)
212
+
213
+
214
+ def hdfs_walk(path: PathLike, followlinks: bool = False
215
+ ) -> Iterator[Tuple[str, List[str], List[str]]]:
216
+ '''
217
+ Iteratively traverse the given hdfs directory, in top-bottom order. In other words, firstly traverse parent directory, if subdirectories exist, traverse the subdirectories.
218
+ Every iteration on generator yields a 3-tuple: (root, dirs, files)
219
+
220
+ - root: Current hdfs path;
221
+ - dirs: Name list of subdirectories in current directory.
222
+ - files: Name list of files in current directory.
223
+
224
+ If path is a file path, return an empty generator
225
+ If path is a non-existent path, return an empty generator
226
+ If path is a bucket path, bucket will be the top directory, and will be returned at first iteration of generator
227
+ If path is an empty bucket, only yield one 3-tuple (notes: hdfs doesn't have empty directory)
228
+ If path doesn't contain any bucket, which is path == 'hdfs://', raise UnsupportedError. walk() on complete hdfs is not supported in megfile
229
+
230
+ :param path: Given path
231
+ :param followlinks: whether followlinks is True or False, result is the same. Because hdfs not support symlink.
232
+ :returns: A 3-tuple generator
233
+ '''
234
+ return HdfsPath(path).walk(followlinks)
235
+
236
+
237
+ def hdfs_getmd5(
238
+ path: PathLike, recalculate: bool = False,
239
+ followlinks: bool = False) -> str:
240
+ '''
241
+ Get checksum of the file or dir.
242
+
243
+ :param path: Given path
244
+ :param recalculate: Ignore this parameter, just for compatibility
245
+ :param followlinks: Ignore this parameter, just for compatibility
246
+ :returns: checksum
247
+ '''
248
+ return HdfsPath(path).md5(recalculate, followlinks)
249
+
250
+
251
+ def hdfs_save_as(file_object: BinaryIO, path: PathLike):
252
+ '''Write the opened binary stream to specified path, but the stream won't be closed
253
+
254
+ :param path: Given path
255
+ :param file_object: Stream to be read
256
+ '''
257
+ return HdfsPath(path).save(file_object)
258
+
259
+
260
+ def hdfs_open(
261
+ path: PathLike,
262
+ mode: str = 'r',
263
+ *,
264
+ buffering: Optional[int] = None,
265
+ encoding: Optional[str] = None,
266
+ errors: Optional[str] = None,
267
+ **kwargs) -> IO[AnyStr]: # pytype: disable=signature-mismatch
268
+ return HdfsPath(path).open(
269
+ mode, buffering=buffering, encoding=encoding, errors=errors)