megfile 3.0.6.post1__py3-none-any.whl → 3.1.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. docs/conf.py +67 -0
  2. megfile/cli.py +16 -16
  3. megfile/config.py +37 -6
  4. megfile/errors.py +26 -20
  5. megfile/fs.py +13 -8
  6. megfile/fs_path.py +69 -49
  7. megfile/hdfs.py +13 -8
  8. megfile/hdfs_path.py +49 -41
  9. megfile/http.py +1 -1
  10. megfile/http_path.py +35 -28
  11. megfile/interfaces.py +119 -48
  12. megfile/lib/base_prefetch_reader.py +9 -8
  13. megfile/lib/combine_reader.py +7 -7
  14. megfile/lib/fnmatch.py +2 -2
  15. megfile/lib/glob.py +3 -3
  16. megfile/lib/hdfs_prefetch_reader.py +2 -1
  17. megfile/lib/http_prefetch_reader.py +3 -2
  18. megfile/lib/lazy_handler.py +6 -5
  19. megfile/lib/s3_buffered_writer.py +8 -7
  20. megfile/lib/s3_cached_handler.py +3 -4
  21. megfile/lib/s3_limited_seekable_writer.py +5 -3
  22. megfile/lib/s3_memory_handler.py +10 -6
  23. megfile/lib/s3_pipe_handler.py +1 -1
  24. megfile/lib/s3_prefetch_reader.py +7 -5
  25. megfile/lib/s3_share_cache_reader.py +2 -2
  26. megfile/lib/shadow_handler.py +5 -5
  27. megfile/lib/stdio_handler.py +3 -3
  28. megfile/pathlike.py +156 -170
  29. megfile/s3.py +19 -13
  30. megfile/s3_path.py +98 -83
  31. megfile/sftp.py +25 -16
  32. megfile/sftp_path.py +109 -94
  33. megfile/smart.py +38 -28
  34. megfile/smart_path.py +6 -6
  35. megfile/stdio.py +3 -3
  36. megfile/stdio_path.py +5 -5
  37. megfile/utils/__init__.py +8 -27
  38. megfile/version.py +1 -1
  39. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/METADATA +4 -5
  40. megfile-3.1.0.post1.dist-info/RECORD +55 -0
  41. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/WHEEL +1 -1
  42. megfile-3.1.0.post1.dist-info/top_level.txt +7 -0
  43. scripts/convert_results_to_sarif.py +124 -0
  44. scripts/generate_file.py +268 -0
  45. megfile-3.0.6.post1.dist-info/RECORD +0 -52
  46. megfile-3.0.6.post1.dist-info/top_level.txt +0 -1
  47. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/LICENSE +0 -0
  48. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/LICENSE.pyre +0 -0
  49. {megfile-3.0.6.post1.dist-info → megfile-3.1.0.post1.dist-info}/entry_points.txt +0 -0
megfile/fs_path.py CHANGED
@@ -3,16 +3,17 @@ import io
3
3
  import os
4
4
  import pathlib
5
5
  import shutil
6
+ from functools import cached_property
6
7
  from stat import S_ISDIR as stat_isdir
7
8
  from stat import S_ISLNK as stat_islnk
8
- from typing import IO, AnyStr, BinaryIO, Callable, Iterator, List, Optional, Tuple, Union
9
+ from typing import IO, BinaryIO, Callable, Iterator, List, Optional, Tuple, Union
9
10
 
10
11
  from megfile.errors import _create_missing_ok_generator
11
12
  from megfile.interfaces import Access, ContextIterator, FileEntry, PathLike, StatResult
12
13
  from megfile.lib.compare import is_same_file
13
14
  from megfile.lib.glob import iglob
14
15
  from megfile.lib.url import get_url_scheme
15
- from megfile.utils import cachedproperty, calculate_md5
16
+ from megfile.utils import calculate_md5
16
17
 
17
18
  from .interfaces import PathLike, URIPath
18
19
  from .lib.compat import fspath
@@ -91,7 +92,8 @@ def fs_home():
91
92
  return os.path.expanduser('~')
92
93
 
93
94
 
94
- def fs_iglob(path: PathLike, recursive: bool = True,
95
+ def fs_iglob(path: PathLike,
96
+ recursive: bool = True,
95
97
  missing_ok: bool = True) -> Iterator[str]:
96
98
  '''Return path iterator in ascending alphabetical order, in which path matches glob pattern
97
99
 
@@ -101,7 +103,7 @@ def fs_iglob(path: PathLike, recursive: bool = True,
101
103
  Assume there exists a path `/a/b/c/b/d.txt`
102
104
  use path pattern like `/**/b/**/*.txt` to glob, the path above will be returned twice
103
105
  3. `**` will match any matched file, directory, symlink and '' by default, when recursive is `True`
104
- 4. fs_glob returns same as glob.glob(pathname, recursive=True) in acsending alphabetical order.
106
+ 4. fs_glob returns same as glob.glob(pathname, recursive=True) in ascending alphabetical order.
105
107
  5. Hidden files (filename stars with '.') will not be found in the result
106
108
 
107
109
  :param recursive: If False, `**` will not search directory recursively
@@ -114,7 +116,8 @@ def fs_iglob(path: PathLike, recursive: bool = True,
114
116
  yield path
115
117
 
116
118
 
117
- def fs_glob(path: PathLike, recursive: bool = True,
119
+ def fs_glob(path: PathLike,
120
+ recursive: bool = True,
118
121
  missing_ok: bool = True) -> List[str]:
119
122
  '''Return path list in ascending alphabetical order, in which path matches glob pattern
120
123
 
@@ -124,7 +127,7 @@ def fs_glob(path: PathLike, recursive: bool = True,
124
127
  Assume there exists a path `/a/b/c/b/d.txt`
125
128
  use path pattern like `/**/b/**/*.txt` to glob, the path above will be returned twice
126
129
  3. `**` will match any matched file, directory, symlink and '' by default, when recursive is `True`
127
- 4. fs_glob returns same as glob.glob(pathname, recursive=True) in acsending alphabetical order.
130
+ 4. fs_glob returns same as glob.glob(pathname, recursive=True) in ascending alphabetical order.
128
131
  5. Hidden files (filename stars with '.') will not be found in the result
129
132
 
130
133
  :param recursive: If False, `**` will not search directory recursively
@@ -135,7 +138,8 @@ def fs_glob(path: PathLike, recursive: bool = True,
135
138
 
136
139
 
137
140
  def fs_glob_stat(
138
- path: PathLike, recursive: bool = True,
141
+ path: PathLike,
142
+ recursive: bool = True,
139
143
  missing_ok: bool = True) -> Iterator[FileEntry]:
140
144
  '''Return a list contains tuples of path and file stat, in ascending alphabetical order, in which path matches glob pattern
141
145
 
@@ -145,7 +149,7 @@ def fs_glob_stat(
145
149
  Assume there exists a path `/a/b/c/b/d.txt`
146
150
  use path pattern like `/**/b/**/*.txt` to glob, the path above will be returned twice
147
151
  3. `**` will match any matched file, directory, symlink and '' by default, when recursive is `True`
148
- 4. fs_glob returns same as glob.glob(pathname, recursive=True) in acsending alphabetical order.
152
+ 4. fs_glob returns same as glob.glob(pathname, recursive=True) in ascending alphabetical order.
149
153
  5. Hidden files (filename stars with '.') will not be found in the result
150
154
 
151
155
  :param recursive: If False, `**` will not search directory recursively
@@ -262,7 +266,7 @@ class FSPath(URIPath):
262
266
 
263
267
  protocol = "file"
264
268
 
265
- def __init__(self, path: Union["PathLike", int], *other_paths: "PathLike"):
269
+ def __init__(self, path: Union[PathLike, int], *other_paths: PathLike):
266
270
  if not isinstance(path, int):
267
271
  if len(other_paths) > 0:
268
272
  path = self.from_path(path).joinpath(*other_paths)
@@ -272,20 +276,20 @@ class FSPath(URIPath):
272
276
  def __fspath__(self) -> str:
273
277
  return os.path.normpath(self.path_without_protocol)
274
278
 
275
- @cachedproperty
279
+ @cached_property
276
280
  def root(self) -> str:
277
281
  return pathlib.Path(self.path_without_protocol).root
278
282
 
279
- @cachedproperty
283
+ @cached_property
280
284
  def anchor(self) -> str:
281
285
  return pathlib.Path(self.path_without_protocol).anchor
282
286
 
283
- @cachedproperty
287
+ @cached_property
284
288
  def drive(self) -> str:
285
289
  return pathlib.Path(self.path_without_protocol).drive
286
290
 
287
291
  @classmethod
288
- def from_uri(cls, path: str) -> "FSPath":
292
+ def from_uri(cls, path: PathLike) -> "FSPath":
289
293
  return cls.from_path(path)
290
294
 
291
295
  @property
@@ -293,9 +297,9 @@ class FSPath(URIPath):
293
297
  if isinstance(self.path, int):
294
298
  return self.path
295
299
  protocol_prefix = self.protocol + "://"
296
- if self.path.startswith(protocol_prefix):
297
- return self.path
298
- return protocol_prefix + self.path
300
+ if self.path.startswith(protocol_prefix): # pyre-ignore[16]
301
+ return self.path # pyre-ignore[7]
302
+ return protocol_prefix + self.path # pyre-ignore[58]
299
303
 
300
304
  def is_absolute(self) -> bool:
301
305
  '''Test whether a path is absolute
@@ -319,14 +323,14 @@ class FSPath(URIPath):
319
323
  :param mode: access mode
320
324
  :returns: Access: Enum, the read/write access that path has.
321
325
  '''
322
- if not isinstance(mode, Access):
323
- raise TypeError(
324
- 'Unsupported mode: {} -- Mode should use one of the enums belonging to: {}'
325
- .format(mode, ', '.join([str(a) for a in Access])))
326
326
  if mode == Access.READ:
327
327
  return os.access(self.path_without_protocol, os.R_OK)
328
- if mode == Access.WRITE:
328
+ elif mode == Access.WRITE:
329
329
  return os.access(self.path_without_protocol, os.W_OK)
330
+ else:
331
+ raise TypeError(
332
+ 'Unsupported mode: {} -- Mode should use one of the enums belonging to: {}'
333
+ .format(mode, ', '.join([str(a) for a in Access])))
330
334
 
331
335
  def exists(self, followlinks: bool = False) -> bool:
332
336
  '''
@@ -365,7 +369,9 @@ class FSPath(URIPath):
365
369
  '''
366
370
  return self.stat(follow_symlinks=follow_symlinks).size
367
371
 
368
- def glob(self, pattern, recursive: bool = True,
372
+ def glob(self,
373
+ pattern,
374
+ recursive: bool = True,
369
375
  missing_ok: bool = True) -> List['FSPath']:
370
376
  '''Return path list in ascending alphabetical order, in which path matches glob pattern
371
377
 
@@ -375,7 +381,7 @@ class FSPath(URIPath):
375
381
  Assume there exists a path `/a/b/c/b/d.txt`
376
382
  use path pattern like `/**/b/**/*.txt` to glob, the path above will be returned twice
377
383
  3. `**` will match any matched file, directory, symlink and '' by default, when recursive is `True`
378
- 4. fs_glob returns same as glob.glob(pathname, recursive=True) in acsending alphabetical order.
384
+ 4. fs_glob returns same as glob.glob(pathname, recursive=True) in ascending alphabetical order.
379
385
  5. Hidden files (filename stars with '.') will not be found in the result
380
386
 
381
387
  :param pattern: Glob the given relative pattern in the directory represented by this path
@@ -388,7 +394,9 @@ class FSPath(URIPath):
388
394
  pattern=pattern, recursive=recursive, missing_ok=missing_ok))
389
395
 
390
396
  def glob_stat(
391
- self, pattern, recursive: bool = True,
397
+ self,
398
+ pattern,
399
+ recursive: bool = True,
392
400
  missing_ok: bool = True) -> Iterator[FileEntry]:
393
401
  '''Return a list contains tuples of path and file stat, in ascending alphabetical order, in which path matches glob pattern
394
402
 
@@ -398,7 +406,7 @@ class FSPath(URIPath):
398
406
  Assume there exists a path `/a/b/c/b/d.txt`
399
407
  use path pattern like `/**/b/**/*.txt` to glob, the path above will be returned twice
400
408
  3. `**` will match any matched file, directory, symlink and '' by default, when recursive is `True`
401
- 4. fs_glob returns same as glob.glob(pathname, recursive=True) in acsending alphabetical order.
409
+ 4. fs_glob returns same as glob.glob(pathname, recursive=True) in ascending alphabetical order.
402
410
  5. Hidden files (filename stars with '.') will not be found in the result
403
411
 
404
412
  :param pattern: Glob the given relative pattern in the directory represented by this path
@@ -409,8 +417,9 @@ class FSPath(URIPath):
409
417
  for path_obj in self.iglob(pattern=pattern, recursive=recursive,
410
418
  missing_ok=missing_ok):
411
419
  yield FileEntry(
412
- path_obj.name, path_obj.path,
413
- _make_stat(os.lstat(path_obj.path)))
420
+ path_obj.name,
421
+ path_obj.path, # pyre-ignore[6]
422
+ _make_stat(os.lstat(path_obj.path))) # pyre-ignore[6]
414
423
 
415
424
  def expanduser(self):
416
425
  '''Expand ~ and ~user constructions. If user or $HOME is unknown,
@@ -418,7 +427,9 @@ class FSPath(URIPath):
418
427
  '''
419
428
  return os.path.expanduser(self.path_without_protocol)
420
429
 
421
- def iglob(self, pattern, recursive: bool = True,
430
+ def iglob(self,
431
+ pattern,
432
+ recursive: bool = True,
422
433
  missing_ok: bool = True) -> Iterator['FSPath']:
423
434
  '''Return path iterator in ascending alphabetical order, in which path matches glob pattern
424
435
 
@@ -428,7 +439,7 @@ class FSPath(URIPath):
428
439
  Assume there exists a path `/a/b/c/b/d.txt`
429
440
  use path pattern like `/**/b/**/*.txt` to glob, the path above will be returned twice
430
441
  3. `**` will match any matched file, directory, symlink and '' by default, when recursive is `True`
431
- 4. fs_glob returns same as glob.glob(pathname, recursive=True) in acsending alphabetical order.
442
+ 4. fs_glob returns same as glob.glob(pathname, recursive=True) in ascending alphabetical order.
432
443
  5. Hidden files (filename stars with '.') will not be found in the result
433
444
 
434
445
  :param pattern: Glob the given relative pattern in the directory represented by this path
@@ -477,20 +488,20 @@ class FSPath(URIPath):
477
488
 
478
489
  def listdir(self) -> List[str]:
479
490
  '''
480
- Get all contents of given fs path. The result is in acsending alphabetical order.
491
+ Get all contents of given fs path. The result is in ascending alphabetical order.
481
492
 
482
- :returns: All contents have in the path in acsending alphabetical order
493
+ :returns: All contents have in the path in ascending alphabetical order
483
494
  '''
484
495
  return sorted(os.listdir(self.path_without_protocol))
485
496
 
486
497
  def iterdir(self) -> Iterator['FSPath']:
487
498
  '''
488
- Get all contents of given fs path. The result is in acsending alphabetical order.
499
+ Get all contents of given fs path. The result is in ascending alphabetical order.
489
500
 
490
- :returns: All contents have in the path in acsending alphabetical order
501
+ :returns: All contents have in the path in ascending alphabetical order
491
502
  '''
492
503
  for path in self.listdir():
493
- yield self.joinpath(path) # type: ignore
504
+ yield self.joinpath(path)
494
505
 
495
506
  def load(self) -> BinaryIO:
496
507
  '''Read all content on specified path and write into memory
@@ -505,14 +516,14 @@ class FSPath(URIPath):
505
516
 
506
517
  def mkdir(self, mode=0o777, parents: bool = False, exist_ok: bool = False):
507
518
  '''
508
- make a directory on fs, including parent directory
509
-
519
+ make a directory on fs, including parent directory.
510
520
  If there exists a file on the path, raise FileExistsError
511
521
 
512
522
  :param mode: If mode is given, it is combined with the process’ umask value to determine the file mode and access flags.
513
523
  :param parents: If parents is true, any missing parents of this path are created as needed;
514
- If parents is false (the default), a missing parent raises FileNotFoundError.
524
+ If parents is false (the default), a missing parent raises FileNotFoundError.
515
525
  :param exist_ok: If False and target directory exists, raise FileExistsError
526
+
516
527
  :raises: FileExistsError
517
528
  '''
518
529
  if exist_ok and self.path_without_protocol == '':
@@ -567,7 +578,8 @@ class FSPath(URIPath):
567
578
  else:
568
579
  os.remove(self.path_without_protocol)
569
580
 
570
- def _scan(self, missing_ok: bool = True,
581
+ def _scan(self,
582
+ missing_ok: bool = True,
571
583
  followlinks: bool = False) -> Iterator[str]:
572
584
  if self.is_file(followlinks=followlinks):
573
585
  path = fspath(self.path_without_protocol)
@@ -577,7 +589,8 @@ class FSPath(URIPath):
577
589
  for filename in files:
578
590
  yield os.path.join(root, filename)
579
591
 
580
- def scan(self, missing_ok: bool = True,
592
+ def scan(self,
593
+ missing_ok: bool = True,
581
594
  followlinks: bool = False) -> Iterator[str]:
582
595
  '''
583
596
  Iteratively traverse only files in given directory, in alphabetical order.
@@ -594,7 +607,8 @@ class FSPath(URIPath):
594
607
  self._scan(followlinks=followlinks), missing_ok,
595
608
  FileNotFoundError('No match any file in: %r' % self.path))
596
609
 
597
- def scan_stat(self, missing_ok: bool = True,
610
+ def scan_stat(self,
611
+ missing_ok: bool = True,
598
612
  followlinks: bool = False) -> Iterator[FileEntry]:
599
613
  '''
600
614
  Iteratively traverse only files in given directory, in alphabetical order.
@@ -668,8 +682,10 @@ class FSPath(URIPath):
668
682
  return
669
683
  os.unlink(self.path_without_protocol)
670
684
 
671
- def walk(self, followlinks: bool = False
672
- ) -> Iterator[Tuple[str, List[str], List[str]]]:
685
+ def walk(
686
+ self,
687
+ followlinks: bool = False
688
+ ) -> Iterator[Tuple[str, List[str], List[str]]]:
673
689
  '''
674
690
  Generate the file names in a directory tree by walking the tree top-down.
675
691
  For each directory in the tree rooted at directory path (including path itself),
@@ -734,6 +750,7 @@ class FSPath(URIPath):
734
750
 
735
751
  :param recalculate: Ignore this parameter, just for compatibility
736
752
  :param followlinks: Ignore this parameter, just for compatibility
753
+
737
754
  returns: md5 of file
738
755
  '''
739
756
  if os.path.isdir(self.path_without_protocol):
@@ -743,7 +760,7 @@ class FSPath(URIPath):
743
760
  recalculate=recalculate, followlinks=followlinks).encode()
744
761
  hash_md5.update(chunk)
745
762
  return hash_md5.hexdigest()
746
- with open(self.path_without_protocol, 'rb') as src: # type: ignore
763
+ with open(self.path_without_protocol, 'rb') as src:
747
764
  md5 = calculate_md5(src)
748
765
  return md5
749
766
 
@@ -754,7 +771,9 @@ class FSPath(URIPath):
754
771
  followlinks: bool = False):
755
772
 
756
773
  shutil.copy2(
757
- self.path_without_protocol, dst_path, follow_symlinks=followlinks)
774
+ self.path_without_protocol,
775
+ fspath(dst_path),
776
+ follow_symlinks=followlinks)
758
777
 
759
778
  # After python3.8, patch `shutil.copyfile` is not a good way, because `shutil.copy2` will not call it in some cases.
760
779
  if callback:
@@ -786,6 +805,7 @@ class FSPath(URIPath):
786
805
  :param followlinks: False if regard symlink as file, else True
787
806
  :param overwrite: whether or not overwrite file when exists, default is True
788
807
  '''
808
+ dst_path = fspath(dst_path)
789
809
  if not overwrite and os.path.exists((dst_path)):
790
810
  return
791
811
 
@@ -811,7 +831,7 @@ class FSPath(URIPath):
811
831
 
812
832
  :param dst_path: Target file path
813
833
  :param followlinks: False if regard symlink as file, else True
814
- :param force: Sync file forcely, do not ignore same files, priority is higher than 'overwrite', default is False
834
+ :param force: Sync file forcible, do not ignore same files, priority is higher than 'overwrite', default is False
815
835
  :param overwrite: whether or not overwrite file when exists, default is True
816
836
  '''
817
837
  if self.is_dir(followlinks=followlinks):
@@ -841,7 +861,7 @@ class FSPath(URIPath):
841
861
  '''
842
862
  Create a symbolic link pointing to src_path named dst_path.
843
863
 
844
- :param dst_path: Desination path
864
+ :param dst_path: Destination path
845
865
  '''
846
866
  return os.symlink(self.path_without_protocol, dst_path)
847
867
 
@@ -906,7 +926,7 @@ class FSPath(URIPath):
906
926
  errors=None,
907
927
  newline=None,
908
928
  closefd=True,
909
- **kwargs) -> IO[AnyStr]: # pytype: disable=signature-mismatch
929
+ **kwargs) -> IO:
910
930
  if not isinstance(self.path_without_protocol, int) and ('w' in mode or
911
931
  'x' in mode or
912
932
  'a' in mode):
@@ -921,8 +941,8 @@ class FSPath(URIPath):
921
941
  newline=newline,
922
942
  closefd=closefd)
923
943
 
924
- @cachedproperty
925
- def parts(self) -> Tuple[str]:
944
+ @cached_property
945
+ def parts(self) -> Tuple[str, ...]:
926
946
  '''
927
947
  A tuple giving access to the path’s various components
928
948
  '''
megfile/hdfs.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import IO, AnyStr, BinaryIO, Iterator, List, Optional, Tuple
1
+ from typing import IO, BinaryIO, Iterator, List, Optional, Tuple
2
2
 
3
3
  from megfile.hdfs_path import HdfsPath, hdfs_glob, hdfs_glob_stat, hdfs_iglob, hdfs_makedirs, is_hdfs
4
4
  from megfile.interfaces import FileEntry, PathLike, StatResult
@@ -37,7 +37,7 @@ def hdfs_exists(path: PathLike, followlinks: bool = False) -> bool:
37
37
  If the bucket of path are not permitted to read, return False
38
38
 
39
39
  :param path: Given path
40
- :returns: True if path eixsts, else False
40
+ :returns: True if path exists, else False
41
41
  '''
42
42
  return HdfsPath(path).exists(followlinks)
43
43
 
@@ -154,7 +154,8 @@ def hdfs_remove(path: PathLike, missing_ok: bool = False) -> None:
154
154
 
155
155
 
156
156
  def hdfs_scan(
157
- path: PathLike, missing_ok: bool = True,
157
+ path: PathLike,
158
+ missing_ok: bool = True,
158
159
  followlinks: bool = False) -> Iterator[str]:
159
160
  '''
160
161
  Iteratively traverse only files in given hdfs directory.
@@ -175,7 +176,8 @@ def hdfs_scan(
175
176
 
176
177
 
177
178
  def hdfs_scan_stat(
178
- path: PathLike, missing_ok: bool = True,
179
+ path: PathLike,
180
+ missing_ok: bool = True,
179
181
  followlinks: bool = False) -> Iterator[FileEntry]:
180
182
  '''
181
183
  Iteratively traverse only files in given directory.
@@ -212,8 +214,10 @@ def hdfs_unlink(path: PathLike, missing_ok: bool = False) -> None:
212
214
  return HdfsPath(path).unlink(missing_ok)
213
215
 
214
216
 
215
- def hdfs_walk(path: PathLike, followlinks: bool = False
216
- ) -> Iterator[Tuple[str, List[str], List[str]]]:
217
+ def hdfs_walk(
218
+ path: PathLike,
219
+ followlinks: bool = False
220
+ ) -> Iterator[Tuple[str, List[str], List[str]]]:
217
221
  '''
218
222
  Iteratively traverse the given hdfs directory, in top-bottom order. In other words, firstly traverse parent directory, if subdirectories exist, traverse the subdirectories.
219
223
  Every iteration on generator yields a 3-tuple: (root, dirs, files)
@@ -236,7 +240,8 @@ def hdfs_walk(path: PathLike, followlinks: bool = False
236
240
 
237
241
 
238
242
  def hdfs_getmd5(
239
- path: PathLike, recalculate: bool = False,
243
+ path: PathLike,
244
+ recalculate: bool = False,
240
245
  followlinks: bool = False) -> str:
241
246
  '''
242
247
  Get checksum of the file or dir.
@@ -265,6 +270,6 @@ def hdfs_open(
265
270
  buffering: Optional[int] = None,
266
271
  encoding: Optional[str] = None,
267
272
  errors: Optional[str] = None,
268
- **kwargs) -> IO[AnyStr]: # pytype: disable=signature-mismatch
273
+ **kwargs) -> IO:
269
274
  return HdfsPath(path).open(
270
275
  mode, buffering=buffering, encoding=encoding, errors=errors)
megfile/hdfs_path.py CHANGED
@@ -1,9 +1,10 @@
1
+ # pyre-ignore-all-errors[16]
1
2
  import hashlib
2
3
  import io
3
4
  import os
4
5
  import sys
5
- from functools import lru_cache
6
- from typing import IO, AnyStr, BinaryIO, Iterator, List, Optional, Tuple
6
+ from functools import cached_property, lru_cache
7
+ from typing import IO, BinaryIO, Iterator, List, Optional, Tuple
7
8
 
8
9
  from megfile.errors import _create_missing_ok_generator, raise_hdfs_error
9
10
  from megfile.interfaces import FileEntry, PathLike, StatResult, URIPath
@@ -14,7 +15,7 @@ from megfile.lib.hdfs_tools import hdfs_api
14
15
  from megfile.lib.url import get_url_scheme
15
16
  from megfile.pathlike import PathLike, URIPath
16
17
  from megfile.smart_path import SmartPath
17
- from megfile.utils import _is_pickle, cachedproperty
18
+ from megfile.utils import _is_pickle
18
19
 
19
20
  __all__ = [
20
21
  'HdfsPath',
@@ -35,7 +36,7 @@ MAX_RETRIES = 10
35
36
  DEFAULT_HDFS_TIMEOUT = 10
36
37
 
37
38
 
38
- def is_hdfs(path: PathLike) -> bool: # pytype: disable=invalid-annotation
39
+ def is_hdfs(path: PathLike) -> bool:
39
40
  '''Test if a path is sftp path
40
41
 
41
42
  :param path: Path to be tested
@@ -55,7 +56,7 @@ def get_hdfs_config(profile_name: Optional[str] = None):
55
56
  }
56
57
  timeout_env = f"{env_profile}{HDFS_TIMEOUT}"
57
58
  if os.getenv(timeout_env):
58
- config['timeout'] = int(os.getenv(timeout_env))
59
+ config['timeout'] = int(os.environ[timeout_env])
59
60
 
60
61
  config_path = os.getenv(HDFS_CONFIG_PATH) or os.path.expanduser(
61
62
  '~/.hdfscli.cfg')
@@ -99,9 +100,9 @@ def get_hdfs_client(profile_name: Optional[str] = None):
99
100
 
100
101
 
101
102
  def hdfs_glob(
102
- path: PathLike,
103
- recursive: bool = True,
104
- missing_ok: bool = True,
103
+ path: PathLike,
104
+ recursive: bool = True,
105
+ missing_ok: bool = True,
105
106
  ) -> List[str]:
106
107
  '''Return hdfs path list in ascending alphabetical order, in which path matches glob pattern
107
108
  Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
@@ -115,7 +116,8 @@ def hdfs_glob(
115
116
 
116
117
 
117
118
  def hdfs_glob_stat(
118
- path: PathLike, recursive: bool = True,
119
+ path: PathLike,
120
+ recursive: bool = True,
119
121
  missing_ok: bool = True) -> Iterator[FileEntry]:
120
122
  '''Return a generator contains tuples of path and file stat, in ascending alphabetical order, in which path matches glob pattern
121
123
  Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
@@ -130,9 +132,9 @@ def hdfs_glob_stat(
130
132
 
131
133
 
132
134
  def hdfs_iglob(
133
- path: PathLike,
134
- recursive: bool = True,
135
- missing_ok: bool = True,
135
+ path: PathLike,
136
+ recursive: bool = True,
137
+ missing_ok: bool = True,
136
138
  ) -> Iterator[str]:
137
139
  '''Return hdfs path iterator in ascending alphabetical order, in which path matches glob pattern
138
140
  Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
@@ -177,7 +179,7 @@ class HdfsPath(URIPath):
177
179
  def _client(self):
178
180
  return get_hdfs_client(profile_name=self._profile_name)
179
181
 
180
- @cachedproperty
182
+ @cached_property
181
183
  def path_with_protocol(self) -> str:
182
184
  '''Return path with protocol, like hdfs://path'''
183
185
  path = self.path
@@ -186,7 +188,7 @@ class HdfsPath(URIPath):
186
188
  return path
187
189
  return protocol_prefix + path.lstrip('/')
188
190
 
189
- @cachedproperty
191
+ @cached_property
190
192
  def path_without_protocol(self) -> str:
191
193
  '''Return path without protocol, example: if path is hdfs://path, return path'''
192
194
  path = self.path
@@ -195,8 +197,8 @@ class HdfsPath(URIPath):
195
197
  path = path[len(protocol_prefix):]
196
198
  return path
197
199
 
198
- @cachedproperty
199
- def parts(self) -> Tuple[str]:
200
+ @cached_property
201
+ def parts(self) -> Tuple[str, ...]:
200
202
  '''A tuple giving access to the path’s various components'''
201
203
  parts = [f"{self._protocol_with_profile}://"]
202
204
  path = self.path_without_protocol
@@ -211,7 +213,7 @@ class HdfsPath(URIPath):
211
213
 
212
214
  If the bucket of path are not permitted to read, return False
213
215
 
214
- :returns: True if path eixsts, else False
216
+ :returns: True if path exists, else False
215
217
  '''
216
218
  return bool(
217
219
  self._client.status(self.path_without_protocol, strict=False))
@@ -262,10 +264,10 @@ class HdfsPath(URIPath):
262
264
  return self.stat(follow_symlinks=follow_symlinks).size
263
265
 
264
266
  def glob(
265
- self,
266
- pattern,
267
- recursive: bool = True,
268
- missing_ok: bool = True,
267
+ self,
268
+ pattern,
269
+ recursive: bool = True,
270
+ missing_ok: bool = True,
269
271
  ) -> List['HdfsPath']:
270
272
  '''Return hdfs path list, in which path matches glob pattern
271
273
  Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
@@ -281,7 +283,9 @@ class HdfsPath(URIPath):
281
283
  pattern=pattern, recursive=recursive, missing_ok=missing_ok))
282
284
 
283
285
  def glob_stat(
284
- self, pattern, recursive: bool = True,
286
+ self,
287
+ pattern,
288
+ recursive: bool = True,
285
289
  missing_ok: bool = True) -> Iterator[FileEntry]:
286
290
  '''Return a generator contains tuples of path and file stat, in which path matches glob pattern
287
291
  Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
@@ -297,10 +301,10 @@ class HdfsPath(URIPath):
297
301
  yield FileEntry(path_obj.name, path_obj.path, path_obj.stat())
298
302
 
299
303
  def iglob(
300
- self,
301
- pattern,
302
- recursive: bool = True,
303
- missing_ok: bool = True,
304
+ self,
305
+ pattern,
306
+ recursive: bool = True,
307
+ missing_ok: bool = True,
304
308
  ) -> Iterator['HdfsPath']:
305
309
  '''Return hdfs path iterator, in which path matches glob pattern
306
310
  Notes: Only glob in bucket. If trying to match bucket with wildcard characters, raise UnsupportedError
@@ -372,7 +376,7 @@ class HdfsPath(URIPath):
372
376
  :raises: FileNotFoundError, NotADirectoryError
373
377
  '''
374
378
  for filename in self.listdir(followlinks=followlinks):
375
- yield self.joinpath(filename) # pytype: disable=bad-return-type
379
+ yield self.joinpath(filename)
376
380
 
377
381
  def load(self, followlinks: bool = False) -> BinaryIO:
378
382
  '''Read all content in binary on specified path and write into memory
@@ -415,7 +419,7 @@ class HdfsPath(URIPath):
415
419
  dst_path = self.from_path(dst_path)
416
420
  if self.is_dir():
417
421
  for filename in self.iterdir():
418
- self.joinpath(filename).rename(dst_path.joinpath(filename)) # pytype: disable=attribute-error
422
+ self.joinpath(filename).rename(dst_path.joinpath(filename))
419
423
  else:
420
424
  if overwrite:
421
425
  dst_path.remove(missing_ok=True)
@@ -449,7 +453,8 @@ class HdfsPath(URIPath):
449
453
  if not missing_ok or not isinstance(e, FileNotFoundError):
450
454
  raise
451
455
 
452
- def scan(self, missing_ok: bool = True,
456
+ def scan(self,
457
+ missing_ok: bool = True,
453
458
  followlinks: bool = False) -> Iterator[str]:
454
459
  '''
455
460
  Iteratively traverse only files in given hdfs directory.
@@ -469,7 +474,8 @@ class HdfsPath(URIPath):
469
474
  followlinks=followlinks):
470
475
  yield file_entry.path
471
476
 
472
- def scan_stat(self, missing_ok: bool = True,
477
+ def scan_stat(self,
478
+ missing_ok: bool = True,
473
479
  followlinks: bool = False) -> Iterator[FileEntry]:
474
480
  '''
475
481
  Iteratively traverse only files in given directory.
@@ -530,8 +536,10 @@ class HdfsPath(URIPath):
530
536
  raise IsADirectoryError('Path is a directory: %r' % self.path)
531
537
  self.remove(missing_ok=missing_ok)
532
538
 
533
- def walk(self, followlinks: bool = False
534
- ) -> Iterator[Tuple[str, List[str], List[str]]]:
539
+ def walk(
540
+ self,
541
+ followlinks: bool = False
542
+ ) -> Iterator[Tuple[str, List[str], List[str]]]:
535
543
  '''
536
544
  Iteratively traverse the given hdfs directory, in top-bottom order. In other words, firstly traverse parent directory, if subdirectories exist, traverse the subdirectories.
537
545
  Every iteration on generator yields a 3-tuple: (root, dirs, files)
@@ -566,7 +574,7 @@ class HdfsPath(URIPath):
566
574
  if self.is_dir(followlinks=followlinks):
567
575
  hash_md5 = hashlib.md5() # nosec
568
576
  for file_name in self.listdir():
569
- chunk = self.joinpath(file_name).md5( # pytype: disable=attribute-error
577
+ chunk = self.joinpath(file_name).md5(
570
578
  recalculate=recalculate).encode()
571
579
  hash_md5.update(chunk)
572
580
  return hash_md5.hexdigest()
@@ -589,7 +597,7 @@ class HdfsPath(URIPath):
589
597
  buffering: Optional[int] = None,
590
598
  encoding: Optional[str] = None,
591
599
  errors: Optional[str] = None,
592
- **kwargs) -> IO[AnyStr]: # pytype: disable=signature-mismatch
600
+ **kwargs) -> IO:
593
601
  if '+' in mode:
594
602
  raise ValueError('unacceptable mode: %r' % mode)
595
603
 
@@ -613,21 +621,21 @@ class HdfsPath(URIPath):
613
621
  client=self._client,
614
622
  profile_name=self._profile_name,
615
623
  **input_kwargs)
616
- if _is_pickle(file_obj): # pytype: disable=wrong-arg-types
617
- file_obj = io.BufferedReader(file_obj) # pytype: disable=wrong-arg-types
624
+ if _is_pickle(file_obj):
625
+ file_obj = io.BufferedReader(file_obj) # type: ignore
618
626
  if 'b' not in mode:
619
627
  file_obj = io.TextIOWrapper(
620
- file_obj, encoding=encoding, errors=errors) # pytype: disable=wrong-arg-types
621
- file_obj.mode = mode
622
- return file_obj # pytype: disable=bad-return-type
628
+ file_obj, encoding=encoding, errors=errors)
629
+ file_obj.mode = mode # pyre-ignore[41]
630
+ return file_obj
623
631
  elif mode in ('w', 'wb'):
624
- return self._client.write( # pytype: disable=bad-return-type
632
+ return self._client.write(
625
633
  self.path_without_protocol,
626
634
  overwrite=True,
627
635
  buffersize=buffering,
628
636
  encoding=encoding)
629
637
  elif mode in ('a', 'ab'):
630
- return self._client.write( # pytype: disable=bad-return-type
638
+ return self._client.write(
631
639
  self.path_without_protocol,
632
640
  append=True,
633
641
  buffersize=buffering,