megfile 3.1.6.post1__py3-none-any.whl → 4.0.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. megfile/cli.py +12 -7
  2. megfile/config.py +27 -39
  3. megfile/fs.py +169 -12
  4. megfile/fs_path.py +183 -260
  5. megfile/hdfs.py +106 -5
  6. megfile/hdfs_path.py +34 -90
  7. megfile/http.py +50 -1
  8. megfile/http_path.py +27 -65
  9. megfile/interfaces.py +1 -8
  10. megfile/lib/base_prefetch_reader.py +62 -78
  11. megfile/lib/combine_reader.py +5 -0
  12. megfile/lib/glob.py +3 -6
  13. megfile/lib/hdfs_prefetch_reader.py +7 -7
  14. megfile/lib/http_prefetch_reader.py +6 -6
  15. megfile/lib/s3_buffered_writer.py +71 -65
  16. megfile/lib/s3_cached_handler.py +1 -2
  17. megfile/lib/s3_limited_seekable_writer.py +3 -7
  18. megfile/lib/s3_memory_handler.py +1 -2
  19. megfile/lib/s3_pipe_handler.py +1 -2
  20. megfile/lib/s3_prefetch_reader.py +10 -19
  21. megfile/lib/s3_share_cache_reader.py +8 -5
  22. megfile/pathlike.py +397 -401
  23. megfile/s3.py +118 -17
  24. megfile/s3_path.py +126 -209
  25. megfile/sftp.py +300 -10
  26. megfile/sftp_path.py +46 -322
  27. megfile/smart.py +33 -27
  28. megfile/smart_path.py +9 -14
  29. megfile/stdio.py +1 -1
  30. megfile/stdio_path.py +2 -2
  31. megfile/utils/__init__.py +3 -4
  32. megfile/version.py +1 -1
  33. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/METADATA +7 -7
  34. megfile-4.0.0.post1.dist-info/RECORD +52 -0
  35. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/WHEEL +1 -1
  36. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/top_level.txt +0 -2
  37. docs/conf.py +0 -65
  38. megfile-3.1.6.post1.dist-info/RECORD +0 -55
  39. scripts/convert_results_to_sarif.py +0 -91
  40. scripts/generate_file.py +0 -344
  41. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/LICENSE +0 -0
  42. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/LICENSE.pyre +0 -0
  43. {megfile-3.1.6.post1.dist-info → megfile-4.0.0.post1.dist-info}/entry_points.txt +0 -0
@@ -3,12 +3,8 @@ from io import BytesIO
3
3
  from typing import Optional
4
4
 
5
5
  from megfile.config import (
6
- BACKOFF_FACTOR,
7
- BACKOFF_INITIAL,
8
- DEFAULT_BLOCK_CAPACITY,
9
- DEFAULT_BLOCK_SIZE,
10
- GLOBAL_MAX_WORKERS,
11
- NEWLINE,
6
+ READER_BLOCK_SIZE,
7
+ READER_MAX_BUFFER_SIZE,
12
8
  S3_MAX_RETRY_TIMES,
13
9
  )
14
10
  from megfile.errors import (
@@ -21,12 +17,6 @@ from megfile.errors import (
21
17
  from megfile.lib.base_prefetch_reader import BasePrefetchReader, LRUCacheFutureManager
22
18
 
23
19
  __all__ = [
24
- "DEFAULT_BLOCK_CAPACITY",
25
- "DEFAULT_BLOCK_SIZE",
26
- "GLOBAL_MAX_WORKERS",
27
- "BACKOFF_INITIAL",
28
- "BACKOFF_FACTOR",
29
- "NEWLINE",
30
20
  "S3PrefetchReader",
31
21
  "LRUCacheFutureManager",
32
22
  ]
@@ -37,7 +27,7 @@ class S3PrefetchReader(BasePrefetchReader):
37
27
  Reader to fast read the s3 content.
38
28
 
39
29
  This will divide the file content into equalparts of block_size size,
40
- and will use LRU to cache at most block_capacity blocks in memory.
30
+ and will use LRU to cache at most blocks in max_buffer_size memory.
41
31
 
42
32
  open(), seek() and read() will trigger prefetch read.
43
33
  The prefetch will cached block_forward blocks of data from offset position
@@ -50,8 +40,8 @@ class S3PrefetchReader(BasePrefetchReader):
50
40
  key: str,
51
41
  *,
52
42
  s3_client,
53
- block_size: int = DEFAULT_BLOCK_SIZE,
54
- block_capacity: int = DEFAULT_BLOCK_CAPACITY,
43
+ block_size: int = READER_BLOCK_SIZE,
44
+ max_buffer_size: int = READER_MAX_BUFFER_SIZE,
55
45
  block_forward: Optional[int] = None,
56
46
  max_retries: int = S3_MAX_RETRY_TIMES,
57
47
  max_workers: Optional[int] = None,
@@ -66,7 +56,7 @@ class S3PrefetchReader(BasePrefetchReader):
66
56
 
67
57
  super().__init__(
68
58
  block_size=block_size,
69
- block_capacity=block_capacity,
59
+ max_buffer_size=max_buffer_size,
70
60
  block_forward=block_forward,
71
61
  max_retries=max_retries,
72
62
  max_workers=max_workers,
@@ -87,9 +77,10 @@ class S3PrefetchReader(BasePrefetchReader):
87
77
  first_index_response = self._fetch_response()
88
78
  content_size = int(first_index_response["ContentLength"])
89
79
 
90
- first_future = Future()
91
- first_future.set_result(first_index_response["Body"])
92
- self._insert_futures(index=0, future=first_future)
80
+ if self._block_capacity > 0:
81
+ first_future = Future()
82
+ first_future.set_result(first_index_response["Body"])
83
+ self._insert_futures(index=0, future=first_future)
93
84
  self._content_etag = first_index_response["ETag"]
94
85
  self._content_info = first_index_response
95
86
  return content_size
@@ -4,8 +4,8 @@ from logging import getLogger as get_logger
4
4
  from typing import Optional
5
5
 
6
6
  from megfile.config import (
7
- DEFAULT_BLOCK_CAPACITY,
8
- DEFAULT_BLOCK_SIZE,
7
+ READER_BLOCK_SIZE,
8
+ READER_MAX_BUFFER_SIZE,
9
9
  S3_MAX_RETRY_TIMES,
10
10
  )
11
11
  from megfile.lib.s3_prefetch_reader import LRUCacheFutureManager, S3PrefetchReader
@@ -14,6 +14,9 @@ from megfile.utils import thread_local
14
14
  _logger = get_logger(__name__)
15
15
 
16
16
 
17
+ DEFAULT_BLOCK_CAPACITY = max(READER_MAX_BUFFER_SIZE // READER_BLOCK_SIZE, 1)
18
+
19
+
17
20
  class S3ShareCacheReader(S3PrefetchReader):
18
21
  """
19
22
  Reader to fast read the s3 content.
@@ -32,8 +35,8 @@ class S3ShareCacheReader(S3PrefetchReader):
32
35
  key: str,
33
36
  *,
34
37
  s3_client,
35
- block_size: int = DEFAULT_BLOCK_SIZE,
36
- block_capacity: int = DEFAULT_BLOCK_CAPACITY,
38
+ block_size: int = READER_BLOCK_SIZE,
39
+ max_buffer_size: int = READER_MAX_BUFFER_SIZE,
37
40
  block_forward: Optional[int] = None,
38
41
  max_retries: int = S3_MAX_RETRY_TIMES,
39
42
  cache_key: str = "lru",
@@ -47,7 +50,7 @@ class S3ShareCacheReader(S3PrefetchReader):
47
50
  key,
48
51
  s3_client=s3_client,
49
52
  block_size=block_size,
50
- block_capacity=block_capacity,
53
+ max_buffer_size=max_buffer_size,
51
54
  block_forward=block_forward,
52
55
  max_retries=max_retries,
53
56
  max_workers=max_workers,