megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. docs/conf.py +2 -4
  2. megfile/__init__.py +394 -203
  3. megfile/cli.py +258 -238
  4. megfile/config.py +25 -21
  5. megfile/errors.py +124 -114
  6. megfile/fs.py +174 -140
  7. megfile/fs_path.py +462 -354
  8. megfile/hdfs.py +133 -101
  9. megfile/hdfs_path.py +290 -236
  10. megfile/http.py +15 -14
  11. megfile/http_path.py +111 -107
  12. megfile/interfaces.py +70 -65
  13. megfile/lib/base_prefetch_reader.py +84 -65
  14. megfile/lib/combine_reader.py +12 -12
  15. megfile/lib/compare.py +17 -13
  16. megfile/lib/compat.py +1 -5
  17. megfile/lib/fnmatch.py +29 -30
  18. megfile/lib/glob.py +46 -54
  19. megfile/lib/hdfs_prefetch_reader.py +40 -25
  20. megfile/lib/hdfs_tools.py +1 -3
  21. megfile/lib/http_prefetch_reader.py +69 -46
  22. megfile/lib/joinpath.py +5 -5
  23. megfile/lib/lazy_handler.py +7 -3
  24. megfile/lib/s3_buffered_writer.py +58 -51
  25. megfile/lib/s3_cached_handler.py +13 -14
  26. megfile/lib/s3_limited_seekable_writer.py +37 -28
  27. megfile/lib/s3_memory_handler.py +34 -30
  28. megfile/lib/s3_pipe_handler.py +24 -25
  29. megfile/lib/s3_prefetch_reader.py +71 -52
  30. megfile/lib/s3_share_cache_reader.py +37 -24
  31. megfile/lib/shadow_handler.py +7 -3
  32. megfile/lib/stdio_handler.py +9 -8
  33. megfile/lib/url.py +3 -3
  34. megfile/pathlike.py +259 -228
  35. megfile/s3.py +220 -153
  36. megfile/s3_path.py +977 -802
  37. megfile/sftp.py +190 -156
  38. megfile/sftp_path.py +540 -450
  39. megfile/smart.py +397 -330
  40. megfile/smart_path.py +100 -105
  41. megfile/stdio.py +10 -9
  42. megfile/stdio_path.py +32 -35
  43. megfile/utils/__init__.py +73 -54
  44. megfile/utils/mutex.py +11 -14
  45. megfile/version.py +1 -1
  46. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
  47. megfile-3.1.2.dist-info/RECORD +55 -0
  48. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
  49. scripts/convert_results_to_sarif.py +45 -78
  50. scripts/generate_file.py +140 -64
  51. megfile-3.1.1.dist-info/RECORD +0 -55
  52. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
  53. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
  54. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
  55. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
@@ -3,8 +3,17 @@ from typing import Optional
3
3
 
4
4
  import requests
5
5
 
6
- from megfile.config import DEFAULT_BLOCK_CAPACITY, DEFAULT_BLOCK_SIZE, HTTP_MAX_RETRY_TIMES
7
- from megfile.errors import HttpBodyIncompleteError, UnsupportedError, http_should_retry, patch_method
6
+ from megfile.config import (
7
+ DEFAULT_BLOCK_CAPACITY,
8
+ DEFAULT_BLOCK_SIZE,
9
+ HTTP_MAX_RETRY_TIMES,
10
+ )
11
+ from megfile.errors import (
12
+ HttpBodyIncompleteError,
13
+ UnsupportedError,
14
+ http_should_retry,
15
+ patch_method,
16
+ )
8
17
  from megfile.lib.base_prefetch_reader import BasePrefetchReader
9
18
  from megfile.lib.compat import fspath
10
19
  from megfile.pathlike import PathLike
@@ -13,24 +22,29 @@ DEFAULT_TIMEOUT = (60, 60 * 60 * 24)
13
22
 
14
23
 
15
24
  class HttpPrefetchReader(BasePrefetchReader):
16
- '''
17
- Reader to fast read the http content, service must support Accept-Ranges.
18
- This will divide the file content into equal parts of block_size size, and will use LRU to cache at most block_capacity blocks in memory.
19
- open(), seek() and read() will trigger prefetch read.
20
- The prefetch will cached block_forward blocks of data from offset position (the position after reading if the called function is read).
21
- '''
25
+ """
26
+ Reader to fast read the http content, service must support Accept-Ranges.
22
27
 
23
- def __init__(
24
- self,
25
- url: PathLike,
26
- *,
27
- content_size: Optional[int] = None,
28
- block_size: int = DEFAULT_BLOCK_SIZE,
29
- block_capacity: int = DEFAULT_BLOCK_CAPACITY,
30
- block_forward: Optional[int] = None,
31
- max_retries: int = HTTP_MAX_RETRY_TIMES,
32
- max_workers: Optional[int] = None):
28
+ This will divide the file content into equal parts of block_size size, and will use
29
+ LRU to cache at most block_capacity blocks in memory.
30
+
31
+ open(), seek() and read() will trigger prefetch read.
32
+
33
+ The prefetch will cached block_forward blocks of data from offset position
34
+ (the position after reading if the called function is read).
35
+ """
33
36
 
37
+ def __init__(
38
+ self,
39
+ url: PathLike,
40
+ *,
41
+ content_size: Optional[int] = None,
42
+ block_size: int = DEFAULT_BLOCK_SIZE,
43
+ block_capacity: int = DEFAULT_BLOCK_CAPACITY,
44
+ block_forward: Optional[int] = None,
45
+ max_retries: int = HTTP_MAX_RETRY_TIMES,
46
+ max_workers: Optional[int] = None,
47
+ ):
34
48
  self._url = url
35
49
  self._content_size = content_size
36
50
 
@@ -39,68 +53,77 @@ class HttpPrefetchReader(BasePrefetchReader):
39
53
  block_capacity=block_capacity,
40
54
  block_forward=block_forward,
41
55
  max_retries=max_retries,
42
- max_workers=max_workers)
56
+ max_workers=max_workers,
57
+ )
43
58
 
44
59
  def _get_content_size(self) -> int:
45
60
  if self._content_size is not None:
46
61
  return self._content_size
47
62
 
48
63
  first_index_response = self._fetch_response()
49
- if first_index_response['Headers'].get('Accept-Ranges') != 'bytes':
64
+ if first_index_response["Headers"].get("Accept-Ranges") != "bytes":
50
65
  raise UnsupportedError(
51
- f'Unsupported server, server must support Accept-Ranges: {self._url}',
66
+ f"Unsupported server, server must support Accept-Ranges: {self._url}",
52
67
  path=fspath(self._url),
53
68
  )
54
- return first_index_response['Headers']['Content-Length']
69
+ return first_index_response["Headers"]["Content-Length"]
55
70
 
56
71
  @property
57
72
  def name(self) -> str:
58
73
  return fspath(self._url)
59
74
 
60
75
  def _fetch_response(
61
- self,
62
- start: Optional[int] = None,
63
- end: Optional[int] = None) -> dict:
64
-
76
+ self, start: Optional[int] = None, end: Optional[int] = None
77
+ ) -> dict:
65
78
  def fetch_response() -> dict:
66
79
  request_kwargs = {}
67
- if hasattr(self._url, 'request_kwargs'):
80
+ if hasattr(self._url, "request_kwargs"):
68
81
  request_kwargs = self._url.request_kwargs # pyre-ignore[16]
69
- timeout = request_kwargs.pop('timeout', DEFAULT_TIMEOUT)
70
- stream = request_kwargs.pop('stream', True)
82
+ timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
83
+ stream = request_kwargs.pop("stream", True)
71
84
 
72
85
  if start is None or end is None:
73
- with requests.get(fspath(self._url), timeout=timeout,
74
- stream=stream, **request_kwargs) as response:
86
+ with requests.get(
87
+ fspath(self._url), timeout=timeout, stream=stream, **request_kwargs
88
+ ) as response:
75
89
  return {
76
- 'Headers': response.headers,
77
- 'Cookies': response.cookies,
78
- 'StatusCode': response.status_code,
90
+ "Headers": response.headers,
91
+ "Cookies": response.cookies,
92
+ "StatusCode": response.status_code,
79
93
  }
80
94
  else:
81
95
  range_end = end
82
96
  if self._content_size is not None:
83
97
  range_end = min(range_end, self._content_size - 1)
84
- headers = request_kwargs.pop('headers', {})
98
+ headers = request_kwargs.pop("headers", {})
85
99
  headers["Range"] = f"bytes={start}-{range_end}"
86
- with requests.get(fspath(self._url), timeout=timeout,
87
- headers=headers, stream=stream,
88
- **request_kwargs) as response:
89
- if len(response.content) != int(
90
- response.headers['Content-Length']):
100
+ with requests.get(
101
+ fspath(self._url),
102
+ timeout=timeout,
103
+ headers=headers,
104
+ stream=stream,
105
+ **request_kwargs,
106
+ ) as response:
107
+ if len(response.content) != int(response.headers["Content-Length"]):
91
108
  raise HttpBodyIncompleteError(
92
- f"The downloaded content is incomplete, expected size: {response.headers['Content-Length']}, actual size: {len(response.content)}",
109
+ "The downloaded content is incomplete, "
110
+ "expected size: %s, actual size: %d"
111
+ % (
112
+ response.headers["Content-Length"],
113
+ len(response.content),
114
+ )
93
115
  )
94
116
  return {
95
- 'Body': BytesIO(response.content),
96
- 'Headers': response.headers,
97
- 'Cookies': response.cookies,
98
- 'StatusCode': response.status_code,
117
+ "Body": BytesIO(response.content),
118
+ "Headers": response.headers,
119
+ "Cookies": response.cookies,
120
+ "StatusCode": response.status_code,
99
121
  }
100
122
 
101
123
  fetch_response = patch_method(
102
124
  fetch_response,
103
125
  max_retries=self._max_retries,
104
- should_retry=http_should_retry)
126
+ should_retry=http_should_retry,
127
+ )
105
128
 
106
129
  return fetch_response()
megfile/lib/joinpath.py CHANGED
@@ -10,22 +10,22 @@ def uri_join(path: str, *other_paths: str) -> str:
10
10
  return path
11
11
 
12
12
  first_path = path
13
- if first_path.endswith('/'):
13
+ if first_path.endswith("/"):
14
14
  first_path = first_path[:-1]
15
15
 
16
16
  last_path = other_paths[-1]
17
- if last_path.startswith('/'):
17
+ if last_path.startswith("/"):
18
18
  last_path = last_path[1:]
19
19
 
20
20
  middle_paths = []
21
21
  for other_path in other_paths[:-1]:
22
- if other_path.startswith('/'):
22
+ if other_path.startswith("/"):
23
23
  other_path = other_path[1:]
24
- if other_path.endswith('/'):
24
+ if other_path.endswith("/"):
25
25
  other_path = other_path[:-1]
26
26
  middle_paths.append(other_path)
27
27
 
28
- return '/'.join([first_path, *middle_paths, last_path])
28
+ return "/".join([first_path, *middle_paths, last_path])
29
29
 
30
30
  # Imp. 2
31
31
  # other_paths = (other_path.lstrip('/') for other_path in other_paths)
@@ -7,9 +7,13 @@ from megfile.utils import get_content_size
7
7
 
8
8
 
9
9
  class LazyHandler(Readable, Seekable, Writable):
10
- ''' Create a File-Like Object, maintaining file pointer, to avoid misunderstanding the position when read / write / seek
11
- It can be roughly regarded as the copy function of the file handle, but you need to be careful with the write handle, because no matter which copy will modify the data itself
12
- '''
10
+ """Create a File-Like Object, maintaining file pointer,
11
+ to avoid misunderstanding the position when read / write / seek.
12
+
13
+ It can be roughly regarded as the copy function of the file handle,
14
+ but you need to be careful with the write handle,
15
+ because no matter which copy will modify the data itself.
16
+ """
13
17
 
14
18
  def __init__(self, path: str, mode: str, open_func: Callable, **options):
15
19
  self._open_func = open_func
@@ -5,13 +5,20 @@ from logging import getLogger as get_logger
5
5
  from threading import Lock
6
6
  from typing import NamedTuple, Optional
7
7
 
8
- from megfile.config import BACKOFF_FACTOR, BACKOFF_INITIAL, DEFAULT_MAX_BLOCK_SIZE, DEFAULT_MAX_BUFFER_SIZE, DEFAULT_MIN_BLOCK_SIZE, GLOBAL_MAX_WORKERS
8
+ from megfile.config import (
9
+ BACKOFF_FACTOR,
10
+ BACKOFF_INITIAL,
11
+ DEFAULT_MAX_BLOCK_SIZE,
12
+ DEFAULT_MAX_BUFFER_SIZE,
13
+ DEFAULT_MIN_BLOCK_SIZE,
14
+ GLOBAL_MAX_WORKERS,
15
+ )
9
16
  from megfile.errors import raise_s3_error
10
17
  from megfile.interfaces import Writable
11
18
  from megfile.utils import get_human_size, process_local
12
19
 
13
20
  _logger = get_logger(__name__)
14
- '''
21
+ """
15
22
  class PartResult(NamedTuple):
16
23
 
17
24
  etag: str
@@ -19,35 +26,31 @@ class PartResult(NamedTuple):
19
26
  content_size: int
20
27
 
21
28
  in Python 3.6+
22
- '''
29
+ """
23
30
 
24
31
  _PartResult = NamedTuple(
25
- 'PartResult', [('etag', str), ('part_number', int), ('content_size', int)])
32
+ "PartResult", [("etag", str), ("part_number", int), ("content_size", int)]
33
+ )
26
34
 
27
35
 
28
36
  class PartResult(_PartResult):
29
-
30
37
  def asdict(self):
31
- return {
32
- 'PartNumber': self.part_number,
33
- 'ETag': self.etag,
34
- }
38
+ return {"PartNumber": self.part_number, "ETag": self.etag}
35
39
 
36
40
 
37
41
  class S3BufferedWriter(Writable[bytes]):
38
-
39
42
  def __init__(
40
- self,
41
- bucket: str,
42
- key: str,
43
- *,
44
- s3_client,
45
- block_size: int = DEFAULT_MIN_BLOCK_SIZE,
46
- max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
47
- max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
48
- max_workers: Optional[int] = None,
49
- profile_name: Optional[str] = None):
50
-
43
+ self,
44
+ bucket: str,
45
+ key: str,
46
+ *,
47
+ s3_client,
48
+ block_size: int = DEFAULT_MIN_BLOCK_SIZE,
49
+ max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
50
+ max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
51
+ max_workers: Optional[int] = None,
52
+ profile_name: Optional[str] = None,
53
+ ):
51
54
  self._bucket = bucket
52
55
  self._key = key
53
56
  self._client = s3_client
@@ -68,9 +71,10 @@ class S3BufferedWriter(Writable[bytes]):
68
71
  self._is_global_executor = False
69
72
  if max_workers is None:
70
73
  self._executor = process_local(
71
- 'S3BufferedWriter.executor',
74
+ "S3BufferedWriter.executor",
72
75
  ThreadPoolExecutor,
73
- max_workers=GLOBAL_MAX_WORKERS)
76
+ max_workers=GLOBAL_MAX_WORKERS,
77
+ )
74
78
  self._is_global_executor = True
75
79
  else:
76
80
  self._executor = ThreadPoolExecutor(max_workers=max_workers)
@@ -79,17 +83,19 @@ class S3BufferedWriter(Writable[bytes]):
79
83
  self.__upload_id = None
80
84
  self.__upload_id_lock = Lock()
81
85
 
82
- _logger.debug('open file: %r, mode: %s' % (self.name, self.mode))
86
+ _logger.debug("open file: %r, mode: %s" % (self.name, self.mode))
83
87
 
84
88
  @property
85
89
  def name(self) -> str:
86
- return 's3%s://%s/%s' % (
90
+ return "s3%s://%s/%s" % (
87
91
  f"+{self._profile_name}" if self._profile_name else "",
88
- self._bucket, self._key)
92
+ self._bucket,
93
+ self._key,
94
+ )
89
95
 
90
96
  @property
91
97
  def mode(self) -> str:
92
- return 'wb'
98
+ return "wb"
93
99
 
94
100
  def tell(self) -> int:
95
101
  return self._offset
@@ -102,8 +108,9 @@ class S3BufferedWriter(Writable[bytes]):
102
108
  def _content_size(self, value: int):
103
109
  if value > self._backoff_size:
104
110
  _logger.debug(
105
- 'writing file: %r, current size: %s' %
106
- (self.name, get_human_size(value)))
111
+ "writing file: %r, current size: %s"
112
+ % (self.name, get_human_size(value))
113
+ )
107
114
  while value > self._backoff_size:
108
115
  self._backoff_size *= BACKOFF_FACTOR
109
116
  self.__content_size = value
@@ -118,9 +125,8 @@ class S3BufferedWriter(Writable[bytes]):
118
125
  if self.__upload_id is None:
119
126
  with raise_s3_error(self.name):
120
127
  self.__upload_id = self._client.create_multipart_upload(
121
- Bucket=self._bucket,
122
- Key=self._key,
123
- )['UploadId']
128
+ Bucket=self._bucket, Key=self._key
129
+ )["UploadId"]
124
130
  return self.__upload_id
125
131
 
126
132
  @property
@@ -128,22 +134,19 @@ class S3BufferedWriter(Writable[bytes]):
128
134
  return self._total_buffer_size - sum(
129
135
  future.result().content_size
130
136
  for future in self._futures.values()
131
- if future.done())
137
+ if future.done()
138
+ )
132
139
 
133
140
  @property
134
141
  def _uploading_futures(self):
135
- return [
136
- future for future in self._futures.values() if not future.done()
137
- ]
142
+ return [future for future in self._futures.values() if not future.done()]
138
143
 
139
144
  @property
140
145
  def _multipart_upload(self):
141
146
  return {
142
- 'Parts':
143
- [
144
- future.result().asdict()
145
- for _, future in sorted(self._futures.items())
146
- ],
147
+ "Parts": [
148
+ future.result().asdict() for _, future in sorted(self._futures.items())
149
+ ]
147
150
  }
148
151
 
149
152
  def _upload_buffer(self, part_number, content):
@@ -155,24 +158,29 @@ class S3BufferedWriter(Writable[bytes]):
155
158
  UploadId=self._upload_id,
156
159
  PartNumber=part_number,
157
160
  Body=content,
158
- )['ETag'], part_number, len(content))
161
+ )["ETag"],
162
+ part_number,
163
+ len(content),
164
+ )
159
165
 
160
166
  def _submit_upload_buffer(self, part_number, content):
161
167
  self._futures[part_number] = self._executor.submit(
162
- self._upload_buffer, part_number, content)
168
+ self._upload_buffer, part_number, content
169
+ )
163
170
  self._total_buffer_size += len(content)
164
171
  while self._buffer_size > self._max_buffer_size:
165
172
  wait(self._uploading_futures, return_when=FIRST_COMPLETED)
166
173
 
167
174
  def _submit_upload_content(self, content: bytes):
168
- # s3 part needs at least 5MB, so we need to divide content into equal-size parts, and give last part more size
175
+ # s3 part needs at least 5MB,
176
+ # so we need to divide content into equal-size parts,
177
+ # and give last part more size.
169
178
  # e.g. 257MB can be divided into 2 parts, 128MB and 129MB
170
179
  offset = 0
171
180
  while len(content) - offset - self._max_block_size > self._block_size:
172
181
  self._part_number += 1
173
182
  offset_stop = offset + self._max_block_size
174
- self._submit_upload_buffer(
175
- self._part_number, content[offset:offset_stop])
183
+ self._submit_upload_buffer(self._part_number, content[offset:offset_stop])
176
184
  offset = offset_stop
177
185
  self._part_number += 1
178
186
  self._submit_upload_buffer(self._part_number, content[offset:])
@@ -186,7 +194,7 @@ class S3BufferedWriter(Writable[bytes]):
186
194
 
187
195
  def write(self, data: bytes) -> int:
188
196
  if self.closed:
189
- raise IOError('file already closed: %r' % self.name)
197
+ raise IOError("file already closed: %r" % self.name)
190
198
 
191
199
  result = self._buffer.write(data)
192
200
  if self._buffer.tell() >= self._block_size:
@@ -200,14 +208,13 @@ class S3BufferedWriter(Writable[bytes]):
200
208
  self._executor.shutdown()
201
209
 
202
210
  def _close(self):
203
- _logger.debug('close file: %r' % self.name)
211
+ _logger.debug("close file: %r" % self.name)
204
212
 
205
213
  if not self._is_multipart:
206
214
  with raise_s3_error(self.name):
207
215
  self._client.put_object(
208
- Bucket=self._bucket,
209
- Key=self._key,
210
- Body=self._buffer.getvalue())
216
+ Bucket=self._bucket, Key=self._key, Body=self._buffer.getvalue()
217
+ )
211
218
  self._shutdown()
212
219
  return
213
220
 
@@ -7,21 +7,20 @@ from megfile.utils import generate_cache_path
7
7
 
8
8
 
9
9
  class S3CachedHandler(S3MemoryHandler):
10
-
11
10
  def __init__(
12
- self,
13
- bucket: str,
14
- key: str,
15
- mode: str,
16
- *,
17
- s3_client,
18
- cache_path: Optional[str] = None,
19
- remove_cache_when_open: bool = True,
20
- profile_name: Optional[str] = None):
21
-
22
- if mode not in ('rb', 'wb', 'ab', 'rb+', 'wb+', 'ab+'):
11
+ self,
12
+ bucket: str,
13
+ key: str,
14
+ mode: str,
15
+ *,
16
+ s3_client,
17
+ cache_path: Optional[str] = None,
18
+ remove_cache_when_open: bool = True,
19
+ profile_name: Optional[str] = None,
20
+ ):
21
+ if mode not in ("rb", "wb", "ab", "rb+", "wb+", "ab+"):
23
22
  # TODO: replace AssertionError with ValueError in 4.0.0
24
- raise AssertionError('unacceptable mode: %r' % mode)
23
+ raise AssertionError("unacceptable mode: %r" % mode)
25
24
 
26
25
  self._bucket = bucket
27
26
  self._key = key
@@ -33,7 +32,7 @@ class S3CachedHandler(S3MemoryHandler):
33
32
  cache_path = generate_cache_path(self.name)
34
33
 
35
34
  self._cache_path = cache_path
36
- self._fileobj = open(self._cache_path, 'wb+')
35
+ self._fileobj = open(self._cache_path, "wb+")
37
36
  self._download_fileobj()
38
37
 
39
38
  if remove_cache_when_open:
@@ -3,7 +3,11 @@ from io import BytesIO
3
3
  from logging import getLogger as get_logger
4
4
  from typing import Optional
5
5
 
6
- from megfile.config import DEFAULT_MAX_BLOCK_SIZE, DEFAULT_MAX_BUFFER_SIZE, DEFAULT_MIN_BLOCK_SIZE
6
+ from megfile.config import (
7
+ DEFAULT_MAX_BLOCK_SIZE,
8
+ DEFAULT_MAX_BUFFER_SIZE,
9
+ DEFAULT_MIN_BLOCK_SIZE,
10
+ )
7
11
  from megfile.errors import raise_s3_error
8
12
  from megfile.interfaces import Seekable
9
13
  from megfile.lib.s3_buffered_writer import S3BufferedWriter
@@ -12,27 +16,27 @@ _logger = get_logger(__name__)
12
16
 
13
17
 
14
18
  class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
15
- ''' For file format like msgpack and mp4, it's a pain that you need to write
19
+ """For file format like msgpack and mp4, it's a pain that you need to write
16
20
  header before writing the data. So it's kind of hard to make streaming write
17
21
  to unseekable file system like s3. In this case, we will try to keep the first
18
22
  and last parts of data in memory, so we can come back to head again and write
19
23
  the header at the last second.
20
- '''
24
+ """
21
25
 
22
26
  def __init__(
23
- self,
24
- bucket: str,
25
- key: str,
26
- *,
27
- s3_client,
28
- block_size: int = DEFAULT_MIN_BLOCK_SIZE,
29
- head_block_size: Optional[int] = None,
30
- tail_block_size: Optional[int] = None,
31
- max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
32
- max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
33
- max_workers: Optional[int] = None,
34
- profile_name: Optional[str] = None):
35
-
27
+ self,
28
+ bucket: str,
29
+ key: str,
30
+ *,
31
+ s3_client,
32
+ block_size: int = DEFAULT_MIN_BLOCK_SIZE,
33
+ head_block_size: Optional[int] = None,
34
+ tail_block_size: Optional[int] = None,
35
+ max_block_size: int = DEFAULT_MAX_BLOCK_SIZE,
36
+ max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
37
+ max_workers: Optional[int] = None,
38
+ profile_name: Optional[str] = None,
39
+ ):
36
40
  super().__init__(
37
41
  bucket,
38
42
  key,
@@ -41,7 +45,8 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
41
45
  max_block_size=max_block_size,
42
46
  max_buffer_size=max_buffer_size,
43
47
  max_workers=max_workers,
44
- profile_name=profile_name)
48
+ profile_name=profile_name,
49
+ )
45
50
 
46
51
  self._head_block_size = head_block_size or block_size
47
52
  self._tail_block_size = tail_block_size or block_size
@@ -61,7 +66,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
61
66
 
62
67
  def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
63
68
  if self.closed:
64
- raise IOError('file already closed: %r' % self.name)
69
+ raise IOError("file already closed: %r" % self.name)
65
70
 
66
71
  offset = int(offset) # user maybe put offset with 'numpy.uint64' type
67
72
  if whence == os.SEEK_SET:
@@ -71,7 +76,7 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
71
76
  elif whence == os.SEEK_END:
72
77
  target_offset = self._content_size + offset
73
78
  else:
74
- raise OSError('Unsupported whence value: %d' % whence)
79
+ raise OSError("Unsupported whence value: %d" % whence)
75
80
 
76
81
  if target_offset < self._head_block_size:
77
82
  self._head_buffer.seek(target_offset)
@@ -79,15 +84,16 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
79
84
  self._buffer.seek(target_offset - self._tail_offset)
80
85
  else:
81
86
  raise OSError(
82
- 'Can only seek inside of head, or seek to tail, target offset: %d'
83
- % target_offset)
87
+ "Can only seek inside of head, or seek to tail, target offset: %d"
88
+ % target_offset
89
+ )
84
90
 
85
91
  self._offset = target_offset
86
92
  return self._offset
87
93
 
88
94
  def write(self, data: bytes) -> int:
89
95
  if self.closed:
90
- raise IOError('file already closed: %r' % self.name)
96
+ raise IOError("file already closed: %r" % self.name)
91
97
 
92
98
  if self._head_size != self._head_block_size: # no tail part yet
93
99
  self._write_to_head(data)
@@ -97,8 +103,9 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
97
103
  self._write_to_tail(data)
98
104
  else:
99
105
  raise OSError(
100
- 'Can only write inside of head, or write to tail, current offset: %d'
101
- % self._offset)
106
+ "Can only write inside of head, or write to tail, current offset: %d"
107
+ % self._offset
108
+ )
102
109
  return len(data)
103
110
 
104
111
  def _write_to_head(self, data: bytes):
@@ -117,8 +124,9 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
117
124
  def _write_to_head_after_tail_part_created(self, data: bytes):
118
125
  if self._offset + len(data) > self._head_block_size:
119
126
  raise Exception(
120
- 'Head part overflow, %d bytes left but try to write %d bytes' %
121
- (self._head_block_size - self._offset, len(data)))
127
+ "Head part overflow, %d bytes left but try to write %d bytes"
128
+ % (self._head_block_size - self._offset, len(data))
129
+ )
122
130
  self._head_buffer.write(data)
123
131
  self._offset += len(data)
124
132
 
@@ -140,14 +148,15 @@ class S3LimitedSeekableWriter(S3BufferedWriter, Seekable):
140
148
  self._submit_upload_content(content[:offset])
141
149
 
142
150
  def _close(self):
143
- _logger.debug('close file: %r' % self.name)
151
+ _logger.debug("close file: %r" % self.name)
144
152
 
145
153
  if not self._is_multipart:
146
154
  with raise_s3_error(self.name):
147
155
  self._client.put_object(
148
156
  Bucket=self._bucket,
149
157
  Key=self._key,
150
- Body=self._head_buffer.getvalue() + self._buffer.getvalue())
158
+ Body=self._head_buffer.getvalue() + self._buffer.getvalue(),
159
+ )
151
160
  self._shutdown()
152
161
  return
153
162