megfile 4.1.4.post1__py3-none-any.whl → 4.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/errors.py CHANGED
@@ -165,8 +165,7 @@ def patch_method(
165
165
  result = func(*args, **kwargs)
166
166
  if after_callback is not None:
167
167
  result = after_callback(result, *args, **kwargs)
168
- if retries > 1:
169
- _logger.info(f"Error already fixed by retry {retries - 1} times")
168
+ _logger.info(f"Error already fixed by retry {retries - 1} times")
170
169
  return result
171
170
  except Exception as error:
172
171
  if not should_retry(error):
megfile/http_path.py CHANGED
@@ -4,7 +4,7 @@ from functools import partial
4
4
  from io import BufferedReader, BytesIO
5
5
  from logging import getLogger as get_logger
6
6
  from threading import Lock
7
- from typing import Iterable, Iterator, Optional, Tuple, Union
7
+ from typing import Dict, Iterable, Iterator, Optional, Tuple, Union
8
8
 
9
9
  import requests
10
10
  from urllib3 import HTTPResponse
@@ -20,7 +20,7 @@ from megfile.lib.compat import fspath
20
20
  from megfile.lib.http_prefetch_reader import DEFAULT_TIMEOUT, HttpPrefetchReader
21
21
  from megfile.lib.url import get_url_scheme
22
22
  from megfile.smart_path import SmartPath
23
- from megfile.utils import _is_pickle, binary_open
23
+ from megfile.utils import _is_pickle, binary_open, cached_property
24
24
 
25
25
  __all__ = [
26
26
  "HttpPath",
@@ -31,12 +31,25 @@ __all__ = [
31
31
 
32
32
  _logger = get_logger(__name__)
33
33
 
34
+ DEFAULT_REQUEST_KWARGS = {
35
+ "timeout": DEFAULT_TIMEOUT,
36
+ }
37
+
34
38
 
35
39
  def get_http_session(
36
40
  timeout: Optional[Union[int, Tuple[int, int]]] = DEFAULT_TIMEOUT,
37
41
  status_forcelist: Iterable[int] = (500, 502, 503, 504),
42
+ params: Optional[Dict[str, str]] = None,
43
+ headers: Optional[Dict[str, str]] = None,
44
+ cookies: Optional[Dict[str, str]] = None,
45
+ trust_env: bool = True,
46
+ **kwargs,
38
47
  ) -> requests.Session:
39
48
  session = requests.Session()
49
+ session.params.update(params or {}) # type: ignore
50
+ session.headers.update(headers or {})
51
+ session.cookies.update(cookies or {})
52
+ session.trust_env = trust_env
40
53
 
41
54
  def after_callback(response, *args, **kwargs):
42
55
  if response.status_code in status_forcelist:
@@ -52,20 +65,8 @@ def get_http_session(
52
65
  error,
53
66
  method,
54
67
  url,
55
- params=None,
56
68
  data=None,
57
- headers=None,
58
- cookies=None,
59
69
  files=None,
60
- auth=None,
61
- timeout=None,
62
- allow_redirects=True,
63
- proxies=None,
64
- hooks=None,
65
- stream=None,
66
- verify=None,
67
- cert=None,
68
- json=None,
69
70
  **kwargs,
70
71
  ):
71
72
  if data and hasattr(data, "seek"):
@@ -100,7 +101,7 @@ def get_http_session(
100
101
  files[key] = file_info
101
102
 
102
103
  session.request = patch_method(
103
- partial(session.request, timeout=timeout),
104
+ partial(session.request, timeout=timeout, **kwargs),
104
105
  max_retries=HTTP_MAX_RETRY_TIMES,
105
106
  should_retry=http_should_retry,
106
107
  before_callback=before_callback,
@@ -136,7 +137,11 @@ class HttpPath(URIPath):
136
137
 
137
138
  if fspath(path).startswith("https://"):
138
139
  self.protocol = "https"
139
- self.request_kwargs = {}
140
+ self.request_kwargs = deepcopy(DEFAULT_REQUEST_KWARGS)
141
+
142
+ @cached_property
143
+ def session(self):
144
+ return get_http_session(status_forcelist=(), **self.request_kwargs)
140
145
 
141
146
  @binary_open
142
147
  def open(
@@ -174,13 +179,8 @@ class HttpPath(URIPath):
174
179
  raise ValueError("unacceptable mode: %r" % mode)
175
180
 
176
181
  response = None
177
- request_kwargs = deepcopy(self.request_kwargs)
178
- timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
179
- stream = request_kwargs.pop("stream", True)
180
182
  try:
181
- response = get_http_session(timeout=timeout, status_forcelist=()).get(
182
- self.path_with_protocol, stream=stream, **request_kwargs
183
- )
183
+ response = self.session.get(self.path_with_protocol, stream=True)
184
184
  response.raise_for_status()
185
185
  except Exception as error:
186
186
  if response:
@@ -197,6 +197,7 @@ class HttpPath(URIPath):
197
197
 
198
198
  reader = HttpPrefetchReader(
199
199
  self,
200
+ session=self.session,
200
201
  content_size=content_size,
201
202
  block_size=block_size,
202
203
  max_buffer_size=max_buffer_size,
@@ -225,15 +226,8 @@ class HttpPath(URIPath):
225
226
  :returns: StatResult
226
227
  :raises: HttpPermissionError, HttpFileNotFoundError
227
228
  """
228
-
229
- request_kwargs = deepcopy(self.request_kwargs)
230
- timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
231
- stream = request_kwargs.pop("stream", True)
232
-
233
229
  try:
234
- with get_http_session(timeout=timeout, status_forcelist=()).get(
235
- self.path_with_protocol, stream=stream, **request_kwargs
236
- ) as response:
230
+ with self.session.get(self.path_with_protocol, stream=True) as response:
237
231
  response.raise_for_status()
238
232
  headers = response.headers
239
233
  except Exception as error:
@@ -254,7 +248,11 @@ class HttpPath(URIPath):
254
248
  last_modified = 0.0
255
249
 
256
250
  return StatResult(
257
- size=size, mtime=last_modified, isdir=False, islnk=False, extra=headers
251
+ size=size,
252
+ mtime=last_modified,
253
+ isdir=False,
254
+ islnk=False,
255
+ extra=headers,
258
256
  )
259
257
 
260
258
  def getsize(self, follow_symlinks: bool = False) -> int:
@@ -281,7 +279,7 @@ class HttpPath(URIPath):
281
279
  """
282
280
  return self.stat().mtime
283
281
 
284
- def exists(self, followlinks: bool = False) -> bool:
282
+ def is_file(self, followlinks: bool = False) -> bool:
285
283
  """Test if http path exists
286
284
 
287
285
  :param followlinks: ignore this parameter, just for compatibility
@@ -289,20 +287,16 @@ class HttpPath(URIPath):
289
287
  :return: return True if exists
290
288
  :rtype: bool
291
289
  """
292
- request_kwargs = deepcopy(self.request_kwargs)
293
- timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
294
- stream = request_kwargs.pop("stream", True)
295
-
296
290
  try:
297
- with get_http_session(timeout=timeout, status_forcelist=()).get(
298
- self.path_with_protocol, stream=stream, **request_kwargs
299
- ) as response:
291
+ with self.session.get(self.path_with_protocol, stream=True) as response:
300
292
  if response.status_code == 404:
301
293
  return False
302
294
  return True
303
295
  except requests.exceptions.ConnectionError:
304
296
  return False
305
297
 
298
+ exists = is_file
299
+
306
300
 
307
301
  @SmartPath.register
308
302
  class HttpsPath(HttpPath):
@@ -38,6 +38,7 @@ class HttpPrefetchReader(BasePrefetchReader):
38
38
  self,
39
39
  url: PathLike,
40
40
  *,
41
+ session: Optional[requests.Session] = None,
41
42
  content_size: Optional[int] = None,
42
43
  block_size: int = READER_BLOCK_SIZE,
43
44
  max_buffer_size: int = READER_MAX_BUFFER_SIZE,
@@ -47,6 +48,7 @@ class HttpPrefetchReader(BasePrefetchReader):
47
48
  ):
48
49
  self._url = url
49
50
  self._content_size = content_size
51
+ self._session = session or requests.Session()
50
52
 
51
53
  super().__init__(
52
54
  block_size=block_size,
@@ -76,16 +78,8 @@ class HttpPrefetchReader(BasePrefetchReader):
76
78
  self, start: Optional[int] = None, end: Optional[int] = None
77
79
  ) -> dict:
78
80
  def fetch_response() -> dict:
79
- request_kwargs = {}
80
- if hasattr(self._url, "request_kwargs"):
81
- request_kwargs = self._url.request_kwargs # pyre-ignore[16]
82
- timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
83
- stream = request_kwargs.pop("stream", True)
84
-
85
81
  if start is None or end is None:
86
- with requests.get(
87
- fspath(self._url), timeout=timeout, stream=stream, **request_kwargs
88
- ) as response:
82
+ with self._session.get(fspath(self._url), stream=True) as response:
89
83
  return {
90
84
  "Headers": response.headers,
91
85
  "Cookies": response.cookies,
@@ -95,14 +89,9 @@ class HttpPrefetchReader(BasePrefetchReader):
95
89
  range_end = end
96
90
  if self._content_size is not None:
97
91
  range_end = min(range_end, self._content_size - 1)
98
- headers = request_kwargs.pop("headers", {})
99
- headers["Range"] = f"bytes={start}-{range_end}"
100
- with requests.get(
101
- fspath(self._url),
102
- timeout=timeout,
103
- headers=headers,
104
- stream=stream,
105
- **request_kwargs,
92
+ headers = {"Range": f"bytes={start}-{range_end}"}
93
+ with self._session.get(
94
+ fspath(self._url), headers=headers, stream=True
106
95
  ) as response:
107
96
  if len(response.content) != int(response.headers["Content-Length"]):
108
97
  raise HttpBodyIncompleteError(
megfile/sftp_path.py CHANGED
@@ -1,5 +1,4 @@
1
1
  import atexit
2
- import fcntl
3
2
  import hashlib
4
3
  import io
5
4
  import os
@@ -198,6 +197,8 @@ def _get_ssh_client(
198
197
  password: Optional[str] = None,
199
198
  default_policy: Type[paramiko.MissingHostKeyPolicy] = paramiko.RejectPolicy,
200
199
  ) -> paramiko.SSHClient:
200
+ import fcntl
201
+
201
202
  hostname, port, username, password, private_key = provide_connect_info(
202
203
  hostname=hostname, port=port, username=username, password=password
203
204
  )
megfile/utils/__init__.py CHANGED
@@ -216,7 +216,7 @@ def get_human_size(size_bytes: float) -> str:
216
216
  index = int(math.floor(math.log(size_bytes, 1024)))
217
217
  base = math.pow(1024, index)
218
218
  if base == 1:
219
- size = size_bytes
219
+ size = int(size_bytes)
220
220
  else:
221
221
  size = round(size_bytes / base, 2)
222
222
  return "%s %s" % (size, size_name[index])
megfile/version.py CHANGED
@@ -1 +1 @@
1
- VERSION = "4.1.4.post1"
1
+ VERSION = "4.1.5"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megfile
3
- Version: 4.1.4.post1
3
+ Version: 4.1.5
4
4
  Summary: Megvii file operation library
5
5
  Author-email: megvii <megfile@megvii.com>
6
6
  Project-URL: Homepage, https://github.com/megvii-research/megfile
@@ -1,24 +1,24 @@
1
1
  megfile/__init__.py,sha256=7oEfu410CFKzDWZ9RjL5xEJ1gtkJkTfvPrL_7TWdJuY,7366
2
2
  megfile/cli.py,sha256=iwwlwVNu_yXgqnaURPHqGPrx1WeReqN0zi9C4rqK5Ag,29178
3
3
  megfile/config.py,sha256=2MMj5QkhlDJQFZRbCQL2c9iDdeMAVctiaPszRBkg5vM,3988
4
- megfile/errors.py,sha256=A4qX2h1rk6UOOlNy4E81RmI_zuQd5vjh_rlqTfccwag,14589
4
+ megfile/errors.py,sha256=Wu6iTzbMt3lxqmIw7ayGp-cfIgKLfcRX2nTg8uDSufM,14553
5
5
  megfile/fs.py,sha256=KMEqAE35alpcxiy6du5nPFYcaorhUM_kPJMah3q76ng,19160
6
6
  megfile/fs_path.py,sha256=Hozl9LAJ8EMuSWBSZXGj2GNmPZ1sJp9PZs-7hPrLgm8,39341
7
7
  megfile/hdfs.py,sha256=owXr4d3j1frCvlbhmhENcSBnKKDky5cJZzWLOF4ZJMo,13251
8
8
  megfile/hdfs_path.py,sha256=OmUe3vA3Qoxnqtcq0Rs3ygBvzAtqUz3fGo8iP5sWneE,26058
9
9
  megfile/http.py,sha256=1nuGe-JbnwMFyV3s35CJxByED3uoRoS9y8Y8cSGP9Kw,3865
10
- megfile/http_path.py,sha256=yRIk-fNbrsY8rUS5KVOfocS_PS520dX5KOs8lImpLaY,14173
10
+ megfile/http_path.py,sha256=ZGug-bTWq8GqEA-R6zIXQmbcOVYvILxEweTXzHGnBuk,13829
11
11
  megfile/interfaces.py,sha256=p4UvVZpeLx5djd6bqqDaygIx_s-_AxIVj-gudTch4JE,8467
12
12
  megfile/pathlike.py,sha256=3Hnw-fn6RcIe9iPrJt00QdHSA--UfDyxnVBuZ_ymYYQ,31278
13
13
  megfile/s3.py,sha256=abBxnI7RIyn7n7qjGszP1VruYd6Gi9I8QnUOvsHkx1Y,16325
14
14
  megfile/s3_path.py,sha256=S8iulho1btVLLvNU-OtcskkbaAC8yNXnBrFNnF3fwS8,93510
15
15
  megfile/sftp.py,sha256=uBcLQs-j6Q-q-sWAdd-pgi5Qmb_kq7boJM-0sCfcNO0,26540
16
- megfile/sftp_path.py,sha256=Wz4VcQ0pBUuWDGMSxPpPbutrT09mnY6jZNiAqTi5tO4,43840
16
+ megfile/sftp_path.py,sha256=CgirHWmNdXdqyIL9ufmlaMpwFhlkQVZhqmfvjUaj7qU,43845
17
17
  megfile/smart.py,sha256=Sae2KJzaU0k_qV_Bk0YifOMq8WsV5qQ2pGInDRF546I,36411
18
18
  megfile/smart_path.py,sha256=Up_6xNZ2019iSzMn_JAU_1H--z-AP6O7SxdXGdeTG0c,7659
19
19
  megfile/stdio.py,sha256=ZwxsnJNJYIT7Iyg5pIw4qiyH8bszG6oAhEJuR-hXGG4,658
20
20
  megfile/stdio_path.py,sha256=cxaDr8rtisTPnN-rjtaEpqQnshwiqwXFUJBM9xWY7Cg,2711
21
- megfile/version.py,sha256=n7uJo8sxqI4-yPdovTHcDolsukxzF_l9KEtT9E_KR3o,25
21
+ megfile/version.py,sha256=adupe9YdXeuX1_csz0joHWbSaFM0bYND2lRZQYlkboI,19
22
22
  megfile/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  megfile/lib/base_prefetch_reader.py,sha256=uxVwYknOjc8hLF7q_T2QKMsBqFcrf411ZsuK25CN1eQ,12848
24
24
  megfile/lib/combine_reader.py,sha256=Kp2wEloOUpTlIU7dve87MBpSzmIM-F9OtpTawAjFkiU,4828
@@ -28,7 +28,7 @@ megfile/lib/fnmatch.py,sha256=4MvGzEahMRA-u8Z7mxaD-Yw1idOwBoJJpVywQy29jwY,4162
28
28
  megfile/lib/glob.py,sha256=-N75Phx1c8IpZ2hNIMwsHIx6BxMKVVr7N82cpWhs4lQ,9987
29
29
  megfile/lib/hdfs_prefetch_reader.py,sha256=yCNpcXcTiC2SHKHC-Qp50KQx1ObSLmOgwNUKlG-4ADg,2131
30
30
  megfile/lib/hdfs_tools.py,sha256=4K-OdMYFFSLBGmDzjatioHvuZuUbKVy7ACeJl-l0HLQ,435
31
- megfile/lib/http_prefetch_reader.py,sha256=OjP5pdWK_e1QiFIt2xmflceLnrz3S7B7ePBZFK-OwQE,4558
31
+ megfile/lib/http_prefetch_reader.py,sha256=OQPZ7kWFImqpynjaiTtmadtgtab5fCeQmu51UYHZfgs,4135
32
32
  megfile/lib/joinpath.py,sha256=gaPNtBi8fzd5LZNyZp5zrHzaybcqKJ1xlntGmVNyFEM,929
33
33
  megfile/lib/lazy_handler.py,sha256=bE7RGt1x_xYWMgGAvHr7dwEt52qy-D3z90X3oyCvE6g,1875
34
34
  megfile/lib/s3_buffered_writer.py,sha256=kXvz1bdoaVIxjnEQeg4dxEzAXMYNrk-5uIMww86ty00,7860
@@ -41,12 +41,12 @@ megfile/lib/s3_share_cache_reader.py,sha256=LVWKxHdHo0_zUIW4o8yqNvplqqwezUPeYEt0
41
41
  megfile/lib/shadow_handler.py,sha256=TntewlvIW9ZxCfmqASDQREHoiZ8v42faOe9sovQYQz0,2779
42
42
  megfile/lib/stdio_handler.py,sha256=IDdgENLQlhigEwkLL4zStueVSzdWg7xVcTF_koof_Ek,1987
43
43
  megfile/lib/url.py,sha256=ER32pWy9Q2MAk3TraAaNEBWIqUeBmLuM57ol2cs7-Ks,103
44
- megfile/utils/__init__.py,sha256=xAzmICA0MtAbg-I2yPfeHjA1N4CiMP4sBrC9BgrfZLw,10151
44
+ megfile/utils/__init__.py,sha256=pawmXnCNokWLj338a60b_hK21koYavpEiEohZhsOaGQ,10156
45
45
  megfile/utils/mutex.py,sha256=asb8opGLgK22RiuBJUnfsvB8LnMmodP8KzCVHKmQBWA,2561
46
- megfile-4.1.4.post1.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
- megfile-4.1.4.post1.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
- megfile-4.1.4.post1.dist-info/METADATA,sha256=-gzxqJGLKfx6MOtJu4nARQ-wj6e46niHR0rXZQoqPXE,9601
49
- megfile-4.1.4.post1.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
50
- megfile-4.1.4.post1.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
51
- megfile-4.1.4.post1.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
52
- megfile-4.1.4.post1.dist-info/RECORD,,
46
+ megfile-4.1.5.dist-info/licenses/LICENSE,sha256=WNHhf_5RCaeuKWyq_K39vmp9F28LxKsB4SpomwSZ2L0,11357
47
+ megfile-4.1.5.dist-info/licenses/LICENSE.pyre,sha256=9lf5nT-5ZH25JijpYAequ0bl8E8z5JmZB1qrjiUMp84,1080
48
+ megfile-4.1.5.dist-info/METADATA,sha256=ZKN5sc5XLpDAZqtkDyhFgjSdhXkrYMhGHmRPVEWmZsE,9595
49
+ megfile-4.1.5.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
50
+ megfile-4.1.5.dist-info/entry_points.txt,sha256=M6ZWSSv5_5_QtIpZafy3vq7WuOJ_5dSGQQnEZbByt2Q,49
51
+ megfile-4.1.5.dist-info/top_level.txt,sha256=i3rMgdU1ZAJekAceojhA-bkm3749PzshtRmLTbeLUPQ,8
52
+ megfile-4.1.5.dist-info/RECORD,,