megfile 3.1.1__py3-none-any.whl → 3.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +2 -4
- megfile/__init__.py +394 -203
- megfile/cli.py +258 -238
- megfile/config.py +25 -21
- megfile/errors.py +126 -114
- megfile/fs.py +174 -140
- megfile/fs_path.py +462 -354
- megfile/hdfs.py +133 -101
- megfile/hdfs_path.py +290 -236
- megfile/http.py +15 -14
- megfile/http_path.py +111 -107
- megfile/interfaces.py +70 -65
- megfile/lib/base_prefetch_reader.py +84 -65
- megfile/lib/combine_reader.py +12 -12
- megfile/lib/compare.py +17 -13
- megfile/lib/compat.py +1 -5
- megfile/lib/fnmatch.py +29 -30
- megfile/lib/glob.py +46 -54
- megfile/lib/hdfs_prefetch_reader.py +40 -25
- megfile/lib/hdfs_tools.py +1 -3
- megfile/lib/http_prefetch_reader.py +69 -46
- megfile/lib/joinpath.py +5 -5
- megfile/lib/lazy_handler.py +7 -3
- megfile/lib/s3_buffered_writer.py +58 -51
- megfile/lib/s3_cached_handler.py +13 -14
- megfile/lib/s3_limited_seekable_writer.py +37 -28
- megfile/lib/s3_memory_handler.py +34 -30
- megfile/lib/s3_pipe_handler.py +24 -25
- megfile/lib/s3_prefetch_reader.py +71 -52
- megfile/lib/s3_share_cache_reader.py +37 -24
- megfile/lib/shadow_handler.py +7 -3
- megfile/lib/stdio_handler.py +9 -8
- megfile/lib/url.py +3 -3
- megfile/pathlike.py +259 -228
- megfile/s3.py +220 -153
- megfile/s3_path.py +977 -802
- megfile/sftp.py +190 -156
- megfile/sftp_path.py +540 -450
- megfile/smart.py +397 -330
- megfile/smart_path.py +100 -105
- megfile/stdio.py +10 -9
- megfile/stdio_path.py +32 -35
- megfile/utils/__init__.py +73 -54
- megfile/utils/mutex.py +11 -14
- megfile/version.py +1 -1
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/METADATA +5 -8
- megfile-3.1.3.dist-info/RECORD +55 -0
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/WHEEL +1 -1
- scripts/convert_results_to_sarif.py +45 -78
- scripts/generate_file.py +140 -64
- megfile-3.1.1.dist-info/RECORD +0 -55
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/LICENSE +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/entry_points.txt +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.3.dist-info}/top_level.txt +0 -0
megfile/http.py
CHANGED
|
@@ -2,30 +2,31 @@ from megfile.http_path import HttpPath, get_http_session, http_open, is_http
|
|
|
2
2
|
from megfile.interfaces import PathLike, StatResult
|
|
3
3
|
|
|
4
4
|
__all__ = [
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
5
|
+
"get_http_session",
|
|
6
|
+
"is_http",
|
|
7
|
+
"http_open",
|
|
8
|
+
"http_stat",
|
|
9
|
+
"http_getsize",
|
|
10
|
+
"http_getmtime",
|
|
11
|
+
"http_exists",
|
|
12
12
|
]
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def http_stat(path: PathLike, follow_symlinks=True) -> StatResult:
|
|
16
|
-
|
|
17
|
-
Get StatResult of http_url response, including size and mtime,
|
|
16
|
+
"""
|
|
17
|
+
Get StatResult of http_url response, including size and mtime,
|
|
18
|
+
referring to http_getsize and http_getmtime
|
|
18
19
|
|
|
19
20
|
:param path: Given path
|
|
20
21
|
:param follow_symlinks: Ignore this parameter, just for compatibility
|
|
21
22
|
:returns: StatResult
|
|
22
23
|
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
23
|
-
|
|
24
|
+
"""
|
|
24
25
|
return HttpPath(path).stat(follow_symlinks)
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
def http_getsize(path: PathLike, follow_symlinks: bool = False) -> int:
|
|
28
|
-
|
|
29
|
+
"""
|
|
29
30
|
Get file size on the given http_url path.
|
|
30
31
|
|
|
31
32
|
If http response header don't support Content-Length, will return None
|
|
@@ -34,12 +35,12 @@ def http_getsize(path: PathLike, follow_symlinks: bool = False) -> int:
|
|
|
34
35
|
:param follow_symlinks: Ignore this parameter, just for compatibility
|
|
35
36
|
:returns: File size (in bytes)
|
|
36
37
|
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
37
|
-
|
|
38
|
+
"""
|
|
38
39
|
return HttpPath(path).getsize(follow_symlinks)
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
def http_getmtime(path: PathLike, follow_symlinks: bool = False) -> float:
|
|
42
|
-
|
|
43
|
+
"""
|
|
43
44
|
Get Last-Modified time of the http request on the given http_url path.
|
|
44
45
|
|
|
45
46
|
If http response header don't support Last-Modified, will return None
|
|
@@ -48,7 +49,7 @@ def http_getmtime(path: PathLike, follow_symlinks: bool = False) -> float:
|
|
|
48
49
|
:param follow_symlinks: Ignore this parameter, just for compatibility
|
|
49
50
|
:returns: Last-Modified time (in Unix timestamp format)
|
|
50
51
|
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
51
|
-
|
|
52
|
+
"""
|
|
52
53
|
return HttpPath(path).getmtime(follow_symlinks)
|
|
53
54
|
|
|
54
55
|
|
megfile/http_path.py
CHANGED
|
@@ -16,17 +16,10 @@ from megfile.lib.compat import fspath
|
|
|
16
16
|
from megfile.lib.http_prefetch_reader import DEFAULT_TIMEOUT, HttpPrefetchReader
|
|
17
17
|
from megfile.lib.s3_buffered_writer import DEFAULT_MAX_BUFFER_SIZE
|
|
18
18
|
from megfile.lib.url import get_url_scheme
|
|
19
|
-
from megfile.pathlike import PathLike
|
|
20
19
|
from megfile.smart_path import SmartPath
|
|
21
20
|
from megfile.utils import _is_pickle, binary_open
|
|
22
21
|
|
|
23
|
-
__all__ = [
|
|
24
|
-
'HttpPath',
|
|
25
|
-
'HttpsPath',
|
|
26
|
-
'get_http_session',
|
|
27
|
-
'is_http',
|
|
28
|
-
'http_open',
|
|
29
|
-
]
|
|
22
|
+
__all__ = ["HttpPath", "HttpsPath", "get_http_session", "is_http", "http_open"]
|
|
30
23
|
|
|
31
24
|
_logger = get_logger(__name__)
|
|
32
25
|
max_retries = HTTP_MAX_RETRY_TIMES
|
|
@@ -34,7 +27,7 @@ max_retries = HTTP_MAX_RETRY_TIMES
|
|
|
34
27
|
|
|
35
28
|
def get_http_session(
|
|
36
29
|
timeout: Optional[Union[int, Tuple[int, int]]] = DEFAULT_TIMEOUT,
|
|
37
|
-
status_forcelist: Iterable[int] = (500, 502, 503, 504)
|
|
30
|
+
status_forcelist: Iterable[int] = (500, 502, 503, 504),
|
|
38
31
|
) -> requests.Session:
|
|
39
32
|
session = requests.Session()
|
|
40
33
|
|
|
@@ -45,8 +38,8 @@ def get_http_session(
|
|
|
45
38
|
|
|
46
39
|
def before_callback(method, url, **kwargs):
|
|
47
40
|
_logger.debug(
|
|
48
|
-
|
|
49
|
-
|
|
41
|
+
"send http request: %s %r, with parameters: %s", method, url, kwargs
|
|
42
|
+
)
|
|
50
43
|
|
|
51
44
|
def retry_callback(
|
|
52
45
|
error,
|
|
@@ -68,36 +61,38 @@ def get_http_session(
|
|
|
68
61
|
json=None,
|
|
69
62
|
**kwargs,
|
|
70
63
|
):
|
|
71
|
-
if data and hasattr(data,
|
|
64
|
+
if data and hasattr(data, "seek"):
|
|
72
65
|
data.seek(0)
|
|
73
66
|
elif isinstance(data, Iterator):
|
|
74
|
-
_logger.warning(
|
|
67
|
+
_logger.warning("Can not retry http request with iterator data")
|
|
75
68
|
raise
|
|
76
69
|
if files:
|
|
77
70
|
|
|
78
71
|
def seek_or_reopen(file_object):
|
|
79
72
|
if isinstance(file_object, (str, bytes)):
|
|
80
73
|
return file_object
|
|
81
|
-
elif hasattr(file_object,
|
|
74
|
+
elif hasattr(file_object, "seek"):
|
|
82
75
|
file_object.seek(0)
|
|
83
76
|
return file_object
|
|
84
|
-
elif hasattr(file_object,
|
|
85
|
-
with SmartPath(file_object.name).open(
|
|
77
|
+
elif hasattr(file_object, "name"):
|
|
78
|
+
with SmartPath(file_object.name).open("rb") as f:
|
|
86
79
|
return BytesIO(f.read())
|
|
87
80
|
else:
|
|
88
81
|
_logger.warning(
|
|
89
|
-
|
|
82
|
+
"Can not retry http request, because the file object "
|
|
83
|
+
'is not seekable and not support "name"'
|
|
90
84
|
)
|
|
91
85
|
raise
|
|
92
86
|
|
|
93
87
|
for key, file_info in files.items():
|
|
94
|
-
if hasattr(file_info,
|
|
88
|
+
if hasattr(file_info, "seek"):
|
|
95
89
|
file_info.seek(0)
|
|
96
|
-
elif isinstance(file_info,
|
|
97
|
-
(tuple, list)) and len(file_info) >= 2:
|
|
90
|
+
elif isinstance(file_info, (tuple, list)) and len(file_info) >= 2:
|
|
98
91
|
file_info = list(file_info)
|
|
99
|
-
if
|
|
100
|
-
|
|
92
|
+
if (
|
|
93
|
+
isinstance(file_info[1], (tuple, list))
|
|
94
|
+
and len(file_info[1]) >= 2
|
|
95
|
+
):
|
|
101
96
|
file_info[1] = list(file_info[1])
|
|
102
97
|
file_info[1] = seek_or_reopen(file_info[1])
|
|
103
98
|
else:
|
|
@@ -116,47 +111,53 @@ def get_http_session(
|
|
|
116
111
|
|
|
117
112
|
|
|
118
113
|
def is_http(path: PathLike) -> bool:
|
|
119
|
-
|
|
114
|
+
"""http scheme definition: http(s)://domain/path
|
|
120
115
|
|
|
121
116
|
:param path: Path to be tested
|
|
122
117
|
:returns: True if path is http url, else False
|
|
123
|
-
|
|
118
|
+
"""
|
|
124
119
|
|
|
125
120
|
path = fspath(path)
|
|
126
|
-
if not isinstance(path, str) or not (
|
|
127
|
-
|
|
121
|
+
if not isinstance(path, str) or not (
|
|
122
|
+
path.startswith("http://") or path.startswith("https://")
|
|
123
|
+
):
|
|
128
124
|
return False
|
|
129
125
|
|
|
130
126
|
scheme = get_url_scheme(path)
|
|
131
|
-
return scheme ==
|
|
127
|
+
return scheme == "http" or scheme == "https"
|
|
132
128
|
|
|
133
129
|
|
|
134
130
|
def http_open(
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
131
|
+
path: PathLike,
|
|
132
|
+
mode: str = "rb",
|
|
133
|
+
*,
|
|
134
|
+
encoding: Optional[str] = None,
|
|
135
|
+
errors: Optional[str] = None,
|
|
136
|
+
max_concurrency: Optional[int] = None,
|
|
137
|
+
max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
|
|
138
|
+
forward_ratio: Optional[float] = None,
|
|
139
|
+
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
140
|
+
**kwargs,
|
|
141
|
+
) -> Union[BufferedReader, HttpPrefetchReader]:
|
|
142
|
+
"""Open a BytesIO to read binary data of given http(s) url
|
|
146
143
|
|
|
147
144
|
.. note ::
|
|
148
145
|
|
|
149
|
-
Essentially, it reads data of http(s) url to memory by requests,
|
|
146
|
+
Essentially, it reads data of http(s) url to memory by requests,
|
|
147
|
+
and then return BytesIO to user.
|
|
150
148
|
|
|
151
149
|
:param path: Given path
|
|
152
150
|
:param mode: Only supports 'rb' mode now
|
|
153
|
-
:param encoding: encoding is the name of the encoding used to decode or encode
|
|
154
|
-
|
|
151
|
+
:param encoding: encoding is the name of the encoding used to decode or encode
|
|
152
|
+
the file. This should only be used in text mode.
|
|
153
|
+
:param errors: errors is an optional string that specifies how encoding and decoding
|
|
154
|
+
errors are to be handled—this cannot be used in binary mode.
|
|
155
155
|
:param max_concurrency: Max download thread number, None by default
|
|
156
156
|
:param max_buffer_size: Max cached buffer size in memory, 128MB by default
|
|
157
|
-
:param block_size: Size of single block, 8MB by default. Each block will be uploaded
|
|
157
|
+
:param block_size: Size of single block, 8MB by default. Each block will be uploaded
|
|
158
|
+
or downloaded by single thread.
|
|
158
159
|
:return: BytesIO initialized with http(s) data
|
|
159
|
-
|
|
160
|
+
"""
|
|
160
161
|
return HttpPath(path).open(
|
|
161
162
|
mode,
|
|
162
163
|
encoding=encoding,
|
|
@@ -164,68 +165,73 @@ def http_open(
|
|
|
164
165
|
max_concurrency=max_concurrency,
|
|
165
166
|
max_buffer_size=max_buffer_size,
|
|
166
167
|
forward_ratio=forward_ratio,
|
|
167
|
-
block_size=block_size
|
|
168
|
+
block_size=block_size,
|
|
169
|
+
)
|
|
168
170
|
|
|
169
171
|
|
|
170
172
|
@SmartPath.register
|
|
171
173
|
class HttpPath(URIPath):
|
|
172
|
-
|
|
173
174
|
protocol = "http"
|
|
174
175
|
|
|
175
176
|
def __init__(self, path: PathLike, *other_paths: PathLike):
|
|
176
177
|
super().__init__(path, *other_paths)
|
|
177
178
|
|
|
178
|
-
if fspath(path).startswith(
|
|
179
|
-
self.protocol =
|
|
179
|
+
if fspath(path).startswith("https://"):
|
|
180
|
+
self.protocol = "https"
|
|
180
181
|
self.request_kwargs = {}
|
|
181
182
|
|
|
182
183
|
@binary_open
|
|
183
184
|
def open(
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
185
|
+
self,
|
|
186
|
+
mode: str = "rb",
|
|
187
|
+
*,
|
|
188
|
+
max_concurrency: Optional[int] = None,
|
|
189
|
+
max_buffer_size: int = DEFAULT_MAX_BUFFER_SIZE,
|
|
190
|
+
forward_ratio: Optional[float] = None,
|
|
191
|
+
block_size: int = DEFAULT_BLOCK_SIZE,
|
|
192
|
+
**kwargs,
|
|
193
|
+
) -> Union[BufferedReader, HttpPrefetchReader]:
|
|
194
|
+
"""Open a BytesIO to read binary data of given http(s) url
|
|
193
195
|
|
|
194
196
|
.. note ::
|
|
195
197
|
|
|
196
|
-
Essentially, it reads data of http(s) url to memory by requests,
|
|
198
|
+
Essentially, it reads data of http(s) url to memory by requests,
|
|
199
|
+
and then return BytesIO to user.
|
|
197
200
|
|
|
198
201
|
:param mode: Only supports 'rb' mode now
|
|
199
|
-
:param encoding: encoding is the name of the encoding used to decode or encode
|
|
200
|
-
|
|
202
|
+
:param encoding: encoding is the name of the encoding used to decode or encode
|
|
203
|
+
the file. This should only be used in text mode.
|
|
204
|
+
:param errors: errors is an optional string that specifies how encoding and
|
|
205
|
+
decoding errors are to be handled—this cannot be used in binary mode.
|
|
201
206
|
:param max_concurrency: Max download thread number, None by default
|
|
202
207
|
:param max_buffer_size: Max cached buffer size in memory, 128MB by default
|
|
203
|
-
:param block_size: Size of single block, 8MB by default. Each block will
|
|
208
|
+
:param block_size: Size of single block, 8MB by default. Each block will
|
|
209
|
+
be uploaded or downloaded by single thread.
|
|
204
210
|
:return: BytesIO initialized with http(s) data
|
|
205
|
-
|
|
206
|
-
if mode not in (
|
|
207
|
-
raise ValueError(
|
|
211
|
+
"""
|
|
212
|
+
if mode not in ("rb",):
|
|
213
|
+
raise ValueError("unacceptable mode: %r" % mode)
|
|
208
214
|
|
|
209
215
|
response = None
|
|
210
216
|
request_kwargs = deepcopy(self.request_kwargs)
|
|
211
|
-
timeout = request_kwargs.pop(
|
|
212
|
-
stream = request_kwargs.pop(
|
|
217
|
+
timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
|
|
218
|
+
stream = request_kwargs.pop("stream", True)
|
|
213
219
|
try:
|
|
214
|
-
response = get_http_session(
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
).get(
|
|
218
|
-
self.path_with_protocol, stream=stream, **request_kwargs)
|
|
220
|
+
response = get_http_session(timeout=timeout, status_forcelist=()).get(
|
|
221
|
+
self.path_with_protocol, stream=stream, **request_kwargs
|
|
222
|
+
)
|
|
219
223
|
response.raise_for_status()
|
|
220
224
|
except Exception as error:
|
|
221
225
|
if response:
|
|
222
226
|
response.close()
|
|
223
227
|
raise translate_http_error(error, self.path_with_protocol)
|
|
224
228
|
|
|
225
|
-
content_size = int(response.headers[
|
|
226
|
-
if (
|
|
227
|
-
|
|
228
|
-
|
|
229
|
+
content_size = int(response.headers["Content-Length"])
|
|
230
|
+
if (
|
|
231
|
+
response.headers.get("Accept-Ranges") == "bytes"
|
|
232
|
+
and content_size >= block_size * 2
|
|
233
|
+
and not response.headers.get("Content-Encoding")
|
|
234
|
+
):
|
|
229
235
|
response.close()
|
|
230
236
|
|
|
231
237
|
block_capacity = max_buffer_size // block_size
|
|
@@ -248,56 +254,56 @@ class HttpPath(URIPath):
|
|
|
248
254
|
return reader
|
|
249
255
|
|
|
250
256
|
response.raw.name = self.path_with_protocol
|
|
251
|
-
# TODO: When python version must bigger than 3.10,
|
|
257
|
+
# TODO: When python version must bigger than 3.10,
|
|
258
|
+
# use urllib3>=2.0.0 instead of 'Response'
|
|
252
259
|
# response.raw.auto_close = False
|
|
253
260
|
# response.raw.decode_content = True
|
|
254
261
|
# return BufferedReader(response.raw)
|
|
255
262
|
return BufferedReader(Response(response.raw)) # type: ignore
|
|
256
263
|
|
|
257
264
|
def stat(self, follow_symlinks=True) -> StatResult:
|
|
258
|
-
|
|
259
|
-
Get StatResult of http_url response, including size and mtime,
|
|
265
|
+
"""
|
|
266
|
+
Get StatResult of http_url response, including size and mtime,
|
|
267
|
+
referring to http_getsize and http_getmtime
|
|
260
268
|
|
|
261
269
|
:param follow_symlinks: Ignore this parameter, just for compatibility
|
|
262
270
|
:returns: StatResult
|
|
263
271
|
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
264
|
-
|
|
272
|
+
"""
|
|
265
273
|
|
|
266
274
|
request_kwargs = deepcopy(self.request_kwargs)
|
|
267
|
-
timeout = request_kwargs.pop(
|
|
268
|
-
stream = request_kwargs.pop(
|
|
275
|
+
timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
|
|
276
|
+
stream = request_kwargs.pop("stream", True)
|
|
269
277
|
|
|
270
278
|
try:
|
|
271
279
|
with get_http_session(timeout=timeout, status_forcelist=()).get(
|
|
272
|
-
|
|
273
|
-
|
|
280
|
+
self.path_with_protocol, stream=stream, **request_kwargs
|
|
281
|
+
) as response:
|
|
274
282
|
response.raise_for_status()
|
|
275
283
|
headers = response.headers
|
|
276
284
|
except Exception as error:
|
|
277
285
|
raise translate_http_error(error, self.path_with_protocol)
|
|
278
286
|
|
|
279
|
-
size = headers.get(
|
|
287
|
+
size = headers.get("Content-Length")
|
|
280
288
|
if size:
|
|
281
289
|
size = int(size)
|
|
282
290
|
else:
|
|
283
291
|
size = 0
|
|
284
292
|
|
|
285
|
-
last_modified = headers.get(
|
|
293
|
+
last_modified = headers.get("Last-Modified")
|
|
286
294
|
if last_modified:
|
|
287
295
|
last_modified = time.mktime(
|
|
288
|
-
time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z")
|
|
296
|
+
time.strptime(last_modified, "%a, %d %b %Y %H:%M:%S %Z")
|
|
297
|
+
)
|
|
289
298
|
else:
|
|
290
299
|
last_modified = 0.0
|
|
291
300
|
|
|
292
301
|
return StatResult(
|
|
293
|
-
size=size,
|
|
294
|
-
|
|
295
|
-
isdir=False,
|
|
296
|
-
islnk=False,
|
|
297
|
-
extra=headers)
|
|
302
|
+
size=size, mtime=last_modified, isdir=False, islnk=False, extra=headers
|
|
303
|
+
)
|
|
298
304
|
|
|
299
305
|
def getsize(self, follow_symlinks: bool = False) -> int:
|
|
300
|
-
|
|
306
|
+
"""
|
|
301
307
|
Get file size on the given http_url path.
|
|
302
308
|
|
|
303
309
|
If http response header don't support Content-Length, will return None
|
|
@@ -305,19 +311,19 @@ class HttpPath(URIPath):
|
|
|
305
311
|
:param follow_symlinks: Ignore this parameter, just for compatibility
|
|
306
312
|
:returns: File size (in bytes)
|
|
307
313
|
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
308
|
-
|
|
314
|
+
"""
|
|
309
315
|
return self.stat().size
|
|
310
316
|
|
|
311
317
|
def getmtime(self, follow_symlinks: bool = False) -> float:
|
|
312
|
-
|
|
318
|
+
"""
|
|
313
319
|
Get Last-Modified time of the http request on the given http_url path.
|
|
314
|
-
|
|
320
|
+
|
|
315
321
|
If http response header don't support Last-Modified, will return None
|
|
316
322
|
|
|
317
323
|
:param follow_symlinks: Ignore this parameter, just for compatibility
|
|
318
324
|
:returns: Last-Modified time (in Unix timestamp format)
|
|
319
325
|
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
320
|
-
|
|
326
|
+
"""
|
|
321
327
|
return self.stat().mtime
|
|
322
328
|
|
|
323
329
|
def exists(self, followlinks: bool = False) -> bool:
|
|
@@ -329,13 +335,13 @@ class HttpPath(URIPath):
|
|
|
329
335
|
:rtype: bool
|
|
330
336
|
"""
|
|
331
337
|
request_kwargs = deepcopy(self.request_kwargs)
|
|
332
|
-
timeout = request_kwargs.pop(
|
|
333
|
-
stream = request_kwargs.pop(
|
|
338
|
+
timeout = request_kwargs.pop("timeout", DEFAULT_TIMEOUT)
|
|
339
|
+
stream = request_kwargs.pop("stream", True)
|
|
334
340
|
|
|
335
341
|
try:
|
|
336
342
|
with get_http_session(timeout=timeout, status_forcelist=()).get(
|
|
337
|
-
|
|
338
|
-
|
|
343
|
+
self.path_with_protocol, stream=stream, **request_kwargs
|
|
344
|
+
) as response:
|
|
339
345
|
if response.status_code == 404:
|
|
340
346
|
return False
|
|
341
347
|
return True
|
|
@@ -345,12 +351,10 @@ class HttpPath(URIPath):
|
|
|
345
351
|
|
|
346
352
|
@SmartPath.register
|
|
347
353
|
class HttpsPath(HttpPath):
|
|
348
|
-
|
|
349
354
|
protocol = "https"
|
|
350
355
|
|
|
351
356
|
|
|
352
357
|
class Response(Readable[bytes]):
|
|
353
|
-
|
|
354
358
|
def __init__(self, raw: HTTPResponse) -> None:
|
|
355
359
|
super().__init__()
|
|
356
360
|
|
|
@@ -367,7 +371,7 @@ class Response(Readable[bytes]):
|
|
|
367
371
|
|
|
368
372
|
@property
|
|
369
373
|
def mode(self):
|
|
370
|
-
return
|
|
374
|
+
return "rb"
|
|
371
375
|
|
|
372
376
|
def tell(self) -> int:
|
|
373
377
|
return self._offset
|
|
@@ -378,7 +382,7 @@ class Response(Readable[bytes]):
|
|
|
378
382
|
|
|
379
383
|
def read(self, size: Optional[int] = None) -> bytes:
|
|
380
384
|
if size == 0:
|
|
381
|
-
return b
|
|
385
|
+
return b""
|
|
382
386
|
if size is not None and size < 0:
|
|
383
387
|
size = None
|
|
384
388
|
|
|
@@ -399,7 +403,7 @@ class Response(Readable[bytes]):
|
|
|
399
403
|
|
|
400
404
|
def readline(self, size: Optional[int] = None) -> bytes:
|
|
401
405
|
if size == 0:
|
|
402
|
-
return b
|
|
406
|
+
return b""
|
|
403
407
|
if size is not None and size < 0:
|
|
404
408
|
size = None
|
|
405
409
|
|
|
@@ -407,11 +411,11 @@ class Response(Readable[bytes]):
|
|
|
407
411
|
self._buffer.seek(0)
|
|
408
412
|
buffer = self._buffer.read()
|
|
409
413
|
self._clear_buffer()
|
|
410
|
-
if b
|
|
411
|
-
content = buffer[:buffer.index(b
|
|
414
|
+
if b"\n" in buffer:
|
|
415
|
+
content = buffer[: buffer.index(b"\n") + 1]
|
|
412
416
|
if size:
|
|
413
417
|
content = content[:size]
|
|
414
|
-
self._buffer.write(buffer[len(content):])
|
|
418
|
+
self._buffer.write(buffer[len(content) :])
|
|
415
419
|
elif size and len(buffer) >= size:
|
|
416
420
|
content = buffer[:size]
|
|
417
421
|
self._buffer.write(buffer[size:])
|