megfile 4.2.5__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- megfile/__init__.py +13 -293
- megfile/cli.py +37 -20
- megfile/config.py +10 -1
- megfile/errors.py +2 -2
- megfile/fs_path.py +32 -3
- megfile/interfaces.py +21 -10
- megfile/lib/base_memory_handler.py +92 -0
- megfile/lib/glob.py +3 -3
- megfile/lib/http_prefetch_reader.py +22 -22
- megfile/lib/s3_memory_handler.py +14 -81
- megfile/lib/webdav_memory_handler.py +83 -0
- megfile/lib/webdav_prefetch_reader.py +115 -0
- megfile/pathlike.py +3 -4
- megfile/s3_path.py +40 -32
- megfile/sftp2_path.py +38 -62
- megfile/sftp_path.py +238 -1
- megfile/smart.py +70 -29
- megfile/smart_path.py +181 -85
- megfile/version.py +1 -1
- megfile/webdav_path.py +159 -165
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/METADATA +27 -39
- megfile-5.0.0.dist-info/RECORD +51 -0
- megfile/fs.py +0 -627
- megfile/hdfs.py +0 -408
- megfile/http.py +0 -114
- megfile/s3.py +0 -540
- megfile/sftp.py +0 -821
- megfile/sftp2.py +0 -827
- megfile/stdio.py +0 -30
- megfile/webdav.py +0 -552
- megfile-4.2.5.dist-info/RECORD +0 -56
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/WHEEL +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/entry_points.txt +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/licenses/LICENSE +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/licenses/LICENSE.pyre +0 -0
- {megfile-4.2.5.dist-info → megfile-5.0.0.dist-info}/top_level.txt +0 -0
megfile/hdfs.py
DELETED
|
@@ -1,408 +0,0 @@
|
|
|
1
|
-
from typing import IO, BinaryIO, Iterator, List, Optional, Tuple
|
|
2
|
-
|
|
3
|
-
from megfile.config import READER_BLOCK_SIZE, READER_MAX_BUFFER_SIZE
|
|
4
|
-
from megfile.hdfs_path import (
|
|
5
|
-
HdfsPath,
|
|
6
|
-
is_hdfs,
|
|
7
|
-
)
|
|
8
|
-
from megfile.interfaces import FileEntry, PathLike, StatResult
|
|
9
|
-
|
|
10
|
-
__all__ = [
|
|
11
|
-
"is_hdfs",
|
|
12
|
-
"hdfs_glob",
|
|
13
|
-
"hdfs_glob_stat",
|
|
14
|
-
"hdfs_iglob",
|
|
15
|
-
"hdfs_makedirs",
|
|
16
|
-
"hdfs_exists",
|
|
17
|
-
"hdfs_stat",
|
|
18
|
-
"hdfs_getmtime",
|
|
19
|
-
"hdfs_getsize",
|
|
20
|
-
"hdfs_isdir",
|
|
21
|
-
"hdfs_isfile",
|
|
22
|
-
"hdfs_listdir",
|
|
23
|
-
"hdfs_load_from",
|
|
24
|
-
"hdfs_move",
|
|
25
|
-
"hdfs_remove",
|
|
26
|
-
"hdfs_scan",
|
|
27
|
-
"hdfs_scan_stat",
|
|
28
|
-
"hdfs_scandir",
|
|
29
|
-
"hdfs_unlink",
|
|
30
|
-
"hdfs_walk",
|
|
31
|
-
"hdfs_getmd5",
|
|
32
|
-
"hdfs_save_as",
|
|
33
|
-
"hdfs_open",
|
|
34
|
-
]
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def hdfs_exists(path: PathLike, followlinks: bool = False) -> bool:
|
|
38
|
-
"""
|
|
39
|
-
Test if path exists
|
|
40
|
-
|
|
41
|
-
If the bucket of path are not permitted to read, return False
|
|
42
|
-
|
|
43
|
-
:param path: Given path
|
|
44
|
-
:returns: True if path exists, else False
|
|
45
|
-
"""
|
|
46
|
-
return HdfsPath(path).exists(followlinks)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def hdfs_stat(path: PathLike, follow_symlinks=True) -> StatResult:
|
|
50
|
-
"""
|
|
51
|
-
Get StatResult of path file, including file size and mtime,
|
|
52
|
-
referring to hdfs_getsize and hdfs_getmtime
|
|
53
|
-
|
|
54
|
-
If path is not an existent path, which means hdfs_exist(path) returns False,
|
|
55
|
-
then raise FileNotFoundError
|
|
56
|
-
|
|
57
|
-
If attempt to get StatResult of complete hdfs, such as hdfs_dir_url == 'hdfs://',
|
|
58
|
-
raise BucketNotFoundError
|
|
59
|
-
|
|
60
|
-
:param path: Given path
|
|
61
|
-
:returns: StatResult
|
|
62
|
-
:raises: FileNotFoundError
|
|
63
|
-
"""
|
|
64
|
-
return HdfsPath(path).stat(follow_symlinks)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def hdfs_getmtime(path: PathLike, follow_symlinks: bool = False) -> float:
|
|
68
|
-
"""
|
|
69
|
-
Get last-modified time of the file on the given path path (in Unix timestamp
|
|
70
|
-
format).
|
|
71
|
-
If the path is an existent directory, return the latest modified time of all
|
|
72
|
-
file in it. The mtime of empty directory is 1970-01-01 00:00:00
|
|
73
|
-
|
|
74
|
-
If path is not an existent path, which means hdfs_exist(path) returns False,
|
|
75
|
-
then raise FileNotFoundError
|
|
76
|
-
|
|
77
|
-
:param path: Given path
|
|
78
|
-
:returns: Last-modified time
|
|
79
|
-
:raises: FileNotFoundError
|
|
80
|
-
"""
|
|
81
|
-
return HdfsPath(path).getmtime(follow_symlinks)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
def hdfs_getsize(path: PathLike, follow_symlinks: bool = False) -> int:
|
|
85
|
-
"""
|
|
86
|
-
Get file size on the given path path (in bytes).
|
|
87
|
-
If the path in a directory, return the sum of all file size in it,
|
|
88
|
-
including file in subdirectories (if exist).
|
|
89
|
-
|
|
90
|
-
The result excludes the size of directory itself. In other words,
|
|
91
|
-
return 0 Byte on an empty directory path.
|
|
92
|
-
|
|
93
|
-
If path is not an existent path, which means hdfs_exist(path) returns False,
|
|
94
|
-
then raise FileNotFoundError
|
|
95
|
-
|
|
96
|
-
:param path: Given path
|
|
97
|
-
:returns: File size
|
|
98
|
-
:raises: FileNotFoundError
|
|
99
|
-
"""
|
|
100
|
-
return HdfsPath(path).getsize(follow_symlinks)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def hdfs_isdir(path: PathLike, followlinks: bool = False) -> bool:
|
|
104
|
-
"""
|
|
105
|
-
Test if an hdfs url is directory
|
|
106
|
-
Specific procedures are as follows:
|
|
107
|
-
If there exists a suffix, of which ``os.path.join(path, suffix)`` is a file
|
|
108
|
-
If the url is empty bucket or hdfs://
|
|
109
|
-
|
|
110
|
-
:param path: Given path
|
|
111
|
-
:param followlinks: whether followlinks is True or False, result is the same.
|
|
112
|
-
Because hdfs symlink not support dir.
|
|
113
|
-
:returns: True if path is hdfs directory, else False
|
|
114
|
-
"""
|
|
115
|
-
return HdfsPath(path).is_dir(followlinks)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def hdfs_isfile(path: PathLike, followlinks: bool = False) -> bool:
|
|
119
|
-
"""
|
|
120
|
-
Test if an path is file
|
|
121
|
-
|
|
122
|
-
:param path: Given path
|
|
123
|
-
:returns: True if path is hdfs file, else False
|
|
124
|
-
"""
|
|
125
|
-
return HdfsPath(path).is_file(followlinks)
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def hdfs_listdir(path: PathLike) -> List[str]:
|
|
129
|
-
"""
|
|
130
|
-
Get all contents of given path.
|
|
131
|
-
|
|
132
|
-
:param path: Given path
|
|
133
|
-
:returns: All contents have prefix of path.
|
|
134
|
-
:raises: FileNotFoundError, NotADirectoryError
|
|
135
|
-
"""
|
|
136
|
-
return HdfsPath(path).listdir()
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def hdfs_load_from(path: PathLike) -> BinaryIO:
|
|
140
|
-
"""Read all content in binary on specified path and write into memory
|
|
141
|
-
|
|
142
|
-
User should close the BinaryIO manually
|
|
143
|
-
|
|
144
|
-
:param path: Given path
|
|
145
|
-
:returns: BinaryIO
|
|
146
|
-
"""
|
|
147
|
-
return HdfsPath(path).load()
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def hdfs_move(src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
|
|
151
|
-
"""
|
|
152
|
-
Move file/directory path from src_path to dst_path
|
|
153
|
-
|
|
154
|
-
:param src_path: Given path
|
|
155
|
-
:param dst_path: Given destination path
|
|
156
|
-
"""
|
|
157
|
-
return HdfsPath(src_path).move(dst_path, overwrite)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
def hdfs_remove(path: PathLike, missing_ok: bool = False) -> None:
|
|
161
|
-
"""
|
|
162
|
-
Remove the file or directory on hdfs, `hdfs://` and `hdfs://bucket` are not
|
|
163
|
-
permitted to remove
|
|
164
|
-
|
|
165
|
-
:param path: Given path
|
|
166
|
-
:param missing_ok: if False and target file/directory not exists,
|
|
167
|
-
raise FileNotFoundError
|
|
168
|
-
:raises: FileNotFoundError, UnsupportedError
|
|
169
|
-
"""
|
|
170
|
-
return HdfsPath(path).remove(missing_ok)
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def hdfs_scan(
|
|
174
|
-
path: PathLike, missing_ok: bool = True, followlinks: bool = False
|
|
175
|
-
) -> Iterator[str]:
|
|
176
|
-
"""
|
|
177
|
-
Iteratively traverse only files in given hdfs directory.
|
|
178
|
-
Every iteration on generator yields a path string.
|
|
179
|
-
|
|
180
|
-
If path is a file path, yields the file only
|
|
181
|
-
If path is a non-existent path, return an empty generator
|
|
182
|
-
If path is a bucket path, return all file paths in the bucket
|
|
183
|
-
If path is an empty bucket, return an empty generator
|
|
184
|
-
If path doesn't contain any bucket, which is path == 'hdfs://',
|
|
185
|
-
raise UnsupportedError. walk() on complete hdfs is not supported in megfile
|
|
186
|
-
|
|
187
|
-
:param path: Given path
|
|
188
|
-
:param missing_ok: If False and there's no file in the directory,
|
|
189
|
-
raise FileNotFoundError
|
|
190
|
-
:raises: UnsupportedError
|
|
191
|
-
:returns: A file path generator
|
|
192
|
-
"""
|
|
193
|
-
return HdfsPath(path).scan(missing_ok, followlinks)
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
def hdfs_scan_stat(
|
|
197
|
-
path: PathLike, missing_ok: bool = True, followlinks: bool = False
|
|
198
|
-
) -> Iterator[FileEntry]:
|
|
199
|
-
"""
|
|
200
|
-
Iteratively traverse only files in given directory.
|
|
201
|
-
Every iteration on generator yields a tuple of path string and file stat
|
|
202
|
-
|
|
203
|
-
:param path: Given path
|
|
204
|
-
:param missing_ok: If False and there's no file in the directory,
|
|
205
|
-
raise FileNotFoundError
|
|
206
|
-
:raises: UnsupportedError
|
|
207
|
-
:returns: A file path generator
|
|
208
|
-
"""
|
|
209
|
-
return HdfsPath(path).scan_stat(missing_ok, followlinks)
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
def hdfs_scandir(path: PathLike) -> Iterator[FileEntry]:
|
|
213
|
-
"""
|
|
214
|
-
Get all contents of given path, the order of result is in arbitrary order.
|
|
215
|
-
|
|
216
|
-
:param path: Given path
|
|
217
|
-
:returns: All contents have prefix of path
|
|
218
|
-
:raises: FileNotFoundError, NotADirectoryError
|
|
219
|
-
"""
|
|
220
|
-
return HdfsPath(path).scandir()
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
def hdfs_unlink(path: PathLike, missing_ok: bool = False) -> None:
|
|
224
|
-
"""
|
|
225
|
-
Remove the file on hdfs
|
|
226
|
-
|
|
227
|
-
:param path: Given path
|
|
228
|
-
:param missing_ok: if False and target file not exists, raise FileNotFoundError
|
|
229
|
-
:raises: FileNotFoundError, IsADirectoryError
|
|
230
|
-
"""
|
|
231
|
-
return HdfsPath(path).unlink(missing_ok)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
def hdfs_walk(
|
|
235
|
-
path: PathLike, followlinks: bool = False
|
|
236
|
-
) -> Iterator[Tuple[str, List[str], List[str]]]:
|
|
237
|
-
"""
|
|
238
|
-
Iteratively traverse the given hdfs directory, in top-bottom order.
|
|
239
|
-
In other words, firstly traverse parent directory, if subdirectories exist,
|
|
240
|
-
traverse the subdirectories.
|
|
241
|
-
|
|
242
|
-
Every iteration on generator yields a 3-tuple: (root, dirs, files)
|
|
243
|
-
|
|
244
|
-
- root: Current hdfs path;
|
|
245
|
-
- dirs: Name list of subdirectories in current directory.
|
|
246
|
-
- files: Name list of files in current directory.
|
|
247
|
-
|
|
248
|
-
If path is a file path, return an empty generator
|
|
249
|
-
|
|
250
|
-
If path is a non-existent path, return an empty generator
|
|
251
|
-
|
|
252
|
-
If path is a bucket path, bucket will be the top directory,
|
|
253
|
-
and will be returned at first iteration of generator
|
|
254
|
-
|
|
255
|
-
If path is an empty bucket, only yield one 3-tuple
|
|
256
|
-
(notes: hdfs doesn't have empty directory)
|
|
257
|
-
|
|
258
|
-
If path doesn't contain any bucket, which is path == 'hdfs://',
|
|
259
|
-
raise UnsupportedError. walk() on complete hdfs is not supported in megfile
|
|
260
|
-
|
|
261
|
-
:param path: Given path
|
|
262
|
-
:param followlinks: whether followlinks is True or False, result is the same.
|
|
263
|
-
Because hdfs not support symlink.
|
|
264
|
-
:returns: A 3-tuple generator
|
|
265
|
-
"""
|
|
266
|
-
return HdfsPath(path).walk(followlinks)
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
def hdfs_getmd5(
|
|
270
|
-
path: PathLike, recalculate: bool = False, followlinks: bool = False
|
|
271
|
-
) -> str:
|
|
272
|
-
"""
|
|
273
|
-
Get checksum of the file or dir.
|
|
274
|
-
|
|
275
|
-
:param path: Given path
|
|
276
|
-
:param recalculate: Ignore this parameter, just for compatibility
|
|
277
|
-
:param followlinks: Ignore this parameter, just for compatibility
|
|
278
|
-
:returns: checksum
|
|
279
|
-
"""
|
|
280
|
-
return HdfsPath(path).md5(recalculate, followlinks)
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
def hdfs_save_as(file_object: BinaryIO, path: PathLike):
|
|
284
|
-
"""Write the opened binary stream to specified path,
|
|
285
|
-
but the stream won't be closed
|
|
286
|
-
|
|
287
|
-
:param path: Given path
|
|
288
|
-
:param file_object: Stream to be read
|
|
289
|
-
"""
|
|
290
|
-
return HdfsPath(path).save(file_object)
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
def hdfs_open(
|
|
294
|
-
path: PathLike,
|
|
295
|
-
mode: str = "r",
|
|
296
|
-
*,
|
|
297
|
-
buffering: Optional[int] = None,
|
|
298
|
-
encoding: Optional[str] = None,
|
|
299
|
-
errors: Optional[str] = None,
|
|
300
|
-
max_workers: Optional[int] = None,
|
|
301
|
-
max_buffer_size: int = READER_MAX_BUFFER_SIZE,
|
|
302
|
-
block_forward: Optional[int] = None,
|
|
303
|
-
block_size: int = READER_BLOCK_SIZE,
|
|
304
|
-
**kwargs,
|
|
305
|
-
) -> IO:
|
|
306
|
-
"""
|
|
307
|
-
Open a file on the specified path.
|
|
308
|
-
|
|
309
|
-
:param path: Given path
|
|
310
|
-
:param mode: Mode to open the file. Supports 'r', 'rb', 'w', 'wb', 'a', 'ab'.
|
|
311
|
-
:param buffering: Optional integer used to set the buffering policy.
|
|
312
|
-
:param encoding: Name of the encoding used to decode or encode the file.
|
|
313
|
-
Should only be used in text mode.
|
|
314
|
-
:param errors: Optional string specifying how encoding and decoding errors are
|
|
315
|
-
to be handled. Cannot be used in binary mode.
|
|
316
|
-
:param max_workers: Max download thread number, `None` by default,
|
|
317
|
-
will use global thread pool with 8 threads.
|
|
318
|
-
:param max_buffer_size: Max cached buffer size in memory, 128MB by default.
|
|
319
|
-
Set to `0` will disable cache.
|
|
320
|
-
:param block_forward: Number of blocks of data for reader cached from the
|
|
321
|
-
offset position.
|
|
322
|
-
:param block_size: Size of a single block for reader, default is 8MB.
|
|
323
|
-
:returns: A file-like object.
|
|
324
|
-
:raises ValueError: If an unacceptable mode is provided.
|
|
325
|
-
"""
|
|
326
|
-
return HdfsPath(path).open(
|
|
327
|
-
mode,
|
|
328
|
-
buffering=buffering,
|
|
329
|
-
encoding=encoding,
|
|
330
|
-
errors=errors,
|
|
331
|
-
max_workers=max_workers,
|
|
332
|
-
max_buffer_size=max_buffer_size,
|
|
333
|
-
block_forward=block_forward,
|
|
334
|
-
block_size=block_size,
|
|
335
|
-
)
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
def hdfs_glob(
|
|
339
|
-
path: PathLike, recursive: bool = True, missing_ok: bool = True
|
|
340
|
-
) -> List[str]:
|
|
341
|
-
"""Return hdfs path list in ascending alphabetical order,
|
|
342
|
-
in which path matches glob pattern
|
|
343
|
-
|
|
344
|
-
Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
|
|
345
|
-
raise UnsupportedError
|
|
346
|
-
|
|
347
|
-
:param recursive: If False, `**` will not search directory recursively
|
|
348
|
-
:param missing_ok: If False and target path doesn't match any file,
|
|
349
|
-
raise FileNotFoundError
|
|
350
|
-
:raises: UnsupportedError, when bucket part contains wildcard characters
|
|
351
|
-
:returns: A list contains paths match `path`
|
|
352
|
-
"""
|
|
353
|
-
return list(hdfs_iglob(path, recursive=recursive, missing_ok=missing_ok))
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
def hdfs_glob_stat(
|
|
357
|
-
path: PathLike, recursive: bool = True, missing_ok: bool = True
|
|
358
|
-
) -> Iterator[FileEntry]:
|
|
359
|
-
"""Return a generator contains tuples of path and file stat,
|
|
360
|
-
in ascending alphabetical order, in which path matches glob pattern
|
|
361
|
-
|
|
362
|
-
Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
|
|
363
|
-
raise UnsupportedError
|
|
364
|
-
|
|
365
|
-
:param recursive: If False, `**` will not search directory recursively
|
|
366
|
-
:param missing_ok: If False and target path doesn't match any file,
|
|
367
|
-
raise FileNotFoundError
|
|
368
|
-
:raises: UnsupportedError, when bucket part contains wildcard characters
|
|
369
|
-
:returns: A generator contains tuples of path and file stat,
|
|
370
|
-
in which paths match `path`
|
|
371
|
-
"""
|
|
372
|
-
return HdfsPath(path).glob_stat(
|
|
373
|
-
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
374
|
-
)
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
def hdfs_iglob(
|
|
378
|
-
path: PathLike, recursive: bool = True, missing_ok: bool = True
|
|
379
|
-
) -> Iterator[str]:
|
|
380
|
-
"""Return hdfs path iterator in ascending alphabetical order,
|
|
381
|
-
in which path matches glob pattern
|
|
382
|
-
|
|
383
|
-
Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
|
|
384
|
-
raise UnsupportedError
|
|
385
|
-
|
|
386
|
-
:param recursive: If False, `**` will not search directory recursively
|
|
387
|
-
:param missing_ok: If False and target path doesn't match any file,
|
|
388
|
-
raise FileNotFoundError
|
|
389
|
-
:raises: UnsupportedError, when bucket part contains wildcard characters
|
|
390
|
-
:returns: An iterator contains paths match `path`
|
|
391
|
-
"""
|
|
392
|
-
for path_obj in HdfsPath(path).iglob(
|
|
393
|
-
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
394
|
-
):
|
|
395
|
-
yield path_obj.path_with_protocol
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
def hdfs_makedirs(path: PathLike, exist_ok: bool = False):
|
|
399
|
-
"""
|
|
400
|
-
Create an hdfs directory.
|
|
401
|
-
Purely creating directory is invalid because it's unavailable on OSS.
|
|
402
|
-
This function is to test the target bucket have WRITE access.
|
|
403
|
-
|
|
404
|
-
:param path: Given path
|
|
405
|
-
:param exist_ok: If False and target directory exists, raise S3FileExistsError
|
|
406
|
-
:raises: FileExistsError
|
|
407
|
-
"""
|
|
408
|
-
return HdfsPath(path).mkdir(parents=True, exist_ok=exist_ok)
|
megfile/http.py
DELETED
|
@@ -1,114 +0,0 @@
|
|
|
1
|
-
from io import BufferedReader
|
|
2
|
-
from typing import Optional, Union
|
|
3
|
-
|
|
4
|
-
from megfile.config import READER_BLOCK_SIZE, READER_MAX_BUFFER_SIZE
|
|
5
|
-
from megfile.http_path import HttpPath, HttpPrefetchReader, get_http_session, is_http
|
|
6
|
-
from megfile.interfaces import PathLike, StatResult
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"get_http_session",
|
|
10
|
-
"is_http",
|
|
11
|
-
"http_open",
|
|
12
|
-
"http_stat",
|
|
13
|
-
"http_getsize",
|
|
14
|
-
"http_getmtime",
|
|
15
|
-
"http_exists",
|
|
16
|
-
]
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def http_open(
|
|
20
|
-
path: PathLike,
|
|
21
|
-
mode: str = "rb",
|
|
22
|
-
*,
|
|
23
|
-
encoding: Optional[str] = None,
|
|
24
|
-
errors: Optional[str] = None,
|
|
25
|
-
max_workers: Optional[int] = None,
|
|
26
|
-
max_buffer_size: int = READER_MAX_BUFFER_SIZE,
|
|
27
|
-
block_forward: Optional[int] = None,
|
|
28
|
-
block_size: int = READER_BLOCK_SIZE,
|
|
29
|
-
**kwargs,
|
|
30
|
-
) -> Union[BufferedReader, HttpPrefetchReader]:
|
|
31
|
-
"""Open a BytesIO to read binary data of given http(s) url
|
|
32
|
-
|
|
33
|
-
.. note ::
|
|
34
|
-
|
|
35
|
-
Essentially, it reads data of http(s) url to memory by requests,
|
|
36
|
-
and then return BytesIO to user.
|
|
37
|
-
|
|
38
|
-
:param path: Given path
|
|
39
|
-
:param mode: Only supports 'r' or 'rb' mode now
|
|
40
|
-
:param encoding: encoding is the name of the encoding used to decode or encode
|
|
41
|
-
the file. This should only be used in text mode.
|
|
42
|
-
:param errors: errors is an optional string that specifies how encoding and decoding
|
|
43
|
-
errors are to be handled—this cannot be used in binary mode.
|
|
44
|
-
:param max_workers: Max download thread number, `None` by default,
|
|
45
|
-
will use global thread pool with 8 threads.
|
|
46
|
-
:param max_buffer_size: Max cached buffer size in memory, 128MB by default.
|
|
47
|
-
Set to `0` will disable cache.
|
|
48
|
-
:param block_forward: How many blocks of data cached from offset position
|
|
49
|
-
:param block_size: Size of single block, 8MB by default. Each block will be uploaded
|
|
50
|
-
or downloaded by single thread.
|
|
51
|
-
:return: A file-like object with http(s) data
|
|
52
|
-
"""
|
|
53
|
-
return HttpPath(path).open(
|
|
54
|
-
mode,
|
|
55
|
-
encoding=encoding,
|
|
56
|
-
errors=errors,
|
|
57
|
-
max_workers=max_workers,
|
|
58
|
-
max_buffer_size=max_buffer_size,
|
|
59
|
-
block_forward=block_forward,
|
|
60
|
-
block_size=block_size,
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def http_stat(path: PathLike, follow_symlinks=True) -> StatResult:
|
|
65
|
-
"""
|
|
66
|
-
Get StatResult of http_url response, including size and mtime,
|
|
67
|
-
referring to http_getsize and http_getmtime
|
|
68
|
-
|
|
69
|
-
:param path: Given path
|
|
70
|
-
:param follow_symlinks: Ignore this parameter, just for compatibility
|
|
71
|
-
:returns: StatResult
|
|
72
|
-
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
73
|
-
"""
|
|
74
|
-
return HttpPath(path).stat(follow_symlinks)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
def http_getsize(path: PathLike, follow_symlinks: bool = False) -> int:
|
|
78
|
-
"""
|
|
79
|
-
Get file size on the given http_url path.
|
|
80
|
-
|
|
81
|
-
If http response header don't support Content-Length, will return None
|
|
82
|
-
|
|
83
|
-
:param path: Given path
|
|
84
|
-
:param follow_symlinks: Ignore this parameter, just for compatibility
|
|
85
|
-
:returns: File size (in bytes)
|
|
86
|
-
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
87
|
-
"""
|
|
88
|
-
return HttpPath(path).getsize(follow_symlinks)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def http_getmtime(path: PathLike, follow_symlinks: bool = False) -> float:
|
|
92
|
-
"""
|
|
93
|
-
Get Last-Modified time of the http request on the given http_url path.
|
|
94
|
-
|
|
95
|
-
If http response header don't support Last-Modified, will return None
|
|
96
|
-
|
|
97
|
-
:param path: Given path
|
|
98
|
-
:param follow_symlinks: Ignore this parameter, just for compatibility
|
|
99
|
-
:returns: Last-Modified time (in Unix timestamp format)
|
|
100
|
-
:raises: HttpPermissionError, HttpFileNotFoundError
|
|
101
|
-
"""
|
|
102
|
-
return HttpPath(path).getmtime(follow_symlinks)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def http_exists(path: PathLike, followlinks: bool = False) -> bool:
|
|
106
|
-
"""Test if http path exists
|
|
107
|
-
|
|
108
|
-
:param path: Given path
|
|
109
|
-
:param followlinks: ignore this parameter, just for compatibility
|
|
110
|
-
:type followlinks: bool, optional
|
|
111
|
-
:return: return True if exists
|
|
112
|
-
:rtype: bool
|
|
113
|
-
"""
|
|
114
|
-
return HttpPath(path).exists(followlinks)
|