megfile 4.2.4__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
megfile/s3.py DELETED
@@ -1,540 +0,0 @@
1
- from typing import BinaryIO, Callable, Iterator, List, Optional, Tuple
2
-
3
- from megfile.interfaces import Access, FileEntry, PathLike, StatResult
4
- from megfile.s3_path import (
5
- S3Path,
6
- get_endpoint_url,
7
- get_s3_client,
8
- get_s3_session,
9
- is_s3,
10
- parse_s3_url,
11
- s3_buffered_open,
12
- s3_cached_open,
13
- s3_concat,
14
- s3_download,
15
- s3_load_content,
16
- s3_memory_open,
17
- s3_open,
18
- s3_path_join,
19
- s3_pipe_open,
20
- s3_prefetch_open,
21
- s3_share_cache_open,
22
- s3_upload,
23
- )
24
-
25
- __all__ = [
26
- "parse_s3_url",
27
- "get_endpoint_url",
28
- "get_s3_session",
29
- "get_s3_client",
30
- "s3_path_join",
31
- "is_s3",
32
- "s3_buffered_open",
33
- "s3_cached_open",
34
- "s3_memory_open",
35
- "s3_pipe_open",
36
- "s3_prefetch_open",
37
- "s3_share_cache_open",
38
- "s3_open",
39
- "s3_upload",
40
- "s3_download",
41
- "s3_load_content",
42
- "s3_readlink",
43
- "s3_glob",
44
- "s3_glob_stat",
45
- "s3_iglob",
46
- "s3_rename",
47
- "s3_makedirs",
48
- "s3_concat",
49
- "s3_lstat",
50
- "s3_access",
51
- "s3_exists",
52
- "s3_getmtime",
53
- "s3_getsize",
54
- "s3_isdir",
55
- "s3_isfile",
56
- "s3_listdir",
57
- "s3_load_from",
58
- "s3_hasbucket",
59
- "s3_move",
60
- "s3_remove",
61
- "s3_scan",
62
- "s3_scan_stat",
63
- "s3_scandir",
64
- "s3_stat",
65
- "s3_unlink",
66
- "s3_walk",
67
- "s3_getmd5",
68
- "s3_copy",
69
- "s3_sync",
70
- "s3_symlink",
71
- "s3_islink",
72
- "s3_save_as",
73
- ]
74
-
75
-
76
- def s3_access(path: PathLike, mode: Access = Access.READ) -> bool:
77
- """
78
- Test if path has access permission described by mode
79
-
80
- :param path: Given path
81
- :param mode: access mode
82
- :returns: bool, if the bucket of s3_url has read/write access.
83
- """
84
- return S3Path(path).access(mode)
85
-
86
-
87
- def s3_exists(path: PathLike, followlinks: bool = False) -> bool:
88
- """
89
- Test if s3_url exists
90
-
91
- If the bucket of s3_url are not permitted to read, return False
92
-
93
- :param path: Given path
94
- :returns: True if s3_url exists, else False
95
- """
96
- return S3Path(path).exists(followlinks)
97
-
98
-
99
- def s3_getmtime(path: PathLike, follow_symlinks: bool = False) -> float:
100
- """
101
- Get last-modified time of the file on the given s3_url path
102
- (in Unix timestamp format).
103
-
104
- If the path is an existent directory, return the latest modified time of
105
- all file in it. The mtime of empty directory is 1970-01-01 00:00:00
106
-
107
- If s3_url is not an existent path, which means s3_exist(s3_url) returns False,
108
- then raise S3FileNotFoundError
109
-
110
- :param path: Given path
111
- :returns: Last-modified time
112
- :raises: S3FileNotFoundError, UnsupportedError
113
- """
114
- return S3Path(path).getmtime(follow_symlinks)
115
-
116
-
117
- def s3_getsize(path: PathLike, follow_symlinks: bool = False) -> int:
118
- """
119
- Get file size on the given s3_url path (in bytes).
120
-
121
- If the path in a directory, return the sum of all file size in it,
122
- including file in subdirectories (if exist).
123
-
124
- The result excludes the size of directory itself.
125
- In other words, return 0 Byte on an empty directory path.
126
-
127
- If s3_url is not an existent path, which means s3_exist(s3_url) returns False,
128
- then raise S3FileNotFoundError
129
-
130
- :param path: Given path
131
- :returns: File size
132
- :raises: S3FileNotFoundError, UnsupportedError
133
- """
134
- return S3Path(path).getsize(follow_symlinks)
135
-
136
-
137
- def s3_isdir(path: PathLike, followlinks: bool = False) -> bool:
138
- """
139
- Test if an s3 url is directory
140
- Specific procedures are as follows:
141
- If there exists a suffix, of which ``os.path.join(s3_url, suffix)`` is a file
142
- If the url is empty bucket or s3://
143
-
144
- :param path: Given path
145
- :param followlinks: whether followlinks is True or False, result is the same.
146
- Because s3 symlink not support dir.
147
- :returns: True if path is s3 directory, else False
148
- """
149
- return S3Path(path).is_dir(followlinks)
150
-
151
-
152
- def s3_isfile(path: PathLike, followlinks: bool = False) -> bool:
153
- """
154
- Test if an s3_url is file
155
-
156
- :param path: Given path
157
- :returns: True if path is s3 file, else False
158
- """
159
- return S3Path(path).is_file(followlinks)
160
-
161
-
162
- def s3_listdir(path: PathLike) -> List[str]:
163
- """
164
- Get all contents of given s3_url. The result is in ascending alphabetical order.
165
-
166
- :param path: Given path
167
- :returns: All contents have prefix of s3_url in ascending alphabetical order
168
- :raises: S3FileNotFoundError, S3NotADirectoryError
169
- """
170
- return S3Path(path).listdir()
171
-
172
-
173
- def s3_load_from(path: PathLike) -> BinaryIO:
174
- """Read all content in binary on specified path and write into memory
175
-
176
- User should close the BinaryIO manually
177
-
178
- :param path: Given path
179
- :returns: BinaryIO
180
- """
181
- return S3Path(path).load()
182
-
183
-
184
- def s3_hasbucket(path: PathLike) -> bool:
185
- """
186
- Test if the bucket of s3_url exists
187
-
188
- :param path: Given path
189
- :returns: True if bucket of s3_url exists, else False
190
- """
191
- return S3Path(path).hasbucket()
192
-
193
-
194
- def s3_move(src_url: PathLike, dst_url: PathLike, overwrite: bool = True) -> None:
195
- """
196
- Move file/directory path from src_url to dst_url
197
-
198
- :param src_url: Given path
199
- :param dst_url: Given destination path
200
- :param overwrite: whether or not overwrite file when exists
201
- """
202
- return S3Path(src_url).move(dst_url, overwrite)
203
-
204
-
205
- def s3_remove(path: PathLike, missing_ok: bool = False) -> None:
206
- """
207
- Remove the file or directory on s3, `s3://` and `s3://bucket`
208
- are not permitted to remove
209
-
210
- :param path: Given path
211
- :param missing_ok: if False and target file/directory not exists,
212
- raise S3FileNotFoundError
213
- :raises: S3PermissionError, S3FileNotFoundError, UnsupportedError
214
- """
215
- return S3Path(path).remove(missing_ok)
216
-
217
-
218
- def s3_scan(
219
- path: PathLike, missing_ok: bool = True, followlinks: bool = False
220
- ) -> Iterator[str]:
221
- """
222
- Iteratively traverse only files in given s3 directory, in alphabetical order.
223
- Every iteration on generator yields a path string.
224
-
225
- If s3_url is a file path, yields the file only
226
-
227
- If s3_url is a non-existent path, return an empty generator
228
-
229
- If s3_url is a bucket path, return all file paths in the bucket
230
-
231
- If s3_url is an empty bucket, return an empty generator
232
-
233
- If s3_url doesn't contain any bucket, which is s3_url == 's3://',
234
- raise UnsupportedError. walk() on complete s3 is not supported in megfile
235
-
236
- :param path: Given path
237
- :param missing_ok: If False and there's no file in the directory,
238
- raise FileNotFoundError
239
- :raises: UnsupportedError
240
- :returns: A file path generator
241
- """
242
- return S3Path(path).scan(missing_ok, followlinks)
243
-
244
-
245
- def s3_scan_stat(
246
- path: PathLike, missing_ok: bool = True, followlinks: bool = False
247
- ) -> Iterator[FileEntry]:
248
- """
249
- Iteratively traverse only files in given directory, in alphabetical order.
250
- Every iteration on generator yields a tuple of path string and file stat
251
-
252
- :param path: Given path
253
- :param missing_ok: If False and there's no file in the directory,
254
- raise FileNotFoundError
255
- :raises: UnsupportedError
256
- :returns: A file path generator
257
- """
258
- return S3Path(path).scan_stat(missing_ok, followlinks)
259
-
260
-
261
- def s3_scandir(path: PathLike) -> Iterator[FileEntry]:
262
- """
263
- Get all contents of given s3_url, the order of result is in arbitrary order.
264
-
265
- :param path: Given path
266
- :returns: All contents have prefix of s3_url
267
- :raises: S3FileNotFoundError, S3NotADirectoryError
268
- """
269
- return S3Path(path).scandir()
270
-
271
-
272
- def s3_stat(path: PathLike, follow_symlinks=True) -> StatResult:
273
- """
274
- Get StatResult of s3_url file, including file size and mtime,
275
- referring to s3_getsize and s3_getmtime
276
-
277
- If s3_url is not an existent path, which means s3_exist(s3_url) returns False,
278
- then raise S3FileNotFoundError
279
-
280
- If attempt to get StatResult of complete s3, such as s3_dir_url == 's3://',
281
- raise S3BucketNotFoundError
282
-
283
- :param path: Given path
284
- :returns: StatResult
285
- :raises: S3FileNotFoundError, S3BucketNotFoundError
286
- """
287
- return S3Path(path).stat(follow_symlinks)
288
-
289
-
290
- def s3_unlink(path: PathLike, missing_ok: bool = False) -> None:
291
- """
292
- Remove the file on s3
293
-
294
- :param path: Given path
295
- :param missing_ok: if False and target file not exists,
296
- raise S3FileNotFoundError
297
- :raises: S3PermissionError, S3FileNotFoundError, S3IsADirectoryError
298
- """
299
- return S3Path(path).unlink(missing_ok)
300
-
301
-
302
- def s3_walk(
303
- path: PathLike, followlinks: bool = False
304
- ) -> Iterator[Tuple[str, List[str], List[str]]]:
305
- """
306
- Iteratively traverse the given s3 directory, in top-bottom order.
307
- In other words, firstly traverse parent directory, if subdirectories exist,
308
- traverse the subdirectories in alphabetical order.
309
-
310
- Every iteration on generator yields a 3-tuple: (root, dirs, files)
311
-
312
- - root: Current s3 path;
313
- - dirs: Name list of subdirectories in current directory.
314
- The list is sorted by name in ascending alphabetical order;
315
- - files: Name list of files in current directory.
316
- The list is sorted by name in ascending alphabetical order;
317
-
318
- If s3_url is a file path, return an empty generator
319
-
320
- If s3_url is a non-existent path, return an empty generator
321
-
322
- If s3_url is a bucket path, bucket will be the top directory,
323
- and will be returned at first iteration of generator
324
-
325
- If s3_url is an empty bucket, only yield one 3-tuple
326
- (notes: s3 doesn't have empty directory)
327
-
328
- If s3_url doesn't contain any bucket, which is s3_url == 's3://',
329
- raise UnsupportedError. walk() on complete s3 is not supported in megfile
330
-
331
- :param path: Given path
332
- :param followlinks: whether followlinks is True or False, result is the same.
333
- Because s3 symlink not support dir.
334
- :raises: UnsupportedError
335
- :returns: A 3-tuple generator
336
- """
337
- return S3Path(path).walk(followlinks)
338
-
339
-
340
- def s3_getmd5(
341
- path: PathLike, recalculate: bool = False, followlinks: bool = False
342
- ) -> str:
343
- """
344
- Get md5 meta info in files that uploaded/copied via megfile
345
-
346
- If meta info is lost or non-existent, return None
347
-
348
- :param path: Given path
349
- :param recalculate: calculate md5 in real-time or return s3 etag
350
- :param followlinks: If is True, calculate md5 for real file
351
- :returns: md5 meta info
352
- """
353
- return S3Path(path).md5(recalculate, followlinks)
354
-
355
-
356
- def s3_copy(
357
- src_url: PathLike,
358
- dst_url: PathLike,
359
- callback: Optional[Callable[[int], None]] = None,
360
- followlinks: bool = False,
361
- overwrite: bool = True,
362
- ) -> None:
363
- """File copy on S3
364
- Copy content of file on `src_path` to `dst_path`.
365
- It's caller's responsibility to ensure the s3_isfile(src_url) is True
366
-
367
- :param src_url: Given path
368
- :param dst_path: Target file path
369
- :param callback: Called periodically during copy, and the input parameter is
370
- the data size (in bytes) of copy since the last call
371
- :param followlinks: False if regard symlink as file, else True
372
- :param overwrite: whether or not overwrite file when exists, default is True
373
- """
374
- return S3Path(src_url).copy(dst_url, callback, followlinks, overwrite)
375
-
376
-
377
- def s3_sync(
378
- src_url: PathLike,
379
- dst_url: PathLike,
380
- followlinks: bool = False,
381
- force: bool = False,
382
- overwrite: bool = True,
383
- ) -> None:
384
- """
385
- Copy file/directory on src_url to dst_url
386
-
387
- :param src_url: Given path
388
- :param dst_url: Given destination path
389
- :param followlinks: False if regard symlink as file, else True
390
- :param force: Sync file forcible, do not ignore same files,
391
- priority is higher than 'overwrite', default is False
392
- :param overwrite: whether or not overwrite file when exists, default is True
393
- """
394
- return S3Path(src_url).sync(dst_url, followlinks, force, overwrite)
395
-
396
-
397
- def s3_symlink(src_path: PathLike, dst_path: PathLike) -> None:
398
- """
399
- Create a symbolic link pointing to src_path named dst_path.
400
-
401
- :param src_path: Given path
402
- :param dst_path: Destination path
403
- :raises: S3NameTooLongError, S3BucketNotFoundError, S3IsADirectoryError
404
- """
405
- return S3Path(src_path).symlink(dst_path)
406
-
407
-
408
- def s3_islink(path: PathLike) -> bool:
409
- """
410
- Test whether a path is link
411
-
412
- :param path: Given path
413
- :returns: True if a path is link, else False
414
- :raises: S3NotALinkError
415
- """
416
- return S3Path(path).is_symlink()
417
-
418
-
419
- def s3_save_as(file_object: BinaryIO, path: PathLike):
420
- """Write the opened binary stream to specified path,
421
- but the stream won't be closed
422
-
423
- :param path: Given path
424
- :param file_object: Stream to be read
425
- """
426
- return S3Path(path).save(file_object)
427
-
428
-
429
- def s3_readlink(path) -> str:
430
- """
431
- Return a string representing the path to which the symbolic link points.
432
-
433
- :returns: Return a string representing the path to which the symbolic link points.
434
- :raises: S3NameTooLongError, S3BucketNotFoundError, S3IsADirectoryError,
435
- S3NotALinkError
436
- """
437
- return S3Path(path).readlink().path_with_protocol
438
-
439
-
440
- def s3_rename(src_url: PathLike, dst_url: PathLike, overwrite: bool = True) -> None:
441
- """
442
- Move s3 file path from src_url to dst_url
443
-
444
- :param dst_url: Given destination path
445
- :param overwrite: whether or not overwrite file when exists
446
- """
447
- S3Path(src_url).rename(dst_url, overwrite)
448
-
449
-
450
- def s3_glob(
451
- path: PathLike,
452
- recursive: bool = True,
453
- missing_ok: bool = True,
454
- ) -> List[str]:
455
- """Return s3 path list in ascending alphabetical order,
456
- in which path matches glob pattern
457
-
458
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
459
- raise UnsupportedError
460
-
461
- :param recursive: If False, `**` will not search directory recursively
462
- :param missing_ok: If False and target path doesn't match any file,
463
- raise FileNotFoundError
464
- :raises: UnsupportedError, when bucket part contains wildcard characters
465
- :returns: A list contains paths match `s3_pathname`
466
- """
467
- return list(
468
- s3_iglob(
469
- path=path,
470
- recursive=recursive,
471
- missing_ok=missing_ok,
472
- )
473
- )
474
-
475
-
476
- def s3_glob_stat(
477
- path: PathLike,
478
- recursive: bool = True,
479
- missing_ok: bool = True,
480
- ) -> Iterator[FileEntry]:
481
- """Return a generator contains tuples of path and file stat,
482
- in ascending alphabetical order, in which path matches glob pattern
483
-
484
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
485
- raise UnsupportedError
486
-
487
- :param recursive: If False, `**` will not search directory recursively
488
- :param missing_ok: If False and target path doesn't match any file,
489
- raise FileNotFoundError
490
- :raises: UnsupportedError, when bucket part contains wildcard characters
491
- :returns: A generator contains tuples of path and file stat,
492
- in which paths match `s3_pathname`
493
- """
494
- return S3Path(path).glob_stat(
495
- pattern="", recursive=recursive, missing_ok=missing_ok
496
- )
497
-
498
-
499
- def s3_iglob(
500
- path: PathLike,
501
- recursive: bool = True,
502
- missing_ok: bool = True,
503
- ) -> Iterator[str]:
504
- """Return s3 path iterator in ascending alphabetical order,
505
- in which path matches glob pattern
506
-
507
- Notes: Only glob in bucket. If trying to match bucket with wildcard characters,
508
- raise UnsupportedError
509
-
510
- :param recursive: If False, `**` will not search directory recursively
511
- :param missing_ok: If False and target path doesn't match any file,
512
- raise FileNotFoundError
513
- :raises: UnsupportedError, when bucket part contains wildcard characters
514
- :returns: An iterator contains paths match `s3_pathname`
515
- """
516
- for path_obj in S3Path(path).iglob(
517
- pattern="", recursive=recursive, missing_ok=missing_ok
518
- ):
519
- yield path_obj.path_with_protocol
520
-
521
-
522
- def s3_makedirs(path: PathLike, exist_ok: bool = False):
523
- """
524
- Create an s3 directory.
525
- Purely creating directory is invalid because it's unavailable on OSS.
526
- This function is to test the target bucket have WRITE access.
527
-
528
- :param path: Given path
529
- :param exist_ok: If False and target directory exists, raise S3FileExistsError
530
- :raises: S3BucketNotFoundError, S3FileExistsError
531
- """
532
- return S3Path(path).mkdir(parents=True, exist_ok=exist_ok)
533
-
534
-
535
- def s3_lstat(path: PathLike) -> StatResult:
536
- """
537
- Like Path.stat() but, if the path points to a symbolic link,
538
- return the symbolic link’s information rather than its target’s.
539
- """
540
- return S3Path(path).lstat()