megfile 3.1.0.post2__py3-none-any.whl → 3.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +2 -4
- megfile/__init__.py +394 -203
- megfile/cli.py +258 -238
- megfile/config.py +25 -21
- megfile/errors.py +124 -114
- megfile/fs.py +174 -140
- megfile/fs_path.py +462 -354
- megfile/hdfs.py +133 -101
- megfile/hdfs_path.py +290 -236
- megfile/http.py +15 -14
- megfile/http_path.py +111 -107
- megfile/interfaces.py +70 -65
- megfile/lib/base_prefetch_reader.py +94 -69
- megfile/lib/combine_reader.py +13 -12
- megfile/lib/compare.py +17 -13
- megfile/lib/compat.py +1 -5
- megfile/lib/fnmatch.py +29 -30
- megfile/lib/glob.py +54 -55
- megfile/lib/hdfs_prefetch_reader.py +40 -25
- megfile/lib/hdfs_tools.py +1 -3
- megfile/lib/http_prefetch_reader.py +69 -46
- megfile/lib/joinpath.py +5 -5
- megfile/lib/lazy_handler.py +7 -3
- megfile/lib/s3_buffered_writer.py +61 -52
- megfile/lib/s3_cached_handler.py +14 -13
- megfile/lib/s3_limited_seekable_writer.py +38 -28
- megfile/lib/s3_memory_handler.py +35 -29
- megfile/lib/s3_pipe_handler.py +25 -24
- megfile/lib/s3_prefetch_reader.py +71 -52
- megfile/lib/s3_share_cache_reader.py +37 -24
- megfile/lib/shadow_handler.py +8 -3
- megfile/lib/stdio_handler.py +9 -8
- megfile/lib/url.py +3 -3
- megfile/pathlike.py +259 -228
- megfile/s3.py +220 -153
- megfile/s3_path.py +977 -802
- megfile/sftp.py +190 -156
- megfile/sftp_path.py +540 -450
- megfile/smart.py +397 -330
- megfile/smart_path.py +100 -105
- megfile/stdio.py +10 -9
- megfile/stdio_path.py +32 -35
- megfile/utils/__init__.py +75 -54
- megfile/utils/mutex.py +11 -14
- megfile/version.py +1 -1
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
- megfile-3.1.2.dist-info/RECORD +55 -0
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
- scripts/convert_results_to_sarif.py +45 -78
- scripts/generate_file.py +140 -64
- megfile-3.1.0.post2.dist-info/RECORD +0 -55
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
- {megfile-3.1.0.post2.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/smart.py
CHANGED
|
@@ -3,108 +3,134 @@ from collections import defaultdict
|
|
|
3
3
|
from functools import partial
|
|
4
4
|
from stat import S_ISDIR as stat_isdir
|
|
5
5
|
from stat import S_ISLNK as stat_islnk
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import (
|
|
7
|
+
IO,
|
|
8
|
+
Any,
|
|
9
|
+
BinaryIO,
|
|
10
|
+
Callable,
|
|
11
|
+
Iterable,
|
|
12
|
+
Iterator,
|
|
13
|
+
List,
|
|
14
|
+
Optional,
|
|
15
|
+
Tuple,
|
|
16
|
+
)
|
|
7
17
|
|
|
8
18
|
from tqdm import tqdm
|
|
9
19
|
|
|
10
20
|
from megfile.errors import S3UnknownError
|
|
11
21
|
from megfile.fs import fs_copy, is_fs
|
|
12
|
-
from megfile.interfaces import
|
|
22
|
+
from megfile.interfaces import (
|
|
23
|
+
Access,
|
|
24
|
+
ContextIterator,
|
|
25
|
+
FileCacher,
|
|
26
|
+
FileEntry,
|
|
27
|
+
NullCacher,
|
|
28
|
+
PathLike,
|
|
29
|
+
StatResult,
|
|
30
|
+
)
|
|
13
31
|
from megfile.lib.combine_reader import CombineReader
|
|
14
32
|
from megfile.lib.compare import get_sync_type, is_same_file
|
|
15
33
|
from megfile.lib.compat import fspath
|
|
16
34
|
from megfile.lib.glob import globlize, ungloblize
|
|
17
|
-
from megfile.s3 import
|
|
35
|
+
from megfile.s3 import (
|
|
36
|
+
is_s3,
|
|
37
|
+
s3_concat,
|
|
38
|
+
s3_copy,
|
|
39
|
+
s3_download,
|
|
40
|
+
s3_load_content,
|
|
41
|
+
s3_open,
|
|
42
|
+
s3_upload,
|
|
43
|
+
)
|
|
18
44
|
from megfile.sftp import sftp_concat, sftp_copy, sftp_download, sftp_upload
|
|
19
45
|
from megfile.smart_path import SmartPath, get_traditional_path
|
|
20
46
|
from megfile.utils import combine, generate_cache_path
|
|
21
47
|
|
|
22
48
|
__all__ = [
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
49
|
+
"smart_access",
|
|
50
|
+
"smart_cache",
|
|
51
|
+
"smart_combine_open",
|
|
52
|
+
"smart_copy",
|
|
53
|
+
"smart_exists",
|
|
54
|
+
"smart_getmtime",
|
|
55
|
+
"smart_getsize",
|
|
56
|
+
"smart_glob_stat",
|
|
57
|
+
"smart_glob",
|
|
58
|
+
"smart_iglob",
|
|
59
|
+
"smart_isdir",
|
|
60
|
+
"smart_isfile",
|
|
61
|
+
"smart_islink",
|
|
62
|
+
"smart_listdir",
|
|
63
|
+
"smart_load_content",
|
|
64
|
+
"smart_save_content",
|
|
65
|
+
"smart_load_from",
|
|
66
|
+
"smart_load_text",
|
|
67
|
+
"smart_save_text",
|
|
68
|
+
"smart_makedirs",
|
|
69
|
+
"smart_open",
|
|
70
|
+
"smart_path_join",
|
|
71
|
+
"smart_remove",
|
|
72
|
+
"smart_move",
|
|
73
|
+
"smart_rename",
|
|
74
|
+
"smart_save_as",
|
|
75
|
+
"smart_scan_stat",
|
|
76
|
+
"smart_scan",
|
|
77
|
+
"smart_scandir",
|
|
78
|
+
"smart_stat",
|
|
79
|
+
"smart_sync",
|
|
80
|
+
"smart_sync_with_progress",
|
|
81
|
+
"smart_touch",
|
|
82
|
+
"smart_unlink",
|
|
83
|
+
"smart_walk",
|
|
84
|
+
"smart_getmd5",
|
|
85
|
+
"smart_realpath",
|
|
86
|
+
"smart_ismount",
|
|
87
|
+
"smart_relpath",
|
|
88
|
+
"smart_abspath",
|
|
89
|
+
"smart_isabs",
|
|
90
|
+
"smart_symlink",
|
|
91
|
+
"smart_readlink",
|
|
92
|
+
"register_copy_func",
|
|
93
|
+
"smart_concat",
|
|
94
|
+
"SmartCacher",
|
|
69
95
|
]
|
|
70
96
|
|
|
71
97
|
|
|
72
98
|
def smart_symlink(src_path: PathLike, dst_path: PathLike) -> None:
|
|
73
|
-
|
|
99
|
+
"""
|
|
74
100
|
Create a symbolic link pointing to src_path named path.
|
|
75
101
|
|
|
76
102
|
:param src_path: Source path
|
|
77
103
|
:param dst_path: Destination path
|
|
78
|
-
|
|
104
|
+
"""
|
|
79
105
|
return SmartPath(src_path).symlink(dst_path)
|
|
80
106
|
|
|
81
107
|
|
|
82
108
|
def smart_readlink(path: PathLike) -> PathLike:
|
|
83
|
-
|
|
109
|
+
"""
|
|
84
110
|
Return a string representing the path to which the symbolic link points.
|
|
85
111
|
:param path: Path to be read
|
|
86
112
|
:returns: Return a string representing the path to which the symbolic link points.
|
|
87
|
-
|
|
113
|
+
"""
|
|
88
114
|
return SmartPath(path).readlink()
|
|
89
115
|
|
|
90
116
|
|
|
91
117
|
def smart_isdir(path: PathLike, followlinks: bool = False) -> bool:
|
|
92
|
-
|
|
118
|
+
"""
|
|
93
119
|
Test if a file path or an s3 url is directory
|
|
94
120
|
|
|
95
121
|
:param path: Path to be tested
|
|
96
122
|
:returns: True if path is directory, else False
|
|
97
|
-
|
|
123
|
+
"""
|
|
98
124
|
return SmartPath(path).is_dir(followlinks=followlinks)
|
|
99
125
|
|
|
100
126
|
|
|
101
127
|
def smart_isfile(path: PathLike, followlinks: bool = False) -> bool:
|
|
102
|
-
|
|
128
|
+
"""
|
|
103
129
|
Test if a file path or an s3 url is file
|
|
104
130
|
|
|
105
131
|
:param path: Path to be tested
|
|
106
132
|
:returns: True if path is file, else False
|
|
107
|
-
|
|
133
|
+
"""
|
|
108
134
|
return SmartPath(path).is_file(followlinks=followlinks)
|
|
109
135
|
|
|
110
136
|
|
|
@@ -113,47 +139,49 @@ def smart_islink(path: PathLike) -> bool:
|
|
|
113
139
|
|
|
114
140
|
|
|
115
141
|
def smart_access(path: PathLike, mode: Access) -> bool:
|
|
116
|
-
|
|
142
|
+
"""
|
|
117
143
|
Test if path has access permission described by mode
|
|
118
144
|
|
|
119
145
|
:param path: Path to be tested
|
|
120
|
-
:param mode: Access mode(Access.READ, Access.WRITE, Access.BUCKETREAD,
|
|
146
|
+
:param mode: Access mode(Access.READ, Access.WRITE, Access.BUCKETREAD,
|
|
147
|
+
Access.BUCKETWRITE)
|
|
121
148
|
:returns: bool, if the path has read/write access.
|
|
122
|
-
|
|
149
|
+
"""
|
|
123
150
|
return SmartPath(path).access(mode)
|
|
124
151
|
|
|
125
152
|
|
|
126
153
|
def smart_exists(path: PathLike, followlinks: bool = False) -> bool:
|
|
127
|
-
|
|
154
|
+
"""
|
|
128
155
|
Test if path or s3_url exists
|
|
129
156
|
|
|
130
157
|
:param path: Path to be tested
|
|
131
158
|
:returns: True if path exists, else False
|
|
132
|
-
|
|
159
|
+
"""
|
|
133
160
|
return SmartPath(path).exists(followlinks=followlinks)
|
|
134
161
|
|
|
135
162
|
|
|
136
163
|
def smart_listdir(path: Optional[PathLike] = None) -> List[str]:
|
|
137
|
-
|
|
138
|
-
Get all contents of given s3_url or file path. The result is in
|
|
164
|
+
"""
|
|
165
|
+
Get all contents of given s3_url or file path. The result is in
|
|
166
|
+
ascending alphabetical order.
|
|
139
167
|
|
|
140
168
|
:param path: Given path
|
|
141
169
|
:returns: All contents of given s3_url or file path in ascending alphabetical order.
|
|
142
170
|
:raises: FileNotFoundError, NotADirectoryError
|
|
143
|
-
|
|
171
|
+
"""
|
|
144
172
|
if path is None:
|
|
145
173
|
return sorted(os.listdir(path))
|
|
146
174
|
return SmartPath(path).listdir()
|
|
147
175
|
|
|
148
176
|
|
|
149
177
|
def smart_scandir(path: Optional[PathLike] = None) -> Iterator[FileEntry]:
|
|
150
|
-
|
|
178
|
+
"""
|
|
151
179
|
Get all content of given s3_url or file path.
|
|
152
180
|
|
|
153
181
|
:param path: Given path
|
|
154
182
|
:returns: An iterator contains all contents have prefix path
|
|
155
183
|
:raises: FileNotFoundError, NotADirectoryError
|
|
156
|
-
|
|
184
|
+
"""
|
|
157
185
|
if path is None:
|
|
158
186
|
|
|
159
187
|
def create_generator():
|
|
@@ -161,7 +189,8 @@ def smart_scandir(path: Optional[PathLike] = None) -> Iterator[FileEntry]:
|
|
|
161
189
|
for entry in entries:
|
|
162
190
|
stat = entry.stat()
|
|
163
191
|
yield FileEntry(
|
|
164
|
-
entry.name,
|
|
192
|
+
entry.name,
|
|
193
|
+
entry.path,
|
|
165
194
|
StatResult(
|
|
166
195
|
size=stat.st_size,
|
|
167
196
|
ctime=stat.st_ctime,
|
|
@@ -169,89 +198,87 @@ def smart_scandir(path: Optional[PathLike] = None) -> Iterator[FileEntry]:
|
|
|
169
198
|
isdir=stat_isdir(stat.st_mode),
|
|
170
199
|
islnk=stat_islnk(stat.st_mode),
|
|
171
200
|
extra=stat,
|
|
172
|
-
)
|
|
201
|
+
),
|
|
202
|
+
)
|
|
173
203
|
|
|
174
204
|
return ContextIterator(create_generator())
|
|
175
205
|
return SmartPath(path).scandir()
|
|
176
206
|
|
|
177
207
|
|
|
178
208
|
def smart_getsize(path: PathLike) -> int:
|
|
179
|
-
|
|
209
|
+
"""
|
|
180
210
|
Get file size on the given s3_url or file path (in bytes).
|
|
181
|
-
|
|
182
|
-
|
|
211
|
+
|
|
212
|
+
If the path in a directory, return the sum of all file size in it, including file
|
|
213
|
+
in subdirectories (if exist).
|
|
214
|
+
|
|
215
|
+
The result excludes the size of directory itself. In other words, return 0 Byte on
|
|
216
|
+
an empty directory path.
|
|
183
217
|
|
|
184
218
|
:param path: Given path
|
|
185
219
|
:returns: File size
|
|
186
220
|
:raises: FileNotFoundError
|
|
187
|
-
|
|
221
|
+
"""
|
|
188
222
|
return SmartPath(path).getsize()
|
|
189
223
|
|
|
190
224
|
|
|
191
225
|
def smart_getmtime(path: PathLike) -> float:
|
|
192
|
-
|
|
193
|
-
Get last-modified time of the file on the given s3_url or file path (in Unix
|
|
194
|
-
|
|
226
|
+
"""
|
|
227
|
+
Get last-modified time of the file on the given s3_url or file path (in Unix
|
|
228
|
+
timestamp format).
|
|
229
|
+
|
|
230
|
+
If the path is an existent directory, return the latest modified time of
|
|
231
|
+
all file in it. The mtime of empty directory is 1970-01-01 00:00:00
|
|
195
232
|
|
|
196
233
|
:param path: Given path
|
|
197
234
|
:returns: Last-modified time
|
|
198
235
|
:raises: FileNotFoundError
|
|
199
|
-
|
|
236
|
+
"""
|
|
200
237
|
return SmartPath(path).getmtime()
|
|
201
238
|
|
|
202
239
|
|
|
203
240
|
def smart_stat(path: PathLike, follow_symlinks=True) -> StatResult:
|
|
204
|
-
|
|
241
|
+
"""
|
|
205
242
|
Get StatResult of s3_url or file path
|
|
206
243
|
|
|
207
244
|
:param path: Given path
|
|
208
245
|
:returns: StatResult
|
|
209
246
|
:raises: FileNotFoundError
|
|
210
|
-
|
|
247
|
+
"""
|
|
211
248
|
return SmartPath(path).stat(follow_symlinks=follow_symlinks)
|
|
212
249
|
|
|
213
250
|
|
|
214
251
|
def smart_lstat(path: PathLike) -> StatResult:
|
|
215
|
-
|
|
252
|
+
"""
|
|
216
253
|
Get StatResult of path but do not follow symbolic links
|
|
217
254
|
|
|
218
255
|
:param path: Given path
|
|
219
256
|
:returns: StatResult
|
|
220
257
|
:raises: FileNotFoundError
|
|
221
|
-
|
|
258
|
+
"""
|
|
222
259
|
return SmartPath(path).lstat()
|
|
223
260
|
|
|
224
261
|
|
|
225
262
|
_copy_funcs = {
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
},
|
|
230
|
-
'file': {
|
|
231
|
-
's3': s3_upload,
|
|
232
|
-
'file': fs_copy,
|
|
233
|
-
'sftp': sftp_upload,
|
|
234
|
-
},
|
|
235
|
-
'sftp': {
|
|
236
|
-
'file': sftp_download,
|
|
237
|
-
'sftp': sftp_copy,
|
|
238
|
-
},
|
|
263
|
+
"s3": {"s3": s3_copy, "file": s3_download},
|
|
264
|
+
"file": {"s3": s3_upload, "file": fs_copy, "sftp": sftp_upload},
|
|
265
|
+
"sftp": {"file": sftp_download, "sftp": sftp_copy},
|
|
239
266
|
}
|
|
240
267
|
|
|
241
268
|
|
|
242
269
|
def register_copy_func(
|
|
243
|
-
src_protocol: str,
|
|
244
|
-
dst_protocol: str,
|
|
245
|
-
copy_func: Optional[Callable] = None,
|
|
270
|
+
src_protocol: str, dst_protocol: str, copy_func: Optional[Callable] = None
|
|
246
271
|
) -> None:
|
|
247
|
-
|
|
248
|
-
Used to register copy func between protocols,
|
|
272
|
+
"""
|
|
273
|
+
Used to register copy func between protocols,
|
|
274
|
+
and do not allow duplicate registration
|
|
249
275
|
|
|
250
276
|
:param src_protocol: protocol name of source file, e.g. 's3'
|
|
251
277
|
:param dst_protocol: protocol name of destination file, e.g. 's3'
|
|
252
|
-
:param copy_func: copy func, its type is:
|
|
253
|
-
Callable[[str, str, Optional[Callable[[int], None]], Optional[bool],
|
|
254
|
-
|
|
278
|
+
:param copy_func: copy func, its type is:
|
|
279
|
+
Callable[[str, str, Optional[Callable[[int], None]], Optional[bool],
|
|
280
|
+
Optional[bool]], None]
|
|
281
|
+
"""
|
|
255
282
|
try:
|
|
256
283
|
_copy_funcs[src_protocol][dst_protocol]
|
|
257
284
|
except KeyError:
|
|
@@ -262,21 +289,24 @@ def register_copy_func(
|
|
|
262
289
|
raise error
|
|
263
290
|
else:
|
|
264
291
|
raise ValueError(
|
|
265
|
-
|
|
266
|
-
src_protocol, dst_protocol
|
|
292
|
+
"Copy Function has already existed: {}->{}".format(
|
|
293
|
+
src_protocol, dst_protocol
|
|
294
|
+
)
|
|
295
|
+
)
|
|
267
296
|
|
|
268
297
|
|
|
269
298
|
def _default_copy_func(
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
299
|
+
src_path: PathLike,
|
|
300
|
+
dst_path: PathLike,
|
|
301
|
+
callback: Optional[Callable[[int], None]] = None,
|
|
302
|
+
followlinks: bool = False,
|
|
303
|
+
overwrite: bool = True,
|
|
304
|
+
) -> None:
|
|
275
305
|
if not overwrite and smart_exists(dst_path):
|
|
276
306
|
return
|
|
277
307
|
|
|
278
|
-
with smart_open(src_path,
|
|
279
|
-
with smart_open(dst_path,
|
|
308
|
+
with smart_open(src_path, "rb", followlinks=followlinks) as fsrc:
|
|
309
|
+
with smart_open(dst_path, "wb") as fdst:
|
|
280
310
|
# This magic number is copied from copyfileobj
|
|
281
311
|
length = 16 * 1024
|
|
282
312
|
while True:
|
|
@@ -296,12 +326,13 @@ def _default_copy_func(
|
|
|
296
326
|
|
|
297
327
|
|
|
298
328
|
def smart_copy(
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
329
|
+
src_path: PathLike,
|
|
330
|
+
dst_path: PathLike,
|
|
331
|
+
callback: Optional[Callable[[int], None]] = None,
|
|
332
|
+
followlinks: bool = False,
|
|
333
|
+
overwrite: bool = True,
|
|
334
|
+
) -> None:
|
|
335
|
+
"""
|
|
305
336
|
Copy file from source path to destination path
|
|
306
337
|
|
|
307
338
|
Here are a few examples: ::
|
|
@@ -317,15 +348,20 @@ def smart_copy(
|
|
|
317
348
|
...
|
|
318
349
|
>>> src_path = 'test.png'
|
|
319
350
|
>>> dst_path = 'test1.png'
|
|
320
|
-
>>> smart_copy(
|
|
351
|
+
>>> smart_copy(
|
|
352
|
+
... src_path,
|
|
353
|
+
... dst_path,
|
|
354
|
+
... callback=Bar(total=smart_stat(src_path).size), followlinks=False
|
|
355
|
+
... )
|
|
321
356
|
856960it [00:00, 260592384.24it/s]
|
|
322
357
|
|
|
323
358
|
:param src_path: Given source path
|
|
324
359
|
:param dst_path: Given destination path
|
|
325
|
-
:param callback: Called periodically during copy, and the input parameter is the
|
|
360
|
+
:param callback: Called periodically during copy, and the input parameter is the
|
|
361
|
+
data size (in bytes) of copy since the last call
|
|
326
362
|
:param followlinks: False if regard symlink as file, else True
|
|
327
363
|
:param overwrite: whether or not overwrite file when exists, default is True
|
|
328
|
-
|
|
364
|
+
"""
|
|
329
365
|
# this function contains plenty of manual polymorphism
|
|
330
366
|
if smart_islink(src_path) and is_s3(dst_path) and not followlinks:
|
|
331
367
|
return
|
|
@@ -343,35 +379,38 @@ def smart_copy(
|
|
|
343
379
|
dst_path,
|
|
344
380
|
callback=callback,
|
|
345
381
|
followlinks=followlinks,
|
|
346
|
-
overwrite=overwrite
|
|
382
|
+
overwrite=overwrite,
|
|
383
|
+
)
|
|
347
384
|
except S3UnknownError as e:
|
|
348
|
-
if
|
|
385
|
+
if "cannot schedule new futures after interpreter shutdown" in str(e):
|
|
349
386
|
_default_copy_func(
|
|
350
387
|
src_path,
|
|
351
388
|
dst_path,
|
|
352
389
|
callback=callback,
|
|
353
390
|
followlinks=followlinks,
|
|
354
|
-
overwrite=overwrite
|
|
391
|
+
overwrite=overwrite,
|
|
392
|
+
)
|
|
355
393
|
else:
|
|
356
394
|
raise
|
|
357
395
|
|
|
358
396
|
|
|
359
397
|
def _smart_sync_single_file(items: dict):
|
|
360
|
-
src_root_path = items[
|
|
361
|
-
dst_root_path = items[
|
|
362
|
-
src_file_path = items[
|
|
363
|
-
callback = items[
|
|
364
|
-
followlinks = items[
|
|
365
|
-
callback_after_copy_file = items[
|
|
366
|
-
force = items[
|
|
367
|
-
overwrite = items[
|
|
398
|
+
src_root_path = items["src_root_path"]
|
|
399
|
+
dst_root_path = items["dst_root_path"]
|
|
400
|
+
src_file_path = items["src_file_path"]
|
|
401
|
+
callback = items["callback"]
|
|
402
|
+
followlinks = items["followlinks"]
|
|
403
|
+
callback_after_copy_file = items["callback_after_copy_file"]
|
|
404
|
+
force = items["force"]
|
|
405
|
+
overwrite = items["overwrite"]
|
|
368
406
|
|
|
369
407
|
content_path = os.path.relpath(src_file_path, start=src_root_path)
|
|
370
|
-
if len(content_path) and content_path !=
|
|
371
|
-
content_path = content_path.lstrip(
|
|
408
|
+
if len(content_path) and content_path != ".":
|
|
409
|
+
content_path = content_path.lstrip("/")
|
|
372
410
|
dst_abs_file_path = smart_path_join(dst_root_path, content_path)
|
|
373
411
|
else:
|
|
374
|
-
# if content_path is empty, which means smart_isfile(src_path) is True,
|
|
412
|
+
# if content_path is empty, which means smart_isfile(src_path) is True,
|
|
413
|
+
# this function is equal to smart_copy
|
|
375
414
|
dst_abs_file_path = dst_root_path
|
|
376
415
|
|
|
377
416
|
src_protocol, _ = SmartPath._extract_protocol(src_file_path)
|
|
@@ -383,9 +422,10 @@ def _smart_sync_single_file(items: dict):
|
|
|
383
422
|
elif not overwrite and smart_exists(dst_abs_file_path):
|
|
384
423
|
should_sync = False
|
|
385
424
|
elif smart_exists(dst_abs_file_path) and is_same_file(
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
425
|
+
smart_stat(src_file_path, follow_symlinks=followlinks),
|
|
426
|
+
smart_stat(dst_abs_file_path, follow_symlinks=followlinks),
|
|
427
|
+
get_sync_type(src_protocol, dst_protocol),
|
|
428
|
+
):
|
|
389
429
|
should_sync = False
|
|
390
430
|
except NotImplementedError:
|
|
391
431
|
pass
|
|
@@ -396,30 +436,32 @@ def _smart_sync_single_file(items: dict):
|
|
|
396
436
|
src_file_path,
|
|
397
437
|
dst_abs_file_path,
|
|
398
438
|
callback=copy_callback,
|
|
399
|
-
followlinks=followlinks
|
|
439
|
+
followlinks=followlinks,
|
|
440
|
+
)
|
|
400
441
|
if callback_after_copy_file:
|
|
401
442
|
callback_after_copy_file(src_file_path, dst_abs_file_path)
|
|
402
443
|
return should_sync
|
|
403
444
|
|
|
404
445
|
|
|
405
446
|
def smart_sync(
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
447
|
+
src_path: PathLike,
|
|
448
|
+
dst_path: PathLike,
|
|
449
|
+
callback: Optional[Callable[[str, int], None]] = None,
|
|
450
|
+
followlinks: bool = False,
|
|
451
|
+
callback_after_copy_file: Optional[Callable[[str, str], None]] = None,
|
|
452
|
+
src_file_stats: Optional[Iterable[FileEntry]] = None,
|
|
453
|
+
map_func: Callable[[Callable, Iterable], Any] = map,
|
|
454
|
+
force: bool = False,
|
|
455
|
+
overwrite: bool = True,
|
|
456
|
+
) -> None:
|
|
457
|
+
"""
|
|
416
458
|
Sync file or directory
|
|
417
459
|
|
|
418
460
|
.. note ::
|
|
419
461
|
|
|
420
462
|
When the parameter is file, this function bahaves like ``smart_copy``.
|
|
421
463
|
|
|
422
|
-
If file and directory of same name and same level, sync consider it's file first
|
|
464
|
+
If file and directory of same name and same level, sync consider it's file first
|
|
423
465
|
|
|
424
466
|
Here are a few examples: ::
|
|
425
467
|
|
|
@@ -437,7 +479,8 @@ def smart_sync(
|
|
|
437
479
|
... with self._lock:
|
|
438
480
|
... if path != self._now:
|
|
439
481
|
... self._file_index += 1
|
|
440
|
-
... print("copy file {}/{}:".format(self._file_index,
|
|
482
|
+
... print("copy file {}/{}:".format(self._file_index,
|
|
483
|
+
... self._total_file))
|
|
441
484
|
... if self._bar:
|
|
442
485
|
... self._bar.close()
|
|
443
486
|
... self._bar = tqdm(total=smart_stat(path).size)
|
|
@@ -448,22 +491,26 @@ def smart_sync(
|
|
|
448
491
|
|
|
449
492
|
:param src_path: Given source path
|
|
450
493
|
:param dst_path: Given destination path
|
|
451
|
-
:param callback: Called periodically during copy, and the input parameter is
|
|
494
|
+
:param callback: Called periodically during copy, and the input parameter is
|
|
495
|
+
the data size (in bytes) of copy since the last call
|
|
452
496
|
:param followlinks: False if regard symlink as file, else True
|
|
453
|
-
:param callback_after_copy_file: Called after copy success, and the input parameter
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
:param
|
|
497
|
+
:param callback_after_copy_file: Called after copy success, and the input parameter
|
|
498
|
+
is src file path and dst file path
|
|
499
|
+
:param src_file_stats: If this parameter is not None, only this parameter's files
|
|
500
|
+
will be synced,and src_path is the root_path of these files used to calculate
|
|
501
|
+
the path of the target file. This parameter is in order to reduce file traversal
|
|
502
|
+
times.
|
|
503
|
+
:param map_func: A Callable func like `map`. You can use ThreadPoolExecutor.map,
|
|
504
|
+
Pool.map and so on if you need concurrent capability. default is standard
|
|
505
|
+
library `map`.
|
|
506
|
+
:param force: Sync file forcible, do not ignore same files, priority is higher than
|
|
507
|
+
'overwrite', default is False
|
|
460
508
|
:param overwrite: whether or not overwrite file when exists, default is True
|
|
461
|
-
|
|
509
|
+
"""
|
|
462
510
|
if not smart_exists(src_path):
|
|
463
|
-
raise FileNotFoundError(f
|
|
511
|
+
raise FileNotFoundError(f"No match file: {src_path}")
|
|
464
512
|
|
|
465
|
-
src_path, dst_path = get_traditional_path(src_path), get_traditional_path(
|
|
466
|
-
dst_path)
|
|
513
|
+
src_path, dst_path = get_traditional_path(src_path), get_traditional_path(dst_path)
|
|
467
514
|
if not src_file_stats:
|
|
468
515
|
src_file_stats = smart_scan_stat(src_path, followlinks=followlinks)
|
|
469
516
|
|
|
@@ -487,37 +534,42 @@ def smart_sync(
|
|
|
487
534
|
|
|
488
535
|
|
|
489
536
|
def smart_sync_with_progress(
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
537
|
+
src_path,
|
|
538
|
+
dst_path,
|
|
539
|
+
callback: Optional[Callable[[str, int], None]] = None,
|
|
540
|
+
followlinks: bool = False,
|
|
541
|
+
map_func: Callable[[Callable, Iterable], Iterator] = map,
|
|
542
|
+
force: bool = False,
|
|
543
|
+
overwrite: bool = True,
|
|
544
|
+
):
|
|
545
|
+
"""
|
|
498
546
|
Sync file or directory with progress bar
|
|
499
547
|
|
|
500
548
|
:param src_path: Given source path
|
|
501
549
|
:param dst_path: Given destination path
|
|
502
|
-
:param callback: Called periodically during copy, and the input parameter is
|
|
550
|
+
:param callback: Called periodically during copy, and the input parameter is
|
|
551
|
+
the data size (in bytes) of copy since the last call
|
|
503
552
|
:param followlinks: False if regard symlink as file, else True
|
|
504
|
-
:param callback_after_copy_file: Called after copy success, and the input parameter
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
:param
|
|
553
|
+
:param callback_after_copy_file: Called after copy success, and the input parameter
|
|
554
|
+
is src file path and dst file path
|
|
555
|
+
:param src_file_stats: If this parameter is not None, only this parameter's files
|
|
556
|
+
will be synced, and src_path is the root_path of these files used to calculate
|
|
557
|
+
the path of the target file. This parameter is in order to reduce file traversal
|
|
558
|
+
times.
|
|
559
|
+
:param map_func: A Callable func like `map`. You can use ThreadPoolExecutor.map,
|
|
560
|
+
Pool.map and so on if you need concurrent capability. default is standard
|
|
561
|
+
library `map`.
|
|
562
|
+
:param force: Sync file forcible, do not ignore same files, priority is higher than
|
|
563
|
+
'overwrite', default is False
|
|
511
564
|
:param overwrite: whether or not overwrite file when exists, default is True
|
|
512
|
-
|
|
565
|
+
"""
|
|
513
566
|
if not smart_exists(src_path):
|
|
514
|
-
raise FileNotFoundError(f
|
|
567
|
+
raise FileNotFoundError(f"No match file: {src_path}")
|
|
515
568
|
|
|
516
|
-
src_path, dst_path = get_traditional_path(src_path), get_traditional_path(
|
|
517
|
-
dst_path)
|
|
569
|
+
src_path, dst_path = get_traditional_path(src_path), get_traditional_path(dst_path)
|
|
518
570
|
file_stats = list(smart_scan_stat(src_path, followlinks=followlinks))
|
|
519
571
|
tbar = tqdm(total=len(file_stats), ascii=True)
|
|
520
|
-
sbar = tqdm(unit=
|
|
572
|
+
sbar = tqdm(unit="B", ascii=True, unit_scale=True, unit_divisor=1024)
|
|
521
573
|
|
|
522
574
|
def tqdm_callback(current_src_path, length: int):
|
|
523
575
|
sbar.update(length)
|
|
@@ -543,27 +595,30 @@ def smart_sync_with_progress(
|
|
|
543
595
|
|
|
544
596
|
|
|
545
597
|
def smart_remove(path: PathLike, missing_ok: bool = False) -> None:
|
|
546
|
-
|
|
547
|
-
Remove the file or directory on s3 or fs, `s3://` and `s3://bucket` are
|
|
598
|
+
"""
|
|
599
|
+
Remove the file or directory on s3 or fs, `s3://` and `s3://bucket` are
|
|
600
|
+
not permitted to remove
|
|
548
601
|
|
|
549
602
|
:param path: Given path
|
|
550
|
-
:param missing_ok: if False and target file/directory not exists,
|
|
603
|
+
:param missing_ok: if False and target file/directory not exists,
|
|
604
|
+
raise FileNotFoundError
|
|
551
605
|
:raises: PermissionError, FileNotFoundError
|
|
552
|
-
|
|
606
|
+
"""
|
|
553
607
|
SmartPath(path).remove(missing_ok=missing_ok)
|
|
554
608
|
|
|
555
609
|
|
|
556
610
|
def smart_rename(
|
|
557
|
-
|
|
558
|
-
|
|
611
|
+
src_path: PathLike, dst_path: PathLike, overwrite: bool = True
|
|
612
|
+
) -> None:
|
|
613
|
+
"""
|
|
559
614
|
Move file on s3 or fs. `s3://` or `s3://bucket` is not allowed to move
|
|
560
615
|
|
|
561
616
|
:param src_path: Given source path
|
|
562
617
|
:param dst_path: Given destination path
|
|
563
618
|
:param overwrite: whether or not overwrite file when exists
|
|
564
|
-
|
|
619
|
+
"""
|
|
565
620
|
if smart_isdir(src_path):
|
|
566
|
-
raise IsADirectoryError(
|
|
621
|
+
raise IsADirectoryError("%r is a directory" % src_path)
|
|
567
622
|
src_protocol, _ = SmartPath._extract_protocol(src_path)
|
|
568
623
|
dst_protocol, _ = SmartPath._extract_protocol(dst_path)
|
|
569
624
|
if src_protocol == dst_protocol:
|
|
@@ -573,15 +628,14 @@ def smart_rename(
|
|
|
573
628
|
smart_unlink(src_path)
|
|
574
629
|
|
|
575
630
|
|
|
576
|
-
def smart_move(
|
|
577
|
-
|
|
578
|
-
'''
|
|
631
|
+
def smart_move(src_path: PathLike, dst_path: PathLike, overwrite: bool = True) -> None:
|
|
632
|
+
"""
|
|
579
633
|
Move file/directory on s3 or fs. `s3://` or `s3://bucket` is not allowed to move
|
|
580
634
|
|
|
581
635
|
:param src_path: Given source path
|
|
582
636
|
:param dst_path: Given destination path
|
|
583
637
|
:param overwrite: whether or not overwrite file when exists
|
|
584
|
-
|
|
638
|
+
"""
|
|
585
639
|
src_protocol, _ = SmartPath._extract_protocol(src_path)
|
|
586
640
|
dst_protocol, _ = SmartPath._extract_protocol(dst_path)
|
|
587
641
|
if src_protocol == dst_protocol:
|
|
@@ -592,41 +646,44 @@ def smart_move(
|
|
|
592
646
|
|
|
593
647
|
|
|
594
648
|
def smart_unlink(path: PathLike, missing_ok: bool = False) -> None:
|
|
595
|
-
|
|
649
|
+
"""
|
|
596
650
|
Remove the file on s3 or fs
|
|
597
651
|
|
|
598
652
|
:param path: Given path
|
|
599
653
|
:param missing_ok: if False and target file not exists, raise FileNotFoundError
|
|
600
654
|
:raises: PermissionError, FileNotFoundError, IsADirectoryError
|
|
601
|
-
|
|
655
|
+
"""
|
|
602
656
|
SmartPath(path).unlink(missing_ok=missing_ok)
|
|
603
657
|
|
|
604
658
|
|
|
605
659
|
def smart_makedirs(path: PathLike, exist_ok: bool = False) -> None:
|
|
606
|
-
|
|
660
|
+
"""
|
|
607
661
|
Create a directory if is on fs.
|
|
608
662
|
If on s3, it actually check if target exists, and check if bucket has WRITE access
|
|
609
663
|
|
|
610
664
|
:param path: Given path
|
|
611
665
|
:param missing_ok: if False and target directory not exists, raise FileNotFoundError
|
|
612
666
|
:raises: PermissionError, FileExistsError
|
|
613
|
-
|
|
667
|
+
"""
|
|
614
668
|
SmartPath(path).makedirs(exist_ok)
|
|
615
669
|
|
|
616
670
|
|
|
617
671
|
def smart_open(
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
672
|
+
path: PathLike,
|
|
673
|
+
mode: str = "r",
|
|
674
|
+
s3_open_func: Callable[[str, str], BinaryIO] = s3_open,
|
|
675
|
+
encoding: Optional[str] = None,
|
|
676
|
+
errors: Optional[str] = None,
|
|
677
|
+
**options,
|
|
678
|
+
) -> IO:
|
|
679
|
+
r"""
|
|
625
680
|
Open a file on the path
|
|
626
681
|
|
|
627
682
|
.. note ::
|
|
628
683
|
|
|
629
|
-
On fs, the difference between this function and ``io.open`` is that
|
|
684
|
+
On fs, the difference between this function and ``io.open`` is that
|
|
685
|
+
this function create directories automatically, instead of
|
|
686
|
+
raising FileNotFoundError
|
|
630
687
|
|
|
631
688
|
Currently, supported protocols are:
|
|
632
689
|
|
|
@@ -642,28 +699,35 @@ def smart_open(
|
|
|
642
699
|
|
|
643
700
|
>>> import cv2
|
|
644
701
|
>>> import numpy as np
|
|
645
|
-
>>> raw = smart_open(
|
|
646
|
-
|
|
702
|
+
>>> raw = smart_open(
|
|
703
|
+
... 'https://ss2.bdstatic.com/70cFvnSh_Q1YnxGkpoWK1HF6hhy'
|
|
704
|
+
... '/it/u=2275743969,3715493841&fm=26&gp=0.jpg'
|
|
705
|
+
... ).read()
|
|
706
|
+
>>> img = cv2.imdecode(np.frombuffer(raw, np.uint8),
|
|
707
|
+
... cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR)
|
|
647
708
|
|
|
648
709
|
:param path: Given path
|
|
649
710
|
:param mode: Mode to open file, supports r'[rwa][tb]?\+?'
|
|
650
|
-
:param s3_open_func: Function used to open s3_url. Require the function includes 2
|
|
651
|
-
|
|
652
|
-
:param
|
|
711
|
+
:param s3_open_func: Function used to open s3_url. Require the function includes 2
|
|
712
|
+
necessary parameters, file path and mode
|
|
713
|
+
:param encoding: encoding is the name of the encoding used to decode or encode
|
|
714
|
+
the file. This should only be used in text mode.
|
|
715
|
+
:param errors: errors is an optional string that specifies how encoding and decoding
|
|
716
|
+
errors are to be handled—this cannot be used in binary mode.
|
|
653
717
|
:returns: File-Like object
|
|
654
718
|
:raises: FileNotFoundError, IsADirectoryError, ValueError
|
|
655
|
-
|
|
719
|
+
"""
|
|
656
720
|
options = {
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
721
|
+
"s3_open_func": s3_open_func,
|
|
722
|
+
"encoding": encoding,
|
|
723
|
+
"errors": errors,
|
|
660
724
|
**options,
|
|
661
725
|
}
|
|
662
726
|
return SmartPath(path).open(mode, **options)
|
|
663
727
|
|
|
664
728
|
|
|
665
729
|
def smart_path_join(path: PathLike, *other_paths: PathLike) -> str:
|
|
666
|
-
|
|
730
|
+
"""
|
|
667
731
|
Concat 2 or more path to a complete path
|
|
668
732
|
|
|
669
733
|
:param path: Given path
|
|
@@ -672,26 +736,33 @@ def smart_path_join(path: PathLike, *other_paths: PathLike) -> str:
|
|
|
672
736
|
|
|
673
737
|
.. note ::
|
|
674
738
|
|
|
675
|
-
For URI, the difference between this function and ``os.path.join`` is that this
|
|
676
|
-
|
|
739
|
+
For URI, the difference between this function and ``os.path.join`` is that this
|
|
740
|
+
function ignores left side slash (which indicates absolute path) in
|
|
741
|
+
``other_paths`` and will directly concat.
|
|
742
|
+
|
|
743
|
+
e.g. os.path.join('s3://path', 'to', '/file') => '/file', and
|
|
744
|
+
smart_path_join('s3://path', 'to', '/file') => '/path/to/file'
|
|
745
|
+
|
|
677
746
|
But for fs path, this function behaves exactly like ``os.path.join``
|
|
747
|
+
|
|
678
748
|
e.g. smart_path_join('/path', 'to', '/file') => '/file'
|
|
679
|
-
|
|
749
|
+
"""
|
|
680
750
|
return fspath(SmartPath(path).joinpath(*other_paths))
|
|
681
751
|
|
|
682
752
|
|
|
683
753
|
def smart_walk(
|
|
684
|
-
|
|
685
|
-
followlinks: bool = False
|
|
754
|
+
path: PathLike, followlinks: bool = False
|
|
686
755
|
) -> Iterator[Tuple[str, List[str], List[str]]]:
|
|
687
|
-
|
|
756
|
+
"""
|
|
688
757
|
Generate the file names in a directory tree by walking the tree top-down.
|
|
689
758
|
For each directory in the tree rooted at directory path (including path itself),
|
|
690
759
|
it yields a 3-tuple (root, dirs, files).
|
|
691
760
|
|
|
692
|
-
root: a string of current path
|
|
693
|
-
dirs: name list of subdirectories (excluding '.' and '..' if they exist) in 'root'
|
|
694
|
-
|
|
761
|
+
- root: a string of current path
|
|
762
|
+
- dirs: name list of subdirectories (excluding '.' and '..' if they exist) in 'root'
|
|
763
|
+
The list is sorted by ascending alphabetical order
|
|
764
|
+
- files: name list of non-directory files (link is regarded as file) in 'root'.
|
|
765
|
+
The list is sorted by ascending alphabetical order
|
|
695
766
|
|
|
696
767
|
If path not exists, return an empty generator
|
|
697
768
|
If path is a file, return an empty generator
|
|
@@ -700,15 +771,14 @@ def smart_walk(
|
|
|
700
771
|
:param path: Given path
|
|
701
772
|
:raises: UnsupportedError
|
|
702
773
|
:returns: A 3-tuple generator
|
|
703
|
-
|
|
774
|
+
"""
|
|
704
775
|
return SmartPath(path).walk(followlinks=followlinks)
|
|
705
776
|
|
|
706
777
|
|
|
707
778
|
def smart_scan(
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
'''
|
|
779
|
+
path: PathLike, missing_ok: bool = True, followlinks: bool = False
|
|
780
|
+
) -> Iterator[str]:
|
|
781
|
+
"""
|
|
712
782
|
Iteratively traverse only files in given directory, in alphabetical order.
|
|
713
783
|
Every iteration on generator yields a path string.
|
|
714
784
|
|
|
@@ -717,37 +787,37 @@ def smart_scan(
|
|
|
717
787
|
If path is a bucket path, return all file paths in the bucket
|
|
718
788
|
|
|
719
789
|
:param path: Given path
|
|
720
|
-
:param missing_ok: If False and there's no file in the directory,
|
|
790
|
+
:param missing_ok: If False and there's no file in the directory,
|
|
791
|
+
raise FileNotFoundError
|
|
721
792
|
:raises: UnsupportedError
|
|
722
793
|
:returns: A file path generator
|
|
723
|
-
|
|
794
|
+
"""
|
|
724
795
|
return SmartPath(path).scan(missing_ok=missing_ok, followlinks=followlinks)
|
|
725
796
|
|
|
726
797
|
|
|
727
798
|
def smart_scan_stat(
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
'''
|
|
799
|
+
path: PathLike, missing_ok: bool = True, followlinks: bool = False
|
|
800
|
+
) -> Iterator[FileEntry]:
|
|
801
|
+
"""
|
|
732
802
|
Iteratively traverse only files in given directory, in alphabetical order.
|
|
733
803
|
Every iteration on generator yields a tuple of path string and file stat
|
|
734
804
|
|
|
735
805
|
:param path: Given path
|
|
736
|
-
:param missing_ok: If False and there's no file in the directory,
|
|
806
|
+
:param missing_ok: If False and there's no file in the directory,
|
|
807
|
+
raise FileNotFoundError
|
|
737
808
|
:raises: UnsupportedError
|
|
738
809
|
:returns: A file path generator
|
|
739
|
-
|
|
740
|
-
return SmartPath(path).scan_stat(
|
|
741
|
-
missing_ok=missing_ok, followlinks=followlinks)
|
|
810
|
+
"""
|
|
811
|
+
return SmartPath(path).scan_stat(missing_ok=missing_ok, followlinks=followlinks)
|
|
742
812
|
|
|
743
813
|
|
|
744
814
|
def _group_glob(globstr: PathLike) -> List[str]:
|
|
745
|
-
|
|
815
|
+
"""
|
|
746
816
|
Split pathname, and group them by protocol, return the glob list of same group.
|
|
747
817
|
|
|
748
818
|
:param globstr: A glob string
|
|
749
819
|
:returns: A glob list after being grouped by protocol
|
|
750
|
-
|
|
820
|
+
"""
|
|
751
821
|
globstr = fspath(globstr)
|
|
752
822
|
glob_dict = defaultdict(list)
|
|
753
823
|
expanded_glob = ungloblize(globstr)
|
|
@@ -764,165 +834,167 @@ def _group_glob(globstr: PathLike) -> List[str]:
|
|
|
764
834
|
|
|
765
835
|
|
|
766
836
|
def smart_glob(
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
837
|
+
pathname: PathLike, recursive: bool = True, missing_ok: bool = True
|
|
838
|
+
) -> List[str]:
|
|
839
|
+
"""
|
|
840
|
+
Given pathname may contain shell wildcard characters, return path list in ascending
|
|
841
|
+
alphabetical order, in which path matches glob pattern
|
|
772
842
|
|
|
773
843
|
:param pathname: A path pattern may contain shell wildcard characters
|
|
774
844
|
:param recursive: If False, this function will not glob recursively
|
|
775
|
-
:param missing_ok: If False and target path doesn't match any file,
|
|
776
|
-
|
|
845
|
+
:param missing_ok: If False and target path doesn't match any file,
|
|
846
|
+
raise FileNotFoundError
|
|
847
|
+
"""
|
|
777
848
|
# Split pathname, group by protocol, call glob respectively
|
|
778
849
|
# SmartPath(pathname).glob(recursive, missing_ok)
|
|
779
850
|
result = []
|
|
780
851
|
group_glob_list = _group_glob(pathname)
|
|
781
852
|
for glob_path in group_glob_list:
|
|
782
|
-
for path_obj in SmartPath(glob_path).glob(
|
|
783
|
-
|
|
784
|
-
|
|
853
|
+
for path_obj in SmartPath(glob_path).glob(
|
|
854
|
+
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
855
|
+
):
|
|
785
856
|
result.append(path_obj.path)
|
|
786
857
|
return result
|
|
787
858
|
|
|
788
859
|
|
|
789
860
|
def smart_iglob(
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
861
|
+
pathname: PathLike, recursive: bool = True, missing_ok: bool = True
|
|
862
|
+
) -> Iterator[str]:
|
|
863
|
+
"""
|
|
864
|
+
Given pathname may contain shell wildcard characters, return path iterator in
|
|
865
|
+
ascending alphabetical order, in which path matches glob pattern
|
|
795
866
|
|
|
796
867
|
:param pathname: A path pattern may contain shell wildcard characters
|
|
797
868
|
:param recursive: If False, this function will not glob recursively
|
|
798
|
-
:param missing_ok: If False and target path doesn't match any file,
|
|
799
|
-
|
|
869
|
+
:param missing_ok: If False and target path doesn't match any file,
|
|
870
|
+
raise FileNotFoundError
|
|
871
|
+
"""
|
|
800
872
|
# Split pathname, group by protocol, call glob respectively
|
|
801
873
|
# SmartPath(pathname).glob(recursive, missing_ok)
|
|
802
874
|
group_glob_list = _group_glob(pathname)
|
|
803
875
|
for glob_path in group_glob_list:
|
|
804
|
-
for path_obj in SmartPath(glob_path).iglob(
|
|
805
|
-
|
|
806
|
-
|
|
876
|
+
for path_obj in SmartPath(glob_path).iglob(
|
|
877
|
+
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
878
|
+
):
|
|
807
879
|
yield path_obj.path
|
|
808
880
|
|
|
809
881
|
|
|
810
882
|
def smart_glob_stat(
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
883
|
+
pathname: PathLike, recursive: bool = True, missing_ok: bool = True
|
|
884
|
+
) -> Iterator[FileEntry]:
|
|
885
|
+
"""
|
|
886
|
+
Given pathname may contain shell wildcard characters, return a list contains tuples
|
|
887
|
+
of path and file stat in ascending alphabetical order,
|
|
888
|
+
in which path matches glob pattern
|
|
816
889
|
|
|
817
890
|
:param pathname: A path pattern may contain shell wildcard characters
|
|
818
891
|
:param recursive: If False, this function will not glob recursively
|
|
819
|
-
:param missing_ok: If False and target path doesn't match any file,
|
|
820
|
-
|
|
892
|
+
:param missing_ok: If False and target path doesn't match any file,
|
|
893
|
+
raise FileNotFoundError
|
|
894
|
+
"""
|
|
821
895
|
# Split pathname, group by protocol, call glob respectively
|
|
822
896
|
# SmartPath(pathname).glob(recursive, missing_ok)
|
|
823
897
|
group_glob_list = _group_glob(pathname)
|
|
824
898
|
for glob_path in group_glob_list:
|
|
825
899
|
yield from SmartPath(glob_path).glob_stat(
|
|
826
|
-
pattern=
|
|
900
|
+
pattern="", recursive=recursive, missing_ok=missing_ok
|
|
901
|
+
)
|
|
827
902
|
|
|
828
903
|
|
|
829
904
|
def smart_save_as(file_object: BinaryIO, path: PathLike) -> None:
|
|
830
|
-
|
|
905
|
+
"""Write the opened binary stream to specified path, but the stream won't be closed
|
|
831
906
|
|
|
832
907
|
:param file_object: Stream to be read
|
|
833
908
|
:param path: Specified target path
|
|
834
|
-
|
|
909
|
+
"""
|
|
835
910
|
SmartPath(path).save(file_object)
|
|
836
911
|
|
|
837
912
|
|
|
838
913
|
def smart_load_from(path: PathLike) -> BinaryIO:
|
|
839
|
-
|
|
914
|
+
"""Read all content in binary on specified path and write into memory
|
|
840
915
|
|
|
841
916
|
User should close the BinaryIO manually
|
|
842
917
|
|
|
843
918
|
:param path: Specified path
|
|
844
919
|
:returns: BinaryIO
|
|
845
|
-
|
|
920
|
+
"""
|
|
846
921
|
return SmartPath(path).load()
|
|
847
922
|
|
|
848
923
|
|
|
849
924
|
def smart_combine_open(
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
'''Open a unified reader that supports multi file reading.
|
|
925
|
+
path_glob: str, mode: str = "rb", open_func=smart_open
|
|
926
|
+
) -> CombineReader:
|
|
927
|
+
"""Open a unified reader that supports multi file reading.
|
|
854
928
|
|
|
855
929
|
:param path_glob: A path may contain shell wildcard characters
|
|
856
930
|
:param mode: Mode to open file, supports 'rb'
|
|
857
931
|
:returns: A ```CombineReader```
|
|
858
|
-
|
|
859
|
-
file_objects = list(
|
|
860
|
-
open_func(path, mode) for path in sorted(smart_glob(path_glob)))
|
|
932
|
+
"""
|
|
933
|
+
file_objects = list(open_func(path, mode) for path in sorted(smart_glob(path_glob)))
|
|
861
934
|
return combine(file_objects, path_glob)
|
|
862
935
|
|
|
863
936
|
|
|
864
937
|
def smart_abspath(path: PathLike):
|
|
865
|
-
|
|
938
|
+
"""Return the absolute path of given path
|
|
866
939
|
|
|
867
940
|
:param path: Given path
|
|
868
941
|
:returns: Absolute path of given path
|
|
869
|
-
|
|
942
|
+
"""
|
|
870
943
|
return SmartPath(path).abspath()
|
|
871
944
|
|
|
872
945
|
|
|
873
946
|
def smart_realpath(path: PathLike):
|
|
874
|
-
|
|
947
|
+
"""Return the real path of given path
|
|
875
948
|
|
|
876
949
|
:param path: Given path
|
|
877
950
|
:returns: Real path of given path
|
|
878
|
-
|
|
951
|
+
"""
|
|
879
952
|
return SmartPath(path).realpath()
|
|
880
953
|
|
|
881
954
|
|
|
882
955
|
def smart_relpath(path: PathLike, start=None):
|
|
883
|
-
|
|
956
|
+
"""Return the relative path of given path
|
|
884
957
|
|
|
885
958
|
:param path: Given path
|
|
886
959
|
:param start: Given start directory
|
|
887
960
|
:returns: Relative path from start
|
|
888
|
-
|
|
961
|
+
"""
|
|
889
962
|
return SmartPath(path).relpath(start)
|
|
890
963
|
|
|
891
964
|
|
|
892
965
|
def smart_isabs(path: PathLike) -> bool:
|
|
893
|
-
|
|
966
|
+
"""Test whether a path is absolute
|
|
894
967
|
|
|
895
968
|
:param path: Given path
|
|
896
969
|
:returns: True if a path is absolute, else False
|
|
897
|
-
|
|
970
|
+
"""
|
|
898
971
|
return SmartPath(path).is_absolute()
|
|
899
972
|
|
|
900
973
|
|
|
901
974
|
def smart_ismount(path: PathLike) -> bool:
|
|
902
|
-
|
|
975
|
+
"""Test whether a path is a mount point
|
|
903
976
|
|
|
904
977
|
:param path: Given path
|
|
905
978
|
:returns: True if a path is a mount point, else False
|
|
906
|
-
|
|
979
|
+
"""
|
|
907
980
|
return SmartPath(path).is_mount()
|
|
908
981
|
|
|
909
982
|
|
|
910
983
|
def smart_load_content(
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
'''
|
|
984
|
+
path: PathLike, start: Optional[int] = None, stop: Optional[int] = None
|
|
985
|
+
) -> bytes:
|
|
986
|
+
"""
|
|
915
987
|
Get specified file from [start, stop) in bytes
|
|
916
988
|
|
|
917
989
|
:param path: Specified path
|
|
918
990
|
:param start: start index
|
|
919
991
|
:param stop: stop index
|
|
920
992
|
:returns: bytes content in range [start, stop)
|
|
921
|
-
|
|
993
|
+
"""
|
|
922
994
|
if is_s3(path):
|
|
923
995
|
return s3_load_content(path, start, stop)
|
|
924
996
|
|
|
925
|
-
with smart_open(path,
|
|
997
|
+
with smart_open(path, "rb") as fd:
|
|
926
998
|
if start:
|
|
927
999
|
fd.seek(start)
|
|
928
1000
|
offset = -1
|
|
@@ -932,97 +1004,92 @@ def smart_load_content(
|
|
|
932
1004
|
|
|
933
1005
|
|
|
934
1006
|
def smart_save_content(path: PathLike, content: bytes) -> None:
|
|
935
|
-
|
|
1007
|
+
"""Save bytes content to specified path
|
|
936
1008
|
|
|
937
1009
|
param path: Path to save content
|
|
938
|
-
|
|
939
|
-
with smart_open(path,
|
|
1010
|
+
"""
|
|
1011
|
+
with smart_open(path, "wb") as fd:
|
|
940
1012
|
fd.write(content)
|
|
941
1013
|
|
|
942
1014
|
|
|
943
1015
|
def smart_load_text(path: PathLike) -> str:
|
|
944
|
-
|
|
1016
|
+
"""
|
|
945
1017
|
Read content from path
|
|
946
1018
|
|
|
947
1019
|
param path: Path to be read
|
|
948
|
-
|
|
1020
|
+
"""
|
|
949
1021
|
with smart_open(path) as fd:
|
|
950
1022
|
return fd.read() # pytype: disable=bad-return-type
|
|
951
1023
|
|
|
952
1024
|
|
|
953
1025
|
def smart_save_text(path: PathLike, text: str) -> None:
|
|
954
|
-
|
|
1026
|
+
"""Save text to specified path
|
|
955
1027
|
|
|
956
1028
|
param path: Path to save text
|
|
957
|
-
|
|
958
|
-
with smart_open(path,
|
|
1029
|
+
"""
|
|
1030
|
+
with smart_open(path, "w") as fd:
|
|
959
1031
|
fd.write(text)
|
|
960
1032
|
|
|
961
1033
|
|
|
962
1034
|
class SmartCacher(FileCacher):
|
|
963
1035
|
cache_path = None
|
|
964
1036
|
|
|
965
|
-
def __init__(
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
raise ValueError('unacceptable mode: %r' % mode)
|
|
1037
|
+
def __init__(self, path: str, cache_path: Optional[str] = None, mode: str = "r"):
|
|
1038
|
+
if mode not in ("r", "w", "a"):
|
|
1039
|
+
raise ValueError("unacceptable mode: %r" % mode)
|
|
969
1040
|
if cache_path is None:
|
|
970
1041
|
cache_path = generate_cache_path(path)
|
|
971
|
-
if mode in (
|
|
1042
|
+
if mode in ("r", "a"):
|
|
972
1043
|
smart_copy(path, cache_path)
|
|
973
1044
|
self.name = path
|
|
974
1045
|
self.mode = mode
|
|
975
1046
|
self.cache_path = cache_path
|
|
976
1047
|
|
|
977
1048
|
def _close(self):
|
|
978
|
-
if self.cache_path is not None and
|
|
979
|
-
|
|
980
|
-
if self.mode in ('w', 'a'):
|
|
1049
|
+
if self.cache_path is not None and os.path.exists(self.cache_path):
|
|
1050
|
+
if self.mode in ("w", "a"):
|
|
981
1051
|
smart_copy(self.cache_path, self.name)
|
|
982
1052
|
os.unlink(self.cache_path)
|
|
983
1053
|
|
|
984
1054
|
|
|
985
1055
|
def smart_cache(path, cacher=SmartCacher, **options):
|
|
986
|
-
|
|
1056
|
+
"""Return a path to Posixpath Interface
|
|
987
1057
|
|
|
988
1058
|
param path: Path to cache
|
|
989
1059
|
param s3_cacher: Cacher for s3 path
|
|
990
1060
|
param options: Optional arguments for s3_cacher
|
|
991
|
-
|
|
1061
|
+
"""
|
|
992
1062
|
if not is_fs(path):
|
|
993
1063
|
return cacher(path, **options)
|
|
994
1064
|
return NullCacher(path)
|
|
995
1065
|
|
|
996
1066
|
|
|
997
1067
|
def smart_touch(path: PathLike):
|
|
998
|
-
|
|
1068
|
+
"""Create a new file on path
|
|
999
1069
|
|
|
1000
1070
|
param path: Path to create file
|
|
1001
|
-
|
|
1002
|
-
with smart_open(path,
|
|
1071
|
+
"""
|
|
1072
|
+
with smart_open(path, "w"):
|
|
1003
1073
|
pass
|
|
1004
1074
|
|
|
1005
1075
|
|
|
1006
1076
|
def smart_getmd5(path: PathLike, recalculate: bool = False):
|
|
1007
|
-
|
|
1077
|
+
"""Get md5 value of file
|
|
1008
1078
|
|
|
1009
1079
|
param path: File path
|
|
1010
1080
|
param recalculate: calculate md5 in real-time or not return s3 etag when path is s3
|
|
1011
|
-
|
|
1081
|
+
"""
|
|
1012
1082
|
return SmartPath(path).md5(recalculate=recalculate)
|
|
1013
1083
|
|
|
1014
1084
|
|
|
1015
|
-
_concat_funcs = {
|
|
1016
|
-
's3': s3_concat,
|
|
1017
|
-
'sftp': sftp_concat,
|
|
1018
|
-
}
|
|
1085
|
+
_concat_funcs = {"s3": s3_concat, "sftp": sftp_concat}
|
|
1019
1086
|
|
|
1020
1087
|
|
|
1021
1088
|
def _default_concat_func(src_paths: List[PathLike], dst_path: PathLike) -> None:
|
|
1022
1089
|
length = 16 * 1024
|
|
1023
|
-
with smart_open(dst_path,
|
|
1090
|
+
with smart_open(dst_path, "wb") as dst_fd:
|
|
1024
1091
|
for src_path in src_paths:
|
|
1025
|
-
with smart_open(src_path,
|
|
1092
|
+
with smart_open(src_path, "rb") as src_fd:
|
|
1026
1093
|
while True:
|
|
1027
1094
|
buf = src_fd.read(length)
|
|
1028
1095
|
if not buf:
|
|
@@ -1031,12 +1098,12 @@ def _default_concat_func(src_paths: List[PathLike], dst_path: PathLike) -> None:
|
|
|
1031
1098
|
|
|
1032
1099
|
|
|
1033
1100
|
def smart_concat(src_paths: List[PathLike], dst_path: PathLike) -> None:
|
|
1034
|
-
|
|
1101
|
+
"""
|
|
1035
1102
|
Concatenate src_paths to dst_path
|
|
1036
|
-
|
|
1103
|
+
|
|
1037
1104
|
:param src_paths: List of source paths
|
|
1038
1105
|
:param dst_path: Destination path
|
|
1039
|
-
|
|
1106
|
+
"""
|
|
1040
1107
|
if not src_paths:
|
|
1041
1108
|
return
|
|
1042
1109
|
|