megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +2 -4
- megfile/__init__.py +394 -203
- megfile/cli.py +258 -238
- megfile/config.py +25 -21
- megfile/errors.py +124 -114
- megfile/fs.py +174 -140
- megfile/fs_path.py +462 -354
- megfile/hdfs.py +133 -101
- megfile/hdfs_path.py +290 -236
- megfile/http.py +15 -14
- megfile/http_path.py +111 -107
- megfile/interfaces.py +70 -65
- megfile/lib/base_prefetch_reader.py +84 -65
- megfile/lib/combine_reader.py +12 -12
- megfile/lib/compare.py +17 -13
- megfile/lib/compat.py +1 -5
- megfile/lib/fnmatch.py +29 -30
- megfile/lib/glob.py +46 -54
- megfile/lib/hdfs_prefetch_reader.py +40 -25
- megfile/lib/hdfs_tools.py +1 -3
- megfile/lib/http_prefetch_reader.py +69 -46
- megfile/lib/joinpath.py +5 -5
- megfile/lib/lazy_handler.py +7 -3
- megfile/lib/s3_buffered_writer.py +58 -51
- megfile/lib/s3_cached_handler.py +13 -14
- megfile/lib/s3_limited_seekable_writer.py +37 -28
- megfile/lib/s3_memory_handler.py +34 -30
- megfile/lib/s3_pipe_handler.py +24 -25
- megfile/lib/s3_prefetch_reader.py +71 -52
- megfile/lib/s3_share_cache_reader.py +37 -24
- megfile/lib/shadow_handler.py +7 -3
- megfile/lib/stdio_handler.py +9 -8
- megfile/lib/url.py +3 -3
- megfile/pathlike.py +259 -228
- megfile/s3.py +220 -153
- megfile/s3_path.py +977 -802
- megfile/sftp.py +190 -156
- megfile/sftp_path.py +540 -450
- megfile/smart.py +397 -330
- megfile/smart_path.py +100 -105
- megfile/stdio.py +10 -9
- megfile/stdio_path.py +32 -35
- megfile/utils/__init__.py +73 -54
- megfile/utils/mutex.py +11 -14
- megfile/version.py +1 -1
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
- megfile-3.1.2.dist-info/RECORD +55 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
- scripts/convert_results_to_sarif.py +45 -78
- scripts/generate_file.py +140 -64
- megfile-3.1.1.dist-info/RECORD +0 -55
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
- {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/cli.py
CHANGED
|
@@ -14,23 +14,48 @@ from megfile.config import DEFAULT_BLOCK_SIZE
|
|
|
14
14
|
from megfile.hdfs_path import DEFAULT_HDFS_TIMEOUT
|
|
15
15
|
from megfile.interfaces import FileEntry
|
|
16
16
|
from megfile.lib.glob import get_non_glob_dir, has_magic
|
|
17
|
-
from megfile.smart import
|
|
17
|
+
from megfile.smart import (
|
|
18
|
+
_smart_sync_single_file,
|
|
19
|
+
smart_copy,
|
|
20
|
+
smart_exists,
|
|
21
|
+
smart_getmd5,
|
|
22
|
+
smart_getmtime,
|
|
23
|
+
smart_getsize,
|
|
24
|
+
smart_glob_stat,
|
|
25
|
+
smart_isdir,
|
|
26
|
+
smart_isfile,
|
|
27
|
+
smart_makedirs,
|
|
28
|
+
smart_move,
|
|
29
|
+
smart_open,
|
|
30
|
+
smart_path_join,
|
|
31
|
+
smart_readlink,
|
|
32
|
+
smart_remove,
|
|
33
|
+
smart_rename,
|
|
34
|
+
smart_scan_stat,
|
|
35
|
+
smart_scandir,
|
|
36
|
+
smart_stat,
|
|
37
|
+
smart_sync,
|
|
38
|
+
smart_sync_with_progress,
|
|
39
|
+
smart_touch,
|
|
40
|
+
smart_unlink,
|
|
41
|
+
)
|
|
18
42
|
from megfile.smart_path import SmartPath
|
|
19
43
|
from megfile.utils import get_human_size
|
|
20
44
|
from megfile.version import VERSION
|
|
21
45
|
|
|
22
46
|
logging.basicConfig(level=logging.ERROR)
|
|
23
|
-
logging.getLogger(
|
|
47
|
+
logging.getLogger("megfile").setLevel(level=logging.INFO)
|
|
24
48
|
DEBUG = False
|
|
25
49
|
|
|
26
50
|
|
|
27
51
|
@click.group()
|
|
28
|
-
@click.option(
|
|
52
|
+
@click.option("--debug", is_flag=True, help="Enable debug mode.")
|
|
29
53
|
def cli(debug):
|
|
30
54
|
"""
|
|
31
55
|
Client for megfile.
|
|
32
|
-
|
|
33
|
-
If you install megfile with ``--user``,
|
|
56
|
+
|
|
57
|
+
If you install megfile with ``--user``,
|
|
58
|
+
you also need configure ``$HOME/.local/bin`` into ``$PATH``.
|
|
34
59
|
"""
|
|
35
60
|
global DEBUG
|
|
36
61
|
DEBUG = debug
|
|
@@ -62,19 +87,19 @@ def simple_echo(file_stat, base_path: str = "", full_path: bool = False):
|
|
|
62
87
|
|
|
63
88
|
|
|
64
89
|
def long_echo(file_stat, base_path: str = "", full_path: bool = False):
|
|
65
|
-
return
|
|
90
|
+
return "%12d %s %s" % (
|
|
66
91
|
file_stat.stat.size,
|
|
67
|
-
time.strftime(
|
|
68
|
-
|
|
69
|
-
|
|
92
|
+
time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(file_stat.stat.mtime)),
|
|
93
|
+
get_echo_path(file_stat, base_path, full_path),
|
|
94
|
+
)
|
|
70
95
|
|
|
71
96
|
|
|
72
97
|
def human_echo(file_stat, base_path: str = "", full_path: bool = False):
|
|
73
|
-
return
|
|
98
|
+
return "%10s %s %s" % (
|
|
74
99
|
get_human_size(file_stat.stat.size),
|
|
75
|
-
time.strftime(
|
|
76
|
-
|
|
77
|
-
|
|
100
|
+
time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(file_stat.stat.mtime)),
|
|
101
|
+
get_echo_path(file_stat, base_path, full_path),
|
|
102
|
+
)
|
|
78
103
|
|
|
79
104
|
|
|
80
105
|
def smart_list_stat(path):
|
|
@@ -114,67 +139,65 @@ def _ls(path: str, long: bool, recursive: bool, human_readable: bool):
|
|
|
114
139
|
link = smart_readlink(file_stat.path)
|
|
115
140
|
except FileNotFoundError as e:
|
|
116
141
|
link = repr(e)
|
|
117
|
-
output +=
|
|
142
|
+
output += " -> %s" % link
|
|
118
143
|
click.echo(output)
|
|
119
144
|
if long:
|
|
120
|
-
click.echo(f
|
|
145
|
+
click.echo(f"total({total_count}): {get_human_size(total_size)}")
|
|
121
146
|
|
|
122
147
|
|
|
123
|
-
@cli.command(short_help=
|
|
124
|
-
@click.argument(
|
|
148
|
+
@cli.command(short_help="List all the objects in the path.")
|
|
149
|
+
@click.argument("path")
|
|
125
150
|
@click.option(
|
|
126
|
-
|
|
127
|
-
|
|
151
|
+
"-l",
|
|
152
|
+
"--long",
|
|
128
153
|
is_flag=True,
|
|
129
|
-
help=
|
|
154
|
+
help="List all the objects in the path with size, modification time and path.",
|
|
130
155
|
)
|
|
131
156
|
@click.option(
|
|
132
|
-
|
|
133
|
-
|
|
157
|
+
"-r",
|
|
158
|
+
"--recursive",
|
|
134
159
|
is_flag=True,
|
|
135
|
-
help=
|
|
136
|
-
|
|
160
|
+
help="Command is performed on all files or objects "
|
|
161
|
+
"under the specified directory or prefix.",
|
|
137
162
|
)
|
|
138
163
|
@click.option(
|
|
139
|
-
|
|
140
|
-
|
|
164
|
+
"-h",
|
|
165
|
+
"--human-readable",
|
|
141
166
|
is_flag=True,
|
|
142
|
-
help=
|
|
167
|
+
help="Displays file sizes in human readable format.",
|
|
168
|
+
)
|
|
143
169
|
def ls(path: str, long: bool, recursive: bool, human_readable: bool):
|
|
144
170
|
_ls(path, long=long, recursive=recursive, human_readable=human_readable)
|
|
145
171
|
|
|
146
172
|
|
|
147
|
-
@cli.command(short_help=
|
|
148
|
-
@click.argument(
|
|
173
|
+
@cli.command(short_help="List all the objects in the path.")
|
|
174
|
+
@click.argument("path")
|
|
149
175
|
@click.option(
|
|
150
|
-
|
|
151
|
-
|
|
176
|
+
"-r",
|
|
177
|
+
"--recursive",
|
|
152
178
|
is_flag=True,
|
|
153
|
-
help=
|
|
154
|
-
|
|
179
|
+
help="Command is performed on all files or objects under "
|
|
180
|
+
"the specified directory or prefix.",
|
|
155
181
|
)
|
|
156
182
|
def ll(path: str, recursive: bool):
|
|
157
183
|
_ls(path, long=True, recursive=recursive, human_readable=True)
|
|
158
184
|
|
|
159
185
|
|
|
160
|
-
@cli.command(
|
|
161
|
-
|
|
162
|
-
@click.argument(
|
|
163
|
-
@click.argument('dst_path')
|
|
186
|
+
@cli.command(short_help="Copy files from source to dest, skipping already copied.")
|
|
187
|
+
@click.argument("src_path")
|
|
188
|
+
@click.argument("dst_path")
|
|
164
189
|
@click.option(
|
|
165
|
-
|
|
166
|
-
|
|
190
|
+
"-r",
|
|
191
|
+
"--recursive",
|
|
167
192
|
is_flag=True,
|
|
168
|
-
help=
|
|
169
|
-
|
|
193
|
+
help="Command is performed on all files or objects "
|
|
194
|
+
"under the specified directory or prefix.",
|
|
170
195
|
)
|
|
171
196
|
@click.option(
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
@click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
|
|
177
|
-
@click.option('--skip', is_flag=True, help='Skip existed files.')
|
|
197
|
+
"-T", "--no-target-directory", is_flag=True, help="treat dst_path as a normal file."
|
|
198
|
+
)
|
|
199
|
+
@click.option("-g", "--progress-bar", is_flag=True, help="Show progress bar.")
|
|
200
|
+
@click.option("--skip", is_flag=True, help="Skip existed files.")
|
|
178
201
|
def cp(
|
|
179
202
|
src_path: str,
|
|
180
203
|
dst_path: str,
|
|
@@ -183,12 +206,10 @@ def cp(
|
|
|
183
206
|
progress_bar: bool,
|
|
184
207
|
skip: bool,
|
|
185
208
|
):
|
|
186
|
-
if not no_target_directory and (dst_path.endswith(
|
|
187
|
-
smart_isdir(dst_path)):
|
|
209
|
+
if not no_target_directory and (dst_path.endswith("/") or smart_isdir(dst_path)):
|
|
188
210
|
dst_path = smart_path_join(dst_path, os.path.basename(src_path))
|
|
189
211
|
if recursive:
|
|
190
|
-
with ThreadPoolExecutor(max_workers=(os.cpu_count() or 1) *
|
|
191
|
-
2) as executor:
|
|
212
|
+
with ThreadPoolExecutor(max_workers=(os.cpu_count() or 1) * 2) as executor:
|
|
192
213
|
if progress_bar:
|
|
193
214
|
smart_sync_with_progress(
|
|
194
215
|
src_path,
|
|
@@ -196,7 +217,8 @@ def cp(
|
|
|
196
217
|
followlinks=True,
|
|
197
218
|
map_func=executor.map,
|
|
198
219
|
force=True,
|
|
199
|
-
overwrite=not skip
|
|
220
|
+
overwrite=not skip,
|
|
221
|
+
)
|
|
200
222
|
else:
|
|
201
223
|
smart_sync(
|
|
202
224
|
src_path,
|
|
@@ -204,44 +226,43 @@ def cp(
|
|
|
204
226
|
followlinks=True,
|
|
205
227
|
map_func=executor.map,
|
|
206
228
|
force=True,
|
|
207
|
-
overwrite=not skip
|
|
229
|
+
overwrite=not skip,
|
|
230
|
+
)
|
|
208
231
|
else:
|
|
209
232
|
if progress_bar:
|
|
210
233
|
file_size = smart_stat(src_path).size
|
|
211
234
|
sbar = tqdm(
|
|
212
235
|
total=file_size,
|
|
213
|
-
unit=
|
|
236
|
+
unit="B",
|
|
214
237
|
ascii=True,
|
|
215
238
|
unit_scale=True,
|
|
216
|
-
unit_divisor=1024
|
|
239
|
+
unit_divisor=1024,
|
|
240
|
+
)
|
|
217
241
|
|
|
218
242
|
def callback(length: int):
|
|
219
243
|
sbar.update(length)
|
|
220
244
|
|
|
221
|
-
smart_copy(
|
|
222
|
-
src_path, dst_path, callback=callback, overwrite=not skip)
|
|
245
|
+
smart_copy(src_path, dst_path, callback=callback, overwrite=not skip)
|
|
223
246
|
sbar.close()
|
|
224
247
|
else:
|
|
225
248
|
smart_copy(src_path, dst_path, overwrite=not skip)
|
|
226
249
|
|
|
227
250
|
|
|
228
|
-
@cli.command(short_help=
|
|
229
|
-
@click.argument(
|
|
230
|
-
@click.argument(
|
|
251
|
+
@cli.command(short_help="Move files from source to dest.")
|
|
252
|
+
@click.argument("src_path")
|
|
253
|
+
@click.argument("dst_path")
|
|
231
254
|
@click.option(
|
|
232
|
-
|
|
233
|
-
|
|
255
|
+
"-r",
|
|
256
|
+
"--recursive",
|
|
234
257
|
is_flag=True,
|
|
235
|
-
help=
|
|
236
|
-
|
|
258
|
+
help="Command is performed on all files or objects "
|
|
259
|
+
"under the specified directory or prefix.",
|
|
237
260
|
)
|
|
238
261
|
@click.option(
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
@click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
|
|
244
|
-
@click.option('--skip', is_flag=True, help='Skip existed files.')
|
|
262
|
+
"-T", "--no-target-directory", is_flag=True, help="treat dst_path as a normal file."
|
|
263
|
+
)
|
|
264
|
+
@click.option("-g", "--progress-bar", is_flag=True, help="Show progress bar.")
|
|
265
|
+
@click.option("--skip", is_flag=True, help="Skip existed files.")
|
|
245
266
|
def mv(
|
|
246
267
|
src_path: str,
|
|
247
268
|
dst_path: str,
|
|
@@ -250,8 +271,7 @@ def mv(
|
|
|
250
271
|
progress_bar: bool,
|
|
251
272
|
skip: bool,
|
|
252
273
|
):
|
|
253
|
-
if not no_target_directory and (dst_path.endswith(
|
|
254
|
-
smart_isdir(dst_path)):
|
|
274
|
+
if not no_target_directory and (dst_path.endswith("/") or smart_isdir(dst_path)):
|
|
255
275
|
dst_path = smart_path_join(dst_path, os.path.basename(src_path))
|
|
256
276
|
if progress_bar:
|
|
257
277
|
src_protocol, _ = SmartPath._extract_protocol(src_path)
|
|
@@ -264,7 +284,8 @@ def mv(
|
|
|
264
284
|
t.update(1)
|
|
265
285
|
else:
|
|
266
286
|
smart_sync_with_progress(
|
|
267
|
-
src_path, dst_path, followlinks=True, overwrite=not skip
|
|
287
|
+
src_path, dst_path, followlinks=True, overwrite=not skip
|
|
288
|
+
)
|
|
268
289
|
smart_remove(src_path)
|
|
269
290
|
else:
|
|
270
291
|
if src_protocol == dst_protocol:
|
|
@@ -275,16 +296,16 @@ def mv(
|
|
|
275
296
|
file_size = smart_stat(src_path).size
|
|
276
297
|
sbar = tqdm(
|
|
277
298
|
total=file_size,
|
|
278
|
-
unit=
|
|
299
|
+
unit="B",
|
|
279
300
|
ascii=True,
|
|
280
301
|
unit_scale=True,
|
|
281
|
-
unit_divisor=1024
|
|
302
|
+
unit_divisor=1024,
|
|
303
|
+
)
|
|
282
304
|
|
|
283
305
|
def callback(length: int):
|
|
284
306
|
sbar.update(length)
|
|
285
307
|
|
|
286
|
-
smart_copy(
|
|
287
|
-
src_path, dst_path, callback=callback, overwrite=not skip)
|
|
308
|
+
smart_copy(src_path, dst_path, callback=callback, overwrite=not skip)
|
|
288
309
|
smart_unlink(src_path)
|
|
289
310
|
sbar.close()
|
|
290
311
|
else:
|
|
@@ -292,75 +313,74 @@ def mv(
|
|
|
292
313
|
move_func(src_path, dst_path, overwrite=not skip)
|
|
293
314
|
|
|
294
315
|
|
|
295
|
-
@cli.command(short_help=
|
|
296
|
-
@click.argument(
|
|
316
|
+
@cli.command(short_help="Remove files from path.")
|
|
317
|
+
@click.argument("path")
|
|
297
318
|
@click.option(
|
|
298
|
-
|
|
299
|
-
|
|
319
|
+
"-r",
|
|
320
|
+
"--recursive",
|
|
300
321
|
is_flag=True,
|
|
301
|
-
help=
|
|
302
|
-
|
|
322
|
+
help="Command is performed on all files or objects "
|
|
323
|
+
"under the specified directory or prefix.",
|
|
303
324
|
)
|
|
304
325
|
def rm(path: str, recursive: bool):
|
|
305
326
|
remove_func = smart_remove if recursive else smart_unlink
|
|
306
327
|
remove_func(path)
|
|
307
328
|
|
|
308
329
|
|
|
309
|
-
@cli.command(
|
|
310
|
-
|
|
311
|
-
@click.argument(
|
|
312
|
-
@click.
|
|
313
|
-
@click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
|
|
330
|
+
@cli.command(short_help="Make source and dest identical, modifying destination only.")
|
|
331
|
+
@click.argument("src_path")
|
|
332
|
+
@click.argument("dst_path")
|
|
333
|
+
@click.option("-g", "--progress-bar", is_flag=True, help="Show progress bar.")
|
|
314
334
|
@click.option(
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
type=click.INT,
|
|
318
|
-
default=8,
|
|
319
|
-
help='Number of concurrent workers.')
|
|
335
|
+
"-w", "--worker", type=click.INT, default=8, help="Number of concurrent workers."
|
|
336
|
+
)
|
|
320
337
|
@click.option(
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
@click.option('-q', '--quiet', is_flag=True, help='Not show any progress log.')
|
|
326
|
-
@click.option('--skip', is_flag=True, help='Skip existed files.')
|
|
338
|
+
"-f", "--force", is_flag=True, help="Copy files forcible, ignore same files."
|
|
339
|
+
)
|
|
340
|
+
@click.option("-q", "--quiet", is_flag=True, help="Not show any progress log.")
|
|
341
|
+
@click.option("--skip", is_flag=True, help="Skip existed files.")
|
|
327
342
|
def sync(
|
|
328
|
-
|
|
329
|
-
|
|
343
|
+
src_path: str,
|
|
344
|
+
dst_path: str,
|
|
345
|
+
progress_bar: bool,
|
|
346
|
+
worker: int,
|
|
347
|
+
force: bool,
|
|
348
|
+
quiet: bool,
|
|
349
|
+
skip: bool,
|
|
350
|
+
):
|
|
330
351
|
with ThreadPoolExecutor(max_workers=worker) as executor:
|
|
331
352
|
if has_magic(src_path):
|
|
332
353
|
src_root_path = get_non_glob_dir(src_path)
|
|
333
354
|
if not smart_exists(src_root_path):
|
|
334
|
-
raise FileNotFoundError(f
|
|
355
|
+
raise FileNotFoundError(f"No match file: {src_path}")
|
|
335
356
|
|
|
336
357
|
def scan_func(path):
|
|
337
358
|
for glob_file_entry in smart_glob_stat(path):
|
|
338
359
|
if glob_file_entry.is_file():
|
|
339
360
|
yield glob_file_entry
|
|
340
361
|
else:
|
|
341
|
-
for file_entry in smart_scan_stat(
|
|
342
|
-
|
|
362
|
+
for file_entry in smart_scan_stat(
|
|
363
|
+
glob_file_entry.path, followlinks=True
|
|
364
|
+
):
|
|
343
365
|
yield file_entry
|
|
366
|
+
|
|
344
367
|
else:
|
|
345
368
|
if not smart_exists(src_path):
|
|
346
|
-
raise FileNotFoundError(f
|
|
369
|
+
raise FileNotFoundError(f"No match file: {src_path}")
|
|
347
370
|
src_root_path = src_path
|
|
348
371
|
scan_func = partial(smart_scan_stat, followlinks=True)
|
|
349
372
|
|
|
350
373
|
if progress_bar and not quiet:
|
|
351
|
-
print(
|
|
374
|
+
print("building progress bar", end="\r")
|
|
352
375
|
file_entries = []
|
|
353
376
|
total_count = total_size = 0
|
|
354
|
-
for total_count, file_entry in enumerate(scan_func(src_path),
|
|
355
|
-
start=1):
|
|
377
|
+
for total_count, file_entry in enumerate(scan_func(src_path), start=1):
|
|
356
378
|
if total_count > 1024 * 128:
|
|
357
379
|
file_entries = []
|
|
358
380
|
else:
|
|
359
381
|
file_entries.append(file_entry)
|
|
360
382
|
total_size += file_entry.stat.size
|
|
361
|
-
print(
|
|
362
|
-
f'building progress bar, find {total_count} files',
|
|
363
|
-
end='\r')
|
|
383
|
+
print(f"building progress bar, find {total_count} files", end="\r")
|
|
364
384
|
|
|
365
385
|
if not file_entries:
|
|
366
386
|
file_entries = scan_func(src_path)
|
|
@@ -373,11 +393,12 @@ def sync(
|
|
|
373
393
|
else:
|
|
374
394
|
tbar = tqdm(total=total_count, ascii=True)
|
|
375
395
|
sbar = tqdm(
|
|
376
|
-
unit=
|
|
396
|
+
unit="B",
|
|
377
397
|
ascii=True,
|
|
378
398
|
unit_scale=True,
|
|
379
399
|
unit_divisor=1024,
|
|
380
|
-
total=total_size
|
|
400
|
+
total=total_size,
|
|
401
|
+
)
|
|
381
402
|
|
|
382
403
|
def callback(_filename: str, length: int):
|
|
383
404
|
sbar.update(length)
|
|
@@ -395,7 +416,9 @@ def sync(
|
|
|
395
416
|
callback_after_copy_file=callback_after_copy_file,
|
|
396
417
|
force=force,
|
|
397
418
|
overwrite=not skip,
|
|
398
|
-
)
|
|
419
|
+
)
|
|
420
|
+
for file_entry in file_entries
|
|
421
|
+
)
|
|
399
422
|
list(executor.map(_smart_sync_single_file, params_iter))
|
|
400
423
|
if not quiet:
|
|
401
424
|
tbar.close()
|
|
@@ -405,36 +428,33 @@ def sync(
|
|
|
405
428
|
|
|
406
429
|
|
|
407
430
|
@cli.command(short_help="Make the path if it doesn't already exist.")
|
|
408
|
-
@click.argument(
|
|
431
|
+
@click.argument("path")
|
|
409
432
|
def mkdir(path: str):
|
|
410
433
|
smart_makedirs(path)
|
|
411
434
|
|
|
412
435
|
|
|
413
436
|
@cli.command(short_help="Make the file if it doesn't already exist.")
|
|
414
|
-
@click.argument(
|
|
437
|
+
@click.argument("path")
|
|
415
438
|
def touch(path: str):
|
|
416
439
|
smart_touch(path)
|
|
417
440
|
|
|
418
441
|
|
|
419
|
-
@cli.command(short_help=
|
|
420
|
-
@click.argument(
|
|
442
|
+
@cli.command(short_help="Concatenate any files and send them to stdout.")
|
|
443
|
+
@click.argument("path")
|
|
421
444
|
def cat(path: str):
|
|
422
|
-
with smart_open(path,
|
|
445
|
+
with smart_open(path, "rb") as f:
|
|
423
446
|
shutil.copyfileobj(f, sys.stdout.buffer) # pytype: disable=wrong-arg-types
|
|
424
447
|
|
|
425
448
|
|
|
426
449
|
@cli.command(
|
|
427
|
-
short_help=
|
|
450
|
+
short_help="Concatenate any files and send first n lines of them to stdout."
|
|
428
451
|
)
|
|
429
|
-
@click.argument(
|
|
452
|
+
@click.argument("path")
|
|
430
453
|
@click.option(
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
type=click.INT,
|
|
434
|
-
default=10,
|
|
435
|
-
help='print the first NUM lines')
|
|
454
|
+
"-n", "--lines", type=click.INT, default=10, help="print the first NUM lines"
|
|
455
|
+
)
|
|
436
456
|
def head(path: str, lines: int):
|
|
437
|
-
with smart_open(path,
|
|
457
|
+
with smart_open(path, "rb") as f:
|
|
438
458
|
for _ in range(lines):
|
|
439
459
|
try:
|
|
440
460
|
content = f.readline()
|
|
@@ -442,36 +462,32 @@ def head(path: str, lines: int):
|
|
|
442
462
|
break
|
|
443
463
|
except EOFError:
|
|
444
464
|
break
|
|
445
|
-
click.echo(content.strip(b
|
|
465
|
+
click.echo(content.strip(b"\n"))
|
|
446
466
|
|
|
447
467
|
|
|
448
468
|
@cli.command(
|
|
449
|
-
short_help=
|
|
450
|
-
|
|
469
|
+
short_help="Concatenate any files and send last n lines of them to stdout."
|
|
470
|
+
)
|
|
471
|
+
@click.argument("path")
|
|
451
472
|
@click.option(
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
type=click.INT,
|
|
455
|
-
default=10,
|
|
456
|
-
help='print the last NUM lines')
|
|
473
|
+
"-n", "--lines", type=click.INT, default=10, help="print the last NUM lines"
|
|
474
|
+
)
|
|
457
475
|
@click.option(
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
is_flag=True,
|
|
461
|
-
help='output appended data as the file grows')
|
|
476
|
+
"-f", "--follow", is_flag=True, help="output appended data as the file grows"
|
|
477
|
+
)
|
|
462
478
|
def tail(path: str, lines: int, follow: bool):
|
|
463
479
|
line_list = []
|
|
464
|
-
with smart_open(path,
|
|
480
|
+
with smart_open(path, "rb") as f:
|
|
465
481
|
f.seek(0, os.SEEK_END)
|
|
466
482
|
file_size = f.tell()
|
|
467
483
|
f.seek(0, os.SEEK_SET)
|
|
468
484
|
|
|
469
|
-
for current_offset in range(
|
|
470
|
-
|
|
471
|
-
|
|
485
|
+
for current_offset in range(
|
|
486
|
+
file_size - DEFAULT_BLOCK_SIZE, 0 - DEFAULT_BLOCK_SIZE, -DEFAULT_BLOCK_SIZE
|
|
487
|
+
):
|
|
472
488
|
current_offset = max(0, current_offset)
|
|
473
489
|
f.seek(current_offset)
|
|
474
|
-
block_lines = f.read(DEFAULT_BLOCK_SIZE).split(b
|
|
490
|
+
block_lines = f.read(DEFAULT_BLOCK_SIZE).split(b"\n")
|
|
475
491
|
if len(line_list) > 0:
|
|
476
492
|
block_lines[-1] += line_list[0]
|
|
477
493
|
block_lines.extend(line_list[1:])
|
|
@@ -489,7 +505,7 @@ def tail(path: str, lines: int, follow: bool):
|
|
|
489
505
|
if follow:
|
|
490
506
|
offset = file_size
|
|
491
507
|
while True:
|
|
492
|
-
with smart_open(path,
|
|
508
|
+
with smart_open(path, "rb") as f:
|
|
493
509
|
f.seek(offset)
|
|
494
510
|
line = f.readline()
|
|
495
511
|
offset = f.tell()
|
|
@@ -499,17 +515,17 @@ def tail(path: str, lines: int, follow: bool):
|
|
|
499
515
|
click.echo(line, nl=False)
|
|
500
516
|
|
|
501
517
|
|
|
502
|
-
@cli.command(short_help=
|
|
503
|
-
@click.argument(
|
|
504
|
-
@click.option(
|
|
505
|
-
@click.option(
|
|
506
|
-
'-o', '--stdout', is_flag=True, help='File content to standard output')
|
|
518
|
+
@cli.command(short_help="Write bytes from stdin to file.")
|
|
519
|
+
@click.argument("path")
|
|
520
|
+
@click.option("-a", "--append", is_flag=True, help="Append to the given file")
|
|
521
|
+
@click.option("-o", "--stdout", is_flag=True, help="File content to standard output")
|
|
507
522
|
def to(path: str, append: bool, stdout: bool):
|
|
508
|
-
mode =
|
|
523
|
+
mode = "wb"
|
|
509
524
|
if append:
|
|
510
|
-
mode =
|
|
511
|
-
with smart_open(
|
|
512
|
-
|
|
525
|
+
mode = "ab"
|
|
526
|
+
with smart_open("stdio://0", "rb") as stdin, smart_open(
|
|
527
|
+
path, mode
|
|
528
|
+
) as f, smart_open("stdio://1", "wb") as stdout_fd:
|
|
513
529
|
length = 16 * 1024
|
|
514
530
|
while True:
|
|
515
531
|
buf = stdin.read(length)
|
|
@@ -520,175 +536,179 @@ def to(path: str, append: bool, stdout: bool):
|
|
|
520
536
|
stdout_fd.write(buf)
|
|
521
537
|
|
|
522
538
|
|
|
523
|
-
@cli.command(
|
|
524
|
-
|
|
525
|
-
@click.argument('path')
|
|
539
|
+
@cli.command(short_help="Produce an md5sum file for all the objects in the path.")
|
|
540
|
+
@click.argument("path")
|
|
526
541
|
def md5sum(path: str):
|
|
527
542
|
click.echo(smart_getmd5(path, recalculate=True))
|
|
528
543
|
|
|
529
544
|
|
|
530
|
-
@cli.command(
|
|
531
|
-
|
|
532
|
-
@click.argument('path')
|
|
545
|
+
@cli.command(short_help="Return the total size and number of objects in remote:path.")
|
|
546
|
+
@click.argument("path")
|
|
533
547
|
def size(path: str):
|
|
534
548
|
click.echo(smart_getsize(path))
|
|
535
549
|
|
|
536
550
|
|
|
537
|
-
@cli.command(
|
|
538
|
-
|
|
539
|
-
@click.argument('path')
|
|
551
|
+
@cli.command(short_help="Return the mtime and number of objects in remote:path.")
|
|
552
|
+
@click.argument("path")
|
|
540
553
|
def mtime(path: str):
|
|
541
554
|
click.echo(smart_getmtime(path))
|
|
542
555
|
|
|
543
556
|
|
|
544
|
-
@cli.command(short_help=
|
|
545
|
-
@click.argument(
|
|
557
|
+
@cli.command(short_help="Return the stat and number of objects in remote:path.")
|
|
558
|
+
@click.argument("path")
|
|
546
559
|
def stat(path: str):
|
|
547
560
|
click.echo(smart_stat(path))
|
|
548
561
|
|
|
549
562
|
|
|
550
|
-
@cli.command(short_help=
|
|
563
|
+
@cli.command(short_help="Return the megfile version.")
|
|
551
564
|
def version():
|
|
552
565
|
click.echo(VERSION)
|
|
553
566
|
|
|
554
567
|
|
|
555
|
-
@cli.group(short_help=
|
|
568
|
+
@cli.group(short_help="Return the config file")
|
|
556
569
|
def config():
|
|
557
570
|
pass
|
|
558
571
|
|
|
559
572
|
|
|
560
|
-
@config.command(short_help=
|
|
573
|
+
@config.command(short_help="Return the config file for s3")
|
|
561
574
|
@click.option(
|
|
562
|
-
|
|
563
|
-
|
|
575
|
+
"-p",
|
|
576
|
+
"--path",
|
|
564
577
|
type=str,
|
|
565
|
-
default=
|
|
566
|
-
help=
|
|
578
|
+
default="~/.aws/credentials",
|
|
579
|
+
help="s3 config file, default is $HOME/.aws/credentials",
|
|
567
580
|
)
|
|
568
581
|
@click.option(
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
@click.argument(
|
|
572
|
-
@click.
|
|
573
|
-
@click.option(
|
|
574
|
-
@click.option(
|
|
582
|
+
"-n", "--profile-name", type=str, default="default", help="s3 config file"
|
|
583
|
+
)
|
|
584
|
+
@click.argument("aws_access_key_id")
|
|
585
|
+
@click.argument("aws_secret_access_key")
|
|
586
|
+
@click.option("-e", "--endpoint-url", help="endpoint-url")
|
|
587
|
+
@click.option("-s", "--addressing-style", help="addressing-style")
|
|
588
|
+
@click.option("--no-cover", is_flag=True, help="Not cover the same-name config")
|
|
575
589
|
def s3(
|
|
576
|
-
|
|
577
|
-
|
|
590
|
+
path,
|
|
591
|
+
profile_name,
|
|
592
|
+
aws_access_key_id,
|
|
593
|
+
aws_secret_access_key,
|
|
594
|
+
endpoint_url,
|
|
595
|
+
addressing_style,
|
|
596
|
+
no_cover,
|
|
597
|
+
):
|
|
578
598
|
path = os.path.expanduser(path)
|
|
579
599
|
|
|
580
600
|
config_dict = {
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
601
|
+
"name": profile_name,
|
|
602
|
+
"aws_access_key_id": aws_access_key_id,
|
|
603
|
+
"aws_secret_access_key": aws_secret_access_key,
|
|
584
604
|
}
|
|
585
605
|
s3 = {}
|
|
586
606
|
if endpoint_url:
|
|
587
|
-
s3.update({
|
|
607
|
+
s3.update({"endpoint_url": endpoint_url})
|
|
588
608
|
if addressing_style:
|
|
589
|
-
s3.update({
|
|
609
|
+
s3.update({"addressing_style": addressing_style})
|
|
590
610
|
if s3:
|
|
591
|
-
config_dict.update({
|
|
611
|
+
config_dict.update({"s3": s3})
|
|
592
612
|
|
|
593
613
|
def dumps(config_dict: dict) -> str:
|
|
594
|
-
content =
|
|
595
|
-
content +=
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
if
|
|
600
|
-
content +=
|
|
601
|
-
s3: dict = config_dict[
|
|
602
|
-
if
|
|
603
|
-
content +=
|
|
604
|
-
if
|
|
605
|
-
content +=
|
|
606
|
-
s3['addressing_style'])
|
|
614
|
+
content = "[{}]\n".format(config_dict["name"])
|
|
615
|
+
content += "aws_access_key_id = {}\n".format(config_dict["aws_access_key_id"])
|
|
616
|
+
content += "aws_secret_access_key = {}\n".format(
|
|
617
|
+
config_dict["aws_secret_access_key"]
|
|
618
|
+
)
|
|
619
|
+
if "s3" in config_dict.keys():
|
|
620
|
+
content += "\ns3 = \n"
|
|
621
|
+
s3: dict = config_dict["s3"]
|
|
622
|
+
if "endpoint_url" in s3.keys():
|
|
623
|
+
content += " endpoint_url = {}\n".format(s3["endpoint_url"])
|
|
624
|
+
if "addressing_style" in s3.keys():
|
|
625
|
+
content += " addressing_style = {}\n".format(s3["addressing_style"])
|
|
607
626
|
return content
|
|
608
627
|
|
|
609
628
|
os.makedirs(os.path.dirname(path), exist_ok=True) # make sure dirpath exist
|
|
610
|
-
if not os.path.exists(path): #If this file doesn't exist.
|
|
629
|
+
if not os.path.exists(path): # If this file doesn't exist.
|
|
611
630
|
content_str = dumps(config_dict)
|
|
612
|
-
with open(path,
|
|
631
|
+
with open(path, "w") as fp:
|
|
613
632
|
fp.write(content_str)
|
|
614
|
-
click.echo(f
|
|
633
|
+
click.echo(f"Your oss config has been saved into {path}")
|
|
615
634
|
return
|
|
616
635
|
|
|
617
636
|
# This file is already exists.
|
|
618
637
|
# (Considering the occasion that profile_name has been used)
|
|
619
638
|
used = False
|
|
620
|
-
with open(path,
|
|
639
|
+
with open(path, "r") as fp:
|
|
621
640
|
text = fp.read()
|
|
622
|
-
sections = text.strip().split(
|
|
641
|
+
sections = text.strip().split("[")
|
|
623
642
|
|
|
624
643
|
if len(sections[0]) <= 1:
|
|
625
644
|
sections = sections[1:]
|
|
626
645
|
|
|
627
646
|
for i in range(0, len(sections)):
|
|
628
647
|
section = sections[i]
|
|
629
|
-
cur_name = section.split(
|
|
648
|
+
cur_name = section.split("]")[0]
|
|
630
649
|
# Given profile_name has been used.
|
|
631
650
|
if cur_name == profile_name:
|
|
632
651
|
if no_cover: # default True(cover the same-name config).
|
|
633
|
-
raise NameError(f
|
|
652
|
+
raise NameError(f"profile-name has been used: {profile_name}")
|
|
634
653
|
used = True
|
|
635
654
|
sections[i] = dumps(config_dict)
|
|
636
655
|
continue
|
|
637
|
-
sections[i] =
|
|
638
|
-
click.echo(f
|
|
639
|
-
text =
|
|
640
|
-
if not used: #Given profile_name not been used.
|
|
641
|
-
text +=
|
|
642
|
-
with open(path,
|
|
656
|
+
sections[i] = "\n" + ("[" + section).strip() + "\n"
|
|
657
|
+
click.echo(f"The {profile_name} config has been updated.")
|
|
658
|
+
text = "\n".join(sections)
|
|
659
|
+
if not used: # Given profile_name not been used.
|
|
660
|
+
text += "\n" + dumps(config_dict)
|
|
661
|
+
with open(path, "w") as fp:
|
|
643
662
|
fp.write(text)
|
|
644
|
-
click.echo(f
|
|
663
|
+
click.echo(f"Your oss config has been saved into {path}")
|
|
645
664
|
|
|
646
665
|
|
|
647
|
-
@config.command(short_help=
|
|
648
|
-
@click.argument(
|
|
666
|
+
@config.command(short_help="Return the config file for s3")
|
|
667
|
+
@click.argument("url")
|
|
649
668
|
@click.option(
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
default=
|
|
653
|
-
help=
|
|
669
|
+
"-p",
|
|
670
|
+
"--path",
|
|
671
|
+
default="~/.hdfscli.cfg",
|
|
672
|
+
help="s3 config file, default is $HOME/.hdfscli.cfg",
|
|
654
673
|
)
|
|
655
|
-
@click.option(
|
|
656
|
-
@click.option(
|
|
657
|
-
@click.option(
|
|
658
|
-
@click.option(
|
|
674
|
+
@click.option("-n", "--profile-name", default="default", help="s3 config file")
|
|
675
|
+
@click.option("-u", "--user", help="user name")
|
|
676
|
+
@click.option("-r", "--root", help="hdfs path's root dir")
|
|
677
|
+
@click.option("-t", "--token", help="token for requesting hdfs server")
|
|
659
678
|
@click.option(
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
help=f"request hdfs server timeout, default {DEFAULT_HDFS_TIMEOUT}"
|
|
663
|
-
|
|
679
|
+
"-o",
|
|
680
|
+
"--timeout",
|
|
681
|
+
help=f"request hdfs server timeout, default {DEFAULT_HDFS_TIMEOUT}",
|
|
682
|
+
)
|
|
683
|
+
@click.option("--no-cover", is_flag=True, help="Not cover the same-name config")
|
|
664
684
|
def hdfs(url, path, profile_name, user, root, token, timeout, no_cover):
|
|
665
685
|
path = os.path.expanduser(path)
|
|
666
686
|
current_config = {
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
687
|
+
"url": url,
|
|
688
|
+
"user": user,
|
|
689
|
+
"root": root,
|
|
690
|
+
"token": token,
|
|
691
|
+
"timeout": timeout,
|
|
672
692
|
}
|
|
673
693
|
profile_name = f"{profile_name}.alias"
|
|
674
694
|
config = configparser.ConfigParser()
|
|
675
695
|
if os.path.exists(path):
|
|
676
696
|
config.read(path)
|
|
677
|
-
if
|
|
678
|
-
config[
|
|
697
|
+
if "global" not in config.sections():
|
|
698
|
+
config["global"] = {"default.alias": "default"}
|
|
679
699
|
if profile_name in config.sections():
|
|
680
700
|
if no_cover:
|
|
681
|
-
raise NameError(f
|
|
701
|
+
raise NameError(f"profile-name has been used: {profile_name[:-6]}")
|
|
682
702
|
else:
|
|
683
703
|
config[profile_name] = {}
|
|
684
704
|
for key, value in current_config.items():
|
|
685
705
|
if value:
|
|
686
706
|
config[profile_name][key] = value
|
|
687
|
-
with open(path,
|
|
707
|
+
with open(path, "w") as fp:
|
|
688
708
|
config.write(fp)
|
|
689
|
-
click.echo(f
|
|
709
|
+
click.echo(f"Your hdfs config has been saved into {path}")
|
|
690
710
|
|
|
691
711
|
|
|
692
|
-
if __name__ ==
|
|
712
|
+
if __name__ == "__main__":
|
|
693
713
|
# Usage: python -m megfile.cli
|
|
694
714
|
safe_cli() # pragma: no cover
|