megfile 3.1.1__py3-none-any.whl → 3.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. docs/conf.py +2 -4
  2. megfile/__init__.py +394 -203
  3. megfile/cli.py +258 -238
  4. megfile/config.py +25 -21
  5. megfile/errors.py +124 -114
  6. megfile/fs.py +174 -140
  7. megfile/fs_path.py +462 -354
  8. megfile/hdfs.py +133 -101
  9. megfile/hdfs_path.py +290 -236
  10. megfile/http.py +15 -14
  11. megfile/http_path.py +111 -107
  12. megfile/interfaces.py +70 -65
  13. megfile/lib/base_prefetch_reader.py +84 -65
  14. megfile/lib/combine_reader.py +12 -12
  15. megfile/lib/compare.py +17 -13
  16. megfile/lib/compat.py +1 -5
  17. megfile/lib/fnmatch.py +29 -30
  18. megfile/lib/glob.py +46 -54
  19. megfile/lib/hdfs_prefetch_reader.py +40 -25
  20. megfile/lib/hdfs_tools.py +1 -3
  21. megfile/lib/http_prefetch_reader.py +69 -46
  22. megfile/lib/joinpath.py +5 -5
  23. megfile/lib/lazy_handler.py +7 -3
  24. megfile/lib/s3_buffered_writer.py +58 -51
  25. megfile/lib/s3_cached_handler.py +13 -14
  26. megfile/lib/s3_limited_seekable_writer.py +37 -28
  27. megfile/lib/s3_memory_handler.py +34 -30
  28. megfile/lib/s3_pipe_handler.py +24 -25
  29. megfile/lib/s3_prefetch_reader.py +71 -52
  30. megfile/lib/s3_share_cache_reader.py +37 -24
  31. megfile/lib/shadow_handler.py +7 -3
  32. megfile/lib/stdio_handler.py +9 -8
  33. megfile/lib/url.py +3 -3
  34. megfile/pathlike.py +259 -228
  35. megfile/s3.py +220 -153
  36. megfile/s3_path.py +977 -802
  37. megfile/sftp.py +190 -156
  38. megfile/sftp_path.py +540 -450
  39. megfile/smart.py +397 -330
  40. megfile/smart_path.py +100 -105
  41. megfile/stdio.py +10 -9
  42. megfile/stdio_path.py +32 -35
  43. megfile/utils/__init__.py +73 -54
  44. megfile/utils/mutex.py +11 -14
  45. megfile/version.py +1 -1
  46. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/METADATA +5 -8
  47. megfile-3.1.2.dist-info/RECORD +55 -0
  48. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/WHEEL +1 -1
  49. scripts/convert_results_to_sarif.py +45 -78
  50. scripts/generate_file.py +140 -64
  51. megfile-3.1.1.dist-info/RECORD +0 -55
  52. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE +0 -0
  53. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/LICENSE.pyre +0 -0
  54. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/entry_points.txt +0 -0
  55. {megfile-3.1.1.dist-info → megfile-3.1.2.dist-info}/top_level.txt +0 -0
megfile/cli.py CHANGED
@@ -14,23 +14,48 @@ from megfile.config import DEFAULT_BLOCK_SIZE
14
14
  from megfile.hdfs_path import DEFAULT_HDFS_TIMEOUT
15
15
  from megfile.interfaces import FileEntry
16
16
  from megfile.lib.glob import get_non_glob_dir, has_magic
17
- from megfile.smart import _smart_sync_single_file, smart_copy, smart_exists, smart_getmd5, smart_getmtime, smart_getsize, smart_glob_stat, smart_isdir, smart_isfile, smart_makedirs, smart_move, smart_open, smart_path_join, smart_readlink, smart_remove, smart_rename, smart_scan_stat, smart_scandir, smart_stat, smart_sync, smart_sync_with_progress, smart_touch, smart_unlink
17
+ from megfile.smart import (
18
+ _smart_sync_single_file,
19
+ smart_copy,
20
+ smart_exists,
21
+ smart_getmd5,
22
+ smart_getmtime,
23
+ smart_getsize,
24
+ smart_glob_stat,
25
+ smart_isdir,
26
+ smart_isfile,
27
+ smart_makedirs,
28
+ smart_move,
29
+ smart_open,
30
+ smart_path_join,
31
+ smart_readlink,
32
+ smart_remove,
33
+ smart_rename,
34
+ smart_scan_stat,
35
+ smart_scandir,
36
+ smart_stat,
37
+ smart_sync,
38
+ smart_sync_with_progress,
39
+ smart_touch,
40
+ smart_unlink,
41
+ )
18
42
  from megfile.smart_path import SmartPath
19
43
  from megfile.utils import get_human_size
20
44
  from megfile.version import VERSION
21
45
 
22
46
  logging.basicConfig(level=logging.ERROR)
23
- logging.getLogger('megfile').setLevel(level=logging.INFO)
47
+ logging.getLogger("megfile").setLevel(level=logging.INFO)
24
48
  DEBUG = False
25
49
 
26
50
 
27
51
  @click.group()
28
- @click.option('--debug', is_flag=True, help='Enable debug mode.')
52
+ @click.option("--debug", is_flag=True, help="Enable debug mode.")
29
53
  def cli(debug):
30
54
  """
31
55
  Client for megfile.
32
-
33
- If you install megfile with ``--user``, you also need configure ``$HOME/.local/bin`` into ``$PATH``.
56
+
57
+ If you install megfile with ``--user``,
58
+ you also need configure ``$HOME/.local/bin`` into ``$PATH``.
34
59
  """
35
60
  global DEBUG
36
61
  DEBUG = debug
@@ -62,19 +87,19 @@ def simple_echo(file_stat, base_path: str = "", full_path: bool = False):
62
87
 
63
88
 
64
89
  def long_echo(file_stat, base_path: str = "", full_path: bool = False):
65
- return '%12d %s %s' % (
90
+ return "%12d %s %s" % (
66
91
  file_stat.stat.size,
67
- time.strftime(
68
- "%Y-%m-%d %H:%M:%S", time.localtime(file_stat.stat.mtime)),
69
- get_echo_path(file_stat, base_path, full_path))
92
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(file_stat.stat.mtime)),
93
+ get_echo_path(file_stat, base_path, full_path),
94
+ )
70
95
 
71
96
 
72
97
  def human_echo(file_stat, base_path: str = "", full_path: bool = False):
73
- return '%10s %s %s' % (
98
+ return "%10s %s %s" % (
74
99
  get_human_size(file_stat.stat.size),
75
- time.strftime(
76
- "%Y-%m-%d %H:%M:%S", time.localtime(file_stat.stat.mtime)),
77
- get_echo_path(file_stat, base_path, full_path))
100
+ time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(file_stat.stat.mtime)),
101
+ get_echo_path(file_stat, base_path, full_path),
102
+ )
78
103
 
79
104
 
80
105
  def smart_list_stat(path):
@@ -114,67 +139,65 @@ def _ls(path: str, long: bool, recursive: bool, human_readable: bool):
114
139
  link = smart_readlink(file_stat.path)
115
140
  except FileNotFoundError as e:
116
141
  link = repr(e)
117
- output += ' -> %s' % link
142
+ output += " -> %s" % link
118
143
  click.echo(output)
119
144
  if long:
120
- click.echo(f'total({total_count}): {get_human_size(total_size)}')
145
+ click.echo(f"total({total_count}): {get_human_size(total_size)}")
121
146
 
122
147
 
123
- @cli.command(short_help='List all the objects in the path.')
124
- @click.argument('path')
148
+ @cli.command(short_help="List all the objects in the path.")
149
+ @click.argument("path")
125
150
  @click.option(
126
- '-l',
127
- '--long',
151
+ "-l",
152
+ "--long",
128
153
  is_flag=True,
129
- help='List all the objects in the path with size, modification time and path.'
154
+ help="List all the objects in the path with size, modification time and path.",
130
155
  )
131
156
  @click.option(
132
- '-r',
133
- '--recursive',
157
+ "-r",
158
+ "--recursive",
134
159
  is_flag=True,
135
- help=
136
- 'Command is performed on all files or objects under the specified directory or prefix.'
160
+ help="Command is performed on all files or objects "
161
+ "under the specified directory or prefix.",
137
162
  )
138
163
  @click.option(
139
- '-h',
140
- '--human-readable',
164
+ "-h",
165
+ "--human-readable",
141
166
  is_flag=True,
142
- help='Displays file sizes in human readable format.')
167
+ help="Displays file sizes in human readable format.",
168
+ )
143
169
  def ls(path: str, long: bool, recursive: bool, human_readable: bool):
144
170
  _ls(path, long=long, recursive=recursive, human_readable=human_readable)
145
171
 
146
172
 
147
- @cli.command(short_help='List all the objects in the path.')
148
- @click.argument('path')
173
+ @cli.command(short_help="List all the objects in the path.")
174
+ @click.argument("path")
149
175
  @click.option(
150
- '-r',
151
- '--recursive',
176
+ "-r",
177
+ "--recursive",
152
178
  is_flag=True,
153
- help=
154
- 'Command is performed on all files or objects under the specified directory or prefix.'
179
+ help="Command is performed on all files or objects under "
180
+ "the specified directory or prefix.",
155
181
  )
156
182
  def ll(path: str, recursive: bool):
157
183
  _ls(path, long=True, recursive=recursive, human_readable=True)
158
184
 
159
185
 
160
- @cli.command(
161
- short_help='Copy files from source to dest, skipping already copied.')
162
- @click.argument('src_path')
163
- @click.argument('dst_path')
186
+ @cli.command(short_help="Copy files from source to dest, skipping already copied.")
187
+ @click.argument("src_path")
188
+ @click.argument("dst_path")
164
189
  @click.option(
165
- '-r',
166
- '--recursive',
190
+ "-r",
191
+ "--recursive",
167
192
  is_flag=True,
168
- help=
169
- 'Command is performed on all files or objects under the specified directory or prefix.'
193
+ help="Command is performed on all files or objects "
194
+ "under the specified directory or prefix.",
170
195
  )
171
196
  @click.option(
172
- '-T',
173
- '--no-target-directory',
174
- is_flag=True,
175
- help='treat dst_path as a normal file.')
176
- @click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
177
- @click.option('--skip', is_flag=True, help='Skip existed files.')
197
+ "-T", "--no-target-directory", is_flag=True, help="treat dst_path as a normal file."
198
+ )
199
+ @click.option("-g", "--progress-bar", is_flag=True, help="Show progress bar.")
200
+ @click.option("--skip", is_flag=True, help="Skip existed files.")
178
201
  def cp(
179
202
  src_path: str,
180
203
  dst_path: str,
@@ -183,12 +206,10 @@ def cp(
183
206
  progress_bar: bool,
184
207
  skip: bool,
185
208
  ):
186
- if not no_target_directory and (dst_path.endswith('/') or
187
- smart_isdir(dst_path)):
209
+ if not no_target_directory and (dst_path.endswith("/") or smart_isdir(dst_path)):
188
210
  dst_path = smart_path_join(dst_path, os.path.basename(src_path))
189
211
  if recursive:
190
- with ThreadPoolExecutor(max_workers=(os.cpu_count() or 1) *
191
- 2) as executor:
212
+ with ThreadPoolExecutor(max_workers=(os.cpu_count() or 1) * 2) as executor:
192
213
  if progress_bar:
193
214
  smart_sync_with_progress(
194
215
  src_path,
@@ -196,7 +217,8 @@ def cp(
196
217
  followlinks=True,
197
218
  map_func=executor.map,
198
219
  force=True,
199
- overwrite=not skip)
220
+ overwrite=not skip,
221
+ )
200
222
  else:
201
223
  smart_sync(
202
224
  src_path,
@@ -204,44 +226,43 @@ def cp(
204
226
  followlinks=True,
205
227
  map_func=executor.map,
206
228
  force=True,
207
- overwrite=not skip)
229
+ overwrite=not skip,
230
+ )
208
231
  else:
209
232
  if progress_bar:
210
233
  file_size = smart_stat(src_path).size
211
234
  sbar = tqdm(
212
235
  total=file_size,
213
- unit='B',
236
+ unit="B",
214
237
  ascii=True,
215
238
  unit_scale=True,
216
- unit_divisor=1024)
239
+ unit_divisor=1024,
240
+ )
217
241
 
218
242
  def callback(length: int):
219
243
  sbar.update(length)
220
244
 
221
- smart_copy(
222
- src_path, dst_path, callback=callback, overwrite=not skip)
245
+ smart_copy(src_path, dst_path, callback=callback, overwrite=not skip)
223
246
  sbar.close()
224
247
  else:
225
248
  smart_copy(src_path, dst_path, overwrite=not skip)
226
249
 
227
250
 
228
- @cli.command(short_help='Move files from source to dest.')
229
- @click.argument('src_path')
230
- @click.argument('dst_path')
251
+ @cli.command(short_help="Move files from source to dest.")
252
+ @click.argument("src_path")
253
+ @click.argument("dst_path")
231
254
  @click.option(
232
- '-r',
233
- '--recursive',
255
+ "-r",
256
+ "--recursive",
234
257
  is_flag=True,
235
- help=
236
- 'Command is performed on all files or objects under the specified directory or prefix.'
258
+ help="Command is performed on all files or objects "
259
+ "under the specified directory or prefix.",
237
260
  )
238
261
  @click.option(
239
- '-T',
240
- '--no-target-directory',
241
- is_flag=True,
242
- help='treat dst_path as a normal file.')
243
- @click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
244
- @click.option('--skip', is_flag=True, help='Skip existed files.')
262
+ "-T", "--no-target-directory", is_flag=True, help="treat dst_path as a normal file."
263
+ )
264
+ @click.option("-g", "--progress-bar", is_flag=True, help="Show progress bar.")
265
+ @click.option("--skip", is_flag=True, help="Skip existed files.")
245
266
  def mv(
246
267
  src_path: str,
247
268
  dst_path: str,
@@ -250,8 +271,7 @@ def mv(
250
271
  progress_bar: bool,
251
272
  skip: bool,
252
273
  ):
253
- if not no_target_directory and (dst_path.endswith('/') or
254
- smart_isdir(dst_path)):
274
+ if not no_target_directory and (dst_path.endswith("/") or smart_isdir(dst_path)):
255
275
  dst_path = smart_path_join(dst_path, os.path.basename(src_path))
256
276
  if progress_bar:
257
277
  src_protocol, _ = SmartPath._extract_protocol(src_path)
@@ -264,7 +284,8 @@ def mv(
264
284
  t.update(1)
265
285
  else:
266
286
  smart_sync_with_progress(
267
- src_path, dst_path, followlinks=True, overwrite=not skip)
287
+ src_path, dst_path, followlinks=True, overwrite=not skip
288
+ )
268
289
  smart_remove(src_path)
269
290
  else:
270
291
  if src_protocol == dst_protocol:
@@ -275,16 +296,16 @@ def mv(
275
296
  file_size = smart_stat(src_path).size
276
297
  sbar = tqdm(
277
298
  total=file_size,
278
- unit='B',
299
+ unit="B",
279
300
  ascii=True,
280
301
  unit_scale=True,
281
- unit_divisor=1024)
302
+ unit_divisor=1024,
303
+ )
282
304
 
283
305
  def callback(length: int):
284
306
  sbar.update(length)
285
307
 
286
- smart_copy(
287
- src_path, dst_path, callback=callback, overwrite=not skip)
308
+ smart_copy(src_path, dst_path, callback=callback, overwrite=not skip)
288
309
  smart_unlink(src_path)
289
310
  sbar.close()
290
311
  else:
@@ -292,75 +313,74 @@ def mv(
292
313
  move_func(src_path, dst_path, overwrite=not skip)
293
314
 
294
315
 
295
- @cli.command(short_help='Remove files from path.')
296
- @click.argument('path')
316
+ @cli.command(short_help="Remove files from path.")
317
+ @click.argument("path")
297
318
  @click.option(
298
- '-r',
299
- '--recursive',
319
+ "-r",
320
+ "--recursive",
300
321
  is_flag=True,
301
- help=
302
- 'Command is performed on all files or objects under the specified directory or prefix.'
322
+ help="Command is performed on all files or objects "
323
+ "under the specified directory or prefix.",
303
324
  )
304
325
  def rm(path: str, recursive: bool):
305
326
  remove_func = smart_remove if recursive else smart_unlink
306
327
  remove_func(path)
307
328
 
308
329
 
309
- @cli.command(
310
- short_help='Make source and dest identical, modifying destination only.')
311
- @click.argument('src_path')
312
- @click.argument('dst_path')
313
- @click.option('-g', '--progress-bar', is_flag=True, help='Show progress bar.')
330
+ @cli.command(short_help="Make source and dest identical, modifying destination only.")
331
+ @click.argument("src_path")
332
+ @click.argument("dst_path")
333
+ @click.option("-g", "--progress-bar", is_flag=True, help="Show progress bar.")
314
334
  @click.option(
315
- '-w',
316
- '--worker',
317
- type=click.INT,
318
- default=8,
319
- help='Number of concurrent workers.')
335
+ "-w", "--worker", type=click.INT, default=8, help="Number of concurrent workers."
336
+ )
320
337
  @click.option(
321
- '-f',
322
- '--force',
323
- is_flag=True,
324
- help='Copy files forcible, ignore same files.')
325
- @click.option('-q', '--quiet', is_flag=True, help='Not show any progress log.')
326
- @click.option('--skip', is_flag=True, help='Skip existed files.')
338
+ "-f", "--force", is_flag=True, help="Copy files forcible, ignore same files."
339
+ )
340
+ @click.option("-q", "--quiet", is_flag=True, help="Not show any progress log.")
341
+ @click.option("--skip", is_flag=True, help="Skip existed files.")
327
342
  def sync(
328
- src_path: str, dst_path: str, progress_bar: bool, worker: int,
329
- force: bool, quiet: bool, skip: bool):
343
+ src_path: str,
344
+ dst_path: str,
345
+ progress_bar: bool,
346
+ worker: int,
347
+ force: bool,
348
+ quiet: bool,
349
+ skip: bool,
350
+ ):
330
351
  with ThreadPoolExecutor(max_workers=worker) as executor:
331
352
  if has_magic(src_path):
332
353
  src_root_path = get_non_glob_dir(src_path)
333
354
  if not smart_exists(src_root_path):
334
- raise FileNotFoundError(f'No match file: {src_path}')
355
+ raise FileNotFoundError(f"No match file: {src_path}")
335
356
 
336
357
  def scan_func(path):
337
358
  for glob_file_entry in smart_glob_stat(path):
338
359
  if glob_file_entry.is_file():
339
360
  yield glob_file_entry
340
361
  else:
341
- for file_entry in smart_scan_stat(glob_file_entry.path,
342
- followlinks=True):
362
+ for file_entry in smart_scan_stat(
363
+ glob_file_entry.path, followlinks=True
364
+ ):
343
365
  yield file_entry
366
+
344
367
  else:
345
368
  if not smart_exists(src_path):
346
- raise FileNotFoundError(f'No match file: {src_path}')
369
+ raise FileNotFoundError(f"No match file: {src_path}")
347
370
  src_root_path = src_path
348
371
  scan_func = partial(smart_scan_stat, followlinks=True)
349
372
 
350
373
  if progress_bar and not quiet:
351
- print('building progress bar', end='\r')
374
+ print("building progress bar", end="\r")
352
375
  file_entries = []
353
376
  total_count = total_size = 0
354
- for total_count, file_entry in enumerate(scan_func(src_path),
355
- start=1):
377
+ for total_count, file_entry in enumerate(scan_func(src_path), start=1):
356
378
  if total_count > 1024 * 128:
357
379
  file_entries = []
358
380
  else:
359
381
  file_entries.append(file_entry)
360
382
  total_size += file_entry.stat.size
361
- print(
362
- f'building progress bar, find {total_count} files',
363
- end='\r')
383
+ print(f"building progress bar, find {total_count} files", end="\r")
364
384
 
365
385
  if not file_entries:
366
386
  file_entries = scan_func(src_path)
@@ -373,11 +393,12 @@ def sync(
373
393
  else:
374
394
  tbar = tqdm(total=total_count, ascii=True)
375
395
  sbar = tqdm(
376
- unit='B',
396
+ unit="B",
377
397
  ascii=True,
378
398
  unit_scale=True,
379
399
  unit_divisor=1024,
380
- total=total_size)
400
+ total=total_size,
401
+ )
381
402
 
382
403
  def callback(_filename: str, length: int):
383
404
  sbar.update(length)
@@ -395,7 +416,9 @@ def sync(
395
416
  callback_after_copy_file=callback_after_copy_file,
396
417
  force=force,
397
418
  overwrite=not skip,
398
- ) for file_entry in file_entries)
419
+ )
420
+ for file_entry in file_entries
421
+ )
399
422
  list(executor.map(_smart_sync_single_file, params_iter))
400
423
  if not quiet:
401
424
  tbar.close()
@@ -405,36 +428,33 @@ def sync(
405
428
 
406
429
 
407
430
  @cli.command(short_help="Make the path if it doesn't already exist.")
408
- @click.argument('path')
431
+ @click.argument("path")
409
432
  def mkdir(path: str):
410
433
  smart_makedirs(path)
411
434
 
412
435
 
413
436
  @cli.command(short_help="Make the file if it doesn't already exist.")
414
- @click.argument('path')
437
+ @click.argument("path")
415
438
  def touch(path: str):
416
439
  smart_touch(path)
417
440
 
418
441
 
419
- @cli.command(short_help='Concatenate any files and send them to stdout.')
420
- @click.argument('path')
442
+ @cli.command(short_help="Concatenate any files and send them to stdout.")
443
+ @click.argument("path")
421
444
  def cat(path: str):
422
- with smart_open(path, 'rb') as f:
445
+ with smart_open(path, "rb") as f:
423
446
  shutil.copyfileobj(f, sys.stdout.buffer) # pytype: disable=wrong-arg-types
424
447
 
425
448
 
426
449
  @cli.command(
427
- short_help='Concatenate any files and send first n lines of them to stdout.'
450
+ short_help="Concatenate any files and send first n lines of them to stdout."
428
451
  )
429
- @click.argument('path')
452
+ @click.argument("path")
430
453
  @click.option(
431
- '-n',
432
- '--lines',
433
- type=click.INT,
434
- default=10,
435
- help='print the first NUM lines')
454
+ "-n", "--lines", type=click.INT, default=10, help="print the first NUM lines"
455
+ )
436
456
  def head(path: str, lines: int):
437
- with smart_open(path, 'rb') as f:
457
+ with smart_open(path, "rb") as f:
438
458
  for _ in range(lines):
439
459
  try:
440
460
  content = f.readline()
@@ -442,36 +462,32 @@ def head(path: str, lines: int):
442
462
  break
443
463
  except EOFError:
444
464
  break
445
- click.echo(content.strip(b'\n'))
465
+ click.echo(content.strip(b"\n"))
446
466
 
447
467
 
448
468
  @cli.command(
449
- short_help='Concatenate any files and send last n lines of them to stdout.')
450
- @click.argument('path')
469
+ short_help="Concatenate any files and send last n lines of them to stdout."
470
+ )
471
+ @click.argument("path")
451
472
  @click.option(
452
- '-n',
453
- '--lines',
454
- type=click.INT,
455
- default=10,
456
- help='print the last NUM lines')
473
+ "-n", "--lines", type=click.INT, default=10, help="print the last NUM lines"
474
+ )
457
475
  @click.option(
458
- '-f',
459
- '--follow',
460
- is_flag=True,
461
- help='output appended data as the file grows')
476
+ "-f", "--follow", is_flag=True, help="output appended data as the file grows"
477
+ )
462
478
  def tail(path: str, lines: int, follow: bool):
463
479
  line_list = []
464
- with smart_open(path, 'rb') as f:
480
+ with smart_open(path, "rb") as f:
465
481
  f.seek(0, os.SEEK_END)
466
482
  file_size = f.tell()
467
483
  f.seek(0, os.SEEK_SET)
468
484
 
469
- for current_offset in range(file_size - DEFAULT_BLOCK_SIZE,
470
- 0 - DEFAULT_BLOCK_SIZE,
471
- -DEFAULT_BLOCK_SIZE):
485
+ for current_offset in range(
486
+ file_size - DEFAULT_BLOCK_SIZE, 0 - DEFAULT_BLOCK_SIZE, -DEFAULT_BLOCK_SIZE
487
+ ):
472
488
  current_offset = max(0, current_offset)
473
489
  f.seek(current_offset)
474
- block_lines = f.read(DEFAULT_BLOCK_SIZE).split(b'\n')
490
+ block_lines = f.read(DEFAULT_BLOCK_SIZE).split(b"\n")
475
491
  if len(line_list) > 0:
476
492
  block_lines[-1] += line_list[0]
477
493
  block_lines.extend(line_list[1:])
@@ -489,7 +505,7 @@ def tail(path: str, lines: int, follow: bool):
489
505
  if follow:
490
506
  offset = file_size
491
507
  while True:
492
- with smart_open(path, 'rb') as f:
508
+ with smart_open(path, "rb") as f:
493
509
  f.seek(offset)
494
510
  line = f.readline()
495
511
  offset = f.tell()
@@ -499,17 +515,17 @@ def tail(path: str, lines: int, follow: bool):
499
515
  click.echo(line, nl=False)
500
516
 
501
517
 
502
- @cli.command(short_help='Write bytes from stdin to file.')
503
- @click.argument('path')
504
- @click.option('-a', '--append', is_flag=True, help='Append to the given file')
505
- @click.option(
506
- '-o', '--stdout', is_flag=True, help='File content to standard output')
518
+ @cli.command(short_help="Write bytes from stdin to file.")
519
+ @click.argument("path")
520
+ @click.option("-a", "--append", is_flag=True, help="Append to the given file")
521
+ @click.option("-o", "--stdout", is_flag=True, help="File content to standard output")
507
522
  def to(path: str, append: bool, stdout: bool):
508
- mode = 'wb'
523
+ mode = "wb"
509
524
  if append:
510
- mode = 'ab'
511
- with smart_open('stdio://0', 'rb') as stdin, smart_open(
512
- path, mode) as f, smart_open('stdio://1', 'wb') as stdout_fd:
525
+ mode = "ab"
526
+ with smart_open("stdio://0", "rb") as stdin, smart_open(
527
+ path, mode
528
+ ) as f, smart_open("stdio://1", "wb") as stdout_fd:
513
529
  length = 16 * 1024
514
530
  while True:
515
531
  buf = stdin.read(length)
@@ -520,175 +536,179 @@ def to(path: str, append: bool, stdout: bool):
520
536
  stdout_fd.write(buf)
521
537
 
522
538
 
523
- @cli.command(
524
- short_help='Produce an md5sum file for all the objects in the path.')
525
- @click.argument('path')
539
+ @cli.command(short_help="Produce an md5sum file for all the objects in the path.")
540
+ @click.argument("path")
526
541
  def md5sum(path: str):
527
542
  click.echo(smart_getmd5(path, recalculate=True))
528
543
 
529
544
 
530
- @cli.command(
531
- short_help='Return the total size and number of objects in remote:path.')
532
- @click.argument('path')
545
+ @cli.command(short_help="Return the total size and number of objects in remote:path.")
546
+ @click.argument("path")
533
547
  def size(path: str):
534
548
  click.echo(smart_getsize(path))
535
549
 
536
550
 
537
- @cli.command(
538
- short_help='Return the mtime and number of objects in remote:path.')
539
- @click.argument('path')
551
+ @cli.command(short_help="Return the mtime and number of objects in remote:path.")
552
+ @click.argument("path")
540
553
  def mtime(path: str):
541
554
  click.echo(smart_getmtime(path))
542
555
 
543
556
 
544
- @cli.command(short_help='Return the stat and number of objects in remote:path.')
545
- @click.argument('path')
557
+ @cli.command(short_help="Return the stat and number of objects in remote:path.")
558
+ @click.argument("path")
546
559
  def stat(path: str):
547
560
  click.echo(smart_stat(path))
548
561
 
549
562
 
550
- @cli.command(short_help='Return the megfile version.')
563
+ @cli.command(short_help="Return the megfile version.")
551
564
  def version():
552
565
  click.echo(VERSION)
553
566
 
554
567
 
555
- @cli.group(short_help='Return the config file')
568
+ @cli.group(short_help="Return the config file")
556
569
  def config():
557
570
  pass
558
571
 
559
572
 
560
- @config.command(short_help='Return the config file for s3')
573
+ @config.command(short_help="Return the config file for s3")
561
574
  @click.option(
562
- '-p',
563
- '--path',
575
+ "-p",
576
+ "--path",
564
577
  type=str,
565
- default='~/.aws/credentials',
566
- help='s3 config file, default is $HOME/.aws/credentials',
578
+ default="~/.aws/credentials",
579
+ help="s3 config file, default is $HOME/.aws/credentials",
567
580
  )
568
581
  @click.option(
569
- '-n', '--profile-name', type=str, default='default', help='s3 config file')
570
- @click.argument('aws_access_key_id')
571
- @click.argument('aws_secret_access_key')
572
- @click.option('-e', '--endpoint-url', help='endpoint-url')
573
- @click.option('-s', '--addressing-style', help='addressing-style')
574
- @click.option('--no-cover', is_flag=True, help='Not cover the same-name config')
582
+ "-n", "--profile-name", type=str, default="default", help="s3 config file"
583
+ )
584
+ @click.argument("aws_access_key_id")
585
+ @click.argument("aws_secret_access_key")
586
+ @click.option("-e", "--endpoint-url", help="endpoint-url")
587
+ @click.option("-s", "--addressing-style", help="addressing-style")
588
+ @click.option("--no-cover", is_flag=True, help="Not cover the same-name config")
575
589
  def s3(
576
- path, profile_name, aws_access_key_id, aws_secret_access_key,
577
- endpoint_url, addressing_style, no_cover):
590
+ path,
591
+ profile_name,
592
+ aws_access_key_id,
593
+ aws_secret_access_key,
594
+ endpoint_url,
595
+ addressing_style,
596
+ no_cover,
597
+ ):
578
598
  path = os.path.expanduser(path)
579
599
 
580
600
  config_dict = {
581
- 'name': profile_name,
582
- 'aws_access_key_id': aws_access_key_id,
583
- 'aws_secret_access_key': aws_secret_access_key,
601
+ "name": profile_name,
602
+ "aws_access_key_id": aws_access_key_id,
603
+ "aws_secret_access_key": aws_secret_access_key,
584
604
  }
585
605
  s3 = {}
586
606
  if endpoint_url:
587
- s3.update({'endpoint_url': endpoint_url})
607
+ s3.update({"endpoint_url": endpoint_url})
588
608
  if addressing_style:
589
- s3.update({'addressing_style': addressing_style})
609
+ s3.update({"addressing_style": addressing_style})
590
610
  if s3:
591
- config_dict.update({'s3': s3})
611
+ config_dict.update({"s3": s3})
592
612
 
593
613
  def dumps(config_dict: dict) -> str:
594
- content = '[{}]\n'.format(config_dict['name'])
595
- content += 'aws_access_key_id = {}\n'.format(
596
- config_dict['aws_access_key_id'])
597
- content += 'aws_secret_access_key = {}\n'.format(
598
- config_dict['aws_secret_access_key'])
599
- if 's3' in config_dict.keys():
600
- content += '\ns3 = \n'
601
- s3: dict = config_dict['s3']
602
- if 'endpoint_url' in s3.keys():
603
- content += ' endpoint_url = {}\n'.format(s3['endpoint_url'])
604
- if 'addressing_style' in s3.keys():
605
- content += ' addressing_style = {}\n'.format(
606
- s3['addressing_style'])
614
+ content = "[{}]\n".format(config_dict["name"])
615
+ content += "aws_access_key_id = {}\n".format(config_dict["aws_access_key_id"])
616
+ content += "aws_secret_access_key = {}\n".format(
617
+ config_dict["aws_secret_access_key"]
618
+ )
619
+ if "s3" in config_dict.keys():
620
+ content += "\ns3 = \n"
621
+ s3: dict = config_dict["s3"]
622
+ if "endpoint_url" in s3.keys():
623
+ content += " endpoint_url = {}\n".format(s3["endpoint_url"])
624
+ if "addressing_style" in s3.keys():
625
+ content += " addressing_style = {}\n".format(s3["addressing_style"])
607
626
  return content
608
627
 
609
628
  os.makedirs(os.path.dirname(path), exist_ok=True) # make sure dirpath exist
610
- if not os.path.exists(path): #If this file doesn't exist.
629
+ if not os.path.exists(path): # If this file doesn't exist.
611
630
  content_str = dumps(config_dict)
612
- with open(path, 'w') as fp:
631
+ with open(path, "w") as fp:
613
632
  fp.write(content_str)
614
- click.echo(f'Your oss config has been saved into {path}')
633
+ click.echo(f"Your oss config has been saved into {path}")
615
634
  return
616
635
 
617
636
  # This file is already exists.
618
637
  # (Considering the occasion that profile_name has been used)
619
638
  used = False
620
- with open(path, 'r') as fp:
639
+ with open(path, "r") as fp:
621
640
  text = fp.read()
622
- sections = text.strip().split('[')
641
+ sections = text.strip().split("[")
623
642
 
624
643
  if len(sections[0]) <= 1:
625
644
  sections = sections[1:]
626
645
 
627
646
  for i in range(0, len(sections)):
628
647
  section = sections[i]
629
- cur_name = section.split(']')[0]
648
+ cur_name = section.split("]")[0]
630
649
  # Given profile_name has been used.
631
650
  if cur_name == profile_name:
632
651
  if no_cover: # default True(cover the same-name config).
633
- raise NameError(f'profile-name has been used: {profile_name}')
652
+ raise NameError(f"profile-name has been used: {profile_name}")
634
653
  used = True
635
654
  sections[i] = dumps(config_dict)
636
655
  continue
637
- sections[i] = '\n' + ('[' + section).strip() + '\n'
638
- click.echo(f'The {profile_name} config has been updated.')
639
- text = '\n'.join(sections)
640
- if not used: #Given profile_name not been used.
641
- text += '\n' + dumps(config_dict)
642
- with open(path, 'w') as fp:
656
+ sections[i] = "\n" + ("[" + section).strip() + "\n"
657
+ click.echo(f"The {profile_name} config has been updated.")
658
+ text = "\n".join(sections)
659
+ if not used: # Given profile_name not been used.
660
+ text += "\n" + dumps(config_dict)
661
+ with open(path, "w") as fp:
643
662
  fp.write(text)
644
- click.echo(f'Your oss config has been saved into {path}')
663
+ click.echo(f"Your oss config has been saved into {path}")
645
664
 
646
665
 
647
- @config.command(short_help='Return the config file for s3')
648
- @click.argument('url')
666
+ @config.command(short_help="Return the config file for s3")
667
+ @click.argument("url")
649
668
  @click.option(
650
- '-p',
651
- '--path',
652
- default='~/.hdfscli.cfg',
653
- help='s3 config file, default is $HOME/.hdfscli.cfg',
669
+ "-p",
670
+ "--path",
671
+ default="~/.hdfscli.cfg",
672
+ help="s3 config file, default is $HOME/.hdfscli.cfg",
654
673
  )
655
- @click.option('-n', '--profile-name', default='default', help='s3 config file')
656
- @click.option('-u', '--user', help='user name')
657
- @click.option('-r', '--root', help="hdfs path's root dir")
658
- @click.option('-t', '--token', help="token for requesting hdfs server")
674
+ @click.option("-n", "--profile-name", default="default", help="s3 config file")
675
+ @click.option("-u", "--user", help="user name")
676
+ @click.option("-r", "--root", help="hdfs path's root dir")
677
+ @click.option("-t", "--token", help="token for requesting hdfs server")
659
678
  @click.option(
660
- '-o',
661
- '--timeout',
662
- help=f"request hdfs server timeout, default {DEFAULT_HDFS_TIMEOUT}")
663
- @click.option('--no-cover', is_flag=True, help='Not cover the same-name config')
679
+ "-o",
680
+ "--timeout",
681
+ help=f"request hdfs server timeout, default {DEFAULT_HDFS_TIMEOUT}",
682
+ )
683
+ @click.option("--no-cover", is_flag=True, help="Not cover the same-name config")
664
684
  def hdfs(url, path, profile_name, user, root, token, timeout, no_cover):
665
685
  path = os.path.expanduser(path)
666
686
  current_config = {
667
- 'url': url,
668
- 'user': user,
669
- 'root': root,
670
- 'token': token,
671
- 'timeout': timeout,
687
+ "url": url,
688
+ "user": user,
689
+ "root": root,
690
+ "token": token,
691
+ "timeout": timeout,
672
692
  }
673
693
  profile_name = f"{profile_name}.alias"
674
694
  config = configparser.ConfigParser()
675
695
  if os.path.exists(path):
676
696
  config.read(path)
677
- if 'global' not in config.sections():
678
- config['global'] = {'default.alias': 'default'}
697
+ if "global" not in config.sections():
698
+ config["global"] = {"default.alias": "default"}
679
699
  if profile_name in config.sections():
680
700
  if no_cover:
681
- raise NameError(f'profile-name has been used: {profile_name[:-6]}')
701
+ raise NameError(f"profile-name has been used: {profile_name[:-6]}")
682
702
  else:
683
703
  config[profile_name] = {}
684
704
  for key, value in current_config.items():
685
705
  if value:
686
706
  config[profile_name][key] = value
687
- with open(path, 'w') as fp:
707
+ with open(path, "w") as fp:
688
708
  config.write(fp)
689
- click.echo(f'Your hdfs config has been saved into {path}')
709
+ click.echo(f"Your hdfs config has been saved into {path}")
690
710
 
691
711
 
692
- if __name__ == '__main__':
712
+ if __name__ == "__main__":
693
713
  # Usage: python -m megfile.cli
694
714
  safe_cli() # pragma: no cover