cloud-files 4.27.0__py3-none-any.whl → 6.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  from collections import defaultdict
2
2
  from concurrent.futures import ProcessPoolExecutor
3
+ from datetime import datetime, timezone
3
4
  from functools import partial
4
5
  import itertools
5
6
  import json
@@ -16,8 +17,7 @@ import sys
16
17
  # High Sierra and above when we are using
17
18
  # a thread before forking. Instead, don't fork,
18
19
  # spawn entirely new processes.
19
- import multiprocess.context as ctx
20
- ctx._force_start_method('spawn')
20
+ mp.set_start_method("spawn", force=True)
21
21
 
22
22
  import click
23
23
  import pathos.pools
@@ -25,9 +25,10 @@ import pathos.pools
25
25
  import cloudfiles
26
26
  import cloudfiles.paths
27
27
  from cloudfiles import CloudFiles
28
+ from cloudfiles.monitoring import TransmissionMonitor, IOSampler, IOEnum
28
29
  from cloudfiles.resumable_tools import ResumableTransfer
29
30
  from cloudfiles.compression import transcode
30
- from cloudfiles.paths import extract, get_protocol
31
+ from cloudfiles.paths import extract, get_protocol, find_common_buckets
31
32
  from cloudfiles.lib import (
32
33
  mkdir, toabs, sip, toiter,
33
34
  first, red, green,
@@ -83,15 +84,20 @@ def license():
83
84
  print(f.read())
84
85
 
85
86
  @main.command()
86
- @click.option('--shortpath', is_flag=True, default=False, help='Don\'t print the common base path for each listed path.')
87
- @click.option('--flat', is_flag=True, default=False, help='Only produce a single level of directory hierarchy.')
88
- @click.option('-e','--expr',is_flag=True, default=False, help='Use a limited regexp language (e.g. [abc123]\{3\}) to generate prefixes.')
87
+ @click.option('--shortpath', is_flag=True, default=False, help='Don\'t print the common base path for each listed path.',show_default=True)
88
+ @click.option('--flat', is_flag=True, default=False, help='Only produce a single level of directory hierarchy.',show_default=True)
89
+ @click.option('-e','--expr',is_flag=True, default=False, help=r'Use a limited regexp language (e.g. [abc123]{3}) to generate prefixes.', show_default=True)
90
+ @click.option('--no-auth',is_flag=True, default=False, help='Uses the http API for read-only operations.', show_default=True)
89
91
  @click.argument("cloudpath")
90
- def ls(shortpath, flat, expr, cloudpath):
92
+ def ls(shortpath, flat, expr, cloudpath, no_auth):
91
93
  """Recursively lists the contents of a directory."""
92
94
  cloudpath = normalize_path(cloudpath)
93
95
 
94
- _, flt, prefix = get_mfp(cloudpath, True)
96
+ no_sign_request = no_auth # only affects s3
97
+ if no_auth and 's3://' not in cloudpath:
98
+ cloudpath = cloudfiles.paths.to_https_protocol(cloudpath)
99
+
100
+ _, flt, prefix, suffix = get_mfp(cloudpath, True)
95
101
  epath = extract(cloudpath)
96
102
  if len(epath.path) > 0:
97
103
  if prefix == "" and flt == False:
@@ -100,7 +106,7 @@ def ls(shortpath, flat, expr, cloudpath):
100
106
 
101
107
  flat = flat or flt
102
108
 
103
- cf = CloudFiles(cloudpath)
109
+ cf = CloudFiles(cloudpath, no_sign_request=no_sign_request)
104
110
  iterables = []
105
111
  if expr:
106
112
  # TODO: make this a reality using a parser
@@ -122,6 +128,10 @@ def ls(shortpath, flat, expr, cloudpath):
122
128
  iterables = [ cf.list(prefix=prefix, flat=flat) ]
123
129
 
124
130
  iterables = itertools.chain(*iterables)
131
+
132
+ if suffix:
133
+ iterables = ( x for x in iterables if x.endswith(suffix) )
134
+
125
135
  for pathset in sip(iterables, 1000):
126
136
  if not shortpath:
127
137
  pathset = [ cloudpathjoin(cloudpath, pth) for pth in pathset ]
@@ -146,6 +156,7 @@ def exprgen(prefix, matches):
146
156
 
147
157
  return finished_prefixes
148
158
 
159
+ SUFFIX_REGEXP = re.compile(r'\*([\w\d\-\._]+)$')
149
160
 
150
161
  def get_mfp(path, recursive):
151
162
  """many,flat,prefix"""
@@ -153,6 +164,13 @@ def get_mfp(path, recursive):
153
164
  flat = not recursive
154
165
  many = recursive
155
166
  prefix = ""
167
+ suffix = ""
168
+
169
+ matches = SUFFIX_REGEXP.search(path)
170
+ if matches is not None:
171
+ suffix = matches.groups()[0]
172
+ path = path.removesuffix(suffix)
173
+
156
174
  if path[-2:] == "**":
157
175
  many = True
158
176
  flat = False
@@ -162,7 +180,20 @@ def get_mfp(path, recursive):
162
180
  flat = True
163
181
  prefix = os.path.basename(path[:-1])
164
182
 
165
- return (many, flat, prefix)
183
+ return (many, flat, prefix, suffix)
184
+
185
+ @main.command("mkdir")
186
+ @click.argument("paths", nargs=-1)
187
+ def _mkdir(paths):
188
+ """
189
+ Create paths on the local file system.
190
+ """
191
+ for path in paths:
192
+ path = normalize_path(path)
193
+ protocol = get_protocol(path)
194
+
195
+ if protocol == "file":
196
+ mkdir(path.replace("file://", "", 1))
166
197
 
167
198
  @main.command()
168
199
  @click.argument("source", nargs=-1)
@@ -173,38 +204,61 @@ def get_mfp(path, recursive):
173
204
  @click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
174
205
  @click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
175
206
  @click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
207
+ @click.option('--resumable', is_flag=True, default=False, help="http->file transfers will dowload to .part files while they are in progress.", show_default=True)
208
+ @click.option('--flight-time', is_flag=True, default=False, help="Save a Gantt chart of the file transfer to the local directory.", show_default=True)
209
+ @click.option('--io-rate', is_flag=True, default=False, help="Save a chart of bitrate estimated based on file sizes and transmission duration.", show_default=True)
210
+ @click.option('--machine-io-rate', is_flag=True, default=False, help="Save a chart of bitrate based on 4 Hz sampling OS network counters for the entire machine.", show_default=True)
211
+ @click.option('--machine-io-rate-buffer-sec', default=600, help="Circular buffer length in seconds. Only allocated if chart enabled. 1 sec = 96 bytes", show_default=True)
176
212
  @click.pass_context
177
213
  def cp(
178
214
  ctx, source, destination,
179
215
  recursive, compression, progress,
180
216
  block_size, part_bytes, no_sign_request,
217
+ resumable,
218
+ flight_time, io_rate,
219
+ machine_io_rate, machine_io_rate_buffer_sec,
181
220
  ):
182
221
  """
183
222
  Copy one or more files from a source to destination.
184
223
 
185
224
  If source is "-" read newline delimited filenames from stdin.
186
225
  If destination is "-" output to stdout.
187
-
188
- Note that for gs:// to gs:// transfers, the gsutil
189
- tool is more efficient because the files never leave
190
- Google's network.
191
226
  """
192
227
  use_stdout = (destination == '-')
193
228
  if len(source) > 1 and not ispathdir(destination) and not use_stdout:
194
229
  print("cloudfiles: destination must be a directory for multiple source files.")
195
230
  return
196
231
 
232
+ network_sampler = None
233
+ if machine_io_rate:
234
+ network_sampler = IOSampler(
235
+ buffer_sec=machine_io_rate_buffer_sec,
236
+ interval=0.25,
237
+ )
238
+ network_sampler.start_sampling()
239
+
197
240
  for src in source:
198
241
  _cp_single(
199
242
  ctx, src, destination, recursive,
200
243
  compression, progress, block_size,
201
- part_bytes, no_sign_request
244
+ part_bytes, no_sign_request,
245
+ resumable, flight_time, io_rate,
202
246
  )
203
247
 
248
+ if machine_io_rate:
249
+ filename = f"./cloudfiles-cp-measured-io-{_timestamp()}.png"
250
+ network_sampler.stop_sampling()
251
+ network_sampler.plot_histogram(
252
+ resolution=1.0,
253
+ filename=filename,
254
+ )
255
+ print(f"Saved chart: {filename}")
256
+
204
257
  def _cp_single(
205
258
  ctx, source, destination, recursive,
206
259
  compression, progress, block_size,
207
- part_bytes, no_sign_request
260
+ part_bytes, no_sign_request,
261
+ resumable, gantt, io_rate,
208
262
  ):
209
263
  use_stdin = (source == '-')
210
264
  use_stdout = (destination == '-')
@@ -214,8 +268,8 @@ def _cp_single(
214
268
 
215
269
  nsrc = normalize_path(source)
216
270
  ndest = normalize_path(destination)
217
-
218
- issrcdir = (ispathdir(source) or CloudFiles(nsrc).isdir()) and use_stdin == False
271
+
272
+ issrcdir = (use_stdin == False) and (ispathdir(source) or CloudFiles(nsrc).isdir())
219
273
  isdestdir = (ispathdir(destination) or CloudFiles(ndest).isdir())
220
274
 
221
275
  recursive = recursive and issrcdir
@@ -238,7 +292,7 @@ def _cp_single(
238
292
 
239
293
  # The else clause here is to handle single file transfers
240
294
  srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
241
- many, flat, prefix = get_mfp(nsrc, recursive)
295
+ many, flat, prefix, suffix = get_mfp(nsrc, recursive)
242
296
 
243
297
  if issrcdir and not many:
244
298
  print(f"cloudfiles: {source} is a directory (not copied).")
@@ -250,7 +304,10 @@ def _cp_single(
250
304
  xferpaths = [ x.replace("\n", "") for x in xferpaths ]
251
305
  prefix = os.path.commonprefix(xferpaths)
252
306
  xferpaths = [ x.replace(prefix, "") for x in xferpaths ]
253
- srcpath = cloudpathjoin(srcpath, prefix)
307
+ srcpath = prefix
308
+ if srcpath == "":
309
+ print(f"cloudfiles: No common prefix found. Currently only one bucket at a time is supported for STDIN.")
310
+ return
254
311
  elif many:
255
312
  xferpaths = CloudFiles(
256
313
  srcpath, no_sign_request=no_sign_request
@@ -270,8 +327,15 @@ def _cp_single(
270
327
  compression = False
271
328
 
272
329
  if not isinstance(xferpaths, str):
330
+ if suffix:
331
+ xferpaths = ( x for x in xferpaths if x.endswith(suffix) )
332
+
273
333
  if parallel == 1:
274
- _cp(srcpath, destpath, compression, progress, block_size, part_bytes, no_sign_request, xferpaths)
334
+ _cp(
335
+ srcpath, destpath, compression,
336
+ progress, block_size, part_bytes,
337
+ no_sign_request, resumable, gantt, io_rate, xferpaths
338
+ )
275
339
  return
276
340
 
277
341
  total = None
@@ -281,14 +345,22 @@ def _cp_single(
281
345
  pass
282
346
 
283
347
  if use_stdout:
284
- fn = partial(_cp_stdout, no_sign_request, srcpath)
348
+ fn = partial(_cp_stdout, srcpath, no_sign_request, False, False)
285
349
  else:
286
- fn = partial(_cp, srcpath, destpath, compression, False, block_size, part_bytes, no_sign_request)
350
+ fn = partial(
351
+ _cp, srcpath, destpath, compression, False,
352
+ block_size, part_bytes, no_sign_request, resumable, False, False,
353
+ )
287
354
 
355
+ tms = []
288
356
  with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar:
289
357
  with pathos.pools.ProcessPool(parallel) as executor:
290
- for _ in executor.imap(fn, sip(xferpaths, block_size)):
358
+ for tm in executor.imap(fn, sip(xferpaths, block_size)):
291
359
  pbar.update(block_size)
360
+ tms.append(tm)
361
+
362
+ tm = TransmissionMonitor.merge(tms)
363
+ del tms
292
364
  else:
293
365
  cfsrc = CloudFiles(srcpath, progress=progress, no_sign_request=no_sign_request)
294
366
  if not cfsrc.exists(xferpaths):
@@ -296,7 +368,7 @@ def _cp_single(
296
368
  return
297
369
 
298
370
  if use_stdout:
299
- _cp_stdout(srcpath, xferpaths)
371
+ _cp_stdout(srcpath, no_sign_request, gantt, io_rate, xferpaths)
300
372
  return
301
373
 
302
374
  cfdest = CloudFiles(
@@ -310,26 +382,238 @@ def _cp_single(
310
382
  else:
311
383
  new_path = os.path.basename(ndest)
312
384
 
313
- cfsrc.transfer_to(cfdest, paths=[{
385
+ tm = cfsrc.transfer_to(cfdest, paths=[{
314
386
  "path": xferpaths,
315
387
  "dest_path": new_path,
316
- }], reencode=compression)
388
+ }], reencode=compression, resumable=resumable)
389
+
390
+ ts = _timestamp()
391
+
392
+ if io_rate:
393
+ filename = f"./cloudfiles-cp-est-io-{ts}.png"
394
+ tm.plot_histogram(filename=filename)
395
+ print(f"Saved chart: {filename}")
317
396
 
318
- def _cp(src, dst, compression, progress, block_size, part_bytes, no_sign_request, paths):
397
+ if gantt:
398
+ filename = f"./cloudfiles-cp-flight-time-{ts}.png"
399
+ tm.plot_gantt(filename=filename)
400
+ print(f"Saved chart: {filename}")
401
+
402
+ def _cp(
403
+ src, dst, compression, progress,
404
+ block_size, part_bytes,
405
+ no_sign_request, resumable, gantt, io_rate,
406
+ paths
407
+ ):
319
408
  cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
320
409
  cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
321
- cfsrc.transfer_to(
410
+ tm = cfsrc.transfer_to(
322
411
  cfdest, paths=paths,
323
- reencode=compression, block_size=block_size
412
+ reencode=compression, block_size=block_size,
413
+ resumable=resumable,
324
414
  )
325
415
 
326
- def _cp_stdout(src, no_sign_request, paths):
416
+ ts = _timestamp()
417
+
418
+ if io_rate:
419
+ filename = f"./cloudfiles-cp-est-io-{ts}.png"
420
+ tm.plot_histogram(filename=filename)
421
+ print(f"Saved chart: {filename}")
422
+
423
+ if gantt:
424
+ filename = f"./cloudfiles-cp-flight-time-{ts}.png"
425
+ tm.plot_gantt(filename=filename)
426
+ print(f"Saved chart: {filename}")
427
+
428
+ return tm
429
+
430
+ def _timestamp():
431
+ now = datetime.now(timezone.utc)
432
+ return now.strftime("%Y-%m-%d_%H-%M-%S.%f")[:-5] + "Z"
433
+
434
+ def _cp_stdout(src, no_sign_request, gantt, io_rate, paths):
327
435
  paths = toiter(paths)
328
436
  cf = CloudFiles(src, progress=False, no_sign_request=no_sign_request)
329
- for res in cf.get(paths):
437
+ results, tm = cf.get(paths, return_recording=True)
438
+
439
+ ts = _timestamp()
440
+
441
+ if io_rate:
442
+ tm.plot_histogram(filename=f"./cloudfiles-cp-est-io-{ts}.png")
443
+
444
+ if gantt:
445
+ tm.plot_gantt(filename=f"./cloudfiles-cp-flight-time-{ts}.png")
446
+
447
+ for res in results:
330
448
  content = res["content"].decode("utf8")
331
449
  sys.stdout.write(content)
332
450
 
451
+ return tm
452
+
453
+ @main.command()
454
+ @click.argument("source", nargs=-1)
455
+ @click.argument("destination", nargs=1)
456
+ @click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
457
+ @click.option('-b', '--block-size', default=128, help="Number of files to download at a time.", show_default=True)
458
+ @click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
459
+ @click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
460
+ @click.pass_context
461
+ def mv(
462
+ ctx, source, destination,
463
+ progress, block_size,
464
+ part_bytes, no_sign_request,
465
+ ):
466
+ """
467
+ Move one or more files from a source to destination.
468
+
469
+ If source is "-" read newline delimited filenames from stdin.
470
+ If destination is "-" output to stdout.
471
+ """
472
+ if len(source) > 1 and not ispathdir(destination):
473
+ print("cloudfiles: destination must be a directory for multiple source files.")
474
+ return
475
+
476
+ ctx.ensure_object(dict)
477
+ parallel = int(ctx.obj.get("parallel", 1))
478
+
479
+ for src in source:
480
+ _mv_single(
481
+ src, destination,
482
+ progress, block_size,
483
+ part_bytes, no_sign_request,
484
+ parallel
485
+ )
486
+
487
+ def _mv_single(
488
+ source, destination,
489
+ progress, block_size,
490
+ part_bytes, no_sign_request,
491
+ parallel
492
+ ):
493
+ use_stdin = (source == '-')
494
+
495
+ nsrc = normalize_path(source)
496
+ ndest = normalize_path(destination)
497
+
498
+ issrcdir = (ispathdir(source) or CloudFiles(nsrc).isdir()) and use_stdin == False
499
+ isdestdir = (ispathdir(destination) or CloudFiles(ndest).isdir())
500
+
501
+ ensrc = cloudfiles.paths.extract(nsrc)
502
+ endest = cloudfiles.paths.extract(ndest)
503
+
504
+ if ensrc.protocol == "file" and endest.protocol == "file" and issrcdir:
505
+ shutil.move(nsrc.replace("file://", ""), ndest.replace("file://", ""))
506
+ return
507
+
508
+ recursive = issrcdir
509
+
510
+ # For more information see:
511
+ # https://cloud.google.com/storage/docs/gsutil/commands/cp#how-names-are-constructed
512
+ # Try to follow cp rules. If the directory exists,
513
+ # copy the base source directory into the dest directory
514
+ # If the directory does not exist, then we copy into
515
+ # the dest directory.
516
+ # Both x* and x** should not copy the base directory
517
+ if recursive and nsrc[-1] != "*":
518
+ if isdestdir:
519
+ if nsrc[-1] == '/':
520
+ nsrc = nsrc[:-1]
521
+ ndest = cloudpathjoin(ndest, os.path.basename(nsrc))
522
+
523
+ # The else clause here is to handle single file transfers
524
+ srcpath = nsrc if issrcdir else os.path.dirname(nsrc)
525
+ many, flat, prefix, suffix = get_mfp(nsrc, recursive)
526
+
527
+ if issrcdir and not many:
528
+ print(f"cloudfiles: {source} is a directory (not copied).")
529
+ return
530
+
531
+ xferpaths = os.path.basename(nsrc)
532
+ if use_stdin:
533
+ xferpaths = sys.stdin.readlines()
534
+ xferpaths = [ x.replace("\n", "") for x in xferpaths ]
535
+ prefix = os.path.commonprefix(xferpaths)
536
+ xferpaths = [ x.replace(prefix, "") for x in xferpaths ]
537
+ srcpath = prefix
538
+ if srcpath == "":
539
+ print(f"cloudfiles: No common prefix found. Currently only one bucket at a time is supported for STDIN.")
540
+ return
541
+ elif many:
542
+ xferpaths = CloudFiles(
543
+ srcpath, no_sign_request=no_sign_request
544
+ ).list(prefix=prefix, flat=flat)
545
+
546
+ destpath = ndest
547
+ if isinstance(xferpaths, str):
548
+ destpath = ndest if isdestdir else os.path.dirname(ndest)
549
+ elif not isdestdir:
550
+ if os.path.exists(ndest.replace("file://", "")):
551
+ print(f"cloudfiles: {ndest} is not a directory (not copied).")
552
+ return
553
+
554
+ if not isinstance(xferpaths, str):
555
+ if suffix:
556
+ xferpaths = ( x for x in xferpaths if x.endswith(suffix) )
557
+
558
+ if parallel == 1:
559
+ _mv(srcpath, destpath, progress, block_size, part_bytes, no_sign_request, xferpaths)
560
+ return
561
+
562
+ total = None
563
+ try:
564
+ total = len(xferpaths)
565
+ except TypeError:
566
+ pass
567
+
568
+ fn = partial(_mv, srcpath, destpath, False, block_size, part_bytes, no_sign_request)
569
+
570
+ with tqdm(desc="Moving", total=total, disable=(not progress)) as pbar:
571
+ with pathos.pools.ProcessPool(parallel) as executor:
572
+ for _ in executor.imap(fn, sip(xferpaths, block_size)):
573
+ pbar.update(block_size)
574
+ else:
575
+ cfsrc = CloudFiles(srcpath, progress=progress, no_sign_request=no_sign_request)
576
+ if not cfsrc.exists(xferpaths):
577
+ print(f"cloudfiles: source path not found: {cfsrc.abspath(xferpaths).replace('file://','')}")
578
+ return
579
+
580
+ cfdest = CloudFiles(
581
+ destpath,
582
+ progress=progress,
583
+ composite_upload_threshold=part_bytes,
584
+ )
585
+
586
+ cfsrc.move(xferpaths, ndest)
587
+
588
+ def _mv(src, dst, progress, block_size, part_bytes, no_sign_request, paths):
589
+ cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
590
+ cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
591
+ cfsrc.moves(
592
+ cfdest, paths=paths, block_size=block_size
593
+ )
594
+
595
+ @main.command()
596
+ @click.argument("sources", nargs=-1)
597
+ @click.option('--progress', is_flag=True, default=False, help="Show transfer progress.", show_default=True)
598
+ @click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
599
+ @click.pass_context
600
+ def touch(
601
+ ctx, sources,
602
+ progress, no_sign_request,
603
+ ):
604
+ """Create file if it doesn't exist."""
605
+ sources = list(map(normalize_path, sources))
606
+ sources = [ src.replace("precomputed://", "") for src in sources ]
607
+ pbar = tqdm(total=len(sources), desc="Touch", disable=(not progress))
608
+
609
+ clustered = find_common_buckets(sources)
610
+
611
+ with pbar:
612
+ for bucket, items in clustered.items():
613
+ cf = CloudFiles(bucket, no_sign_request=no_sign_request, progress=False)
614
+ cf.touch(items)
615
+ pbar.update(len(items))
616
+
333
617
  @main.group("xfer")
334
618
  def xfergroup():
335
619
  """
@@ -449,7 +733,7 @@ def rm(ctx, paths, recursive, progress, block_size):
449
733
  singles = []
450
734
  multiples = []
451
735
  for path in paths:
452
- many, flat, prefix = get_mfp(path, recursive)
736
+ many, flat, prefix, suffix = get_mfp(path, recursive)
453
737
  if ispathdir(path) and not many:
454
738
  print(f"cloudfiles: {path}: is a directory.")
455
739
  return
@@ -468,7 +752,8 @@ def _rm_singles(paths, progress, parallel, block_size):
468
752
  for path in paths:
469
753
  npath = normalize_path(path)
470
754
  extracted = cloudfiles.paths.extract(npath)
471
- cfgroups[(extracted.protocol, extracted.bucket)].append(extracted.path)
755
+ normalized_protocol = cloudfiles.paths.asprotocolpath(extracted)
756
+ cfgroups[(normalized_protocol, extracted.bucket)].append(extracted.path)
472
757
 
473
758
  for group, paths in cfgroups.items():
474
759
  cfpath = f"{group[0]}://{group[1]}/"
@@ -489,13 +774,15 @@ def _rm_many(path, recursive, progress, parallel, block_size):
489
774
  isdir = (ispathdir(path) or CloudFiles(npath).isdir())
490
775
  recursive = recursive and isdir
491
776
 
492
- many, flat, prefix = get_mfp(path, recursive)
777
+ many, flat, prefix, suffix = get_mfp(path, recursive)
493
778
 
494
779
  cfpath = npath if isdir else os.path.dirname(npath)
495
780
  xferpaths = os.path.basename(npath)
496
781
 
497
782
  if many:
498
783
  xferpaths = CloudFiles(cfpath).list(prefix=prefix, flat=flat)
784
+ if suffix:
785
+ xferpaths = ( x for x in xferpaths if x.endswith(suffix) )
499
786
 
500
787
  if parallel == 1 or not many:
501
788
  __rm(cfpath, progress, xferpaths)
@@ -515,14 +802,22 @@ def __rm(cloudpath, progress, paths):
515
802
  @click.option('-c', '--grand-total', is_flag=True, default=False, help="Sum a grand total of all inputs.")
516
803
  @click.option('-s', '--summarize', is_flag=True, default=False, help="Sum a total for each input argument.")
517
804
  @click.option('-h', '--human-readable', is_flag=True, default=False, help='"Human-readable" output. Use unit suffixes: Bytes, KiB, MiB, GiB, TiB, PiB, and EiB.')
518
- def du(paths, grand_total, summarize, human_readable):
805
+ @click.option('-N', '--count-files', is_flag=True, default=False, help='Also report the number of files.')
806
+ def du(paths, grand_total, summarize, human_readable, count_files):
519
807
  """Display disk usage statistics."""
520
808
  results = []
809
+
810
+ list_data = False
811
+
521
812
  for path in paths:
522
813
  npath = normalize_path(path)
523
814
  if ispathdir(path):
524
815
  cf = CloudFiles(npath)
525
- results.append(cf.size(cf.list()))
816
+ if summarize:
817
+ results.append(cf.subtree_size())
818
+ else:
819
+ list_data = True
820
+ results.append(cf.size(cf.list()))
526
821
  else:
527
822
  cf = CloudFiles(os.path.dirname(npath))
528
823
  sz = cf.size(os.path.basename(npath))
@@ -551,8 +846,15 @@ def du(paths, grand_total, summarize, human_readable):
551
846
  return f"{(val / 2**60):.2f} EiB"
552
847
 
553
848
  summary = {}
849
+ num_files = 0
554
850
  for path, res in zip(paths, results):
555
- summary[path] = sum(res.values())
851
+ if list_data:
852
+ summary[path] = sum(res.values())
853
+ num_files += len(res)
854
+ else:
855
+ summary[path] = res["num_bytes"]
856
+ num_files += res["N"]
857
+
556
858
  if summarize:
557
859
  print(f"{SI(summary[path])}\t{path}")
558
860
 
@@ -562,7 +864,10 @@ def du(paths, grand_total, summarize, human_readable):
562
864
  print(f"{SI(size)}\t{pth}")
563
865
 
564
866
  if grand_total:
565
- print(f"{SI(sum(summary.values()))}\ttotal")
867
+ print(f"{SI(sum(summary.values()))}\tbytes total")
868
+
869
+ if count_files:
870
+ print(f"{num_files}\tfiles total")
566
871
 
567
872
  @main.command()
568
873
  @click.argument('paths', nargs=-1)
@@ -572,10 +877,13 @@ def head(paths):
572
877
  for path in paths:
573
878
  npath = normalize_path(path)
574
879
  npath = re.sub(r'\*+$', '', path)
575
- many, flat, prefix = get_mfp(path, False)
880
+ many, flat, prefix, suffix = get_mfp(path, False)
576
881
  if many:
577
882
  cf = CloudFiles(npath)
578
- res = cf.head(cf.list(prefix=prefix, flat=flat))
883
+ lst = cf.list(prefix=prefix, flat=flat)
884
+ if suffix:
885
+ lst = ( x for x in lst if x.endswith(suffix) )
886
+ res = cf.head(lst)
579
887
  results.update(res)
580
888
  else:
581
889
  cf = CloudFiles(os.path.dirname(npath))
@@ -1,26 +0,0 @@
1
- cloudfiles/__init__.py,sha256=pLB4CcV2l3Jgv_ni1520Np1pfzFj8Cpr87vNxFT3rNI,493
2
- cloudfiles/buckets.py,sha256=eRAYdDfvVpNyJyK5ryDRMwgNJUeEuFBJ6doWU2JkAcA,74
3
- cloudfiles/cloudfiles.py,sha256=QlrQlU94gqNhKWyOuP0xe58UEwk2x8wtZ7n9LKiyLpM,44854
4
- cloudfiles/compression.py,sha256=pqYdpu5vfFv-094BpfZ2pgRjVu7ESM9pAZC09P6E8bY,6150
5
- cloudfiles/connectionpools.py,sha256=aL8RiSjRepECfgAFmJcz80aJFKbou7hsbuEgugDKwB8,4814
6
- cloudfiles/exceptions.py,sha256=H2IcMlZoy2Bsn-6wCPwyLDjg66LZCyxtcf3s_p21FDw,770
7
- cloudfiles/gcs.py,sha256=_njJ7TpqwrHCjPHRGkBN5alCrCWKM2m9qdy5DhxMZ7U,3718
8
- cloudfiles/interfaces.py,sha256=4ICm7tnS8HHK5IKw-HYD653V-qndprPRK8e1cxUYKgA,36782
9
- cloudfiles/lib.py,sha256=fEqL5APu_WQhl2yxqQbwE7msHdu7U8pstAJw6LgoKO0,5142
10
- cloudfiles/paths.py,sha256=xadVh5Vw8wAack1cws5dzVIlYQ3r8h8lrP43umUSuT0,10547
11
- cloudfiles/resumable_tools.py,sha256=pK-VcoPjQ2BjGjvlvH4dDCBf6lNsqHG-weiBgxVFbzA,5838
12
- cloudfiles/scheduler.py,sha256=DqDANmOpB3NdzFgJDNMMibRIkCrXQqIh2XGL8GWoc9c,3668
13
- cloudfiles/secrets.py,sha256=791b5a8nWSBYtlleGzKeoYIR5jl-FI1bw6INRM4Wy-0,5295
14
- cloudfiles/threaded_queue.py,sha256=Nl4vfXhQ6nDLF8PZpSSBpww0M2zWtcd4DLs3W3BArBw,7082
15
- cloudfiles/typing.py,sha256=f3ZYkNfN9poxhGu5j-P0KCxjCCqSn9HAg5KiIPkjnCg,416
16
- cloudfiles_cli/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
17
- cloudfiles_cli/__init__.py,sha256=Wftt3R3F21QsHtWqx49ODuqT9zcSr0em7wk48kcH0WM,29
18
- cloudfiles_cli/cloudfiles_cli.py,sha256=eETIOK4QyztQcpA4ZRny21SobLtcrPDlzZ_JaKBmmmA,28449
19
- cloud_files-4.27.0.dist-info/AUTHORS,sha256=7E2vC894bbLPO_kvUuEB2LFZZbIxZn23HabxH7x0Hgo,266
20
- cloud_files-4.27.0.dist-info/LICENSE,sha256=Jna4xYE8CCQmaxjr5Fs-wmUBnIQJ1DGcNn9MMjbkprk,1538
21
- cloud_files-4.27.0.dist-info/METADATA,sha256=7xdozjXt0yT1OKZV47M9dWeip9PUx9Wj9adUcO0qZ_M,26804
22
- cloud_files-4.27.0.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
23
- cloud_files-4.27.0.dist-info/entry_points.txt,sha256=xlirb1FVhn1mbcv4IoyMEGumDqKOA4VMVd3drsRQxIg,51
24
- cloud_files-4.27.0.dist-info/pbr.json,sha256=C4Xk2iNLylqeAReQ_DjwEN6rVj4PC_6x96XsvMs7138,46
25
- cloud_files-4.27.0.dist-info/top_level.txt,sha256=xPyrST3okJbsmdCF5IC2gYAVxg_aD5AYVTnNo8UuoZU,26
26
- cloud_files-4.27.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
- {"git_version": "e4b04bf", "is_release": true}
cloudfiles/buckets.py DELETED
@@ -1,10 +0,0 @@
1
-
2
-
3
-
4
-
5
-
6
- class Bucket:
7
- def __init__(self, cloudpath, secrets=None):
8
- pass
9
-
10
-