cloud-files 5.4.1__py3-none-any.whl → 5.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cloudfiles/scheduler.py CHANGED
@@ -137,7 +137,12 @@ def schedule_jobs(
137
137
  or (hasattr(fns, "__len__") and len(fns) <= 1)
138
138
  ):
139
139
  return schedule_single_threaded_jobs(fns, progress, total, count_return)
140
-
140
+
141
+ if isinstance(total, int):
142
+ concurrency = min(concurrency, max(total, 1))
143
+ elif hasattr(fns, "__len__"):
144
+ concurrency = min(concurrency, max(len(fns), 1))
145
+
141
146
  if green == True or (green is None and gevent.monkey.saved):
142
147
  return schedule_green_jobs(fns, concurrency, progress, total, count_return)
143
148
 
@@ -1,5 +1,6 @@
1
1
  from collections import defaultdict
2
2
  from concurrent.futures import ProcessPoolExecutor
3
+ from datetime import datetime, timezone
3
4
  from functools import partial
4
5
  import itertools
5
6
  import json
@@ -24,6 +25,7 @@ import pathos.pools
24
25
  import cloudfiles
25
26
  import cloudfiles.paths
26
27
  from cloudfiles import CloudFiles
28
+ from cloudfiles.monitoring import TransmissionMonitor, IOSampler, IOEnum
27
29
  from cloudfiles.resumable_tools import ResumableTransfer
28
30
  from cloudfiles.compression import transcode
29
31
  from cloudfiles.paths import extract, get_protocol, find_common_buckets
@@ -178,12 +180,18 @@ def get_mfp(path, recursive):
178
180
  @click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
179
181
  @click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
180
182
  @click.option('--resumable', is_flag=True, default=False, help="http->file transfers will dowload to .part files while they are in progress.", show_default=True)
183
+ @click.option('--flight-time', is_flag=True, default=False, help="Save a Gantt chart of the file transfer to the local directory.", show_default=True)
184
+ @click.option('--io-rate', is_flag=True, default=False, help="Save a chart of bitrate estimated based on file sizes and transmission duration.", show_default=True)
185
+ @click.option('--machine-io-rate', is_flag=True, default=False, help="Save a chart of bitrate based on 4 Hz sampling OS network counters for the entire machine.", show_default=True)
186
+ @click.option('--machine-io-rate-buffer-sec', default=600, help="Circular buffer length in seconds. Only allocated if chart enabled. 1 sec = 96 bytes", show_default=True)
181
187
  @click.pass_context
182
188
  def cp(
183
189
  ctx, source, destination,
184
190
  recursive, compression, progress,
185
191
  block_size, part_bytes, no_sign_request,
186
- resumable,
192
+ resumable,
193
+ flight_time, io_rate,
194
+ machine_io_rate, machine_io_rate_buffer_sec,
187
195
  ):
188
196
  """
189
197
  Copy one or more files from a source to destination.
@@ -196,19 +204,36 @@ def cp(
196
204
  print("cloudfiles: destination must be a directory for multiple source files.")
197
205
  return
198
206
 
207
+ network_sampler = None
208
+ if machine_io_rate:
209
+ network_sampler = IOSampler(
210
+ buffer_sec=machine_io_rate_buffer_sec,
211
+ interval=0.25,
212
+ )
213
+ network_sampler.start_sampling()
214
+
199
215
  for src in source:
200
216
  _cp_single(
201
217
  ctx, src, destination, recursive,
202
218
  compression, progress, block_size,
203
219
  part_bytes, no_sign_request,
204
- resumable,
220
+ resumable, flight_time, io_rate,
205
221
  )
206
222
 
223
+ if machine_io_rate:
224
+ filename = f"./cloudfiles-cp-measured-io-{_timestamp()}.png"
225
+ network_sampler.stop_sampling()
226
+ network_sampler.plot_histogram(
227
+ resolution=1.0,
228
+ filename=filename,
229
+ )
230
+ print(f"Saved chart: {filename}")
231
+
207
232
  def _cp_single(
208
233
  ctx, source, destination, recursive,
209
234
  compression, progress, block_size,
210
235
  part_bytes, no_sign_request,
211
- resumable,
236
+ resumable, gantt, io_rate,
212
237
  ):
213
238
  use_stdin = (source == '-')
214
239
  use_stdout = (destination == '-')
@@ -281,7 +306,7 @@ def _cp_single(
281
306
  _cp(
282
307
  srcpath, destpath, compression,
283
308
  progress, block_size, part_bytes,
284
- no_sign_request, resumable, xferpaths
309
+ no_sign_request, resumable, gantt, io_rate, xferpaths
285
310
  )
286
311
  return
287
312
 
@@ -292,17 +317,22 @@ def _cp_single(
292
317
  pass
293
318
 
294
319
  if use_stdout:
295
- fn = partial(_cp_stdout, srcpath, no_sign_request)
320
+ fn = partial(_cp_stdout, srcpath, no_sign_request, False, False)
296
321
  else:
297
322
  fn = partial(
298
323
  _cp, srcpath, destpath, compression, False,
299
- block_size, part_bytes, no_sign_request, resumable
324
+ block_size, part_bytes, no_sign_request, resumable, False, False,
300
325
  )
301
326
 
327
+ tms = []
302
328
  with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar:
303
329
  with pathos.pools.ProcessPool(parallel) as executor:
304
- for _ in executor.imap(fn, sip(xferpaths, block_size)):
330
+ for tm in executor.imap(fn, sip(xferpaths, block_size)):
305
331
  pbar.update(block_size)
332
+ tms.append(tm)
333
+
334
+ tm = TransmissionMonitor.merge(tms)
335
+ del tms
306
336
  else:
307
337
  cfsrc = CloudFiles(srcpath, progress=progress, no_sign_request=no_sign_request)
308
338
  if not cfsrc.exists(xferpaths):
@@ -310,7 +340,7 @@ def _cp_single(
310
340
  return
311
341
 
312
342
  if use_stdout:
313
- _cp_stdout(srcpath, xferpaths)
343
+ _cp_stdout(srcpath, no_sign_request, gantt, io_rate, xferpaths)
314
344
  return
315
345
 
316
346
  cfdest = CloudFiles(
@@ -324,32 +354,74 @@ def _cp_single(
324
354
  else:
325
355
  new_path = os.path.basename(ndest)
326
356
 
327
- cfsrc.transfer_to(cfdest, paths=[{
357
+ tm = cfsrc.transfer_to(cfdest, paths=[{
328
358
  "path": xferpaths,
329
359
  "dest_path": new_path,
330
360
  }], reencode=compression, resumable=resumable)
331
361
 
362
+ ts = _timestamp()
363
+
364
+ if io_rate:
365
+ filename = f"./cloudfiles-cp-est-io-{ts}.png"
366
+ tm.plot_histogram(filename=filename)
367
+ print(f"Saved chart: {filename}")
368
+
369
+ if gantt:
370
+ filename = f"./cloudfiles-cp-flight-time-{ts}.png"
371
+ tm.plot_gantt(filename=filename)
372
+ print(f"Saved chart: {filename}")
373
+
332
374
  def _cp(
333
375
  src, dst, compression, progress,
334
376
  block_size, part_bytes,
335
- no_sign_request, resumable,
377
+ no_sign_request, resumable, gantt, io_rate,
336
378
  paths
337
379
  ):
338
380
  cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
339
381
  cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
340
- cfsrc.transfer_to(
382
+ tm = cfsrc.transfer_to(
341
383
  cfdest, paths=paths,
342
384
  reencode=compression, block_size=block_size,
343
385
  resumable=resumable,
344
386
  )
345
387
 
346
- def _cp_stdout(src, no_sign_request, paths):
388
+ ts = _timestamp()
389
+
390
+ if io_rate:
391
+ filename = f"./cloudfiles-cp-est-io-{ts}.png"
392
+ tm.plot_histogram(filename=filename)
393
+ print(f"Saved chart: {filename}")
394
+
395
+ if gantt:
396
+ filename = f"./cloudfiles-cp-flight-time-{ts}.png"
397
+ tm.plot_gantt(filename=filename)
398
+ print(f"Saved chart: {filename}")
399
+
400
+ return tm
401
+
402
+ def _timestamp():
403
+ now = datetime.now(timezone.utc)
404
+ return now.strftime("%Y-%m-%d_%H-%M-%S.%f")[:-5] + "Z"
405
+
406
+ def _cp_stdout(src, no_sign_request, gantt, io_rate, paths):
347
407
  paths = toiter(paths)
348
408
  cf = CloudFiles(src, progress=False, no_sign_request=no_sign_request)
349
- for res in cf.get(paths):
409
+ results, tm = cf.get(paths, return_recording=True)
410
+
411
+ ts = _timestamp()
412
+
413
+ if io_rate:
414
+ tm.plot_histogram(filename=f"./cloudfiles-cp-est-io-{ts}.png")
415
+
416
+ if gantt:
417
+ tm.plot_gantt(filename=f"./cloudfiles-cp-flight-time-{ts}.png")
418
+
419
+ for res in results:
350
420
  content = res["content"].decode("utf8")
351
421
  sys.stdout.write(content)
352
422
 
423
+ return tm
424
+
353
425
  @main.command()
354
426
  @click.argument("source", nargs=-1)
355
427
  @click.argument("destination", nargs=1)
@@ -648,7 +720,8 @@ def _rm_singles(paths, progress, parallel, block_size):
648
720
  for path in paths:
649
721
  npath = normalize_path(path)
650
722
  extracted = cloudfiles.paths.extract(npath)
651
- cfgroups[(extracted.protocol, extracted.bucket)].append(extracted.path)
723
+ normalized_protocol = cloudfiles.paths.asprotocolpath(extracted)
724
+ cfgroups[(normalized_protocol, extracted.bucket)].append(extracted.path)
652
725
 
653
726
  for group, paths in cfgroups.items():
654
727
  cfpath = f"{group[0]}://{group[1]}/"
@@ -1 +0,0 @@
1
- {"git_version": "e4d56b4", "is_release": true}