cloud-files 5.4.1__py3-none-any.whl → 5.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cloud_files-5.4.1.dist-info → cloud_files-5.6.0.dist-info}/METADATA +72 -2
- {cloud_files-5.4.1.dist-info → cloud_files-5.6.0.dist-info}/RECORD +13 -12
- cloud_files-5.6.0.dist-info/pbr.json +1 -0
- cloudfiles/cloudfiles.py +245 -53
- cloudfiles/interfaces.py +55 -38
- cloudfiles/monitoring.py +724 -0
- cloudfiles/scheduler.py +6 -1
- cloudfiles_cli/cloudfiles_cli.py +87 -14
- cloud_files-5.4.1.dist-info/pbr.json +0 -1
- {cloud_files-5.4.1.dist-info → cloud_files-5.6.0.dist-info}/AUTHORS +0 -0
- {cloud_files-5.4.1.dist-info → cloud_files-5.6.0.dist-info}/LICENSE +0 -0
- {cloud_files-5.4.1.dist-info → cloud_files-5.6.0.dist-info}/WHEEL +0 -0
- {cloud_files-5.4.1.dist-info → cloud_files-5.6.0.dist-info}/entry_points.txt +0 -0
- {cloud_files-5.4.1.dist-info → cloud_files-5.6.0.dist-info}/top_level.txt +0 -0
cloudfiles/scheduler.py
CHANGED
|
@@ -137,7 +137,12 @@ def schedule_jobs(
|
|
|
137
137
|
or (hasattr(fns, "__len__") and len(fns) <= 1)
|
|
138
138
|
):
|
|
139
139
|
return schedule_single_threaded_jobs(fns, progress, total, count_return)
|
|
140
|
-
|
|
140
|
+
|
|
141
|
+
if isinstance(total, int):
|
|
142
|
+
concurrency = min(concurrency, max(total, 1))
|
|
143
|
+
elif hasattr(fns, "__len__"):
|
|
144
|
+
concurrency = min(concurrency, max(len(fns), 1))
|
|
145
|
+
|
|
141
146
|
if green == True or (green is None and gevent.monkey.saved):
|
|
142
147
|
return schedule_green_jobs(fns, concurrency, progress, total, count_return)
|
|
143
148
|
|
cloudfiles_cli/cloudfiles_cli.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from collections import defaultdict
|
|
2
2
|
from concurrent.futures import ProcessPoolExecutor
|
|
3
|
+
from datetime import datetime, timezone
|
|
3
4
|
from functools import partial
|
|
4
5
|
import itertools
|
|
5
6
|
import json
|
|
@@ -24,6 +25,7 @@ import pathos.pools
|
|
|
24
25
|
import cloudfiles
|
|
25
26
|
import cloudfiles.paths
|
|
26
27
|
from cloudfiles import CloudFiles
|
|
28
|
+
from cloudfiles.monitoring import TransmissionMonitor, IOSampler, IOEnum
|
|
27
29
|
from cloudfiles.resumable_tools import ResumableTransfer
|
|
28
30
|
from cloudfiles.compression import transcode
|
|
29
31
|
from cloudfiles.paths import extract, get_protocol, find_common_buckets
|
|
@@ -178,12 +180,18 @@ def get_mfp(path, recursive):
|
|
|
178
180
|
@click.option('--part-bytes', default=int(1e8), help="Composite upload threshold in bytes. Splits a file into pieces for some cloud services like gs and s3.", show_default=True)
|
|
179
181
|
@click.option('--no-sign-request', is_flag=True, default=False, help="Use s3 in anonymous mode (don't sign requests) for the source.", show_default=True)
|
|
180
182
|
@click.option('--resumable', is_flag=True, default=False, help="http->file transfers will dowload to .part files while they are in progress.", show_default=True)
|
|
183
|
+
@click.option('--flight-time', is_flag=True, default=False, help="Save a Gantt chart of the file transfer to the local directory.", show_default=True)
|
|
184
|
+
@click.option('--io-rate', is_flag=True, default=False, help="Save a chart of bitrate estimated based on file sizes and transmission duration.", show_default=True)
|
|
185
|
+
@click.option('--machine-io-rate', is_flag=True, default=False, help="Save a chart of bitrate based on 4 Hz sampling OS network counters for the entire machine.", show_default=True)
|
|
186
|
+
@click.option('--machine-io-rate-buffer-sec', default=600, help="Circular buffer length in seconds. Only allocated if chart enabled. 1 sec = 96 bytes", show_default=True)
|
|
181
187
|
@click.pass_context
|
|
182
188
|
def cp(
|
|
183
189
|
ctx, source, destination,
|
|
184
190
|
recursive, compression, progress,
|
|
185
191
|
block_size, part_bytes, no_sign_request,
|
|
186
|
-
resumable,
|
|
192
|
+
resumable,
|
|
193
|
+
flight_time, io_rate,
|
|
194
|
+
machine_io_rate, machine_io_rate_buffer_sec,
|
|
187
195
|
):
|
|
188
196
|
"""
|
|
189
197
|
Copy one or more files from a source to destination.
|
|
@@ -196,19 +204,36 @@ def cp(
|
|
|
196
204
|
print("cloudfiles: destination must be a directory for multiple source files.")
|
|
197
205
|
return
|
|
198
206
|
|
|
207
|
+
network_sampler = None
|
|
208
|
+
if machine_io_rate:
|
|
209
|
+
network_sampler = IOSampler(
|
|
210
|
+
buffer_sec=machine_io_rate_buffer_sec,
|
|
211
|
+
interval=0.25,
|
|
212
|
+
)
|
|
213
|
+
network_sampler.start_sampling()
|
|
214
|
+
|
|
199
215
|
for src in source:
|
|
200
216
|
_cp_single(
|
|
201
217
|
ctx, src, destination, recursive,
|
|
202
218
|
compression, progress, block_size,
|
|
203
219
|
part_bytes, no_sign_request,
|
|
204
|
-
resumable,
|
|
220
|
+
resumable, flight_time, io_rate,
|
|
205
221
|
)
|
|
206
222
|
|
|
223
|
+
if machine_io_rate:
|
|
224
|
+
filename = f"./cloudfiles-cp-measured-io-{_timestamp()}.png"
|
|
225
|
+
network_sampler.stop_sampling()
|
|
226
|
+
network_sampler.plot_histogram(
|
|
227
|
+
resolution=1.0,
|
|
228
|
+
filename=filename,
|
|
229
|
+
)
|
|
230
|
+
print(f"Saved chart: {filename}")
|
|
231
|
+
|
|
207
232
|
def _cp_single(
|
|
208
233
|
ctx, source, destination, recursive,
|
|
209
234
|
compression, progress, block_size,
|
|
210
235
|
part_bytes, no_sign_request,
|
|
211
|
-
resumable,
|
|
236
|
+
resumable, gantt, io_rate,
|
|
212
237
|
):
|
|
213
238
|
use_stdin = (source == '-')
|
|
214
239
|
use_stdout = (destination == '-')
|
|
@@ -281,7 +306,7 @@ def _cp_single(
|
|
|
281
306
|
_cp(
|
|
282
307
|
srcpath, destpath, compression,
|
|
283
308
|
progress, block_size, part_bytes,
|
|
284
|
-
no_sign_request, resumable, xferpaths
|
|
309
|
+
no_sign_request, resumable, gantt, io_rate, xferpaths
|
|
285
310
|
)
|
|
286
311
|
return
|
|
287
312
|
|
|
@@ -292,17 +317,22 @@ def _cp_single(
|
|
|
292
317
|
pass
|
|
293
318
|
|
|
294
319
|
if use_stdout:
|
|
295
|
-
fn = partial(_cp_stdout, srcpath, no_sign_request)
|
|
320
|
+
fn = partial(_cp_stdout, srcpath, no_sign_request, False, False)
|
|
296
321
|
else:
|
|
297
322
|
fn = partial(
|
|
298
323
|
_cp, srcpath, destpath, compression, False,
|
|
299
|
-
block_size, part_bytes, no_sign_request, resumable
|
|
324
|
+
block_size, part_bytes, no_sign_request, resumable, False, False,
|
|
300
325
|
)
|
|
301
326
|
|
|
327
|
+
tms = []
|
|
302
328
|
with tqdm(desc="Transferring", total=total, disable=(not progress)) as pbar:
|
|
303
329
|
with pathos.pools.ProcessPool(parallel) as executor:
|
|
304
|
-
for
|
|
330
|
+
for tm in executor.imap(fn, sip(xferpaths, block_size)):
|
|
305
331
|
pbar.update(block_size)
|
|
332
|
+
tms.append(tm)
|
|
333
|
+
|
|
334
|
+
tm = TransmissionMonitor.merge(tms)
|
|
335
|
+
del tms
|
|
306
336
|
else:
|
|
307
337
|
cfsrc = CloudFiles(srcpath, progress=progress, no_sign_request=no_sign_request)
|
|
308
338
|
if not cfsrc.exists(xferpaths):
|
|
@@ -310,7 +340,7 @@ def _cp_single(
|
|
|
310
340
|
return
|
|
311
341
|
|
|
312
342
|
if use_stdout:
|
|
313
|
-
_cp_stdout(srcpath, xferpaths)
|
|
343
|
+
_cp_stdout(srcpath, no_sign_request, gantt, io_rate, xferpaths)
|
|
314
344
|
return
|
|
315
345
|
|
|
316
346
|
cfdest = CloudFiles(
|
|
@@ -324,32 +354,74 @@ def _cp_single(
|
|
|
324
354
|
else:
|
|
325
355
|
new_path = os.path.basename(ndest)
|
|
326
356
|
|
|
327
|
-
cfsrc.transfer_to(cfdest, paths=[{
|
|
357
|
+
tm = cfsrc.transfer_to(cfdest, paths=[{
|
|
328
358
|
"path": xferpaths,
|
|
329
359
|
"dest_path": new_path,
|
|
330
360
|
}], reencode=compression, resumable=resumable)
|
|
331
361
|
|
|
362
|
+
ts = _timestamp()
|
|
363
|
+
|
|
364
|
+
if io_rate:
|
|
365
|
+
filename = f"./cloudfiles-cp-est-io-{ts}.png"
|
|
366
|
+
tm.plot_histogram(filename=filename)
|
|
367
|
+
print(f"Saved chart: {filename}")
|
|
368
|
+
|
|
369
|
+
if gantt:
|
|
370
|
+
filename = f"./cloudfiles-cp-flight-time-{ts}.png"
|
|
371
|
+
tm.plot_gantt(filename=filename)
|
|
372
|
+
print(f"Saved chart: {filename}")
|
|
373
|
+
|
|
332
374
|
def _cp(
|
|
333
375
|
src, dst, compression, progress,
|
|
334
376
|
block_size, part_bytes,
|
|
335
|
-
no_sign_request, resumable,
|
|
377
|
+
no_sign_request, resumable, gantt, io_rate,
|
|
336
378
|
paths
|
|
337
379
|
):
|
|
338
380
|
cfsrc = CloudFiles(src, progress=progress, composite_upload_threshold=part_bytes, no_sign_request=no_sign_request)
|
|
339
381
|
cfdest = CloudFiles(dst, progress=progress, composite_upload_threshold=part_bytes)
|
|
340
|
-
cfsrc.transfer_to(
|
|
382
|
+
tm = cfsrc.transfer_to(
|
|
341
383
|
cfdest, paths=paths,
|
|
342
384
|
reencode=compression, block_size=block_size,
|
|
343
385
|
resumable=resumable,
|
|
344
386
|
)
|
|
345
387
|
|
|
346
|
-
|
|
388
|
+
ts = _timestamp()
|
|
389
|
+
|
|
390
|
+
if io_rate:
|
|
391
|
+
filename = f"./cloudfiles-cp-est-io-{ts}.png"
|
|
392
|
+
tm.plot_histogram(filename=filename)
|
|
393
|
+
print(f"Saved chart: {filename}")
|
|
394
|
+
|
|
395
|
+
if gantt:
|
|
396
|
+
filename = f"./cloudfiles-cp-flight-time-{ts}.png"
|
|
397
|
+
tm.plot_gantt(filename=filename)
|
|
398
|
+
print(f"Saved chart: {filename}")
|
|
399
|
+
|
|
400
|
+
return tm
|
|
401
|
+
|
|
402
|
+
def _timestamp():
|
|
403
|
+
now = datetime.now(timezone.utc)
|
|
404
|
+
return now.strftime("%Y-%m-%d_%H-%M-%S.%f")[:-5] + "Z"
|
|
405
|
+
|
|
406
|
+
def _cp_stdout(src, no_sign_request, gantt, io_rate, paths):
|
|
347
407
|
paths = toiter(paths)
|
|
348
408
|
cf = CloudFiles(src, progress=False, no_sign_request=no_sign_request)
|
|
349
|
-
|
|
409
|
+
results, tm = cf.get(paths, return_recording=True)
|
|
410
|
+
|
|
411
|
+
ts = _timestamp()
|
|
412
|
+
|
|
413
|
+
if io_rate:
|
|
414
|
+
tm.plot_histogram(filename=f"./cloudfiles-cp-est-io-{ts}.png")
|
|
415
|
+
|
|
416
|
+
if gantt:
|
|
417
|
+
tm.plot_gantt(filename=f"./cloudfiles-cp-flight-time-{ts}.png")
|
|
418
|
+
|
|
419
|
+
for res in results:
|
|
350
420
|
content = res["content"].decode("utf8")
|
|
351
421
|
sys.stdout.write(content)
|
|
352
422
|
|
|
423
|
+
return tm
|
|
424
|
+
|
|
353
425
|
@main.command()
|
|
354
426
|
@click.argument("source", nargs=-1)
|
|
355
427
|
@click.argument("destination", nargs=1)
|
|
@@ -648,7 +720,8 @@ def _rm_singles(paths, progress, parallel, block_size):
|
|
|
648
720
|
for path in paths:
|
|
649
721
|
npath = normalize_path(path)
|
|
650
722
|
extracted = cloudfiles.paths.extract(npath)
|
|
651
|
-
|
|
723
|
+
normalized_protocol = cloudfiles.paths.asprotocolpath(extracted)
|
|
724
|
+
cfgroups[(normalized_protocol, extracted.bucket)].append(extracted.path)
|
|
652
725
|
|
|
653
726
|
for group, paths in cfgroups.items():
|
|
654
727
|
cfpath = f"{group[0]}://{group[1]}/"
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"git_version": "e4d56b4", "is_release": true}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|