cloud-files 5.5.0__py3-none-any.whl → 5.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cloudfiles/cloudfiles.py CHANGED
@@ -7,8 +7,9 @@ from typing import (
7
7
 
8
8
  from queue import Queue
9
9
  from collections import defaultdict
10
- from functools import partial, wraps
10
+ from functools import partial, wraps, reduce
11
11
  import inspect
12
+ import io
12
13
  import math
13
14
  import multiprocessing
14
15
  import itertools
@@ -18,6 +19,7 @@ import posixpath
18
19
  import re
19
20
  import shutil
20
21
  import types
22
+ import time
21
23
 
22
24
  import orjson
23
25
  import pathos.pools
@@ -32,6 +34,7 @@ from .lib import (
32
34
  duplicates, first, sip, touch,
33
35
  md5, crc32c, decode_crc32c_b64
34
36
  )
37
+ from .monitoring import TransmissionMonitor, IOEnum
35
38
  from .paths import ALIASES, find_common_buckets
36
39
  from .secrets import CLOUD_FILES_DIR, CLOUD_FILES_LOCK_DIR
37
40
  from .threaded_queue import ThreadedQueue, DEFAULT_THREADS
@@ -154,25 +157,37 @@ def parallel_execute(
154
157
  multiprocessing.set_start_method("spawn", force=True)
155
158
 
156
159
  results = []
160
+ tms = []
157
161
  try:
158
162
  with pathos.pools.ProcessPool(parallel) as executor:
159
163
  for res in executor.imap(fn, sip(inputs, block_size)):
160
- if isinstance(res, int):
161
- pbar.update(res)
162
- elif isinstance(res, list):
163
- pbar.update(len(res))
164
+ update = res
165
+ if isinstance(res, tuple):
166
+ update = res[0]
167
+
168
+ if isinstance(update, int):
169
+ pbar.update(update)
170
+ elif isinstance(update, list):
171
+ pbar.update(len(update))
164
172
  else:
165
173
  pbar.update(block_size)
166
174
 
167
175
  if returns_list:
168
- results.extend(res)
176
+ if isinstance(res, tuple):
177
+ results.extend(res[0])
178
+ tms.append(res[1])
179
+ else:
180
+ results.extend(res)
169
181
  finally:
170
182
  if platform.system().lower() == "darwin":
171
183
  os.environ["no_proxy"] = no_proxy
172
184
  pbar.close()
173
185
 
174
186
  if returns_list:
175
- return results
187
+ if len(tms):
188
+ return (results, TransmissionMonitor.merge(tms))
189
+ else:
190
+ return results
176
191
 
177
192
  def get_interface_class(protocol):
178
193
  if protocol in INTERFACES:
@@ -232,12 +247,55 @@ class CloudFiles:
232
247
  currently supports local filesystem, Google Cloud Storage,
233
248
  Amazon S3 interfaces, and reading from arbitrary HTTP
234
249
  servers.
250
+
251
+ cloudpath: a parent directory of the files you want to fetch
252
+ specified as:
253
+ e.g. gs://bucket/dir/
254
+ s3://bucket/dir/
255
+ s3://https://myendpoint.com/dir/
256
+ file://./dir
257
+ ./dir
258
+ https://some.host.edu/dir/
259
+ mem://bucket/dir
260
+ Key:
261
+ gs: Google Cloud Storage
262
+ s3: Amazon S3
263
+ file: Local Filesystem (including network mounts)
264
+ mem: In-Memory storage
265
+
266
+ progress: display progress bar measured in files
267
+ green: whether to use green threads (uses gevent library)
268
+ secrets: you can provide GCS, S3, CAVE, etc credentials
269
+ via the constructor here instead of the default secrets
270
+ files
271
+ num_threads: number of threads to launch for remote server
272
+ IO. No effect on local file fetching (always single threaded
273
+ for maximum performance).
274
+ use_https: use the public https API for GCS and S3 instead of
275
+ boto or google-storage-python
276
+ endpoint: for S3 emulators, you can provide a different endpoint
277
+ like https://s3-storage.university.edu. This can also be specified
278
+ in the secrets file.
279
+ parallel: number of separate processes to launch (each will use num_threads)
280
+ request_payer: bill your s3 usage to someone other than the bucket owner
281
+ locking: for local filesystems, you can use advisory file locking to avoid
282
+ separate cloudfiles instances from interfering with each other
283
+ lock_dir: you can specify your own directory for the advisory lock files
284
+ composite_upload_threshold: GCS and S3 both support multi-part uploads.
285
+ For files larger than this threshold, use that facility.
286
+ no_sign_request: (s3 only) don't sign the request with credentials
235
287
  """
236
288
  def __init__(
237
- self, cloudpath:str, progress:bool = False,
238
- green:Optional[bool] = None, secrets:SecretsType = None, num_threads:int = 20,
239
- use_https:bool = False, endpoint:Optional[str] = None,
240
- parallel:ParallelType = 1, request_payer:Optional[str] = None,
289
+ self,
290
+ cloudpath:str,
291
+ progress:bool = False,
292
+ green:Optional[bool] = None,
293
+ secrets:SecretsType = None,
294
+ num_threads:int = 20,
295
+ use_https:bool = False,
296
+ endpoint:Optional[str] = None,
297
+ parallel:ParallelType = 1,
298
+ request_payer:Optional[str] = None,
241
299
  locking:Optional[bool] = None,
242
300
  lock_dir:Optional[str] = None,
243
301
  composite_upload_threshold:int = int(1e8),
@@ -323,11 +381,16 @@ class CloudFiles:
323
381
 
324
382
  @parallelize(desc="Download", returns_list=True)
325
383
  def get(
326
- self, paths:GetPathType, total:Optional[int] = None,
327
- raw:bool = False, progress:Optional[bool] = None,
384
+ self,
385
+ paths:GetPathType,
386
+ total:Optional[int] = None,
387
+ raw:bool = False,
388
+ progress:Optional[bool] = None,
328
389
  parallel:Optional[ParallelType] = None,
329
- return_dict:bool = False, raise_errors:bool = True,
330
- part_size:Optional[int] = None
390
+ return_dict:bool = False,
391
+ raise_errors:bool = True,
392
+ part_size:Optional[int] = None,
393
+ return_recording:bool = False,
331
394
  ) -> Union[dict,bytes,List[dict]]:
332
395
  """
333
396
  Download one or more files. Return order is not guaranteed to match input.
@@ -353,6 +416,10 @@ class CloudFiles:
353
416
  extra information. Errors will be raised immediately.
354
417
  raise_errors: Raise the first error immediately instead
355
418
  of returning them as part of the output.
419
+ return_recording: Also return a TransmissionMonitor object that
420
+ records the start and end times and the transmitted size of
421
+ each object (i.e. before decompression) stored in an interval
422
+ tree. This enables post-hoc analysis of performance.
356
423
 
357
424
  Returns:
358
425
  if return_dict:
@@ -370,12 +437,18 @@ class CloudFiles:
370
437
  'raw': boolean,
371
438
  }
372
439
  ]
440
+
441
+ if return_recording:
442
+ return (ABOVE, TransmissionMonitor)
443
+ else:
444
+ return ABOVE
373
445
  """
374
446
  paths, multiple_return = toiter(paths, is_iter=True)
375
447
  progress = nvl(progress, self.progress)
376
448
  # return_dict prevents the user from having a chance
377
449
  # to inspect errors, so we must raise here.
378
450
  raise_errors = raise_errors or return_dict or (not multiple_return)
451
+ tm = TransmissionMonitor(IOEnum.RX)
379
452
 
380
453
  def check_md5(path, content, server_hash):
381
454
  if server_hash is None:
@@ -405,12 +478,17 @@ class CloudFiles:
405
478
  encoding = None
406
479
  server_hash = None
407
480
  server_hash_type = None
481
+ num_bytes_rx = 0
408
482
  try:
483
+ flight_id = tm.start_io(1)
484
+
409
485
  with self._get_connection() as conn:
410
486
  content, encoding, server_hash, server_hash_type = conn.get_file(
411
487
  path, start=start, end=end, part_size=part_size
412
488
  )
413
489
 
490
+ num_bytes_rx = len(content) if content is not None else 0
491
+
414
492
  # md5s don't match for partial reads
415
493
  if start is None and end is None:
416
494
  if server_hash_type == "md5":
@@ -422,6 +500,9 @@ class CloudFiles:
422
500
  content = compression.decompress(content, encoding, filename=path)
423
501
  except Exception as err:
424
502
  error = err
503
+ tm.end_error(flight_id)
504
+
505
+ tm.end_io(flight_id, num_bytes_rx)
425
506
 
426
507
  if raise_errors and error:
427
508
  raise error
@@ -441,11 +522,16 @@ class CloudFiles:
441
522
  if total == 1:
442
523
  ret = download(first(paths))
443
524
  if return_dict:
444
- return { ret["path"]: ret["content"] }
525
+ ret = { ret["path"]: ret["content"] }
445
526
  elif multiple_return:
446
- return [ ret ]
527
+ ret = [ ret ]
528
+ else:
529
+ ret = ret['content']
530
+
531
+ if return_recording:
532
+ return (ret, tm)
447
533
  else:
448
- return ret['content']
534
+ return ret
449
535
 
450
536
  num_threads = self.num_threads
451
537
  if self.protocol == "file":
@@ -461,10 +547,14 @@ class CloudFiles:
461
547
  green=self.green,
462
548
  )
463
549
 
550
+ ret = results
464
551
  if return_dict:
465
- return { res["path"]: res["content"] for res in results }
552
+ ret = { res["path"]: res["content"] for res in results }
553
+
554
+ if return_recording:
555
+ return (ret, tm)
466
556
 
467
- return results
557
+ return ret
468
558
 
469
559
  def get_json(
470
560
  self, paths:GetPathType, total:Optional[int] = None
@@ -511,12 +601,19 @@ class CloudFiles:
511
601
 
512
602
  @parallelize(desc="Upload")
513
603
  def puts(
514
- self, files:PutType,
515
- content_type:Optional[str] = None, compress:CompressType = None,
516
- compression_level:Optional[int] = None, cache_control:Optional[str] = None,
517
- total:Optional[int] = None, raw:bool = False, progress:Optional[bool] = None,
518
- parallel:ParallelType = 1, storage_class:Optional[str] = None
519
- ) -> int:
604
+ self,
605
+ files:PutType,
606
+ content_type:Optional[str] = None,
607
+ compress:CompressType = None,
608
+ compression_level:Optional[int] = None,
609
+ cache_control:Optional[str] = None,
610
+ total:Optional[int] = None,
611
+ raw:bool = False,
612
+ progress:Optional[bool] = None,
613
+ parallel:ParallelType = 1,
614
+ storage_class:Optional[str] = None,
615
+ return_recording:bool = False,
616
+ ) -> Union[int, tuple[int,TransmissionMonitor]]:
520
617
  """
521
618
  Writes one or more files at a given location.
522
619
 
@@ -551,11 +648,22 @@ class CloudFiles:
551
648
  function call. If progress is a string, it sets the
552
649
  text of the progress bar.
553
650
  parallel: number of concurrent processes (0 means all cores)
554
-
555
- Returns: number of files uploaded
651
+ return_recording: Also return a TransmissionMonitor object that
652
+ records the start and end times and the transmitted size of
653
+ each object (i.e. before decompression) stored in an interval
654
+ tree. This enables post-hoc analysis of performance.
655
+
656
+ Returns:
657
+ N = number of files uploaded
658
+ tm = TransmissionMonitor
659
+ if return_recording:
660
+ return (N, tm)
661
+ else:
662
+ return N
556
663
  """
557
664
  files = toiter(files)
558
665
  progress = nvl(progress, self.progress)
666
+ tm = TransmissionMonitor(IOEnum.TX)
559
667
 
560
668
  def todict(file):
561
669
  if isinstance(file, tuple):
@@ -563,6 +671,7 @@ class CloudFiles:
563
671
  return file
564
672
 
565
673
  def uploadfn(file):
674
+ start_time = time.monotonic()
566
675
  file = todict(file)
567
676
 
568
677
  file_compress = file.get('compress', compress)
@@ -577,11 +686,19 @@ class CloudFiles:
577
686
  compress_level=file.get('compression_level', compression_level),
578
687
  )
579
688
 
689
+ num_bytes_tx = 0
690
+ if hasattr(content, "__len__"):
691
+ num_bytes_tx = len(content)
692
+ elif isinstance(content, io.IOBase):
693
+ num_bytes_tx = os.fstat(content.fileno()).st_size
694
+
695
+ flight_id = tm.start_io(num_bytes_tx, start_time)
696
+
580
697
  if (
581
698
  self.protocol == "gs"
582
699
  and (
583
700
  (hasattr(content, "read") and hasattr(content, "seek"))
584
- or (hasattr(content, "__len__") and len(content) > self.composite_upload_threshold)
701
+ or (num_bytes_tx > self.composite_upload_threshold)
585
702
  )
586
703
  ):
587
704
  gcs.composite_upload(
@@ -608,6 +725,8 @@ class CloudFiles:
608
725
  storage_class=file.get('storage_class', storage_class)
609
726
  )
610
727
 
728
+ tm.end_io(flight_id, num_bytes_tx)
729
+
611
730
  if not isinstance(files, (types.GeneratorType, zip)):
612
731
  dupes = duplicates([ todict(file)['path'] for file in files ])
613
732
  if dupes:
@@ -617,7 +736,10 @@ class CloudFiles:
617
736
 
618
737
  if total == 1:
619
738
  uploadfn(first(files))
620
- return 1
739
+ if return_recording:
740
+ return (1,tm)
741
+ else:
742
+ return 1
621
743
 
622
744
  fns = ( partial(uploadfn, file) for file in files )
623
745
  desc = self._progress_description("Upload")
@@ -628,7 +750,11 @@ class CloudFiles:
628
750
  total=total,
629
751
  green=self.green,
630
752
  )
631
- return len(results)
753
+
754
+ if return_recording:
755
+ return (len(results), tm)
756
+ else:
757
+ return len(results)
632
758
 
633
759
  def put(
634
760
  self,
@@ -666,9 +792,13 @@ class CloudFiles:
666
792
  self, files:PutType,
667
793
  compress:CompressType = None,
668
794
  compression_level:Optional[int] = None,
669
- cache_control:Optional[str] = None, total:Optional[int] = None,
670
- raw:bool = False, progress:Optional[bool] = None, parallel:ParallelType = 1,
671
- storage_class:Optional[str] = None
795
+ cache_control:Optional[str] = None,
796
+ total:Optional[int] = None,
797
+ raw:bool = False,
798
+ progress:Optional[bool] = None,
799
+ parallel:ParallelType = 1,
800
+ storage_class:Optional[str] = None,
801
+ return_recording:bool = False,
672
802
  ) -> int:
673
803
  """
674
804
  Write one or more files as JSON.
@@ -697,7 +827,7 @@ class CloudFiles:
697
827
  compress=compress, compression_level=compression_level,
698
828
  content_type='application/json', storage_class=storage_class,
699
829
  total=total, raw=raw,
700
- progress=progress, parallel=parallel
830
+ progress=progress, parallel=parallel, return_recording=return_recording,
701
831
  )
702
832
 
703
833
  def put_json(
@@ -1008,7 +1138,7 @@ class CloudFiles:
1008
1138
  allow_missing:bool = False,
1009
1139
  progress:Optional[bool] = None,
1010
1140
  resumable:bool = False,
1011
- ) -> None:
1141
+ ) -> TransmissionMonitor:
1012
1142
  """
1013
1143
  Transfer all files from this CloudFiles storage
1014
1144
  to the destination CloudFiles in batches sized
@@ -1071,7 +1201,7 @@ class CloudFiles:
1071
1201
  allow_missing:bool = False,
1072
1202
  progress:Optional[bool] = None,
1073
1203
  resumable:bool = False,
1074
- ) -> None:
1204
+ ) -> TransmissionMonitor:
1075
1205
  """
1076
1206
  Transfer all files from the source CloudFiles storage
1077
1207
  to this CloudFiles in batches sized in the
@@ -1135,7 +1265,7 @@ class CloudFiles:
1135
1265
  and self.protocol == "file"
1136
1266
  and reencode is None
1137
1267
  ):
1138
- self.__transfer_file_to_file(
1268
+ return self.__transfer_file_to_file(
1139
1269
  cf_src, self, paths, total,
1140
1270
  pbar, block_size, allow_missing
1141
1271
  )
@@ -1144,7 +1274,7 @@ class CloudFiles:
1144
1274
  and self.protocol == "file"
1145
1275
  and reencode is None
1146
1276
  ):
1147
- self.__transfer_remote_to_file(
1277
+ return self.__transfer_remote_to_file(
1148
1278
  cf_src, self, paths, total,
1149
1279
  pbar, block_size, content_type,
1150
1280
  allow_missing, resumable,
@@ -1154,7 +1284,7 @@ class CloudFiles:
1154
1284
  and self.protocol != "file"
1155
1285
  and reencode is None
1156
1286
  ):
1157
- self.__transfer_file_to_remote(
1287
+ return self.__transfer_file_to_remote(
1158
1288
  cf_src, self, paths, total,
1159
1289
  pbar, block_size, content_type,
1160
1290
  allow_missing,
@@ -1170,13 +1300,13 @@ class CloudFiles:
1170
1300
  )
1171
1301
  and reencode is None
1172
1302
  ):
1173
- self.__transfer_cloud_internal(
1303
+ return self.__transfer_cloud_internal(
1174
1304
  cf_src, self, paths,
1175
1305
  total, pbar, block_size,
1176
1306
  allow_missing,
1177
1307
  )
1178
1308
  else:
1179
- self.__transfer_general(
1309
+ return self.__transfer_general(
1180
1310
  cf_src, self, paths, total,
1181
1311
  pbar, block_size,
1182
1312
  reencode, content_type,
@@ -1188,7 +1318,7 @@ class CloudFiles:
1188
1318
  total, pbar, block_size,
1189
1319
  reencode, content_type,
1190
1320
  allow_missing
1191
- ):
1321
+ ) -> TransmissionMonitor:
1192
1322
  """
1193
1323
  Downloads the file into RAM, transforms
1194
1324
  the data, and uploads it. This is the slowest and
@@ -1197,6 +1327,7 @@ class CloudFiles:
1197
1327
  pair of endpoints as well as transcoding compression
1198
1328
  formats.
1199
1329
  """
1330
+ upload_tms = []
1200
1331
  for block_paths in sip(paths, block_size):
1201
1332
  for path in block_paths:
1202
1333
  if isinstance(path, dict):
@@ -1220,26 +1351,32 @@ class CloudFiles:
1220
1351
  item["path"] = item["tags"]["dest_path"]
1221
1352
  del item["tags"]["dest_path"]
1222
1353
  yield item
1223
- self.puts(
1354
+ (ct, batch_tm) = self.puts(
1224
1355
  renameiter(),
1225
1356
  raw=True,
1226
1357
  progress=False,
1227
1358
  compress=reencode,
1228
1359
  content_type=content_type,
1360
+ return_recording=True,
1229
1361
  )
1230
1362
  pbar.update(len(block_paths))
1363
+ upload_tms.append(batch_tm)
1364
+
1365
+ return TransmissionMonitor.merge(upload_tms)
1231
1366
 
1232
1367
  def __transfer_file_to_file(
1233
1368
  self, cf_src, cf_dest, paths,
1234
1369
  total, pbar, block_size, allow_missing
1235
- ):
1370
+ ) -> TransmissionMonitor:
1236
1371
  """
1237
1372
  shutil.copyfile, starting in Python 3.8, uses
1238
1373
  special OS kernel functions to accelerate file copies
1239
1374
  """
1375
+ tm = TransmissionMonitor(IOEnum.TX)
1240
1376
  srcdir = cf_src.cloudpath.replace("file://", "")
1241
1377
  destdir = mkdir(cf_dest.cloudpath.replace("file://", ""))
1242
1378
  for path in paths:
1379
+ start_time = time.monotonic()
1243
1380
  if isinstance(path, dict):
1244
1381
  src = os.path.join(srcdir, path["path"])
1245
1382
  dest = os.path.join(destdir, path["dest_path"])
@@ -1253,6 +1390,15 @@ class CloudFiles:
1253
1390
  if dest_ext_compress != dest_ext:
1254
1391
  dest += dest_ext_compress
1255
1392
 
1393
+ num_bytes_tx = 0
1394
+ try:
1395
+ if src:
1396
+ num_bytes_tx = os.path.getsize(src)
1397
+ except FileNotFoundError:
1398
+ pass
1399
+
1400
+ flight_id = tm.start_io(num_bytes_tx, start_time)
1401
+
1256
1402
  try:
1257
1403
  shutil.copyfile(src, dest) # avoids user space
1258
1404
  except FileNotFoundError:
@@ -1260,16 +1406,26 @@ class CloudFiles:
1260
1406
  with open(dest, "wb") as f:
1261
1407
  f.write(b'')
1262
1408
  else:
1409
+ tm.end_error(flight_id)
1263
1410
  raise
1411
+ finally:
1412
+ tm.end_io(flight_id, num_bytes_tx)
1264
1413
 
1265
1414
  pbar.update(1)
1266
1415
 
1416
+ return tm
1417
+
1267
1418
  def __transfer_remote_to_file(
1268
1419
  self, cf_src, cf_dest, paths,
1269
1420
  total, pbar, block_size, content_type,
1270
1421
  allow_missing, resumable,
1271
- ):
1422
+ ) -> TransmissionMonitor:
1423
+
1424
+ tm = TransmissionMonitor(IOEnum.RX)
1425
+
1272
1426
  def thunk_save(key):
1427
+ nonlocal tm
1428
+ flight_id = tm.start_io(1)
1273
1429
  with cf_src._get_connection() as conn:
1274
1430
  if isinstance(key, dict):
1275
1431
  dest_key = key.get("dest_path", key["path"])
@@ -1279,14 +1435,17 @@ class CloudFiles:
1279
1435
  dest_key = key
1280
1436
 
1281
1437
  dest_key = os.path.join(cf_dest._path.path, dest_key)
1282
- found = conn.save_file(src_key, dest_key, resumable=resumable)
1438
+ (found, num_bytes_rx) = conn.save_file(src_key, dest_key, resumable=resumable)
1439
+
1440
+ tm.end_io(flight_id, num_bytes_rx)
1283
1441
 
1284
1442
  if found == False and not allow_missing:
1443
+ tm.end_error(flight_id)
1285
1444
  raise FileNotFoundError(src_key)
1286
1445
 
1287
1446
  return int(found)
1288
1447
 
1289
- results = schedule_jobs(
1448
+ schedule_jobs(
1290
1449
  fns=( partial(thunk_save, path) for path in paths ),
1291
1450
  progress=pbar,
1292
1451
  concurrency=self.num_threads,
@@ -1294,7 +1453,7 @@ class CloudFiles:
1294
1453
  green=self.green,
1295
1454
  count_return=True,
1296
1455
  )
1297
- return len(results)
1456
+ return tm
1298
1457
 
1299
1458
  def __transfer_file_to_remote(
1300
1459
  self, cf_src, cf_dest, paths,
@@ -1306,6 +1465,7 @@ class CloudFiles:
1306
1465
  so that GCS and S3 can do low-memory chunked multi-part
1307
1466
  uploads if necessary.
1308
1467
  """
1468
+ tms = []
1309
1469
  srcdir = cf_src.cloudpath.replace("file://", "")
1310
1470
  for block_paths in sip(paths, block_size):
1311
1471
  to_upload = []
@@ -1336,13 +1496,22 @@ class CloudFiles:
1336
1496
  "content": handle,
1337
1497
  "compress": encoding,
1338
1498
  })
1339
- cf_dest.puts(to_upload, raw=True, progress=False, content_type=content_type)
1499
+ (ct, batch_tm) = cf_dest.puts(
1500
+ to_upload,
1501
+ raw=True,
1502
+ progress=False,
1503
+ content_type=content_type,
1504
+ return_recording=True,
1505
+ )
1340
1506
  for item in to_upload:
1341
1507
  handle = item["content"]
1342
1508
  if hasattr(handle, "close"):
1343
1509
  handle.close()
1510
+ tms.append(batch_tm)
1344
1511
  pbar.update(len(block_paths))
1345
1512
 
1513
+ return TransmissionMonitor.merge(tms)
1514
+
1346
1515
  def __transfer_cloud_internal(
1347
1516
  self, cf_src, cf_dest, paths,
1348
1517
  total, pbar, block_size, allow_missing
@@ -1355,7 +1524,11 @@ class CloudFiles:
1355
1524
  of the cloud, this is much slower and more expensive
1356
1525
  than necessary.
1357
1526
  """
1527
+ tm = TransmissionMonitor(IOEnum.TX)
1528
+
1358
1529
  def thunk_copy(key):
1530
+ nonlocal tm
1531
+ flight_id = tm.start_io(1)
1359
1532
  with cf_src._get_connection() as conn:
1360
1533
  if isinstance(key, dict):
1361
1534
  dest_key = key.get("dest_path", key["path"])
@@ -1365,14 +1538,17 @@ class CloudFiles:
1365
1538
  dest_key = key
1366
1539
 
1367
1540
  dest_key = posixpath.join(cf_dest._path.path, dest_key)
1368
- found = conn.copy_file(src_key, cf_dest._path.bucket, dest_key)
1541
+ (found, num_bytes_tx) = conn.copy_file(src_key, cf_dest._path.bucket, dest_key)
1542
+
1543
+ tm.end_io(flight_id, num_bytes_tx)
1369
1544
 
1370
1545
  if found == False and not allow_missing:
1546
+ tm.end_error(flight_id)
1371
1547
  raise FileNotFoundError(src_key)
1372
1548
 
1373
1549
  return int(found)
1374
1550
 
1375
- results = schedule_jobs(
1551
+ schedule_jobs(
1376
1552
  fns=( partial(thunk_copy, path) for path in paths ),
1377
1553
  progress=pbar,
1378
1554
  concurrency=self.num_threads,
@@ -1380,7 +1556,7 @@ class CloudFiles:
1380
1556
  green=self.green,
1381
1557
  count_return=True,
1382
1558
  )
1383
- return len(results)
1559
+ return tm
1384
1560
 
1385
1561
  def move(self, src:str, dest:str):
1386
1562
  """Move (rename) src to dest.
@@ -1490,6 +1666,16 @@ class CloudFiles:
1490
1666
  return os.path.join(*paths)
1491
1667
  return posixpath.join(*paths)
1492
1668
 
1669
+ def dirname(self, path:str) -> str:
1670
+ if self._path.protocol == "file":
1671
+ return os.path.dirname(path)
1672
+ return posixpath.dirname(path)
1673
+
1674
+ def basename(self, path:str) -> str:
1675
+ if self._path.protocol == "file":
1676
+ return os.path.basename(path)
1677
+ return posixpath.basename(path)
1678
+
1493
1679
  def __getitem__(self, key) -> Union[dict,bytes,List[dict]]:
1494
1680
  if isinstance(key, tuple) and len(key) == 2 and isinstance(key[1], slice) and isinstance(key[0], str):
1495
1681
  return self.get({ 'path': key[0], 'start': key[1].start, 'end': key[1].stop })
@@ -1656,6 +1842,12 @@ class CloudFile:
1656
1842
  def join(self, *args):
1657
1843
  return self.cf.join(*args)
1658
1844
 
1845
+ def dirname(self, *args):
1846
+ return self.cf.dirname(*args)
1847
+
1848
+ def basename(self, *args):
1849
+ return self.cf.basename(*args)
1850
+
1659
1851
  def touch(self):
1660
1852
  return self.cf.touch(self.filename)
1661
1853