ciocore 9.0.0b2__py2.py3-none-any.whl → 9.1.0b2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ciocore might be problematic. Click here for more details.

Binary file
ciocore/package_tree.py CHANGED
@@ -290,6 +290,10 @@ class PackageTree(object):
290
290
  def __bool__(self):
291
291
  return True if self._tree["children"] else False
292
292
 
293
+ def __nonzero__(self):
294
+ # Python 2.7
295
+ return self.__bool__()
296
+
293
297
  def as_dict(self):
294
298
  """
295
299
  Returns:
@@ -2,6 +2,7 @@ import datetime
2
2
  import json
3
3
  import logging
4
4
  import os
5
+ import pathlib
5
6
  import requests.exceptions
6
7
  import sys
7
8
  import time
@@ -25,13 +26,12 @@ from ciocore import (
25
26
  exceptions,
26
27
  )
27
28
 
29
+ from . import thread_queue_job
30
+
28
31
  from .upload_stats import UploadStats
29
32
 
30
33
  logger = logging.getLogger("{}.uploader".format(loggeria.CONDUCTOR_LOGGER_NAME))
31
34
 
32
- SINGLEPART = "singlepart"
33
- MULTIPART = "multipart"
34
-
35
35
 
36
36
  class MD5Worker(worker.ThreadWorker):
37
37
  """
@@ -329,28 +329,61 @@ class FileStatWorker(worker.ThreadWorker):
329
329
  """
330
330
 
331
331
  if job:
332
+
333
+ kms_key_name = job.get('kmsKeyName')
334
+
332
335
  # iterate through singlepart urls
333
336
  for singlepart_upload in job.get("singlePartURLs", []):
334
337
  path = singlepart_upload["filePath"]
335
338
  file_size = singlepart_upload["fileSize"]
336
339
  upload_url = singlepart_upload["preSignedURL"]
340
+ md5 = self.metric_store.get_dict("file_md5s", path)
337
341
 
338
342
  self.metric_store.increment("bytes_to_upload", file_size, path)
339
343
  self.metric_store.increment("num_files_to_upload")
340
344
  logger.debug("Singlepart, adding task %s", path)
341
345
 
342
- self.put_job((path, file_size, upload_url, SINGLEPART))
346
+ upload_tq_job = thread_queue_job.UploadThreadQueueJob(path,
347
+ file_size,
348
+ presigned_url=upload_url,
349
+ file_md5=md5,
350
+ upload_id=None,
351
+ part_size=file_size,
352
+ part_index=1,
353
+ kms_key_name=kms_key_name)
354
+
355
+ self.put_job(upload_tq_job)
343
356
 
344
357
  # iterate through multipart
345
358
  for multipart_upload in job.get("multiPartURLs", []):
346
359
  path = multipart_upload["filePath"]
347
360
  file_size = multipart_upload["fileSize"]
348
361
 
362
+ part = multipart_upload
363
+ total_parts = len(multipart_upload['parts'])
364
+ md5 = self.metric_store.get_dict("file_md5s", path)
365
+
366
+ for chunk in multipart_upload['parts']:
367
+ logger.debug("Multipart, adding task %s (part %s)", path, chunk['partNumber'])
368
+
369
+ upload_tq_job = thread_queue_job.UploadThreadQueueJob(
370
+ path=path,
371
+ file_size=file_size,
372
+ presigned_url=chunk['url'],
373
+ file_md5=md5,
374
+ upload_id=multipart_upload['uploadID'],
375
+ part_size=multipart_upload['partSize'],
376
+ total_parts=total_parts,
377
+ part_index=chunk['partNumber'],
378
+ kms_key_name=kms_key_name)
379
+
380
+
381
+ part['parts'] = chunk
382
+ self.put_job(upload_tq_job)
383
+
349
384
  self.metric_store.increment("bytes_to_upload", file_size, path)
350
385
  self.metric_store.increment("num_files_to_upload")
351
- logger.debug("Multipart, adding task %s", path)
352
- self.put_job((path, file_size, multipart_upload, MULTIPART))
353
-
386
+
354
387
  # make sure we return None, so no message is automatically added to the out_queue
355
388
  return None
356
389
 
@@ -382,45 +415,30 @@ class UploadWorker(worker.ThreadWorker):
382
415
  self.metric_store.increment("bytes_uploaded", len(data), filename)
383
416
 
384
417
  def do_work(self, job, thread_int):
418
+
385
419
  if job:
386
- kms_key_name = None
387
-
388
- try:
389
- filename = job[0]
390
- file_size = job[1]
391
- upload = job[2]
392
- upload_type = job[3]
393
-
394
- except Exception:
395
- logger.error("Issue with job (%s): %s", len(job), job)
396
- raise
397
-
398
- if len(job) > 4:
399
- kms_key_name = job[4]
400
-
401
- md5 = self.metric_store.get_dict("file_md5s", filename)
420
+
421
+ md5 = self.metric_store.get_dict("file_md5s", job.path)
402
422
 
403
423
  try:
404
- if upload_type == SINGLEPART:
405
- return self.do_singlepart_upload(
406
- upload, filename, file_size, md5, kms_key_name
407
- )
408
- elif upload_type == MULTIPART:
409
- return self.do_multipart_upload(upload, filename, md5)
410
-
411
- raise Exception(
412
- "upload_type is '%s' expected %s or %s"
413
- % (upload_type, SINGLEPART, MULTIPART)
414
- )
424
+ if job.is_multipart():
425
+ return self.do_multipart_upload(job)
426
+
427
+ else:
428
+ return self.do_singlepart_upload(job)
415
429
 
416
430
  except Exception as err_msg:
417
- real_md5 = common.get_base64_md5(filename)
431
+ real_md5 = common.get_base64_md5(job.path)
432
+
433
+ exc_tb = sys.exc_info()[2]
434
+ exception_line_num = exc_tb.tb_lineno
435
+ exception_file = pathlib.Path(exc_tb.tb_frame.f_code.co_filename).name
418
436
 
419
437
  if isinstance(err_msg, requests.exceptions.HTTPError):
420
- error_message = f"Upload of {filename} failed with a response code {err_msg.response.status_code} ({err_msg.response.reason}) (expected '{md5}', got '{real_md5}')"
438
+ error_message = f"Upload of {job.path} failed with a response code {err_msg.response.status_code} ({err_msg.response.reason}) (expected '{job.md5}', got '{real_md5}')"
421
439
  else:
422
440
  error_message = (
423
- f"Upload of {filename} failed. (expected '{md5}', got '{real_md5}') {str(err_msg)}"
441
+ f"Upload of {job.path} failed. (expected '{job.file_md5}', got '{real_md5}') {str(err_msg)} [{exception_file}-{exception_line_num}]"
424
442
  )
425
443
 
426
444
  logger.error(error_message)
@@ -429,9 +447,7 @@ class UploadWorker(worker.ThreadWorker):
429
447
  return worker.EMPTY_JOB
430
448
 
431
449
  @common.DecRetry(retry_exceptions=api_client.CONNECTION_EXCEPTIONS, tries=5)
432
- def do_singlepart_upload(
433
- self, upload_url, filename, file_size, md5, kms_key_name=None
434
- ):
450
+ def do_singlepart_upload(self, job):
435
451
  """
436
452
  Note that for GCS we don't rely on the make_request's own retry mechanism because we need to
437
453
  recreate the chunked_reader generator before retrying the request. Instead, we wrap this
@@ -441,19 +457,23 @@ class UploadWorker(worker.ThreadWorker):
441
457
  headers that S3 does not accept.
442
458
  """
443
459
 
444
- if ("amazonaws" in upload_url) or ("coreweave" in upload_url):
460
+ tq_job = thread_queue_job.MultiPartThreadQueueJob( md5=job.file_md5,
461
+ path=job.path,
462
+ total_parts=job.total_parts)
463
+
464
+ if job.is_vendor_aws() or job.is_vendor_cw():
445
465
  # must declare content-length ourselves due to zero byte bug in requests library.
446
466
  # api_client.make_prepared_request docstring.
447
467
  headers = {
448
468
  "Content-Type": "application/octet-stream",
449
- "Content-Length": str(file_size),
469
+ "Content-Length": str(job.file_size),
450
470
  }
451
471
 
452
- with open(filename, "rb") as fh:
472
+ with open(job.path, "rb") as fh:
453
473
  # TODO: support chunked
454
474
  response = self.api_client.make_prepared_request(
455
475
  verb="PUT",
456
- url=upload_url,
476
+ url=job.presigned_url,
457
477
  headers=headers,
458
478
  params=None,
459
479
  data=fh,
@@ -467,25 +487,26 @@ class UploadWorker(worker.ThreadWorker):
467
487
  response.close()
468
488
 
469
489
  # report upload progress
470
- self.metric_store.increment("bytes_uploaded", file_size, filename)
471
-
472
- return response
490
+ self.metric_store.increment("bytes_uploaded", job.file_size, job.path)
491
+
473
492
  else:
474
493
  headers = {"Content-MD5": md5, "Content-Type": "application/octet-stream"}
475
494
 
476
- if kms_key_name:
477
- headers["x-goog-encryption-kms-key-name"] = kms_key_name
495
+ if job.kms_key_name is not None:
496
+ headers["x-goog-encryption-kms-key-name"] = job.kms_key_name
478
497
 
479
- return self.api_client.make_request(
480
- conductor_url=upload_url,
498
+ self.api_client.make_request(
499
+ conductor_url=job.presigned_url,
481
500
  headers=headers,
482
- data=self.chunked_reader(filename),
501
+ data=self.chunked_reader(job.path),
483
502
  verb="PUT",
484
503
  tries=1,
485
504
  use_api_key=True,
486
505
  )
506
+
507
+ return tq_job
487
508
 
488
- def do_multipart_upload(self, upload, filename, md5):
509
+ def do_multipart_upload(self, job):
489
510
  """
490
511
  Files will be split into partSize returned by the FileAPI and hydrated once all parts are
491
512
  uploaded. On successful part upload, response headers will contain an ETag. This value must
@@ -493,42 +514,32 @@ class UploadWorker(worker.ThreadWorker):
493
514
  """
494
515
  uploads = []
495
516
  complete_payload = {
496
- "uploadID": upload["uploadID"],
497
- "hash": md5,
517
+ "uploadID": job.upload_id,
518
+ "hash": job.file_md5,
498
519
  "completedParts": [],
499
520
  "project": self.project,
500
521
  }
501
522
 
502
- # iterate over parts and upload
503
- for part in upload["parts"]:
504
- resp_headers = self._do_multipart_upload(
505
- upload_url=part["url"],
506
- filename=filename,
507
- part_number=part["partNumber"],
508
- part_size=upload["partSize"],
509
- )
523
+ tq_job = thread_queue_job.MultiPartThreadQueueJob(path=job.path,
524
+ md5=job.file_md5,
525
+ total_parts=job.total_parts,
526
+ part_index=job.part_index)
527
+ tq_job.upload_id = job.upload_id
528
+ tq_job.project = self.project
510
529
 
511
- if resp_headers:
512
- uploads.append(upload["uploadID"])
513
- completed_part = {
514
- "partNumber": part["partNumber"],
515
- "etag": resp_headers["ETag"].strip('"'),
516
- }
517
- complete_payload["completedParts"].append(completed_part)
518
530
 
519
- # Complete multipart upload in order to hydrate file for availability
520
- uri_path = "/api/v2/files/multipart/complete"
521
- headers = {"Content-Type": "application/json"}
522
- self.api_client.make_request(
523
- uri_path=uri_path,
524
- verb="POST",
525
- headers=headers,
526
- data=json.dumps(complete_payload),
527
- raise_on_error=True,
528
- use_api_key=True,
531
+ resp_headers = self._do_multipart_upload(
532
+ upload_url=job.presigned_url,
533
+ filename=job.path,
534
+ part_number=job.part_index,
535
+ part_size=job.part_size,
529
536
  )
530
537
 
531
- return uploads
538
+ if resp_headers:
539
+ tq_job.part = job.part_index
540
+ tq_job.etag = resp_headers["ETag"].strip('"')
541
+
542
+ return tq_job
532
543
 
533
544
  @common.DecRetry(retry_exceptions=api_client.CONNECTION_EXCEPTIONS, tries=5)
534
545
  def _do_multipart_upload(self, upload_url, filename, part_number, part_size):
@@ -563,6 +574,73 @@ class UploadWorker(worker.ThreadWorker):
563
574
 
564
575
  return response.headers
565
576
 
577
+
578
+ class MultiPartSiphonWorker(worker.ThreadWorker):
579
+ def __init__(self, *args, **kwargs):
580
+ super(MultiPartSiphonWorker, self).__init__(*args, **kwargs)
581
+
582
+ self.api_client = api_client.ApiClient()
583
+ self.multipart_siphon = {}
584
+
585
+ def do_work(self, job, thread_int):
586
+ """
587
+ Process files that have already been uploaded.
588
+
589
+ If it's a single-part file, add the job to the out queue, so that it can
590
+ be used to determine if the Upload entity is complete.
591
+
592
+ If it's a multi-part upload, collect all the parts together. Once all the
593
+ parts have been accumulated, mark it as complete and add the file to the
594
+ out queue.
595
+ """
596
+
597
+ if job:
598
+
599
+ if not job.is_multipart():
600
+ logger.debug("Job is not multipart (%s, %s)", job.total_parts, job.part_index)
601
+
602
+ else:
603
+
604
+ if job.md5 not in self.multipart_siphon:
605
+ self.multipart_siphon[job.md5] = []
606
+
607
+ # Add to the task count for this worker.
608
+ # -1 because a task has already been added for a single file
609
+ # but not all its parts.
610
+ old_task_count = self.task_count
611
+ self.task_count += job.total_parts - 1
612
+ logger.debug("Incrementing task count to %s from %s", self.task_count, old_task_count)
613
+
614
+ self.multipart_siphon[job.md5].append(job)
615
+
616
+ if len(self.multipart_siphon[job.md5]) == job.total_parts:
617
+
618
+ complete_payload = {
619
+ "uploadID": job.upload_id,
620
+ "hash": job.md5,
621
+ "completedParts": thread_queue_job.MultiPartThreadQueueJob.aggregate_parts(self.multipart_siphon[job.md5]),
622
+ "project": job.project,
623
+ }
624
+
625
+ # Complete multipart upload in order to hydrate file for availability
626
+ uri_path = "/api/v2/files/multipart/complete"
627
+ headers = {"Content-Type": "application/json"}
628
+ self.api_client.make_request(
629
+ uri_path=uri_path,
630
+ verb="POST",
631
+ headers=headers,
632
+ data=json.dumps(complete_payload),
633
+ raise_on_error=True,
634
+ use_api_key=True,
635
+ )
636
+
637
+ logger.debug("JSON payload: '%s'", json.dumps(complete_payload))
638
+
639
+ return job
640
+
641
+ # make sure we return None, so no message is automatically added to the out_queue
642
+ return None
643
+
566
644
  def is_complete(self):
567
645
  # Get the number of files already uploaded as they are not passed to the Upload
568
646
  # worker
@@ -580,11 +658,11 @@ class UploadWorker(worker.ThreadWorker):
580
658
  self.task_count,
581
659
  )
582
660
 
583
- return (queue_size + already_completed_uploads) >= self.task_count
661
+ return (queue_size) >= self.task_count
584
662
 
585
663
  else:
586
664
  logger.debug("Is complete?: files not initialized yet")
587
- return False
665
+ return False
588
666
 
589
667
 
590
668
  class Uploader(object):
@@ -638,6 +716,7 @@ class Uploader(object):
638
716
  ),
639
717
  (FileStatWorker, [], {"thread_count": 1}),
640
718
  (UploadWorker, [], {"thread_count": self.args["thread_count"]}),
719
+ (MultiPartSiphonWorker, [], {"thread_count": 1})
641
720
  ]
642
721
 
643
722
  manager = worker.JobManager(job_description)
@@ -760,6 +839,31 @@ class Uploader(object):
760
839
  file_map = {path: None for path in processed_filepaths}
761
840
  self.handle_upload_response(project=None, upload_files=file_map)
762
841
 
842
+ if common.SIGINT_EXIT or self.cancel:
843
+ print("\nUpload cancelled\n")
844
+
845
+ else:
846
+ print("\nUpload of {} file completed\n".format(len(file_map)))
847
+
848
+ error_messages = []
849
+
850
+ for exception in self.error_messages:
851
+ error_messages.append(str(exception[1]))
852
+ print("".join(traceback.format_tb(exception[2])))
853
+ logger.error("".join(traceback.format_tb(exception[2])))
854
+
855
+ if error_messages:
856
+
857
+ log_file = loggeria.LOG_PATH
858
+ sys.stderr.write("\nError uploading files:\n")
859
+
860
+ for err_msg in error_messages:
861
+ sys.stderr.write("\t{}\n".format(err_msg))
862
+
863
+ sys.stderr.write("\nSee log {} for more details\n\n".format(log_file))
864
+
865
+ self.error_messages = []
866
+
763
867
  def handle_upload_response(self, project, upload_files, upload_id=None):
764
868
  """
765
869
  This is a really confusing method and should probably be split into to clear logic
@@ -818,8 +922,7 @@ class Uploader(object):
818
922
  time.sleep(5)
819
923
 
820
924
  # Shutdown the manager once all jobs are done
821
- if not self.cancel and not self.manager.error:
822
- logger.debug("Waiting for Manager to join")
925
+ if not (self.cancel or self.manager.error or common.SIGINT_EXIT):
823
926
  self.manager.join()
824
927
 
825
928
  upload_stats = UploadStats.create(
@@ -0,0 +1,101 @@
1
+ import logging
2
+ from ciocore import loggeria
3
+
4
+ logger = logging.getLogger("{}.uploader".format(loggeria.CONDUCTOR_LOGGER_NAME))
5
+
6
+ class ThreadQueueJob():
7
+ pass
8
+
9
+ class UploadThreadQueueJob(ThreadQueueJob):
10
+
11
+ def __init__(self, path, file_size, presigned_url, file_md5=None, upload_id=None, part_size=None, total_parts=1, part_index=1, kms_key_name=None):
12
+
13
+ super().__init__()
14
+
15
+ self.path = path
16
+ self.file_size = file_size
17
+ self.upload_id = upload_id
18
+ self.presigned_url = presigned_url
19
+ self.file_md5 = file_md5
20
+ self.part_size = part_size
21
+ self.part_index = part_index
22
+ self.total_parts = total_parts
23
+ self.kms_key_name = kms_key_name
24
+
25
+ logger.info("Creating %s (%s): %s", str(self.__class__), str(self), str(self.__dict__))
26
+
27
+ def is_multipart(self):
28
+ return self.total_parts != 1
29
+
30
+ def is_vendor_aws(self):
31
+ return "amazonaws" in self.presigned_url
32
+
33
+ def is_vendor_cw(self):
34
+ return "coreweave" in self.presigned_url
35
+
36
+ @classmethod
37
+ def create_from_response(cls, response):
38
+
39
+ new_thread_queue_jobs = []
40
+
41
+ for part_type, file_request_list in response.items():
42
+
43
+ for file_request in file_request_list:
44
+ if part_type == "multiPartURLs":
45
+
46
+ for part in file_request["parts"]:
47
+ new_tqj = cls( path=file_request['filePath'],
48
+ file_size = file_request['filePath'],
49
+ presigned_url = file_request['preSignedURL'],
50
+ file_md5 = file_request['preSignedURL'],
51
+ upload_id = file_request['preSignedURL'],
52
+ part_size = file_request['preSignedURL'],
53
+ part_index = file_request['preSignedURL'])
54
+
55
+
56
+ else:
57
+ new_tqj = cls( path=file_request['filePath'],
58
+ file_size = file_request['filePath'],
59
+ presigned_url = file_request['preSignedURL'])
60
+
61
+ new_thread_queue_jobs.append(new_tqj)
62
+
63
+
64
+
65
+ class MultiPartThreadQueueJob(ThreadQueueJob):
66
+
67
+ def __init__(self, path, md5, total_parts=1, part_index=1):
68
+
69
+ super().__init__()
70
+
71
+ self.upload_id = None
72
+ self.md5 = md5
73
+ self.project = None
74
+ self.path = path
75
+ self.part_index = part_index
76
+ self.etag = None
77
+ self.total_parts = total_parts
78
+
79
+ logger.info("Creating %s (%s): %s", str(self.__class__), str(self), str(self.__dict__))
80
+
81
+ def is_multipart(self):
82
+ return self.total_parts != 1
83
+
84
+ # def __str__(self):
85
+ # return
86
+
87
+ @staticmethod
88
+ def aggregate_parts(parts):
89
+ """
90
+ Helper function to take all the parts of a multipart upload and put
91
+ them into a format that's expected for the HTTP call.
92
+ """
93
+
94
+ completed_parts_payload = []
95
+
96
+ for part in parts:
97
+ completed_parts_payload.append({'partNumber': part.part,
98
+ 'etag': part.etag}
99
+ )
100
+
101
+ return completed_parts_payload
ciocore/worker.py CHANGED
@@ -621,7 +621,11 @@ class JobManager():
621
621
  q_size = self.work_queues[index].qsize()
622
622
  worker_threads = self.workers[index].threads
623
623
 
624
- num_active_threads = len([thd for thd in worker_threads if thd.is_alive()])
624
+ # thread.isAlive() was renamed to is_alive() in Python 3.9
625
+ try:
626
+ num_active_threads = len([thd for thd in worker_threads if thd.isAlive()])
627
+ except AttributeError:
628
+ num_active_threads = len([thd for thd in worker_threads if thd.is_alive()])
625
629
 
626
630
  msg += '%s \titems in queue: %s' % (q_size, worker_class.__name__)
627
631
  msg += '\t\t%s threads' % num_active_threads
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ciocore
3
- Version: 9.0.0b2
3
+ Version: 9.1.0b2
4
4
  Summary: Core functionality for Conductor's client tools
5
5
  Home-page: https://github.com/ConductorTechnologies/ciocore
6
6
  Author: conductor
@@ -9,8 +9,9 @@ Classifier: Operating System :: OS Independent
9
9
  Classifier: Programming Language :: Python
10
10
  Classifier: Topic :: Multimedia :: Graphics :: 3D Rendering
11
11
  Description-Content-Type: text/markdown
12
- Requires-Dist: requests>=2.31.0
13
- Requires-Dist: pyjwt==2.9.0
12
+ Requires-Dist: requests[use_chardet_on_py3]==2.28.1
13
+ Requires-Dist: pyjwt==1.7.1
14
+ Requires-Dist: future>=0.18.2
14
15
  Requires-Dist: cioseq<1.0.0,>=0.4.1
15
16
  Requires-Dist: Click<9.0.0,>=8.1.3
16
17
  Requires-Dist: markdown<4.0.0,>=3.5.2
@@ -51,10 +52,10 @@ See [CONTRIBUTING](CONTRIBUTING.md)
51
52
  ## Changelog
52
53
 
53
54
  ## Unreleased:
55
+ * Adds required changes to parallelize multi-part uploads
56
+ * Cleans up the output when explicit paths are uploaded
57
+ * Fixes logic so managers doesn't erroneously try and call join a second time if cancelled
54
58
 
55
- * 9.0.0-beta.1
56
- * Use the new required jwt parameters
57
- * Removing py2.7 compatibility
58
59
 
59
60
  ## Version:8.3.2 -- 01 Oct 2024
60
61