ciocore 9.1.0b1__py2.py3-none-any.whl → 9.1.0b2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ciocore might be problematic. Click here for more details.

ciocore/VERSION CHANGED
@@ -1 +1 @@
1
- 9.1.0-beta.1
1
+ 9.1.0-beta.2
@@ -26,13 +26,12 @@ from ciocore import (
26
26
  exceptions,
27
27
  )
28
28
 
29
+ from . import thread_queue_job
30
+
29
31
  from .upload_stats import UploadStats
30
32
 
31
33
  logger = logging.getLogger("{}.uploader".format(loggeria.CONDUCTOR_LOGGER_NAME))
32
34
 
33
- SINGLEPART = "singlepart"
34
- MULTIPART = "multipart"
35
-
36
35
 
37
36
  class MD5Worker(worker.ThreadWorker):
38
37
  """
@@ -330,28 +329,61 @@ class FileStatWorker(worker.ThreadWorker):
330
329
  """
331
330
 
332
331
  if job:
332
+
333
+ kms_key_name = job.get('kmsKeyName')
334
+
333
335
  # iterate through singlepart urls
334
336
  for singlepart_upload in job.get("singlePartURLs", []):
335
337
  path = singlepart_upload["filePath"]
336
338
  file_size = singlepart_upload["fileSize"]
337
339
  upload_url = singlepart_upload["preSignedURL"]
340
+ md5 = self.metric_store.get_dict("file_md5s", path)
338
341
 
339
342
  self.metric_store.increment("bytes_to_upload", file_size, path)
340
343
  self.metric_store.increment("num_files_to_upload")
341
344
  logger.debug("Singlepart, adding task %s", path)
342
345
 
343
- self.put_job((path, file_size, upload_url, SINGLEPART))
346
+ upload_tq_job = thread_queue_job.UploadThreadQueueJob(path,
347
+ file_size,
348
+ presigned_url=upload_url,
349
+ file_md5=md5,
350
+ upload_id=None,
351
+ part_size=file_size,
352
+ part_index=1,
353
+ kms_key_name=kms_key_name)
354
+
355
+ self.put_job(upload_tq_job)
344
356
 
345
357
  # iterate through multipart
346
358
  for multipart_upload in job.get("multiPartURLs", []):
347
359
  path = multipart_upload["filePath"]
348
360
  file_size = multipart_upload["fileSize"]
349
361
 
362
+ part = multipart_upload
363
+ total_parts = len(multipart_upload['parts'])
364
+ md5 = self.metric_store.get_dict("file_md5s", path)
365
+
366
+ for chunk in multipart_upload['parts']:
367
+ logger.debug("Multipart, adding task %s (part %s)", path, chunk['partNumber'])
368
+
369
+ upload_tq_job = thread_queue_job.UploadThreadQueueJob(
370
+ path=path,
371
+ file_size=file_size,
372
+ presigned_url=chunk['url'],
373
+ file_md5=md5,
374
+ upload_id=multipart_upload['uploadID'],
375
+ part_size=multipart_upload['partSize'],
376
+ total_parts=total_parts,
377
+ part_index=chunk['partNumber'],
378
+ kms_key_name=kms_key_name)
379
+
380
+
381
+ part['parts'] = chunk
382
+ self.put_job(upload_tq_job)
383
+
350
384
  self.metric_store.increment("bytes_to_upload", file_size, path)
351
385
  self.metric_store.increment("num_files_to_upload")
352
- logger.debug("Multipart, adding task %s", path)
353
- self.put_job((path, file_size, multipart_upload, MULTIPART))
354
-
386
+
355
387
  # make sure we return None, so no message is automatically added to the out_queue
356
388
  return None
357
389
 
@@ -383,45 +415,30 @@ class UploadWorker(worker.ThreadWorker):
383
415
  self.metric_store.increment("bytes_uploaded", len(data), filename)
384
416
 
385
417
  def do_work(self, job, thread_int):
418
+
386
419
  if job:
387
- kms_key_name = None
420
+
421
+ md5 = self.metric_store.get_dict("file_md5s", job.path)
388
422
 
389
423
  try:
390
- filename = job[0]
391
- file_size = job[1]
392
- upload = job[2]
393
- upload_type = job[3]
394
-
395
- except Exception:
396
- logger.error("Issue with job (%s): %s", len(job), job)
397
- raise
398
-
399
- if len(job) > 4:
400
- kms_key_name = job[4]
401
-
402
- md5 = self.metric_store.get_dict("file_md5s", filename)
403
-
404
- try:
405
- if upload_type == SINGLEPART:
406
- return self.do_singlepart_upload(
407
- upload, filename, file_size, md5, kms_key_name
408
- )
409
- elif upload_type == MULTIPART:
410
- return self.do_multipart_upload(upload, filename, md5)
411
-
412
- raise Exception(
413
- "upload_type is '%s' expected %s or %s"
414
- % (upload_type, SINGLEPART, MULTIPART)
415
- )
424
+ if job.is_multipart():
425
+ return self.do_multipart_upload(job)
426
+
427
+ else:
428
+ return self.do_singlepart_upload(job)
416
429
 
417
430
  except Exception as err_msg:
418
- real_md5 = common.get_base64_md5(filename)
431
+ real_md5 = common.get_base64_md5(job.path)
432
+
433
+ exc_tb = sys.exc_info()[2]
434
+ exception_line_num = exc_tb.tb_lineno
435
+ exception_file = pathlib.Path(exc_tb.tb_frame.f_code.co_filename).name
419
436
 
420
437
  if isinstance(err_msg, requests.exceptions.HTTPError):
421
- error_message = f"Upload of {filename} failed with a response code {err_msg.response.status_code} ({err_msg.response.reason}) (expected '{md5}', got '{real_md5}')"
438
+ error_message = f"Upload of {job.path} failed with a response code {err_msg.response.status_code} ({err_msg.response.reason}) (expected '{job.md5}', got '{real_md5}')"
422
439
  else:
423
440
  error_message = (
424
- f"Upload of {filename} failed. (expected '{md5}', got '{real_md5}') {str(err_msg)}"
441
+ f"Upload of {job.path} failed. (expected '{job.file_md5}', got '{real_md5}') {str(err_msg)} [{exception_file}-{exception_line_num}]"
425
442
  )
426
443
 
427
444
  logger.error(error_message)
@@ -430,9 +447,7 @@ class UploadWorker(worker.ThreadWorker):
430
447
  return worker.EMPTY_JOB
431
448
 
432
449
  @common.DecRetry(retry_exceptions=api_client.CONNECTION_EXCEPTIONS, tries=5)
433
- def do_singlepart_upload(
434
- self, upload_url, filename, file_size, md5, kms_key_name=None
435
- ):
450
+ def do_singlepart_upload(self, job):
436
451
  """
437
452
  Note that for GCS we don't rely on the make_request's own retry mechanism because we need to
438
453
  recreate the chunked_reader generator before retrying the request. Instead, we wrap this
@@ -442,19 +457,23 @@ class UploadWorker(worker.ThreadWorker):
442
457
  headers that S3 does not accept.
443
458
  """
444
459
 
445
- if ("amazonaws" in upload_url) or ("coreweave" in upload_url):
460
+ tq_job = thread_queue_job.MultiPartThreadQueueJob( md5=job.file_md5,
461
+ path=job.path,
462
+ total_parts=job.total_parts)
463
+
464
+ if job.is_vendor_aws() or job.is_vendor_cw():
446
465
  # must declare content-length ourselves due to zero byte bug in requests library.
447
466
  # api_client.make_prepared_request docstring.
448
467
  headers = {
449
468
  "Content-Type": "application/octet-stream",
450
- "Content-Length": str(file_size),
469
+ "Content-Length": str(job.file_size),
451
470
  }
452
471
 
453
- with open(filename, "rb") as fh:
472
+ with open(job.path, "rb") as fh:
454
473
  # TODO: support chunked
455
474
  response = self.api_client.make_prepared_request(
456
475
  verb="PUT",
457
- url=upload_url,
476
+ url=job.presigned_url,
458
477
  headers=headers,
459
478
  params=None,
460
479
  data=fh,
@@ -468,25 +487,26 @@ class UploadWorker(worker.ThreadWorker):
468
487
  response.close()
469
488
 
470
489
  # report upload progress
471
- self.metric_store.increment("bytes_uploaded", file_size, filename)
472
-
473
- return response
490
+ self.metric_store.increment("bytes_uploaded", job.file_size, job.path)
491
+
474
492
  else:
475
493
  headers = {"Content-MD5": md5, "Content-Type": "application/octet-stream"}
476
494
 
477
- if kms_key_name:
478
- headers["x-goog-encryption-kms-key-name"] = kms_key_name
495
+ if job.kms_key_name is not None:
496
+ headers["x-goog-encryption-kms-key-name"] = job.kms_key_name
479
497
 
480
- return self.api_client.make_request(
481
- conductor_url=upload_url,
498
+ self.api_client.make_request(
499
+ conductor_url=job.presigned_url,
482
500
  headers=headers,
483
- data=self.chunked_reader(filename),
501
+ data=self.chunked_reader(job.path),
484
502
  verb="PUT",
485
503
  tries=1,
486
504
  use_api_key=True,
487
505
  )
506
+
507
+ return tq_job
488
508
 
489
- def do_multipart_upload(self, upload, filename, md5):
509
+ def do_multipart_upload(self, job):
490
510
  """
491
511
  Files will be split into partSize returned by the FileAPI and hydrated once all parts are
492
512
  uploaded. On successful part upload, response headers will contain an ETag. This value must
@@ -494,42 +514,32 @@ class UploadWorker(worker.ThreadWorker):
494
514
  """
495
515
  uploads = []
496
516
  complete_payload = {
497
- "uploadID": upload["uploadID"],
498
- "hash": md5,
517
+ "uploadID": job.upload_id,
518
+ "hash": job.file_md5,
499
519
  "completedParts": [],
500
520
  "project": self.project,
501
521
  }
502
522
 
503
- # iterate over parts and upload
504
- for part in upload["parts"]:
505
- resp_headers = self._do_multipart_upload(
506
- upload_url=part["url"],
507
- filename=filename,
508
- part_number=part["partNumber"],
509
- part_size=upload["partSize"],
510
- )
523
+ tq_job = thread_queue_job.MultiPartThreadQueueJob(path=job.path,
524
+ md5=job.file_md5,
525
+ total_parts=job.total_parts,
526
+ part_index=job.part_index)
527
+ tq_job.upload_id = job.upload_id
528
+ tq_job.project = self.project
511
529
 
512
- if resp_headers:
513
- uploads.append(upload["uploadID"])
514
- completed_part = {
515
- "partNumber": part["partNumber"],
516
- "etag": resp_headers["ETag"].strip('"'),
517
- }
518
- complete_payload["completedParts"].append(completed_part)
519
530
 
520
- # Complete multipart upload in order to hydrate file for availability
521
- uri_path = "/api/v2/files/multipart/complete"
522
- headers = {"Content-Type": "application/json"}
523
- self.api_client.make_request(
524
- uri_path=uri_path,
525
- verb="POST",
526
- headers=headers,
527
- data=json.dumps(complete_payload),
528
- raise_on_error=True,
529
- use_api_key=True,
531
+ resp_headers = self._do_multipart_upload(
532
+ upload_url=job.presigned_url,
533
+ filename=job.path,
534
+ part_number=job.part_index,
535
+ part_size=job.part_size,
530
536
  )
531
537
 
532
- return uploads
538
+ if resp_headers:
539
+ tq_job.part = job.part_index
540
+ tq_job.etag = resp_headers["ETag"].strip('"')
541
+
542
+ return tq_job
533
543
 
534
544
  @common.DecRetry(retry_exceptions=api_client.CONNECTION_EXCEPTIONS, tries=5)
535
545
  def _do_multipart_upload(self, upload_url, filename, part_number, part_size):
@@ -564,6 +574,73 @@ class UploadWorker(worker.ThreadWorker):
564
574
 
565
575
  return response.headers
566
576
 
577
+
578
+ class MultiPartSiphonWorker(worker.ThreadWorker):
579
+ def __init__(self, *args, **kwargs):
580
+ super(MultiPartSiphonWorker, self).__init__(*args, **kwargs)
581
+
582
+ self.api_client = api_client.ApiClient()
583
+ self.multipart_siphon = {}
584
+
585
+ def do_work(self, job, thread_int):
586
+ """
587
+ Process files that have already been uploaded.
588
+
589
+ If it's a single-part file, add the job to the out queue, so that it can
590
+ be used to determine if the Upload entity is complete.
591
+
592
+ If it's a multi-part upload, collect all the parts together. Once all the
593
+ parts have been accumulated, mark it as complete and add the file to the
594
+ out queue.
595
+ """
596
+
597
+ if job:
598
+
599
+ if not job.is_multipart():
600
+ logger.debug("Job is not multipart (%s, %s)", job.total_parts, job.part_index)
601
+
602
+ else:
603
+
604
+ if job.md5 not in self.multipart_siphon:
605
+ self.multipart_siphon[job.md5] = []
606
+
607
+ # Add to the task count for this worker.
608
+ # -1 because a task has already been added for a single file
609
+ # but not all its parts.
610
+ old_task_count = self.task_count
611
+ self.task_count += job.total_parts - 1
612
+ logger.debug("Incrementing task count to %s from %s", self.task_count, old_task_count)
613
+
614
+ self.multipart_siphon[job.md5].append(job)
615
+
616
+ if len(self.multipart_siphon[job.md5]) == job.total_parts:
617
+
618
+ complete_payload = {
619
+ "uploadID": job.upload_id,
620
+ "hash": job.md5,
621
+ "completedParts": thread_queue_job.MultiPartThreadQueueJob.aggregate_parts(self.multipart_siphon[job.md5]),
622
+ "project": job.project,
623
+ }
624
+
625
+ # Complete multipart upload in order to hydrate file for availability
626
+ uri_path = "/api/v2/files/multipart/complete"
627
+ headers = {"Content-Type": "application/json"}
628
+ self.api_client.make_request(
629
+ uri_path=uri_path,
630
+ verb="POST",
631
+ headers=headers,
632
+ data=json.dumps(complete_payload),
633
+ raise_on_error=True,
634
+ use_api_key=True,
635
+ )
636
+
637
+ logger.debug("JSON payload: '%s'", json.dumps(complete_payload))
638
+
639
+ return job
640
+
641
+ # make sure we return None, so no message is automatically added to the out_queue
642
+ return None
643
+
567
644
  def is_complete(self):
568
645
  # Get the number of files already uploaded as they are not passed to the Upload
569
646
  # worker
@@ -581,11 +658,11 @@ class UploadWorker(worker.ThreadWorker):
581
658
  self.task_count,
582
659
  )
583
660
 
584
- return (queue_size + already_completed_uploads) >= self.task_count
661
+ return (queue_size) >= self.task_count
585
662
 
586
663
  else:
587
664
  logger.debug("Is complete?: files not initialized yet")
588
- return False
665
+ return False
589
666
 
590
667
 
591
668
  class Uploader(object):
@@ -639,6 +716,7 @@ class Uploader(object):
639
716
  ),
640
717
  (FileStatWorker, [], {"thread_count": 1}),
641
718
  (UploadWorker, [], {"thread_count": self.args["thread_count"]}),
719
+ (MultiPartSiphonWorker, [], {"thread_count": 1})
642
720
  ]
643
721
 
644
722
  manager = worker.JobManager(job_description)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ciocore
3
- Version: 9.1.0b1
3
+ Version: 9.1.0b2
4
4
  Summary: Core functionality for Conductor's client tools
5
5
  Home-page: https://github.com/ConductorTechnologies/ciocore
6
6
  Author: conductor
@@ -1,4 +1,4 @@
1
- ciocore/VERSION,sha256=TCUlj36m9-VV6atKK8EETx-2I8BQY_6CSKTVDGTHeqo,12
1
+ ciocore/VERSION,sha256=V-Kb1TSOiqU51Ac0b71EzlP2Ib7nl3rdmaEL7bhM4iY,12
2
2
  ciocore/__init__.py,sha256=aTP7LeeosQA8BZE67gDV4jgfTK5zxmwZRjiTRu_ZWj0,646
3
3
  ciocore/api_client.py,sha256=SBxEwAiwn2XtH7T_ipefUbWhczXjoNdNbQBur1RV-Bw,32810
4
4
  ciocore/cli.py,sha256=jZ1lOKQiUcrMhsVmD9SVmPMFwHtgDF4SaoAf2-PBS54,15449
@@ -95,7 +95,7 @@ ciocore/downloader/perpetual_downloader.py,sha256=cD7lnBH75-c-ZVVPHZc1vSnDhgJOnG
95
95
  ciocore/downloader/registry.py,sha256=_JIOuqpWkJkgJGN33nt-DCvqN9Gw3xeFhzPq4RUxIoE,2903
96
96
  ciocore/downloader/reporter.py,sha256=p1NK9k6iQ-jt7lRvZR0xFz0cGb2yo8tQcjlvYKR9SWM,4501
97
97
  ciocore/uploader/__init__.py,sha256=hxRFJf5Lo86rtRObFXSjjot8nybQd-SebSfYCbgZwow,24
98
- ciocore/uploader/_uploader.py,sha256=Xz1sos76FK-BKsgPuErlKQ8HsCAtBKp3pLbCtRVrr9E,38250
98
+ ciocore/uploader/_uploader.py,sha256=Kt4toITJHZDMjRLqRyw_lwe_HOoWz2AigMp2k5heHBI,42291
99
99
  ciocore/uploader/thread_queue_job.py,sha256=MzOcetttfWtDfwy-M0_ARwUf8_OjaGjyy-dA_WgNTPE,3416
100
100
  ciocore/uploader/upload_stats/__init__.py,sha256=Lg1y4zq1i0cwc6Hh2K1TAQDYymLff49W-uIo1xjcvdI,5309
101
101
  ciocore/uploader/upload_stats/stats_formats.py,sha256=giNirtObU66VALWghPFSRhg3q_vw5MvESsnXhb_I3y8,2402
@@ -122,8 +122,8 @@ tests/test_uploader.py,sha256=B1llTJt_fqR6e_V_Jxfw9z73QgkFlEPU87xLYGzt-TQ,2914
122
122
  tests/test_validator.py,sha256=2fY66ayNc08PGyj2vTI-V_1yeCWJDngkj2zkUM5TTCI,1526
123
123
  tests/mocks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
124
  tests/mocks/glob.py,sha256=J2MH7nqi6NJOHuGdVWxhfeBd700_Ckj6cLh_8jSNkfg,215
125
- ciocore-9.1.0b1.dist-info/METADATA,sha256=jd_61cbX9eiHy2SgX19hXCdf-gqvcvaOJP_gOwZVG-M,19085
126
- ciocore-9.1.0b1.dist-info/WHEEL,sha256=qUzzGenXXuJTzyjFah76kDVqDvnk-YDzY00svnrl84w,109
127
- ciocore-9.1.0b1.dist-info/entry_points.txt,sha256=cCqcALMYbC4d8545V9w0Zysfg9MVuKWhzDQ2er4UfGE,47
128
- ciocore-9.1.0b1.dist-info/top_level.txt,sha256=SvlM5JlqULzAz00JZWfiUhfjhqDzYzSWssA87zdJl0o,14
129
- ciocore-9.1.0b1.dist-info/RECORD,,
125
+ ciocore-9.1.0b2.dist-info/METADATA,sha256=g67y_5StIUtJ3HXVWUsECxrGN5X6LrT8NU96amgI_fg,19085
126
+ ciocore-9.1.0b2.dist-info/WHEEL,sha256=qUzzGenXXuJTzyjFah76kDVqDvnk-YDzY00svnrl84w,109
127
+ ciocore-9.1.0b2.dist-info/entry_points.txt,sha256=cCqcALMYbC4d8545V9w0Zysfg9MVuKWhzDQ2er4UfGE,47
128
+ ciocore-9.1.0b2.dist-info/top_level.txt,sha256=SvlM5JlqULzAz00JZWfiUhfjhqDzYzSWssA87zdJl0o,14
129
+ ciocore-9.1.0b2.dist-info/RECORD,,