pyPreservica 2.0.3__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyPreservica might be problematic. Click here for more details.

pyPreservica/uploadAPI.py CHANGED
@@ -13,7 +13,7 @@ import shutil
13
13
  import tempfile
14
14
  import uuid
15
15
  import xml
16
- from datetime import datetime
16
+ from datetime import datetime, timedelta, timezone
17
17
  from time import sleep
18
18
  from xml.dom import minidom
19
19
  from xml.etree import ElementTree
@@ -22,10 +22,12 @@ from xml.etree.ElementTree import Element, SubElement
22
22
  import boto3
23
23
  import s3transfer.tasks
24
24
  import s3transfer.upload
25
-
25
+ from botocore.session import get_session
26
26
  from boto3.s3.transfer import TransferConfig, S3Transfer
27
27
  from botocore.config import Config
28
- from botocore.exceptions import ClientError
28
+ from botocore.credentials import RefreshableCredentials
29
+ from botocore.exceptions import ClientError, NoCredentialsError, PartialCredentialsError
30
+ from dateutil.tz import tzlocal
29
31
  from s3transfer import S3UploadFailedError
30
32
  from tqdm import tqdm
31
33
 
@@ -36,7 +38,7 @@ logger = logging.getLogger(__name__)
36
38
 
37
39
  MB = 1024 * 1024
38
40
  GB = 1024 ** 3
39
- transfer_config = TransferConfig(multipart_threshold=int((1 * GB) / 16))
41
+ transfer_config = TransferConfig(multipart_threshold=int(32 * MB))
40
42
 
41
43
  CONTENT_FOLDER = "content"
42
44
  PRESERVATION_CONTENT_FOLDER = "p1"
@@ -57,8 +59,7 @@ def upload_file(self, filename, bucket, key, callback=None, extra_args=None):
57
59
  raise ValueError('Filename must be a string')
58
60
 
59
61
  subscribers = self._get_subscribers(callback)
60
- future = self._manager.upload(
61
- filename, bucket, key, extra_args, subscribers)
62
+ future = self._manager.upload(filename, bucket, key, extra_args, subscribers)
62
63
  try:
63
64
  return future.result()
64
65
  # If a client error was raised, add the backwards compatibility layer
@@ -66,9 +67,7 @@ def upload_file(self, filename, bucket, key, callback=None, extra_args=None):
66
67
  # ever thrown for upload_parts but now can be thrown for any related
67
68
  # client error.
68
69
  except ClientError as e:
69
- raise S3UploadFailedError(
70
- "Failed to upload %s to %s: %s" % (
71
- filename, '/'.join([bucket, key]), e))
70
+ raise S3UploadFailedError("Failed to upload %s to %s: %s" % (filename, '/'.join([bucket, key]), e))
72
71
 
73
72
 
74
73
  class PutObjectTask(s3transfer.tasks.Task):
@@ -82,13 +81,9 @@ class PutObjectTask(s3transfer.tasks.Task):
82
81
  class CompleteMultipartUploadTask(s3transfer.tasks.Task):
83
82
  # Copied from s3transfer/tasks.py, changed to return a result.
84
83
  def _main(self, client, bucket, key, upload_id, parts, extra_args):
85
- return client.complete_multipart_upload(
86
- Bucket=bucket,
87
- Key=key,
88
- UploadId=upload_id,
89
- MultipartUpload={"Parts": parts},
90
- **extra_args,
91
- )
84
+ return client.complete_multipart_upload(Bucket=bucket, Key=key, UploadId=upload_id,
85
+ MultipartUpload={"Parts": parts},
86
+ **extra_args, )
92
87
 
93
88
 
94
89
  s3transfer.upload.PutObjectTask = PutObjectTask
@@ -105,11 +100,11 @@ def prettify(elem):
105
100
 
106
101
  def __create_io__(xip=None, file_name=None, parent_folder=None, **kwargs):
107
102
  if xip is None:
108
- xip = Element('XIP')
103
+ xip = Element('xip:XIP')
104
+ xip.set('xmlns:xip', 'http://preservica.com/XIP/v6.0')
109
105
  assert xip is not None
110
- xip.set('xmlns', 'http://preservica.com/XIP/v6.0')
111
- io = SubElement(xip, 'InformationObject')
112
- ref = SubElement(io, 'Ref')
106
+ io = SubElement(xip, 'xip:InformationObject')
107
+ ref = SubElement(io, 'xip:Ref')
113
108
 
114
109
  if 'IO_Identifier_callback' in kwargs:
115
110
  ident_callback = kwargs.get('IO_Identifier_callback')
@@ -117,15 +112,15 @@ def __create_io__(xip=None, file_name=None, parent_folder=None, **kwargs):
117
112
  else:
118
113
  ref.text = str(uuid.uuid4())
119
114
 
120
- title = SubElement(io, 'Title')
115
+ title = SubElement(io, 'xip:Title')
121
116
  title.text = kwargs.get('Title', file_name)
122
- description = SubElement(io, 'Description')
117
+ description = SubElement(io, 'xip:Description')
123
118
  description.text = kwargs.get('Description', file_name)
124
- security = SubElement(io, 'SecurityTag')
119
+ security = SubElement(io, 'xip:SecurityTag')
125
120
  security.text = kwargs.get('SecurityTag', "open")
126
- custom_type = SubElement(io, 'CustomType')
121
+ custom_type = SubElement(io, 'xip:CustomType')
127
122
  custom_type.text = kwargs.get('CustomType', "")
128
- parent = SubElement(io, 'Parent')
123
+ parent = SubElement(io, 'xip:Parent')
129
124
 
130
125
  if hasattr(parent_folder, "reference"):
131
126
  parent.text = parent_folder.reference
@@ -136,76 +131,76 @@ def __create_io__(xip=None, file_name=None, parent_folder=None, **kwargs):
136
131
 
137
132
 
138
133
  def __make_representation__(xip, rep_name, rep_type, io_ref):
139
- representation = SubElement(xip, 'Representation')
140
- io_link = SubElement(representation, 'InformationObject')
134
+ representation = SubElement(xip, 'xip:Representation')
135
+ io_link = SubElement(representation, 'xip:InformationObject')
141
136
  io_link.text = io_ref
142
- access_name = SubElement(representation, 'Name')
137
+ access_name = SubElement(representation, 'xip:Name')
143
138
  access_name.text = rep_name
144
- access_type = SubElement(representation, 'Type')
139
+ access_type = SubElement(representation, 'xip:Type')
145
140
  access_type.text = rep_type
146
- content_objects = SubElement(representation, 'ContentObjects')
147
- content_object = SubElement(content_objects, 'ContentObject')
141
+ content_objects = SubElement(representation, 'xip:ContentObjects')
142
+ content_object = SubElement(content_objects, 'xip:ContentObject')
148
143
  content_object_ref = str(uuid.uuid4())
149
144
  content_object.text = content_object_ref
150
145
  return content_object_ref
151
146
 
152
147
 
153
148
  def __make_content_objects__(xip, content_title, co_ref, io_ref, tag, content_description, content_type):
154
- content_object = SubElement(xip, 'ContentObject')
155
- ref_element = SubElement(content_object, "Ref")
149
+ content_object = SubElement(xip, 'xip:ContentObject')
150
+ ref_element = SubElement(content_object, "xip:Ref")
156
151
  ref_element.text = co_ref
157
- title = SubElement(content_object, "Title")
152
+ title = SubElement(content_object, "xip:Title")
158
153
  title.text = content_title
159
- description = SubElement(content_object, "Description")
154
+ description = SubElement(content_object, "xip:Description")
160
155
  description.text = content_description
161
- security_tag = SubElement(content_object, "SecurityTag")
156
+ security_tag = SubElement(content_object, "xip:SecurityTag")
162
157
  security_tag.text = tag
163
- custom_type = SubElement(content_object, "CustomType")
158
+ custom_type = SubElement(content_object, "xip:CustomType")
164
159
  custom_type.text = content_type
165
- parent = SubElement(content_object, "Parent")
160
+ parent = SubElement(content_object, "xip:Parent")
166
161
  parent.text = io_ref
167
162
 
168
163
 
169
164
  def __make_generation__(xip, filename, co_ref, generation_label, location=None):
170
- generation = SubElement(xip, 'Generation', {"original": "true", "active": "true"})
171
- content_object = SubElement(generation, "ContentObject")
165
+ generation = SubElement(xip, 'xip:Generation', {"original": "true", "active": "true"})
166
+ content_object = SubElement(generation, "xip:ContentObject")
172
167
  content_object.text = co_ref
173
- label = SubElement(generation, "Label")
168
+ label = SubElement(generation, "xip:Label")
174
169
  if generation_label:
175
170
  label.text = generation_label
176
171
  else:
177
172
  label.text = os.path.splitext(filename)[0]
178
- effective_date = SubElement(generation, "EffectiveDate")
173
+ effective_date = SubElement(generation, "xip:EffectiveDate")
179
174
  effective_date.text = datetime.now().isoformat()
180
- bitstreams = SubElement(generation, "Bitstreams")
181
- bitstream = SubElement(bitstreams, "Bitstream")
175
+ bitstreams = SubElement(generation, "xip:Bitstreams")
176
+ bitstream = SubElement(bitstreams, "xip:Bitstream")
182
177
  bitstream.text = f"{location}/{filename}"
183
- SubElement(generation, "Formats")
184
- SubElement(generation, "Properties")
178
+ SubElement(generation, "xip:Formats")
179
+ SubElement(generation, "xip:Properties")
185
180
 
186
181
 
187
182
  def __make_bitstream__(xip, file_name, full_path, callback, location=None):
188
- bitstream = SubElement(xip, 'Bitstream')
189
- filename_element = SubElement(bitstream, "Filename")
183
+ bitstream = SubElement(xip, 'xip:Bitstream')
184
+ filename_element = SubElement(bitstream, "xip:Filename")
190
185
  filename_element.text = file_name
191
- filesize = SubElement(bitstream, "FileSize")
186
+ filesize = SubElement(bitstream, "xip:FileSize")
192
187
  file_stats = os.stat(full_path)
193
188
  filesize.text = str(file_stats.st_size)
194
- physical_location = SubElement(bitstream, "PhysicalLocation")
189
+ physical_location = SubElement(bitstream, "xip:PhysicalLocation")
195
190
  physical_location.text = location
196
- fixities = SubElement(bitstream, "Fixities")
191
+ fixities = SubElement(bitstream, "xip:Fixities")
197
192
  fixity_result = callback(file_name, full_path)
198
193
  if type(fixity_result) == tuple:
199
- fixity = SubElement(fixities, "Fixity")
200
- fixity_algorithm_ref = SubElement(fixity, "FixityAlgorithmRef")
201
- fixity_value = SubElement(fixity, "FixityValue")
194
+ fixity = SubElement(fixities, "xip:Fixity")
195
+ fixity_algorithm_ref = SubElement(fixity, "xip:FixityAlgorithmRef")
196
+ fixity_value = SubElement(fixity, "xip:FixityValue")
202
197
  fixity_algorithm_ref.text = fixity_result[0]
203
198
  fixity_value.text = fixity_result[1]
204
199
  elif type(fixity_result) == dict:
205
200
  for key, val in fixity_result.items():
206
- fixity = SubElement(fixities, "Fixity")
207
- fixity_algorithm_ref = SubElement(fixity, "FixityAlgorithmRef")
208
- fixity_value = SubElement(fixity, "FixityValue")
201
+ fixity = SubElement(fixities, "xip:Fixity")
202
+ fixity_algorithm_ref = SubElement(fixity, "xip:FixityAlgorithmRef")
203
+ fixity_value = SubElement(fixity, "xip:FixityValue")
209
204
  fixity_algorithm_ref.text = key
210
205
  fixity_value.text = val
211
206
  else:
@@ -214,17 +209,17 @@ def __make_bitstream__(xip, file_name, full_path, callback, location=None):
214
209
 
215
210
 
216
211
  def __make_representation_multiple_co__(xip, rep_name, rep_type, rep_files, io_ref):
217
- representation = SubElement(xip, 'Representation')
218
- io_link = SubElement(representation, 'InformationObject')
212
+ representation = SubElement(xip, 'xip:Representation')
213
+ io_link = SubElement(representation, 'xip:InformationObject')
219
214
  io_link.text = io_ref
220
- access_name = SubElement(representation, 'Name')
215
+ access_name = SubElement(representation, 'xip:Name')
221
216
  access_name.text = rep_name
222
- access_type = SubElement(representation, 'Type')
217
+ access_type = SubElement(representation, 'xip:Type')
223
218
  access_type.text = rep_type
224
- content_objects = SubElement(representation, 'ContentObjects')
219
+ content_objects = SubElement(representation, 'xip:ContentObjects')
225
220
  refs_dict = {}
226
221
  for f in rep_files:
227
- content_object = SubElement(content_objects, 'ContentObject')
222
+ content_object = SubElement(content_objects, 'xip:ContentObject')
228
223
  content_object_ref = str(uuid.uuid4())
229
224
  content_object.text = content_object_ref
230
225
  refs_dict[content_object_ref] = f
@@ -248,12 +243,9 @@ def cvs_to_cmis_xslt(csv_file, xml_namespace, root_element, title="Metadata Titl
248
243
  headers.add(xml_tag)
249
244
  break
250
245
 
251
- namespaces = {"version": "2.0",
252
- "xmlns:xsl": "http://www.w3.org/1999/XSL/Transform",
253
- "xmlns:fn": "http://www.w3.org/2005/xpath-functions",
254
- "xmlns:xs": "http://www.w3.org/2001/XMLSchema",
255
- "xmlns:csv": xml_namespace,
256
- "xmlns": "http://www.tessella.com/sdb/cmis/metadata",
246
+ namespaces = {"version": "2.0", "xmlns:xsl": "http://www.w3.org/1999/XSL/Transform",
247
+ "xmlns:fn": "http://www.w3.org/2005/xpath-functions", "xmlns:xs": "http://www.w3.org/2001/XMLSchema",
248
+ "xmlns:csv": xml_namespace, "xmlns": "http://www.tessella.com/sdb/cmis/metadata",
257
249
  "exclude-result-prefixes": "csv"}
258
250
 
259
251
  if additional_namespaces is not None:
@@ -323,8 +315,7 @@ def cvs_to_xsd(csv_file, xml_namespace, root_element, export_folder=None, additi
323
315
  headers.add(xml_tag)
324
316
  break
325
317
 
326
- namespaces = {"xmlns:xs": "http://www.w3.org/2001/XMLSchema",
327
- "attributeFormDefault": "unqualified",
318
+ namespaces = {"xmlns:xs": "http://www.w3.org/2001/XMLSchema", "attributeFormDefault": "unqualified",
328
319
  "elementFormDefault": "qualified",
329
320
  "targetNamespace": xml_namespace}
330
321
 
@@ -399,9 +390,7 @@ def csv_to_search_xml(csv_file, xml_namespace, root_element, title="Metadata Tit
399
390
  else:
400
391
  xpath_expression = f"//{short_name}:{root_element}/{short_name}:{header}"
401
392
 
402
- attr = {"indexName": header, "displayName": header,
403
- "xpath": xpath_expression,
404
- "indexType": "STRING_DEFAULT"}
393
+ attr = {"indexName": header, "displayName": header, "xpath": xpath_expression, "indexType": "STRING_DEFAULT"}
405
394
  xml_term = xml.etree.ElementTree.SubElement(xml_index, "term", attr)
406
395
 
407
396
  if additional_namespaces is not None:
@@ -470,8 +459,9 @@ def cvs_to_xml(csv_file, xml_namespace, root_element, file_name_column="filename
470
459
  yield name
471
460
 
472
461
 
473
- def generic_asset_package(preservation_files_dict=None, access_files_dict=None, export_folder=None,
474
- parent_folder=None, compress=True, **kwargs):
462
+ def generic_asset_package(preservation_files_dict=None, access_files_dict=None, export_folder=None, parent_folder=None,
463
+ compress=True,
464
+ **kwargs):
475
465
  # some basic validation
476
466
  if export_folder is None:
477
467
  export_folder = tempfile.gettempdir()
@@ -492,7 +482,7 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
492
482
  content_type = kwargs.get('CustomType', "")
493
483
 
494
484
  if not compress:
495
- shutil.register_archive_format("szip", _make_stored_zipfile, None, "UnCompressed ZIP file")
485
+ shutil.register_archive_format(name="szip", function=_make_stored_zipfile, extra_args=None, description="UnCompressed ZIP file")
496
486
 
497
487
  has_preservation_files = bool((preservation_files_dict is not None) and (len(preservation_files_dict) > 0))
498
488
  has_access_files = bool((access_files_dict is not None) and (len(access_files_dict) > 0))
@@ -570,7 +560,8 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
570
560
  access_content_description = access_content_title.get("filename", default_content_objects_title)
571
561
 
572
562
  __make_content_objects__(xip, access_content_title, content_ref, io_ref, security_tag,
573
- access_content_description, content_type)
563
+ access_content_description,
564
+ content_type)
574
565
 
575
566
  if has_preservation_files:
576
567
  for representation_name in preservation_representation_refs_dict.keys():
@@ -622,12 +613,12 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
622
613
  for identifier_key, identifier_value in identifier_map.items():
623
614
  if identifier_key:
624
615
  if identifier_value:
625
- identifier = SubElement(xip, 'Identifier')
626
- id_type = SubElement(identifier, "Type")
616
+ identifier = SubElement(xip, 'xip:Identifier')
617
+ id_type = SubElement(identifier, "xip:Type")
627
618
  id_type.text = identifier_key
628
- id_value = SubElement(identifier, "Value")
619
+ id_value = SubElement(identifier, "xip:Value")
629
620
  id_value.text = identifier_value
630
- id_io = SubElement(identifier, "Entity")
621
+ id_io = SubElement(identifier, "xip:Entity")
631
622
  id_io.text = io_ref
632
623
 
633
624
  if 'Asset_Metadata' in kwargs:
@@ -637,22 +628,22 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
637
628
  if metadata_path:
638
629
  if os.path.exists(metadata_path) and os.path.isfile(metadata_path):
639
630
  descriptive_metadata = xml.etree.ElementTree.parse(source=metadata_path)
640
- metadata = SubElement(xip, 'Metadata', {'schemaUri': metadata_ns})
641
- metadata_ref = SubElement(metadata, 'Ref')
631
+ metadata = SubElement(xip, 'xip:Metadata', {'schemaUri': metadata_ns})
632
+ metadata_ref = SubElement(metadata, 'xip:Ref')
642
633
  metadata_ref.text = str(uuid.uuid4())
643
- entity = SubElement(metadata, 'Entity')
634
+ entity = SubElement(metadata, 'xip:Entity')
644
635
  entity.text = io_ref
645
- content = SubElement(metadata, 'Content')
636
+ content = SubElement(metadata, 'xip:Content')
646
637
  content.append(descriptive_metadata.getroot())
647
638
  elif isinstance(metadata_path, str):
648
639
  try:
649
640
  descriptive_metadata = xml.etree.ElementTree.fromstring(metadata_path)
650
- metadata = SubElement(xip, 'Metadata', {'schemaUri': metadata_ns})
651
- metadata_ref = SubElement(metadata, 'Ref')
641
+ metadata = SubElement(xip, 'xip:Metadata', {'schemaUri': metadata_ns})
642
+ metadata_ref = SubElement(metadata, 'xip:Ref')
652
643
  metadata_ref.text = str(uuid.uuid4())
653
- entity = SubElement(metadata, 'Entity')
644
+ entity = SubElement(metadata, 'xip:Entity')
654
645
  entity.text = io_ref
655
- content = SubElement(metadata, 'Content')
646
+ content = SubElement(metadata, 'xip:Content')
656
647
  content.append(descriptive_metadata)
657
648
  except RuntimeError:
658
649
  logging.info(f"Could not parse asset metadata in namespace {metadata_ns}")
@@ -736,71 +727,72 @@ def multi_asset_package(asset_file_list=None, export_folder=None, parent_folder=
736
727
  os.mkdir(os.path.join(inner_folder, CONTENT_FOLDER))
737
728
 
738
729
  asset_map = dict()
739
- xip = Element('XIP')
730
+ xip = Element('xip:XIP')
731
+ xip.set('xmlns:xip', 'http://preservica.com/XIP/v6.0')
740
732
  for file in asset_file_list:
741
733
  default_asset_title = os.path.splitext(os.path.basename(file))[0]
742
734
  xip, io_ref = __create_io__(xip, file_name=default_asset_title, parent_folder=parent_folder, **kwargs)
743
735
  asset_map[file] = io_ref
744
- representation = SubElement(xip, 'Representation')
745
- io_link = SubElement(representation, 'InformationObject')
736
+ representation = SubElement(xip, 'xip:Representation')
737
+ io_link = SubElement(representation, 'xip:InformationObject')
746
738
  io_link.text = io_ref
747
- access_name = SubElement(representation, 'Name')
739
+ access_name = SubElement(representation, 'xip:Name')
748
740
  access_name.text = "Preservation"
749
- access_type = SubElement(representation, 'Type')
741
+ access_type = SubElement(representation, 'xip:Type')
750
742
  access_type.text = "Preservation"
751
- content_objects = SubElement(representation, 'ContentObjects')
752
- content_object = SubElement(content_objects, 'ContentObject')
743
+ content_objects = SubElement(representation, 'xip:ContentObjects')
744
+ content_object = SubElement(content_objects, 'xip:ContentObject')
753
745
  content_object_ref = str(uuid.uuid4())
754
746
  content_object.text = content_object_ref
755
747
 
756
748
  default_content_objects_title = os.path.splitext(os.path.basename(file))[0]
757
- content_object = SubElement(xip, 'ContentObject')
758
- ref_element = SubElement(content_object, "Ref")
749
+ content_object = SubElement(xip, 'xip:ContentObject')
750
+ ref_element = SubElement(content_object, "xip:Ref")
759
751
  ref_element.text = content_object_ref
760
- title = SubElement(content_object, "Title")
752
+ title = SubElement(content_object, "xip:Title")
761
753
  title.text = default_content_objects_title
762
- description = SubElement(content_object, "Description")
754
+ description = SubElement(content_object, "xip:Description")
763
755
  description.text = default_content_objects_title
764
- security_tag_element = SubElement(content_object, "SecurityTag")
756
+ security_tag_element = SubElement(content_object, "xip:SecurityTag")
765
757
  security_tag_element.text = security_tag
766
- custom_type = SubElement(content_object, "CustomType")
758
+ custom_type = SubElement(content_object, "xip:CustomType")
767
759
  custom_type.text = content_type
768
- parent = SubElement(content_object, "Parent")
760
+ parent = SubElement(content_object, "xip:Parent")
769
761
  parent.text = io_ref
770
762
 
771
- generation = SubElement(xip, 'Generation', {"original": "true", "active": "true"})
772
- content_object = SubElement(generation, "ContentObject")
763
+ generation = SubElement(xip, 'xip:Generation', {"original": "true", "active": "true"})
764
+ content_object = SubElement(generation, "xip:ContentObject")
773
765
  content_object.text = content_object_ref
774
- label = SubElement(generation, "Label")
766
+ label = SubElement(generation, "xip:Label")
775
767
  label.text = os.path.splitext(os.path.basename(file))[0]
776
- effective_date = SubElement(generation, "EffectiveDate")
768
+ effective_date = SubElement(generation, "xip:EffectiveDate")
777
769
  effective_date.text = datetime.now().isoformat()
778
- bitstreams = SubElement(generation, "Bitstreams")
779
- bitstream = SubElement(bitstreams, "Bitstream")
770
+ bitstreams = SubElement(generation, "xip:Bitstreams")
771
+ bitstream = SubElement(bitstreams, "xip:Bitstream")
780
772
  bitstream.text = os.path.basename(file)
781
- SubElement(generation, "Formats")
782
- SubElement(generation, "Properties")
773
+ SubElement(generation, "xip:Formats")
774
+ SubElement(generation, "xip:Properties")
783
775
 
784
- bitstream = SubElement(xip, 'Bitstream')
785
- filename_element = SubElement(bitstream, "Filename")
776
+ bitstream = SubElement(xip, 'xip:Bitstream')
777
+ filename_element = SubElement(bitstream, "xip:Filename")
786
778
  filename_element.text = os.path.basename(file)
787
- filesize = SubElement(bitstream, "FileSize")
779
+ filesize = SubElement(bitstream, "xip:FileSize")
788
780
  file_stats = os.stat(file)
789
781
  filesize.text = str(file_stats.st_size)
790
- physical_location = SubElement(bitstream, "PhysicalLocation")
791
- fixities = SubElement(bitstream, "Fixities")
782
+ physical_location = SubElement(bitstream, "xip:PhysicalLocation")
783
+ fixities = SubElement(bitstream, "xip:Fixities")
792
784
  fixity_result = fixity_callback(filename_element.text, file)
793
785
  if type(fixity_result) == tuple:
794
- fixity = SubElement(fixities, "Fixity")
795
- fixity_algorithm_ref = SubElement(fixity, "FixityAlgorithmRef")
796
- fixity_value = SubElement(fixity, "FixityValue")
786
+ fixity = SubElement(fixities, "xip:Fixity")
787
+ fixity_algorithm_ref = SubElement(fixity, "xip:FixityAlgorithmRef")
788
+ fixity_value = SubElement(fixity, "xip:FixityValue")
797
789
  fixity_algorithm_ref.text = fixity_result[0]
798
790
  fixity_value.text = fixity_result[1]
799
791
  elif type(fixity_result) == dict:
800
792
  for key, val in fixity_result.items():
801
- fixity = SubElement(fixities, "Fixity")
802
- fixity_algorithm_ref = SubElement(fixity, "FixityAlgorithmRef")
803
- fixity_value = SubElement(fixity, "FixityValue")
793
+ fixity = SubElement(fixities, "xip:Fixity")
794
+ fixity_algorithm_ref = SubElement(fixity, "xip:FixityAlgorithmRef")
795
+ fixity_value = SubElement(fixity, "xip:FixityValue")
804
796
  fixity_algorithm_ref.text = key
805
797
  fixity_value.text = val
806
798
  else:
@@ -814,12 +806,12 @@ def multi_asset_package(asset_file_list=None, export_folder=None, parent_folder=
814
806
  for identifier_key, identifier_value in identifier_map_values.items():
815
807
  if identifier_key:
816
808
  if identifier_value:
817
- identifier = SubElement(xip, 'Identifier')
818
- id_type = SubElement(identifier, "Type")
809
+ identifier = SubElement(xip, 'xip:Identifier')
810
+ id_type = SubElement(identifier, "xip:Type")
819
811
  id_type.text = identifier_key
820
- id_value = SubElement(identifier, "Value")
812
+ id_value = SubElement(identifier, "xip:Value")
821
813
  id_value.text = identifier_value
822
- id_io = SubElement(identifier, "Entity")
814
+ id_io = SubElement(identifier, "xip:Entity")
823
815
  id_io.text = io_ref
824
816
 
825
817
  src_file = file
@@ -839,8 +831,9 @@ def multi_asset_package(asset_file_list=None, export_folder=None, parent_folder=
839
831
  return top_level_folder + ".zip"
840
832
 
841
833
 
842
- def complex_asset_package(preservation_files_list=None, access_files_list=None, export_folder=None,
843
- parent_folder=None, compress=True, **kwargs):
834
+ def complex_asset_package(preservation_files_list=None, access_files_list=None, export_folder=None, parent_folder=None,
835
+ compress=True,
836
+ **kwargs):
844
837
  """
845
838
 
846
839
  Create a Preservica package containing a single Asset from a multiple preservation files
@@ -888,6 +881,8 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
888
881
  'Preservation_Representation_Name' Name of the Preservation Representation
889
882
  'Access_Representation_Name' Name of the Access Representation
890
883
  """
884
+ xml.etree.ElementTree.register_namespace("xip", "http://preservica.com/XIP/v6.0")
885
+
891
886
  # some basic validation
892
887
  if export_folder is None:
893
888
  export_folder = tempfile.gettempdir()
@@ -916,17 +911,22 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
916
911
  if has_preservation_files:
917
912
  if default_asset_title is None:
918
913
  default_asset_title = os.path.splitext(os.path.basename(preservation_files_list[0]))[0]
919
-
920
914
  # create the asset
921
- xip, io_ref = __create_io__(file_name=default_asset_title, parent_folder=parent_folder, **kwargs)
915
+ if io_ref is None:
916
+ xip, io_ref = __create_io__(file_name=default_asset_title, parent_folder=parent_folder, **kwargs)
922
917
 
923
918
  if has_access_files:
924
919
  if default_asset_title is None:
925
920
  default_asset_title = os.path.splitext(os.path.basename(access_files_list[0]))[0]
926
-
927
921
  if io_ref is None:
928
922
  xip, io_ref = __create_io__(file_name=default_asset_title, parent_folder=parent_folder, **kwargs)
929
923
 
924
+ if io_ref is None:
925
+ default_asset_title = kwargs.get('Title', None)
926
+ if default_asset_title is None:
927
+ default_asset_title = "New Asset"
928
+ xip, io_ref = __create_io__(file_name=default_asset_title, parent_folder=parent_folder, **kwargs)
929
+
930
930
  if has_preservation_files:
931
931
  # add the content objects
932
932
  representation_name = kwargs.get('Preservation_Representation_Name', "Preservation")
@@ -938,7 +938,8 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
938
938
  # add the content objects
939
939
  access_name = kwargs.get('Access_Representation_Name', "Access")
940
940
  access_refs_dict = __make_representation_multiple_co__(xip, rep_name=access_name, rep_type="Access",
941
- rep_files=access_files_list, io_ref=io_ref)
941
+ rep_files=access_files_list,
942
+ io_ref=io_ref)
942
943
 
943
944
  if has_preservation_files:
944
945
 
@@ -955,7 +956,8 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
955
956
  preservation_content_description = preservation_content_description[filename]
956
957
 
957
958
  __make_content_objects__(xip, preservation_content_title, content_ref, io_ref, security_tag,
958
- preservation_content_description, content_type)
959
+ preservation_content_description,
960
+ content_type)
959
961
 
960
962
  if has_access_files:
961
963
 
@@ -1018,40 +1020,51 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
1018
1020
  for identifier_key, identifier_value in identifier_map.items():
1019
1021
  if identifier_key:
1020
1022
  if identifier_value:
1021
- identifier = SubElement(xip, 'Identifier')
1022
- id_type = SubElement(identifier, "Type")
1023
+ identifier = SubElement(xip, 'xip:Identifier')
1024
+ id_type = SubElement(identifier, "xip:Type")
1023
1025
  id_type.text = identifier_key
1024
- id_value = SubElement(identifier, "Value")
1026
+ id_value = SubElement(identifier, "xip:Value")
1025
1027
  id_value.text = identifier_value
1026
- id_io = SubElement(identifier, "Entity")
1028
+ id_io = SubElement(identifier, "xip:Entity")
1027
1029
  id_io.text = io_ref
1028
1030
 
1029
1031
  if 'Asset_Metadata' in kwargs:
1030
1032
  metadata_map = kwargs.get('Asset_Metadata')
1031
1033
  for metadata_ns, metadata_path in metadata_map.items():
1032
1034
  if metadata_ns:
1033
- if metadata_path:
1035
+ if metadata_path and isinstance(metadata_path, str):
1034
1036
  if os.path.exists(metadata_path) and os.path.isfile(metadata_path):
1035
1037
  descriptive_metadata = xml.etree.ElementTree.parse(source=metadata_path)
1036
- metadata = SubElement(xip, 'Metadata', {'schemaUri': metadata_ns})
1037
- metadata_ref = SubElement(metadata, 'Ref')
1038
+ metadata = SubElement(xip, 'xip:Metadata', {'schemaUri': metadata_ns})
1039
+ metadata_ref = SubElement(metadata, 'xip:Ref')
1038
1040
  metadata_ref.text = str(uuid.uuid4())
1039
- entity = SubElement(metadata, 'Entity')
1041
+ entity = SubElement(metadata, 'xip:Entity')
1040
1042
  entity.text = io_ref
1041
- content = SubElement(metadata, 'Content')
1043
+ content = SubElement(metadata, 'xip:Content')
1042
1044
  content.append(descriptive_metadata.getroot())
1043
1045
  elif isinstance(metadata_path, str):
1044
1046
  try:
1045
1047
  descriptive_metadata = xml.etree.ElementTree.fromstring(metadata_path)
1046
- metadata = SubElement(xip, 'Metadata', {'schemaUri': metadata_ns})
1047
- metadata_ref = SubElement(metadata, 'Ref')
1048
+ metadata = SubElement(xip, 'xip:Metadata', {'schemaUri': metadata_ns})
1049
+ metadata_ref = SubElement(metadata, 'xip:Ref')
1048
1050
  metadata_ref.text = str(uuid.uuid4())
1049
- entity = SubElement(metadata, 'Entity')
1051
+ entity = SubElement(metadata, 'xip:Entity')
1050
1052
  entity.text = io_ref
1051
- content = SubElement(metadata, 'Content')
1053
+ content = SubElement(metadata, 'xip:Content')
1052
1054
  content.append(descriptive_metadata)
1053
1055
  except RuntimeError:
1054
1056
  logging.info(f"Could not parse asset metadata in namespace {metadata_ns}")
1057
+ if metadata_path and isinstance(metadata_path, list):
1058
+ for path in metadata_path:
1059
+ if os.path.exists(path) and os.path.isfile(path):
1060
+ descriptive_metadata = xml.etree.ElementTree.parse(source=path)
1061
+ metadata = SubElement(xip, 'xip:Metadata', {'schemaUri': metadata_ns})
1062
+ metadata_ref = SubElement(metadata, 'xip:Ref')
1063
+ metadata_ref.text = str(uuid.uuid4())
1064
+ entity = SubElement(metadata, 'xip:Entity')
1065
+ entity.text = io_ref
1066
+ content = SubElement(metadata, 'xip:Content')
1067
+ content.append(descriptive_metadata.getroot())
1055
1068
 
1056
1069
  if xip is not None:
1057
1070
  export_folder = export_folder
@@ -1146,382 +1159,14 @@ def _unpad(s):
1146
1159
 
1147
1160
  class UploadAPI(AuthenticatedAPI):
1148
1161
 
1149
- def ingest_tweet(self, twitter_user=None, tweet_id: int = 0, twitter_consumer_key=None,
1150
- twitter_secret_key=None, folder=None, callback=None, **kwargs):
1151
-
1152
- """
1153
- Ingest tweets from a twitter stream by twitter username
1154
-
1155
- :param tweet_id:
1156
- :param str twitter_user: Twitter Username
1157
- :param str twitter_consumer_key: Optional asset title
1158
- :param str twitter_secret_key: Optional asset description
1159
- :param str folder: Folder to ingest into
1160
- :param callback callback: Optional upload progress callback
1161
- :raises RuntimeError:
1162
-
1163
-
1164
- """
1165
-
1166
- def get_image(m, has_video_element):
1167
- media_url_https_ = m["media_url_https"]
1168
- if media_url_https_:
1169
- req = requests.get(media_url_https_)
1170
- if req.status_code == requests.codes.ok:
1171
- if has_video_element:
1172
- image_name_ = f"{{{media_id_str}}}_[{twitter_user}]_thumb.jpg"
1173
- else:
1174
- image_name_ = f"{{{media_id_str}}}_[{twitter_user}].jpg"
1175
- image_name_document_ = open(image_name_, "wb")
1176
- image_name_document_.write(req.content)
1177
- image_name_document_.close()
1178
- return image_name_
1179
-
1180
- def get_video(m):
1181
- video_info_ = m["video_info"]
1182
- variants_ = video_info_["variants"]
1183
- for v_ in variants_:
1184
- video_url_ = v_["url"]
1185
- req = requests.get(video_url_)
1186
- if req.status_code == requests.codes.ok:
1187
- video_name_ = f"{{{media_id_str}}}_[{twitter_user}].mp4"
1188
- video_name_document_ = open(video_name_, "wb")
1189
- video_name_document_.write(req.content)
1190
- video_name_document_.close()
1191
- return video_name_, True
1192
-
1193
- entity_client = pyPreservica.EntityAPI(username=self.username, password=self.password, server=self.server,
1194
- tenant=self.tenant)
1195
- if hasattr(folder, "reference"):
1196
- folder = entity_client.folder(folder.reference)
1197
- else:
1198
- folder = entity_client.folder(folder)
1199
- try:
1200
- import tweepy
1201
- except ImportError:
1202
- logger.error("Package tweepy is required for twitter harvesting. pip install --upgrade tweepy")
1203
- raise RuntimeError("Package tweepy is required for twitter harvesting. pip install --upgrade tweepy")
1204
- config = configparser.ConfigParser()
1205
- config.read('credentials.properties')
1206
- if twitter_consumer_key is None:
1207
- twitter_consumer_key = os.environ.get('TWITTER_CONSUMER_KEY')
1208
- if twitter_consumer_key is None:
1209
- try:
1210
- twitter_consumer_key = config['credentials']['TWITTER_CONSUMER_KEY']
1211
- except KeyError:
1212
- logger.error("No valid TWITTER_CONSUMER_KEY found in method arguments, "
1213
- "environment variables or credentials.properties file")
1214
- raise RuntimeError("No valid TWITTER_CONSUMER_KEY found in method arguments, "
1215
- "environment variables or credentials.properties file")
1216
- if twitter_secret_key is None:
1217
- twitter_secret_key = os.environ.get('TWITTER_SECRET_KEY')
1218
- if twitter_secret_key is None:
1219
- try:
1220
- twitter_secret_key = config['credentials']['TWITTER_SECRET_KEY']
1221
- except KeyError:
1222
- logger.error("No valid TWITTER_SECRET_KEY found in method arguments, "
1223
- "environment variables or credentials.properties file")
1224
- raise RuntimeError("No valid TWITTER_SECRET_KEY found in method arguments, "
1225
- "environment variables or credentials.properties file")
1226
-
1227
- api = None
1228
- try:
1229
- auth = tweepy.AppAuthHandler(twitter_consumer_key, twitter_secret_key)
1230
- api = tweepy.API(auth, wait_on_rate_limit=True)
1231
- except TweepError:
1232
- logger.error("No valid Twitter API keys. Could not authenticate")
1233
- raise RuntimeError("No valid Twitter API keys. Could not authenticate")
1234
- if api is not None:
1235
- logger.debug(api)
1236
- tweet = api.get_status(tweet_id, tweet_mode="extended", include_entities=True)
1237
- created_at = tweet.created_at
1238
- id_str = tweet.id_str
1239
- author = tweet.author.name
1240
- tweet_entities = tweet.entities
1241
- hashtags = dict()
1242
- if 'hashtags' in tweet_entities:
1243
- hashtags = tweet.entities['hashtags']
1244
- entities = entity_client.identifier("tweet_id", id_str.strip())
1245
- if len(entities) > 0:
1246
- logger.warning("Tweet already exists, skipping....")
1247
- return
1248
- logger.info(f"Processing tweet {id_str} ...")
1249
- tid = tweet.id
1250
- content_objects = list()
1251
- full_tweet = api.get_status(tid, tweet_mode="extended", include_entities=True)
1252
- text = tweet.full_text
1253
- full_text = full_tweet.full_text
1254
- file_name = f"{{{id_str}}}_[{twitter_user}].json"
1255
- json_doc = json.dumps(full_tweet._json)
1256
- json_file = open(file_name, "wt", encoding="utf-8")
1257
- json_file.write(json_doc)
1258
- json_file.close()
1259
- content_objects.append(file_name)
1260
- if hasattr(full_tweet, "extended_entities"):
1261
- extended_entities = full_tweet.extended_entities
1262
- if "media" in extended_entities:
1263
- media = extended_entities["media"]
1264
- for med in media:
1265
- media_id_str = med["id_str"]
1266
- has_video = False
1267
- if "video_info" in med:
1268
- co, has_video = get_video(med)
1269
- content_objects.append(co)
1270
- if has_video:
1271
- co = get_image(med, has_video)
1272
- content_objects.append(co)
1273
- continue
1274
- if "media_url_https" in med:
1275
- co = get_image(med, has_video)
1276
- content_objects.append(co)
1277
- identifiers = dict()
1278
- asset_metadata = dict()
1279
- identifiers["tweet_id"] = id_str
1280
-
1281
- user = full_tweet._json['user']
1282
-
1283
- if full_tweet._json.get('retweeted_status'):
1284
- retweeted_status = full_tweet._json['retweeted_status']
1285
- if retweeted_status.get("extended_entities"):
1286
- extended_entities = retweeted_status["extended_entities"]
1287
- if "media" in extended_entities:
1288
- media = extended_entities["media"]
1289
- for med in media:
1290
- media_id_str = med["id_str"]
1291
- has_video = False
1292
- if "video_info" in med:
1293
- co, has_video = get_video(med)
1294
- content_objects.append(co)
1295
- continue
1296
- if "media_url_https" in med:
1297
- co = get_image(med, has_video)
1298
- content_objects.append(co)
1299
-
1300
- xml_object = xml.etree.ElementTree.Element('tweet', {"xmlns": "http://www.preservica.com/tweets/v1"})
1301
- xml.etree.ElementTree.SubElement(xml_object, "id").text = id_str
1302
- xml.etree.ElementTree.SubElement(xml_object, "full_text").text = full_text
1303
- xml.etree.ElementTree.SubElement(xml_object, "created_at").text = str(created_at)
1304
- xml.etree.ElementTree.SubElement(xml_object, "screen_name_sender").text = user.get('screen_name')
1305
- for h in hashtags:
1306
- xml.etree.ElementTree.SubElement(xml_object, "hashtag").text = str(h['text'])
1307
-
1308
- xml.etree.ElementTree.SubElement(xml_object, "name").text = author
1309
- xml.etree.ElementTree.SubElement(xml_object, "retweet").text = str(full_tweet._json['retweet_count'])
1310
- xml.etree.ElementTree.SubElement(xml_object, "likes").text = str(full_tweet._json['favorite_count'])
1311
-
1312
- xml_request = xml.etree.ElementTree.tostring(xml_object, encoding='utf-8')
1313
-
1314
- metadata_document = open("metadata.xml", "wt", encoding="utf-8")
1315
- metadata_document.write(xml_request.decode("utf-8"))
1316
- metadata_document.close()
1317
-
1318
- asset_metadata["http://www.preservica.com/tweets/v1"] = "metadata.xml"
1319
-
1320
- security_tag = kwargs.get("SecurityTag", "open")
1321
- asset_title = kwargs.get("Title", text)
1322
- asset_description = kwargs.get("Description", full_text)
1323
-
1324
- p = complex_asset_package(preservation_files_list=content_objects, parent_folder=folder,
1325
- Title=asset_title, Description=asset_description, CustomType="Tweet",
1326
- Identifiers=identifiers, Asset_Metadata=asset_metadata,
1327
- SecurityTag=security_tag)
1328
- self.upload_zip_package(p, folder=folder, callback=callback)
1329
- for ob in content_objects:
1330
- os.remove(ob)
1331
- os.remove("metadata.xml")
1332
-
1333
- def ingest_twitter_feed(self, twitter_user=None, num_tweets: int = 25, twitter_consumer_key=None,
1334
- twitter_secret_key=None, folder=None, callback=None, **kwargs):
1335
-
1336
- """
1337
- Ingest tweets from a twitter stream by twitter username
1338
-
1339
- :param str twitter_user: Twitter Username
1340
- :param int num_tweets: The number of tweets from the stream
1341
- :param str twitter_consumer_key: Optional asset title
1342
- :param str twitter_secret_key: Optional asset description
1343
- :param str folder: Folder to ingest into
1344
- :param callback callback: Optional upload progress callback
1345
- :raises RuntimeError:
1346
1162
 
1347
1163
 
1348
- """
1349
-
1350
- def get_image(m, has_video_element):
1351
- media_url_https_ = m["media_url_https"]
1352
- if media_url_https_:
1353
- req = requests.get(media_url_https_)
1354
- if req.status_code == requests.codes.ok:
1355
- if has_video_element:
1356
- image_name_ = f"{{{media_id_str}}}_[{twitter_user}]_thumb.jpg"
1357
- else:
1358
- image_name_ = f"{{{media_id_str}}}_[{twitter_user}].jpg"
1359
- image_name_document_ = open(image_name_, "wb")
1360
- image_name_document_.write(req.content)
1361
- image_name_document_.close()
1362
- return image_name_
1363
-
1364
- def get_video(m):
1365
- video_info_ = m["video_info"]
1366
- variants_ = video_info_["variants"]
1367
- for v_ in variants_:
1368
- if v_['content_type'] == 'video/mp4':
1369
- video_url_ = v_["url"]
1370
- with requests.get(video_url_, stream=True) as req:
1371
- video_name_ = f"{{{media_id_str}}}_[{twitter_user}].mp4"
1372
- with open(video_name_, 'wb') as video_name_document_:
1373
- for chunk in req.iter_content(chunk_size=1024):
1374
- video_name_document_.write(chunk)
1375
- video_name_document_.flush()
1376
- return video_name_, True
1377
-
1378
- entity_client = pyPreservica.EntityAPI(username=self.username, password=self.password, server=self.server,
1379
- tenant=self.tenant)
1380
- if hasattr(folder, "reference"):
1381
- folder = entity_client.folder(folder.reference)
1382
- else:
1383
- folder = entity_client.folder(folder)
1384
- try:
1385
- import tweepy
1386
- except ImportError:
1387
- logger.error("Package tweepy is required for twitter harvesting. pip install --upgrade tweepy")
1388
- raise RuntimeError("Package tweepy is required for twitter harvesting. pip install --upgrade tweepy")
1389
- config = configparser.ConfigParser()
1390
- config.read('credentials.properties')
1391
- if twitter_consumer_key is None:
1392
- twitter_consumer_key = os.environ.get('TWITTER_CONSUMER_KEY')
1393
- if twitter_consumer_key is None:
1394
- try:
1395
- twitter_consumer_key = config['credentials']['TWITTER_CONSUMER_KEY']
1396
- except KeyError:
1397
- logger.error("No valid TWITTER_CONSUMER_KEY found in method arguments, "
1398
- "environment variables or credentials.properties file")
1399
- raise RuntimeError("No valid TWITTER_CONSUMER_KEY found in method arguments, "
1400
- "environment variables or credentials.properties file")
1401
- if twitter_secret_key is None:
1402
- twitter_secret_key = os.environ.get('TWITTER_SECRET_KEY')
1403
- if twitter_secret_key is None:
1404
- try:
1405
- twitter_secret_key = config['credentials']['TWITTER_SECRET_KEY']
1406
- except KeyError:
1407
- logger.error("No valid TWITTER_SECRET_KEY found in method arguments, "
1408
- "environment variables or credentials.properties file")
1409
- raise RuntimeError("No valid TWITTER_SECRET_KEY found in method arguments, "
1410
- "environment variables or credentials.properties file")
1411
-
1412
- api = None
1413
- try:
1414
- auth = tweepy.AppAuthHandler(twitter_consumer_key, twitter_secret_key)
1415
- api = tweepy.API(auth, wait_on_rate_limit=True)
1416
- except TweepError:
1417
- logger.error("No valid Twitter API keys. Could not authenticate")
1418
- raise RuntimeError("No valid Twitter API keys. Could not authenticate")
1419
- if api is not None:
1420
- logger.debug(api)
1421
- for tweet in tweepy.Cursor(api.user_timeline, id=twitter_user).items(int(num_tweets)):
1422
- created_at = tweet.created_at
1423
- id_str = tweet.id_str
1424
- author = tweet.author.name
1425
- tweet_entities = tweet.entities
1426
- hashtags = dict()
1427
- if 'hashtags' in tweet_entities:
1428
- hashtags = tweet.entities['hashtags']
1429
- entities = entity_client.identifier("tweet_id", id_str.strip())
1430
- if len(entities) > 0:
1431
- logger.warning("Tweet already exists, skipping....")
1432
- continue
1433
- logger.info(f"Processing tweet {id_str} ...")
1434
- tid = tweet.id
1435
- content_objects = list()
1436
- full_tweet = api.get_status(tid, tweet_mode="extended", include_entities=True)
1437
- text = tweet.text
1438
- logger.debug(text)
1439
- full_text = full_tweet.full_text
1440
- file_name = f"{{{id_str}}}_[{twitter_user}].json"
1441
- json_doc = json.dumps(full_tweet._json)
1442
- json_file = open(file_name, "wt", encoding="utf-8")
1443
- json_file.write(json_doc)
1444
- json_file.close()
1445
- content_objects.append(file_name)
1446
- if hasattr(full_tweet, "extended_entities"):
1447
- extended_entities = full_tweet.extended_entities
1448
- if "media" in extended_entities:
1449
- media = extended_entities["media"]
1450
- for med in media:
1451
- media_id_str = med["id_str"]
1452
- has_video = False
1453
- if "video_info" in med:
1454
- co, has_video = get_video(med)
1455
- content_objects.append(co)
1456
- if has_video:
1457
- co = get_image(med, has_video)
1458
- content_objects.append(co)
1459
- continue
1460
- if "media_url_https" in med:
1461
- co = get_image(med, has_video)
1462
- content_objects.append(co)
1463
- identifiers = {}
1464
- asset_metadata = {}
1465
- identifiers["tweet_id"] = id_str
1466
-
1467
- user = full_tweet._json['user']
1468
-
1469
- if full_tweet._json.get('retweeted_status'):
1470
- retweeted_status = full_tweet._json['retweeted_status']
1471
- if retweeted_status.get("extended_entities"):
1472
- extended_entities = retweeted_status["extended_entities"]
1473
- if "media" in extended_entities:
1474
- media = extended_entities["media"]
1475
- for med in media:
1476
- media_id_str = med["id_str"]
1477
- has_video = False
1478
- if "video_info" in med:
1479
- co, has_video = get_video(med)
1480
- content_objects.append(co)
1481
- continue
1482
- if "media_url_https" in med:
1483
- co = get_image(med, has_video)
1484
- content_objects.append(co)
1485
-
1486
- xml_object = xml.etree.ElementTree.Element('tweet', {"xmlns": "http://www.preservica.com/tweets/v1"})
1487
- xml.etree.ElementTree.SubElement(xml_object, "id").text = id_str
1488
- xml.etree.ElementTree.SubElement(xml_object, "full_text").text = full_text
1489
- xml.etree.ElementTree.SubElement(xml_object, "created_at").text = str(created_at)
1490
- xml.etree.ElementTree.SubElement(xml_object, "screen_name_sender").text = user.get('screen_name')
1491
- for h in hashtags:
1492
- xml.etree.ElementTree.SubElement(xml_object, "hashtag").text = str(h['text'])
1493
-
1494
- xml.etree.ElementTree.SubElement(xml_object, "name").text = author
1495
- xml.etree.ElementTree.SubElement(xml_object, "retweet").text = str(full_tweet._json['retweet_count'])
1496
- xml.etree.ElementTree.SubElement(xml_object, "likes").text = str(full_tweet._json['favorite_count'])
1497
-
1498
- xml_request = xml.etree.ElementTree.tostring(xml_object, encoding='utf-8')
1499
-
1500
- metadata_document = open("metadata.xml", "wt", encoding="utf-8")
1501
- metadata_document.write(xml_request.decode("utf-8"))
1502
- metadata_document.close()
1503
-
1504
- asset_metadata["http://www.preservica.com/tweets/v1"] = "metadata.xml"
1505
-
1506
- security_tag = kwargs.get("SecurityTag", "open")
1507
- asset_title = kwargs.get("Title", text)
1508
- asset_description = kwargs.get("Description", full_text)
1509
-
1510
- p = complex_asset_package(preservation_files_list=content_objects, parent_folder=folder,
1511
- Title=asset_title, Description=asset_description, CustomType="Tweet",
1512
- Identifiers=identifiers, Asset_Metadata=asset_metadata,
1513
- SecurityTag=security_tag)
1514
- self.upload_zip_package(p, folder=folder, callback=callback)
1515
- for ob in content_objects:
1516
- os.remove(ob)
1517
- os.remove("metadata.xml")
1518
- sleep(2)
1519
1164
 
1520
1165
  def ingest_web_video(self, url=None, parent_folder=None, **kwargs):
1521
1166
  """
1522
1167
  Ingest a web video such as YouTube etc based on the URL
1523
1168
 
1524
- :param str url: URL to the youtube video
1169
+ :param str url: URL to the YouTube video
1525
1170
  :param Folder parent_folder: The folder to ingest the video into
1526
1171
  :param str Title: Optional asset title
1527
1172
  :param str Description: Optional asset description
@@ -1545,10 +1190,7 @@ class UploadAPI(AuthenticatedAPI):
1545
1190
  if d['status'] == 'finished':
1546
1191
  logger.info('Download Complete. Uploading to Preservica ...')
1547
1192
 
1548
- ydl_opts = {
1549
- 'outtmpl': '%(id)s.mp4',
1550
- 'progress_hooks': [my_hook],
1551
- }
1193
+ ydl_opts = {'outtmpl': '%(id)s.mp4', 'progress_hooks': [my_hook], }
1552
1194
 
1553
1195
  # if True:
1554
1196
  # ydl_opts['writesubtitles'] = True
@@ -1622,6 +1264,52 @@ class UploadAPI(AuthenticatedAPI):
1622
1264
  logger.error(exception)
1623
1265
  raise exception
1624
1266
 
1267
+ def clean_upload_bucket(self, bucket_name: str, older_than_days: int = 90):
1268
+ """
1269
+ Clean up objects in an upload bucket which are older than older_than_days.
1270
+
1271
+ """
1272
+ from azure.storage.blob import ContainerClient
1273
+
1274
+ for location in self.upload_locations():
1275
+ if location['containerName'] == bucket_name:
1276
+
1277
+ if location['type'] != 'AWS':
1278
+ credentials = self.upload_credentials(location['apiId'])
1279
+ account_key = credentials['key']
1280
+ session_token = credentials['sessionToken']
1281
+ sas_url = f"https://{account_key}.blob.core.windows.net/{bucket_name}"
1282
+ container = ContainerClient.from_container_url(container_url=sas_url, credential=session_token)
1283
+ now = datetime.now(timezone.utc)
1284
+ for blob in container.list_blobs():
1285
+ if abs((blob.last_modified - now).days) > older_than_days:
1286
+ logger.debug(f"Deleting expired object {blob.name}")
1287
+ container.delete_blob(blob.name)
1288
+
1289
+ if location['type'] == 'AWS':
1290
+ credentials = self.upload_credentials(location['apiId'])
1291
+ access_key = credentials['key']
1292
+ secret_key = credentials['secret']
1293
+ session_token = credentials['sessionToken']
1294
+ session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key,
1295
+ aws_session_token=session_token)
1296
+ s3_client = session.client("s3")
1297
+ paginator = s3_client.get_paginator('list_objects_v2')
1298
+ now = datetime.now(timezone.utc)
1299
+ for page in paginator.paginate(Bucket=bucket_name):
1300
+ if 'Contents' in page:
1301
+ for key in page['Contents']:
1302
+ last_modified = key['LastModified']
1303
+ if abs((last_modified - now).days) > older_than_days:
1304
+ logger.debug(f"Deleting expired object {key['Key']}")
1305
+ s3_client.delete_object(Bucket=bucket_name, Key=key['Key'])
1306
+
1307
+
1308
+
1309
+
1310
+
1311
+
1312
+
1625
1313
  def upload_locations(self):
1626
1314
  """
1627
1315
  Upload locations are configured on the Sources page as 'SIP Upload'.
@@ -1650,36 +1338,60 @@ class UploadAPI(AuthenticatedAPI):
1650
1338
  """
1651
1339
  return self.upload_locations()
1652
1340
 
1653
- def crawl_filesystem(self, filesystem_path, bucket_name, preservica_parent, callback=None,
1341
+ def crawl_filesystem(self, filesystem_path, bucket_name, preservica_parent, callback: bool = False,
1654
1342
  security_tag: str = "open",
1655
- delete_after_upload=True, max_MB_ingested: int = -1):
1343
+ delete_after_upload: bool = True, max_MB_ingested: int = -1):
1344
+
1345
+ from pyPreservica import EntityAPI
1346
+
1347
+ def entity_value(client: EntityAPI, identifier: str) -> Entity:
1348
+ back_off: int = 5
1349
+ while True:
1350
+ try:
1351
+ entities = client.identifier("code", identifier)
1352
+ if bool(len(entities) > 0):
1353
+ return entities.pop()
1354
+ else:
1355
+ return None
1356
+ except HTTPException as e:
1357
+ sleep(back_off)
1358
+ back_off = back_off * 2
1359
+
1360
+ def entity_exists(client: EntityAPI, identifier: str) -> bool:
1361
+ back_off: int = 5
1362
+ while True:
1363
+ try:
1364
+ entities = client.identifier("code", identifier)
1365
+ return bool(len(entities) > 0)
1366
+ except HTTPException as e:
1367
+ sleep(back_off)
1368
+ back_off = back_off * 2
1656
1369
 
1657
1370
  def get_parent(client, identifier, parent_reference):
1658
- id = str(os.path.dirname(identifier))
1659
- if not id:
1660
- id = identifier
1661
- entities = client.identifier("code", id)
1662
- if len(entities) > 0:
1663
- folder = entities.pop()
1371
+ dirname_id: str = str(os.path.dirname(identifier))
1372
+ if not dirname_id:
1373
+ dirname_id = identifier
1374
+ folder = entity_value(client, dirname_id)
1375
+ if folder is not None:
1664
1376
  folder = client.folder(folder.reference)
1665
1377
  return folder.reference
1666
1378
  else:
1667
1379
  return parent_reference
1668
1380
 
1669
1381
  def get_folder(client, name, tag, parent_reference, identifier):
1670
- entities = client.identifier("code", identifier)
1671
- if len(entities) == 0:
1382
+ folder = entity_value(client, identifier)
1383
+ if folder is None:
1672
1384
  logger.info(f"Creating new folder with name {name}")
1673
1385
  folder = client.create_folder(name, name, tag, parent_reference)
1674
1386
  client.add_identifier(folder, "code", identifier)
1675
1387
  else:
1676
1388
  logger.info(f"Found existing folder with name {name}")
1677
- folder = entities.pop()
1678
1389
  return folder
1679
1390
 
1680
- from pyPreservica import EntityAPI
1681
1391
  entity_client = EntityAPI(username=self.username, password=self.password, server=self.server,
1682
- tenant=self.tenant)
1392
+ tenant=self.tenant,
1393
+ two_fa_secret_key=self.two_fa_secret_key, use_shared_secret=self.shared_secret,
1394
+ protocol=self.protocol)
1683
1395
 
1684
1396
  if preservica_parent:
1685
1397
  parent = entity_client.folder(preservica_parent)
@@ -1705,7 +1417,7 @@ class UploadAPI(AuthenticatedAPI):
1705
1417
  files.remove(file)
1706
1418
  continue
1707
1419
  asset_code = os.path.join(code, file)
1708
- if len(entity_client.identifier("code", asset_code)) == 0:
1420
+ if not entity_exists(entity_client, asset_code):
1709
1421
  bytes_ingested = bytes_ingested + os.stat(full_path).st_size
1710
1422
  logger.info(f"Adding new file: {file} to package ready for upload")
1711
1423
  file_identifiers = {"code": asset_code}
@@ -1718,8 +1430,19 @@ class UploadAPI(AuthenticatedAPI):
1718
1430
  full_path_list = [os.path.join(dirname, file) for file in files]
1719
1431
  package = multi_asset_package(asset_file_list=full_path_list, parent_folder=f, SecurityTag=security_tag,
1720
1432
  Identifiers=identifiers)
1721
- self.upload_zip_package_to_S3(path_to_zip_package=package, bucket_name=bucket_name,
1722
- callback=callback, delete_after_upload=delete_after_upload)
1433
+ if callback:
1434
+ progress_display = UploadProgressConsoleCallback(package)
1435
+ else:
1436
+ progress_display = None
1437
+
1438
+ if bucket_name is None:
1439
+ self.upload_zip_package(path_to_zip_package=package, callback=progress_display,
1440
+ delete_after_upload=delete_after_upload)
1441
+ else:
1442
+ self.upload_zip_to_Source(path_to_zip_package=package, container_name=bucket_name,
1443
+ show_progress=bool(progress_display is not None),
1444
+ delete_after_upload=delete_after_upload)
1445
+
1723
1446
  logger.info(f"Uploaded " + "{:.1f}".format(bytes_ingested / (1024 * 1024)) + " MB")
1724
1447
 
1725
1448
  if max_MB_ingested > 0:
@@ -1727,6 +1450,37 @@ class UploadAPI(AuthenticatedAPI):
1727
1450
  logger.info(f"Reached Max Upload Limit")
1728
1451
  break
1729
1452
 
1453
+ def upload_zip_to_Source(self, path_to_zip_package, container_name, folder=None, delete_after_upload=False,
1454
+ show_progress=False):
1455
+
1456
+ """
1457
+ Uploads a zip file package to either an Azure container or S3 bucket
1458
+ depending on the Preservica system deployment
1459
+
1460
+ :param str path_to_zip_package: Path to the package
1461
+ :param str container_name: container connected to the ingest workflow
1462
+ :param Folder folder: The folder to ingest the package into
1463
+ :param bool delete_after_upload: Delete the local copy of the package after the upload has completed
1464
+ :param bool show_progress: Show upload progress bar
1465
+
1466
+ """
1467
+
1468
+ locations = self.upload_locations()
1469
+ for location in locations:
1470
+ if location['containerName'] == container_name:
1471
+ if location['type'] == 'AWS':
1472
+ callback = None
1473
+ if show_progress:
1474
+ callback = UploadProgressConsoleCallback(path_to_zip_package)
1475
+ self.upload_zip_package_to_S3(path_to_zip_package=path_to_zip_package, bucket_name=container_name,
1476
+ folder=folder,
1477
+ callback=callback, delete_after_upload=delete_after_upload)
1478
+ else:
1479
+ self.upload_zip_package_to_Azure(path_to_zip_package=path_to_zip_package,
1480
+ container_name=container_name, folder=folder,
1481
+ delete_after_upload=delete_after_upload,
1482
+ show_progress=show_progress)
1483
+
1730
1484
  def upload_zip_package_to_Azure(self, path_to_zip_package, container_name, folder=None, delete_after_upload=False,
1731
1485
  show_progress=False):
1732
1486
 
@@ -1746,97 +1500,99 @@ class UploadAPI(AuthenticatedAPI):
1746
1500
 
1747
1501
  from azure.storage.blob import ContainerClient
1748
1502
 
1749
- if (self.major_version > 5) and (self.minor_version > 4):
1750
- locations = self.upload_locations()
1751
- for location in locations:
1752
- if location['containerName'] == container_name:
1753
- credentials = self.upload_credentials(location['apiId'])
1754
- account_key = credentials['key']
1755
- session_token = credentials['sessionToken']
1503
+ locations = self.upload_locations()
1504
+ for location in locations:
1505
+ if location['containerName'] == container_name:
1506
+ credentials = self.upload_credentials(location['apiId'])
1507
+ account_key = credentials['key']
1508
+ session_token = credentials['sessionToken']
1756
1509
 
1757
- sas_url = f"https://{account_key}.blob.core.windows.net/{container_name}"
1758
- container = ContainerClient.from_container_url(container_url=sas_url, credential=session_token)
1510
+ sas_url = f"https://{account_key}.blob.core.windows.net/{container_name}"
1511
+ container = ContainerClient.from_container_url(container_url=sas_url, credential=session_token)
1759
1512
 
1760
- upload_key = str(uuid.uuid4())
1761
- metadata = {'key': upload_key, 'name': upload_key + ".zip", 'bucket': container_name,
1762
- 'status': 'ready'}
1513
+ upload_key = str(uuid.uuid4())
1514
+ metadata = {'key': upload_key, 'name': upload_key + ".zip", 'bucket': container_name, 'status': 'ready'}
1763
1515
 
1764
- if hasattr(folder, "reference"):
1765
- metadata['collectionreference'] = folder.reference
1766
- elif isinstance(folder, str):
1767
- metadata['collectionreference'] = folder
1516
+ if hasattr(folder, "reference"):
1517
+ metadata['collectionreference'] = folder.reference
1518
+ elif isinstance(folder, str):
1519
+ metadata['collectionreference'] = folder
1768
1520
 
1769
- properties = None
1521
+ properties = None
1770
1522
 
1771
- len_bytes = Path(path_to_zip_package).stat().st_size
1523
+ len_bytes = Path(path_to_zip_package).stat().st_size
1772
1524
 
1773
- if show_progress:
1774
- with tqdm.wrapattr(open(path_to_zip_package, 'rb'), "read", total=len_bytes) as data:
1775
- blob_client = container.upload_blob(name=upload_key, data=data, metadata=metadata,
1776
- length=len_bytes)
1777
- properties = blob_client.get_blob_properties()
1778
- else:
1779
- with open(path_to_zip_package, "rb") as data:
1780
- blob_client = container.upload_blob(name=upload_key, data=data, metadata=metadata,
1781
- length=len_bytes)
1782
- properties = blob_client.get_blob_properties()
1525
+ if show_progress:
1526
+ with tqdm.wrapattr(open(path_to_zip_package, 'rb'), "read", total=len_bytes) as data:
1527
+ blob_client = container.upload_blob(name=upload_key, data=data, metadata=metadata,
1528
+ length=len_bytes)
1529
+ properties = blob_client.get_blob_properties()
1530
+ else:
1531
+ with open(path_to_zip_package, "rb") as data:
1532
+ blob_client = container.upload_blob(name=upload_key, data=data, metadata=metadata,
1533
+ length=len_bytes)
1534
+ properties = blob_client.get_blob_properties()
1783
1535
 
1784
- if delete_after_upload:
1785
- os.remove(path_to_zip_package)
1536
+ if delete_after_upload:
1537
+ os.remove(path_to_zip_package)
1786
1538
 
1787
- return properties
1539
+ return properties
1788
1540
 
1789
1541
  def upload_zip_package_to_S3(self, path_to_zip_package, bucket_name, folder=None, callback=None,
1790
1542
  delete_after_upload=False):
1791
1543
 
1792
1544
  """
1793
- Uploads a zip file package to an S3 bucket connected to a Preservica Cloud System
1545
+ Uploads a zip file package to an S3 bucket connected to a Preservica Cloud System
1794
1546
 
1795
- :param str path_to_zip_package: Path to the package
1796
- :param str bucket_name: Bucket connected to an ingest workflow
1797
- :param Folder folder: The folder to ingest the package into
1798
- :param Callable callback: Optional callback to allow the callee to monitor the upload progress
1799
- :param bool delete_after_upload: Delete the local copy of the package after the upload has completed
1547
+ :param str path_to_zip_package: Path to the package
1548
+ :param str bucket_name: Bucket connected to an ingest workflow
1549
+ :param Folder folder: The folder to ingest the package into
1550
+ :param Callable callback: Optional callback to allow the callee to monitor the upload progress
1551
+ :param bool delete_after_upload: Delete the local copy of the package after the upload has completed
1800
1552
 
1801
- """
1553
+ """
1802
1554
 
1803
1555
  if (self.major_version < 7) and (self.minor_version < 5):
1804
1556
  raise RuntimeError("This call [upload_zip_package_to_S3] is only available against v6.5 systems and above")
1805
1557
 
1806
- if (self.major_version > 5) and (self.minor_version > 4):
1807
- locations = self.upload_locations()
1808
- for location in locations:
1809
- if location['containerName'] == bucket_name:
1810
- credentials = self.upload_credentials(location['apiId'])
1811
- access_key = credentials['key']
1812
- secret_key = credentials['secret']
1813
- session_token = credentials['sessionToken']
1814
- endpoint = credentials['endpoint']
1558
+ logger.debug("Finding Upload Locations")
1559
+ self.token = self.__token__()
1560
+ locations = self.upload_locations()
1561
+ for location in locations:
1562
+ if location['containerName'] == bucket_name:
1563
+ logger.debug(f"Found Upload Location {location['containerName']}")
1564
+ logger.debug(f"Fetching Upload Credentials for {location['containerName']}")
1565
+ credentials = self.upload_credentials(location['apiId'])
1566
+ access_key = credentials['key']
1567
+ secret_key = credentials['secret']
1568
+ session_token = credentials['sessionToken']
1569
+ endpoint = credentials['endpoint']
1815
1570
 
1816
- session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key,
1817
- aws_session_token=session_token)
1818
- s3 = session.resource(service_name="s3")
1571
+ session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key,
1572
+ aws_session_token=session_token)
1573
+ s3 = session.resource(service_name="s3")
1574
+
1575
+ logger.debug(f"S3 Session: {s3}")
1819
1576
 
1820
- upload_key = str(uuid.uuid4())
1821
- s3_object = s3.Object(bucket_name, upload_key)
1822
- metadata = {'key': upload_key, 'name': upload_key + ".zip", 'bucket': bucket_name,
1823
- 'status': 'ready'}
1577
+ upload_key = str(uuid.uuid4())
1578
+ s3_object = s3.Object(bucket_name, upload_key)
1579
+ metadata = {'key': upload_key, 'name': upload_key + ".zip", 'bucket': bucket_name, 'status': 'ready'}
1824
1580
 
1825
- if hasattr(folder, "reference"):
1826
- metadata['collectionreference'] = folder.reference
1827
- elif isinstance(folder, str):
1828
- metadata['collectionreference'] = folder
1581
+ if hasattr(folder, "reference"):
1582
+ metadata['collectionreference'] = folder.reference
1583
+ elif isinstance(folder, str):
1584
+ metadata['collectionreference'] = folder
1829
1585
 
1830
- metadata['size'] = str(Path(path_to_zip_package).stat().st_size)
1831
- metadata['createdby'] = self.username
1586
+ metadata['size'] = str(Path(path_to_zip_package).stat().st_size)
1587
+ metadata['createdby'] = self.username
1832
1588
 
1833
- metadata_map = {'Metadata': metadata}
1589
+ metadata_map = {'Metadata': metadata}
1834
1590
 
1835
- s3_object.upload_file(path_to_zip_package, Callback=callback, ExtraArgs=metadata_map,
1836
- Config=transfer_config)
1591
+ s3_object.upload_file(path_to_zip_package, Callback=callback, ExtraArgs=metadata_map,
1592
+ Config=transfer_config)
1837
1593
 
1838
- if delete_after_upload:
1839
- os.remove(path_to_zip_package)
1594
+ if delete_after_upload:
1595
+ os.remove(path_to_zip_package)
1840
1596
 
1841
1597
  def upload_zip_package(self, path_to_zip_package, folder=None, callback=None, delete_after_upload=False):
1842
1598
  """
@@ -1859,9 +1615,42 @@ class UploadAPI(AuthenticatedAPI):
1859
1615
  endpoint = f'{self.protocol}://{self.server}/api/s3/buckets'
1860
1616
  self.token = self.__token__()
1861
1617
 
1862
- s3_client = boto3.client('s3', endpoint_url=endpoint, aws_access_key_id=self.token,
1863
- aws_secret_access_key="NOT_USED",
1864
- config=Config(s3={'addressing_style': 'path'}))
1618
+
1619
+ retries= {
1620
+ 'max_attempts': 5,
1621
+ 'mode': 'adaptive'
1622
+ }
1623
+
1624
+ def new_credentials():
1625
+ cred_metadata: dict = {}
1626
+ cred_metadata['access_key'] = self.__token__()
1627
+ cred_metadata['secret_key'] = "NOT_USED"
1628
+ cred_metadata['token'] = ""
1629
+ cred_metadata["expiry_time"] = (datetime.now(tzlocal()) + timedelta(minutes=12)).isoformat()
1630
+ logger.info("Refreshing credentials at: " + str(datetime.now(tzlocal())))
1631
+ return cred_metadata
1632
+
1633
+ session = get_session()
1634
+
1635
+ session_credentials = RefreshableCredentials.create_from_metadata(
1636
+ metadata=new_credentials(),
1637
+ refresh_using=new_credentials,
1638
+ advisory_timeout = 4 * 60,
1639
+ mandatory_timeout = 12 * 60,
1640
+ method = 'Preservica'
1641
+ )
1642
+
1643
+ autorefresh_session = boto3.Session(botocore_session=session)
1644
+
1645
+ session._credentials = session_credentials
1646
+
1647
+ config = Config(s3={'addressing_style': 'path'}, read_timeout=120, connect_timeout=120,
1648
+ request_checksum_calculation="WHEN_REQUIRED",
1649
+ response_checksum_validation="WHEN_REQUIRED",
1650
+ retries=retries, tcp_keepalive=True)
1651
+
1652
+
1653
+ s3_client = autorefresh_session.client('s3', endpoint_url=endpoint, config=config)
1865
1654
 
1866
1655
  metadata = {}
1867
1656
  if folder is not None:
@@ -1874,20 +1663,48 @@ class UploadAPI(AuthenticatedAPI):
1874
1663
  try:
1875
1664
  key_id = str(uuid.uuid4()) + ".zip"
1876
1665
 
1666
+
1667
+ # how big is the package
1668
+ package_size = os.path.getsize(path_to_zip_package)
1669
+ if package_size > 1 * GB:
1670
+ transfer_config.multipart_chunksize = 16 * MB ## Min 64 Chunks
1671
+ if package_size > 8 * GB:
1672
+ transfer_config.multipart_chunksize = 32 * MB ## Min 256 Chunks
1673
+ if package_size > 24 * GB:
1674
+ transfer_config.multipart_chunksize = 48 * MB ## Min 512 Chunks
1675
+ if package_size > 48 * GB:
1676
+ transfer_config.multipart_chunksize = 64 * MB
1677
+
1678
+ logger.info("Using Multipart Chunk Size: " + str(transfer_config.multipart_chunksize))
1679
+
1877
1680
  transfer = S3Transfer(client=s3_client, config=transfer_config)
1878
1681
 
1879
1682
  transfer.PutObjectTask = PutObjectTask
1880
1683
  transfer.CompleteMultipartUploadTask = CompleteMultipartUploadTask
1881
1684
  transfer.upload_file = upload_file
1882
1685
 
1883
- response = transfer.upload_file(self=transfer, filename=path_to_zip_package, bucket=bucket, key=key_id,
1884
- extra_args=metadata, callback=callback)
1686
+
1687
+ response = transfer.upload_file(self=transfer, filename=path_to_zip_package, bucket=bucket,
1688
+ key=key_id,
1689
+ extra_args=metadata,
1690
+ callback=callback)
1691
+
1885
1692
 
1886
1693
  if delete_after_upload:
1887
1694
  os.remove(path_to_zip_package)
1888
1695
 
1889
1696
  return response['ResponseMetadata']['HTTPHeaders']['preservica-progress-token']
1890
1697
 
1891
- except ClientError as e:
1892
- logger.error(e)
1893
- raise e
1698
+ except (NoCredentialsError, PartialCredentialsError) as ex:
1699
+ logger.error(ex)
1700
+ raise ex
1701
+
1702
+ except ClientError as ex:
1703
+ logger.error(ex)
1704
+ raise ex
1705
+
1706
+
1707
+
1708
+
1709
+
1710
+