pyPreservica 2.7.2__py3-none-any.whl → 3.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyPreservica/__init__.py +18 -6
- pyPreservica/adminAPI.py +29 -22
- pyPreservica/authorityAPI.py +6 -7
- pyPreservica/common.py +116 -19
- pyPreservica/contentAPI.py +179 -8
- pyPreservica/entityAPI.py +730 -214
- pyPreservica/mdformsAPI.py +501 -29
- pyPreservica/monitorAPI.py +2 -2
- pyPreservica/parAPI.py +1 -37
- pyPreservica/retentionAPI.py +58 -26
- pyPreservica/settingsAPI.py +295 -0
- pyPreservica/uploadAPI.py +298 -480
- pyPreservica/webHooksAPI.py +42 -1
- pyPreservica/workflowAPI.py +17 -13
- {pyPreservica-2.7.2.dist-info → pypreservica-3.3.4.dist-info}/METADATA +20 -9
- pypreservica-3.3.4.dist-info/RECORD +20 -0
- {pyPreservica-2.7.2.dist-info → pypreservica-3.3.4.dist-info}/WHEEL +1 -1
- pyPreservica/vocabularyAPI.py +0 -141
- pyPreservica-2.7.2.dist-info/RECORD +0 -20
- {pyPreservica-2.7.2.dist-info → pypreservica-3.3.4.dist-info/licenses}/LICENSE.txt +0 -0
- {pyPreservica-2.7.2.dist-info → pypreservica-3.3.4.dist-info}/top_level.txt +0 -0
pyPreservica/uploadAPI.py
CHANGED
|
@@ -13,7 +13,7 @@ import shutil
|
|
|
13
13
|
import tempfile
|
|
14
14
|
import uuid
|
|
15
15
|
import xml
|
|
16
|
-
from datetime import datetime, timedelta
|
|
16
|
+
from datetime import datetime, timedelta, timezone
|
|
17
17
|
from time import sleep
|
|
18
18
|
from xml.dom import minidom
|
|
19
19
|
from xml.etree import ElementTree
|
|
@@ -22,11 +22,12 @@ from xml.etree.ElementTree import Element, SubElement
|
|
|
22
22
|
import boto3
|
|
23
23
|
import s3transfer.tasks
|
|
24
24
|
import s3transfer.upload
|
|
25
|
-
|
|
25
|
+
from botocore.session import get_session
|
|
26
26
|
from boto3.s3.transfer import TransferConfig, S3Transfer
|
|
27
27
|
from botocore.config import Config
|
|
28
28
|
from botocore.credentials import RefreshableCredentials
|
|
29
|
-
from botocore.exceptions import ClientError
|
|
29
|
+
from botocore.exceptions import ClientError, NoCredentialsError, PartialCredentialsError
|
|
30
|
+
from dateutil.tz import tzlocal
|
|
30
31
|
from s3transfer import S3UploadFailedError
|
|
31
32
|
from tqdm import tqdm
|
|
32
33
|
|
|
@@ -37,7 +38,7 @@ logger = logging.getLogger(__name__)
|
|
|
37
38
|
|
|
38
39
|
MB = 1024 * 1024
|
|
39
40
|
GB = 1024 ** 3
|
|
40
|
-
transfer_config = TransferConfig(multipart_threshold=int(
|
|
41
|
+
transfer_config = TransferConfig(multipart_threshold=int(32 * MB))
|
|
41
42
|
|
|
42
43
|
CONTENT_FOLDER = "content"
|
|
43
44
|
PRESERVATION_CONTENT_FOLDER = "p1"
|
|
@@ -80,7 +81,8 @@ class PutObjectTask(s3transfer.tasks.Task):
|
|
|
80
81
|
class CompleteMultipartUploadTask(s3transfer.tasks.Task):
|
|
81
82
|
# Copied from s3transfer/tasks.py, changed to return a result.
|
|
82
83
|
def _main(self, client, bucket, key, upload_id, parts, extra_args):
|
|
83
|
-
return client.complete_multipart_upload(Bucket=bucket, Key=key, UploadId=upload_id,
|
|
84
|
+
return client.complete_multipart_upload(Bucket=bucket, Key=key, UploadId=upload_id,
|
|
85
|
+
MultipartUpload={"Parts": parts},
|
|
84
86
|
**extra_args, )
|
|
85
87
|
|
|
86
88
|
|
|
@@ -99,8 +101,8 @@ def prettify(elem):
|
|
|
99
101
|
def __create_io__(xip=None, file_name=None, parent_folder=None, **kwargs):
|
|
100
102
|
if xip is None:
|
|
101
103
|
xip = Element('xip:XIP')
|
|
104
|
+
xip.set('xmlns:xip', 'http://preservica.com/XIP/v6.0')
|
|
102
105
|
assert xip is not None
|
|
103
|
-
xip.set('xmlns:xip', 'http://preservica.com/XIP/v6.0')
|
|
104
106
|
io = SubElement(xip, 'xip:InformationObject')
|
|
105
107
|
ref = SubElement(io, 'xip:Ref')
|
|
106
108
|
|
|
@@ -224,7 +226,8 @@ def __make_representation_multiple_co__(xip, rep_name, rep_type, rep_files, io_r
|
|
|
224
226
|
return refs_dict
|
|
225
227
|
|
|
226
228
|
|
|
227
|
-
def cvs_to_cmis_xslt(csv_file, xml_namespace, root_element, title="Metadata Title", export_folder=None,
|
|
229
|
+
def cvs_to_cmis_xslt(csv_file, xml_namespace, root_element, title="Metadata Title", export_folder=None,
|
|
230
|
+
additional_namespaces=None):
|
|
228
231
|
"""
|
|
229
232
|
Create a custom CMIS transform to display metadata within UA.
|
|
230
233
|
|
|
@@ -242,7 +245,8 @@ def cvs_to_cmis_xslt(csv_file, xml_namespace, root_element, title="Metadata Titl
|
|
|
242
245
|
|
|
243
246
|
namespaces = {"version": "2.0", "xmlns:xsl": "http://www.w3.org/1999/XSL/Transform",
|
|
244
247
|
"xmlns:fn": "http://www.w3.org/2005/xpath-functions", "xmlns:xs": "http://www.w3.org/2001/XMLSchema",
|
|
245
|
-
"xmlns:csv": xml_namespace, "xmlns": "http://www.tessella.com/sdb/cmis/metadata",
|
|
248
|
+
"xmlns:csv": xml_namespace, "xmlns": "http://www.tessella.com/sdb/cmis/metadata",
|
|
249
|
+
"exclude-result-prefixes": "csv"}
|
|
246
250
|
|
|
247
251
|
if additional_namespaces is not None:
|
|
248
252
|
for prefix, uri in additional_namespaces.items():
|
|
@@ -311,7 +315,8 @@ def cvs_to_xsd(csv_file, xml_namespace, root_element, export_folder=None, additi
|
|
|
311
315
|
headers.add(xml_tag)
|
|
312
316
|
break
|
|
313
317
|
|
|
314
|
-
namespaces = {"xmlns:xs": "http://www.w3.org/2001/XMLSchema", "attributeFormDefault": "unqualified",
|
|
318
|
+
namespaces = {"xmlns:xs": "http://www.w3.org/2001/XMLSchema", "attributeFormDefault": "unqualified",
|
|
319
|
+
"elementFormDefault": "qualified",
|
|
315
320
|
"targetNamespace": xml_namespace}
|
|
316
321
|
|
|
317
322
|
if additional_namespaces is not None:
|
|
@@ -333,7 +338,8 @@ def cvs_to_xsd(csv_file, xml_namespace, root_element, export_folder=None, additi
|
|
|
333
338
|
prefix, sep, tag = header.partition(":")
|
|
334
339
|
try:
|
|
335
340
|
namespace = additional_namespaces[prefix]
|
|
336
|
-
xml.etree.ElementTree.SubElement(xml_sequence, "xs:element",
|
|
341
|
+
xml.etree.ElementTree.SubElement(xml_sequence, "xs:element",
|
|
342
|
+
{"ref": header, "xmlns:" + prefix: namespace})
|
|
337
343
|
except KeyError:
|
|
338
344
|
xml.etree.ElementTree.SubElement(xml_sequence, "xs:element", {"type": "xs:string", "name": header})
|
|
339
345
|
else:
|
|
@@ -350,7 +356,8 @@ def cvs_to_xsd(csv_file, xml_namespace, root_element, export_folder=None, additi
|
|
|
350
356
|
return xsd_file
|
|
351
357
|
|
|
352
358
|
|
|
353
|
-
def csv_to_search_xml(csv_file, xml_namespace, root_element, title="Metadata Title", export_folder=None,
|
|
359
|
+
def csv_to_search_xml(csv_file, xml_namespace, root_element, title="Metadata Title", export_folder=None,
|
|
360
|
+
additional_namespaces=None):
|
|
354
361
|
"""
|
|
355
362
|
Create a custom Preservica search index based on the columns in a csv file
|
|
356
363
|
|
|
@@ -400,7 +407,8 @@ def csv_to_search_xml(csv_file, xml_namespace, root_element, title="Metadata Tit
|
|
|
400
407
|
return search_xml
|
|
401
408
|
|
|
402
409
|
|
|
403
|
-
def cvs_to_xml(csv_file, xml_namespace, root_element, file_name_column="filename", export_folder=None,
|
|
410
|
+
def cvs_to_xml(csv_file, xml_namespace, root_element, file_name_column="filename", export_folder=None,
|
|
411
|
+
additional_namespaces=None):
|
|
404
412
|
"""
|
|
405
413
|
Export the rows of a CSV file as XML metadata documents which can be added to Preservica assets
|
|
406
414
|
|
|
@@ -451,7 +459,8 @@ def cvs_to_xml(csv_file, xml_namespace, root_element, file_name_column="filename
|
|
|
451
459
|
yield name
|
|
452
460
|
|
|
453
461
|
|
|
454
|
-
def generic_asset_package(preservation_files_dict=None, access_files_dict=None, export_folder=None, parent_folder=None,
|
|
462
|
+
def generic_asset_package(preservation_files_dict=None, access_files_dict=None, export_folder=None, parent_folder=None,
|
|
463
|
+
compress=True,
|
|
455
464
|
**kwargs):
|
|
456
465
|
# some basic validation
|
|
457
466
|
if export_folder is None:
|
|
@@ -473,7 +482,7 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
|
|
|
473
482
|
content_type = kwargs.get('CustomType', "")
|
|
474
483
|
|
|
475
484
|
if not compress:
|
|
476
|
-
shutil.register_archive_format("szip", _make_stored_zipfile, None, "UnCompressed ZIP file")
|
|
485
|
+
shutil.register_archive_format(name="szip", function=_make_stored_zipfile, extra_args=None, description="UnCompressed ZIP file")
|
|
477
486
|
|
|
478
487
|
has_preservation_files = bool((preservation_files_dict is not None) and (len(preservation_files_dict) > 0))
|
|
479
488
|
has_access_files = bool((access_files_dict is not None) and (len(access_files_dict) > 0))
|
|
@@ -501,8 +510,10 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
|
|
|
501
510
|
if has_preservation_files:
|
|
502
511
|
for representation_name in preservation_files_dict.keys():
|
|
503
512
|
preservation_files_list = preservation_files_dict[representation_name]
|
|
504
|
-
preservation_refs_dict = __make_representation_multiple_co__(xip, rep_name=representation_name,
|
|
505
|
-
|
|
513
|
+
preservation_refs_dict = __make_representation_multiple_co__(xip, rep_name=representation_name,
|
|
514
|
+
rep_type="Preservation",
|
|
515
|
+
rep_files=preservation_files_list,
|
|
516
|
+
io_ref=io_ref)
|
|
506
517
|
preservation_representation_refs_dict[representation_name] = preservation_refs_dict
|
|
507
518
|
|
|
508
519
|
if has_access_files:
|
|
@@ -519,13 +530,16 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
|
|
|
519
530
|
default_content_objects_title = os.path.splitext(os.path.basename(filename))[0]
|
|
520
531
|
|
|
521
532
|
preservation_content_title = kwargs.get('Preservation_Content_Title', default_content_objects_title)
|
|
522
|
-
preservation_content_description = kwargs.get('Preservation_Content_Description',
|
|
533
|
+
preservation_content_description = kwargs.get('Preservation_Content_Description',
|
|
534
|
+
default_content_objects_title)
|
|
523
535
|
|
|
524
536
|
if isinstance(preservation_content_title, dict):
|
|
525
|
-
preservation_content_title = preservation_content_title.get("filename",
|
|
537
|
+
preservation_content_title = preservation_content_title.get("filename",
|
|
538
|
+
default_content_objects_title)
|
|
526
539
|
|
|
527
540
|
if isinstance(preservation_content_description, dict):
|
|
528
|
-
preservation_content_description = preservation_content_description.get("filename",
|
|
541
|
+
preservation_content_description = preservation_content_description.get("filename",
|
|
542
|
+
default_content_objects_title)
|
|
529
543
|
|
|
530
544
|
__make_content_objects__(xip, preservation_content_title, content_ref, io_ref, security_tag,
|
|
531
545
|
preservation_content_description, content_type)
|
|
@@ -545,7 +559,8 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
|
|
|
545
559
|
if isinstance(access_content_description, dict):
|
|
546
560
|
access_content_description = access_content_title.get("filename", default_content_objects_title)
|
|
547
561
|
|
|
548
|
-
__make_content_objects__(xip, access_content_title, content_ref, io_ref, security_tag,
|
|
562
|
+
__make_content_objects__(xip, access_content_title, content_ref, io_ref, security_tag,
|
|
563
|
+
access_content_description,
|
|
549
564
|
content_type)
|
|
550
565
|
|
|
551
566
|
if has_preservation_files:
|
|
@@ -598,12 +613,12 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
|
|
|
598
613
|
for identifier_key, identifier_value in identifier_map.items():
|
|
599
614
|
if identifier_key:
|
|
600
615
|
if identifier_value:
|
|
601
|
-
identifier = SubElement(xip, 'Identifier')
|
|
602
|
-
id_type = SubElement(identifier, "Type")
|
|
616
|
+
identifier = SubElement(xip, 'xip:Identifier')
|
|
617
|
+
id_type = SubElement(identifier, "xip:Type")
|
|
603
618
|
id_type.text = identifier_key
|
|
604
|
-
id_value = SubElement(identifier, "Value")
|
|
619
|
+
id_value = SubElement(identifier, "xip:Value")
|
|
605
620
|
id_value.text = identifier_value
|
|
606
|
-
id_io = SubElement(identifier, "Entity")
|
|
621
|
+
id_io = SubElement(identifier, "xip:Entity")
|
|
607
622
|
id_io.text = io_ref
|
|
608
623
|
|
|
609
624
|
if 'Asset_Metadata' in kwargs:
|
|
@@ -613,22 +628,22 @@ def generic_asset_package(preservation_files_dict=None, access_files_dict=None,
|
|
|
613
628
|
if metadata_path:
|
|
614
629
|
if os.path.exists(metadata_path) and os.path.isfile(metadata_path):
|
|
615
630
|
descriptive_metadata = xml.etree.ElementTree.parse(source=metadata_path)
|
|
616
|
-
metadata = SubElement(xip, 'Metadata', {'schemaUri': metadata_ns})
|
|
617
|
-
metadata_ref = SubElement(metadata, 'Ref')
|
|
631
|
+
metadata = SubElement(xip, 'xip:Metadata', {'schemaUri': metadata_ns})
|
|
632
|
+
metadata_ref = SubElement(metadata, 'xip:Ref')
|
|
618
633
|
metadata_ref.text = str(uuid.uuid4())
|
|
619
|
-
entity = SubElement(metadata, 'Entity')
|
|
634
|
+
entity = SubElement(metadata, 'xip:Entity')
|
|
620
635
|
entity.text = io_ref
|
|
621
|
-
content = SubElement(metadata, 'Content')
|
|
636
|
+
content = SubElement(metadata, 'xip:Content')
|
|
622
637
|
content.append(descriptive_metadata.getroot())
|
|
623
638
|
elif isinstance(metadata_path, str):
|
|
624
639
|
try:
|
|
625
640
|
descriptive_metadata = xml.etree.ElementTree.fromstring(metadata_path)
|
|
626
|
-
metadata = SubElement(xip, 'Metadata', {'schemaUri': metadata_ns})
|
|
627
|
-
metadata_ref = SubElement(metadata, 'Ref')
|
|
641
|
+
metadata = SubElement(xip, 'xip:Metadata', {'schemaUri': metadata_ns})
|
|
642
|
+
metadata_ref = SubElement(metadata, 'xip:Ref')
|
|
628
643
|
metadata_ref.text = str(uuid.uuid4())
|
|
629
|
-
entity = SubElement(metadata, 'Entity')
|
|
644
|
+
entity = SubElement(metadata, 'xip:Entity')
|
|
630
645
|
entity.text = io_ref
|
|
631
|
-
content = SubElement(metadata, 'Content')
|
|
646
|
+
content = SubElement(metadata, 'xip:Content')
|
|
632
647
|
content.append(descriptive_metadata)
|
|
633
648
|
except RuntimeError:
|
|
634
649
|
logging.info(f"Could not parse asset metadata in namespace {metadata_ns}")
|
|
@@ -712,71 +727,72 @@ def multi_asset_package(asset_file_list=None, export_folder=None, parent_folder=
|
|
|
712
727
|
os.mkdir(os.path.join(inner_folder, CONTENT_FOLDER))
|
|
713
728
|
|
|
714
729
|
asset_map = dict()
|
|
715
|
-
xip = Element('XIP')
|
|
730
|
+
xip = Element('xip:XIP')
|
|
731
|
+
xip.set('xmlns:xip', 'http://preservica.com/XIP/v6.0')
|
|
716
732
|
for file in asset_file_list:
|
|
717
733
|
default_asset_title = os.path.splitext(os.path.basename(file))[0]
|
|
718
734
|
xip, io_ref = __create_io__(xip, file_name=default_asset_title, parent_folder=parent_folder, **kwargs)
|
|
719
735
|
asset_map[file] = io_ref
|
|
720
|
-
representation = SubElement(xip, 'Representation')
|
|
721
|
-
io_link = SubElement(representation, 'InformationObject')
|
|
736
|
+
representation = SubElement(xip, 'xip:Representation')
|
|
737
|
+
io_link = SubElement(representation, 'xip:InformationObject')
|
|
722
738
|
io_link.text = io_ref
|
|
723
|
-
access_name = SubElement(representation, 'Name')
|
|
739
|
+
access_name = SubElement(representation, 'xip:Name')
|
|
724
740
|
access_name.text = "Preservation"
|
|
725
|
-
access_type = SubElement(representation, 'Type')
|
|
741
|
+
access_type = SubElement(representation, 'xip:Type')
|
|
726
742
|
access_type.text = "Preservation"
|
|
727
|
-
content_objects = SubElement(representation, 'ContentObjects')
|
|
728
|
-
content_object = SubElement(content_objects, 'ContentObject')
|
|
743
|
+
content_objects = SubElement(representation, 'xip:ContentObjects')
|
|
744
|
+
content_object = SubElement(content_objects, 'xip:ContentObject')
|
|
729
745
|
content_object_ref = str(uuid.uuid4())
|
|
730
746
|
content_object.text = content_object_ref
|
|
731
747
|
|
|
732
748
|
default_content_objects_title = os.path.splitext(os.path.basename(file))[0]
|
|
733
|
-
content_object = SubElement(xip, 'ContentObject')
|
|
734
|
-
ref_element = SubElement(content_object, "Ref")
|
|
749
|
+
content_object = SubElement(xip, 'xip:ContentObject')
|
|
750
|
+
ref_element = SubElement(content_object, "xip:Ref")
|
|
735
751
|
ref_element.text = content_object_ref
|
|
736
|
-
title = SubElement(content_object, "Title")
|
|
752
|
+
title = SubElement(content_object, "xip:Title")
|
|
737
753
|
title.text = default_content_objects_title
|
|
738
|
-
description = SubElement(content_object, "Description")
|
|
754
|
+
description = SubElement(content_object, "xip:Description")
|
|
739
755
|
description.text = default_content_objects_title
|
|
740
|
-
security_tag_element = SubElement(content_object, "SecurityTag")
|
|
756
|
+
security_tag_element = SubElement(content_object, "xip:SecurityTag")
|
|
741
757
|
security_tag_element.text = security_tag
|
|
742
|
-
custom_type = SubElement(content_object, "CustomType")
|
|
758
|
+
custom_type = SubElement(content_object, "xip:CustomType")
|
|
743
759
|
custom_type.text = content_type
|
|
744
|
-
parent = SubElement(content_object, "Parent")
|
|
760
|
+
parent = SubElement(content_object, "xip:Parent")
|
|
745
761
|
parent.text = io_ref
|
|
746
762
|
|
|
747
|
-
generation = SubElement(xip, 'Generation', {"original": "true", "active": "true"})
|
|
748
|
-
content_object = SubElement(generation, "ContentObject")
|
|
763
|
+
generation = SubElement(xip, 'xip:Generation', {"original": "true", "active": "true"})
|
|
764
|
+
content_object = SubElement(generation, "xip:ContentObject")
|
|
749
765
|
content_object.text = content_object_ref
|
|
750
|
-
label = SubElement(generation, "Label")
|
|
766
|
+
label = SubElement(generation, "xip:Label")
|
|
751
767
|
label.text = os.path.splitext(os.path.basename(file))[0]
|
|
752
|
-
effective_date = SubElement(generation, "EffectiveDate")
|
|
768
|
+
effective_date = SubElement(generation, "xip:EffectiveDate")
|
|
753
769
|
effective_date.text = datetime.now().isoformat()
|
|
754
|
-
bitstreams = SubElement(generation, "Bitstreams")
|
|
755
|
-
bitstream = SubElement(bitstreams, "Bitstream")
|
|
770
|
+
bitstreams = SubElement(generation, "xip:Bitstreams")
|
|
771
|
+
bitstream = SubElement(bitstreams, "xip:Bitstream")
|
|
756
772
|
bitstream.text = os.path.basename(file)
|
|
757
|
-
SubElement(generation, "Formats")
|
|
758
|
-
SubElement(generation, "Properties")
|
|
773
|
+
SubElement(generation, "xip:Formats")
|
|
774
|
+
SubElement(generation, "xip:Properties")
|
|
759
775
|
|
|
760
|
-
bitstream = SubElement(xip, 'Bitstream')
|
|
761
|
-
filename_element = SubElement(bitstream, "Filename")
|
|
776
|
+
bitstream = SubElement(xip, 'xip:Bitstream')
|
|
777
|
+
filename_element = SubElement(bitstream, "xip:Filename")
|
|
762
778
|
filename_element.text = os.path.basename(file)
|
|
763
|
-
filesize = SubElement(bitstream, "FileSize")
|
|
779
|
+
filesize = SubElement(bitstream, "xip:FileSize")
|
|
764
780
|
file_stats = os.stat(file)
|
|
765
781
|
filesize.text = str(file_stats.st_size)
|
|
766
|
-
physical_location = SubElement(bitstream, "PhysicalLocation")
|
|
767
|
-
fixities = SubElement(bitstream, "Fixities")
|
|
782
|
+
physical_location = SubElement(bitstream, "xip:PhysicalLocation")
|
|
783
|
+
fixities = SubElement(bitstream, "xip:Fixities")
|
|
768
784
|
fixity_result = fixity_callback(filename_element.text, file)
|
|
769
785
|
if type(fixity_result) == tuple:
|
|
770
|
-
fixity = SubElement(fixities, "Fixity")
|
|
771
|
-
fixity_algorithm_ref = SubElement(fixity, "FixityAlgorithmRef")
|
|
772
|
-
fixity_value = SubElement(fixity, "FixityValue")
|
|
786
|
+
fixity = SubElement(fixities, "xip:Fixity")
|
|
787
|
+
fixity_algorithm_ref = SubElement(fixity, "xip:FixityAlgorithmRef")
|
|
788
|
+
fixity_value = SubElement(fixity, "xip:FixityValue")
|
|
773
789
|
fixity_algorithm_ref.text = fixity_result[0]
|
|
774
790
|
fixity_value.text = fixity_result[1]
|
|
775
791
|
elif type(fixity_result) == dict:
|
|
776
792
|
for key, val in fixity_result.items():
|
|
777
|
-
fixity = SubElement(fixities, "Fixity")
|
|
778
|
-
fixity_algorithm_ref = SubElement(fixity, "FixityAlgorithmRef")
|
|
779
|
-
fixity_value = SubElement(fixity, "FixityValue")
|
|
793
|
+
fixity = SubElement(fixities, "xip:Fixity")
|
|
794
|
+
fixity_algorithm_ref = SubElement(fixity, "xip:FixityAlgorithmRef")
|
|
795
|
+
fixity_value = SubElement(fixity, "xip:FixityValue")
|
|
780
796
|
fixity_algorithm_ref.text = key
|
|
781
797
|
fixity_value.text = val
|
|
782
798
|
else:
|
|
@@ -790,12 +806,12 @@ def multi_asset_package(asset_file_list=None, export_folder=None, parent_folder=
|
|
|
790
806
|
for identifier_key, identifier_value in identifier_map_values.items():
|
|
791
807
|
if identifier_key:
|
|
792
808
|
if identifier_value:
|
|
793
|
-
identifier = SubElement(xip, 'Identifier')
|
|
794
|
-
id_type = SubElement(identifier, "Type")
|
|
809
|
+
identifier = SubElement(xip, 'xip:Identifier')
|
|
810
|
+
id_type = SubElement(identifier, "xip:Type")
|
|
795
811
|
id_type.text = identifier_key
|
|
796
|
-
id_value = SubElement(identifier, "Value")
|
|
812
|
+
id_value = SubElement(identifier, "xip:Value")
|
|
797
813
|
id_value.text = identifier_value
|
|
798
|
-
id_io = SubElement(identifier, "Entity")
|
|
814
|
+
id_io = SubElement(identifier, "xip:Entity")
|
|
799
815
|
id_io.text = io_ref
|
|
800
816
|
|
|
801
817
|
src_file = file
|
|
@@ -815,7 +831,8 @@ def multi_asset_package(asset_file_list=None, export_folder=None, parent_folder=
|
|
|
815
831
|
return top_level_folder + ".zip"
|
|
816
832
|
|
|
817
833
|
|
|
818
|
-
def complex_asset_package(preservation_files_list=None, access_files_list=None, export_folder=None, parent_folder=None,
|
|
834
|
+
def complex_asset_package(preservation_files_list=None, access_files_list=None, export_folder=None, parent_folder=None,
|
|
835
|
+
compress=True,
|
|
819
836
|
**kwargs):
|
|
820
837
|
"""
|
|
821
838
|
|
|
@@ -894,27 +911,34 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
|
|
|
894
911
|
if has_preservation_files:
|
|
895
912
|
if default_asset_title is None:
|
|
896
913
|
default_asset_title = os.path.splitext(os.path.basename(preservation_files_list[0]))[0]
|
|
897
|
-
|
|
898
914
|
# create the asset
|
|
899
|
-
|
|
915
|
+
if io_ref is None:
|
|
916
|
+
xip, io_ref = __create_io__(file_name=default_asset_title, parent_folder=parent_folder, **kwargs)
|
|
900
917
|
|
|
901
918
|
if has_access_files:
|
|
902
919
|
if default_asset_title is None:
|
|
903
920
|
default_asset_title = os.path.splitext(os.path.basename(access_files_list[0]))[0]
|
|
904
|
-
|
|
905
921
|
if io_ref is None:
|
|
906
922
|
xip, io_ref = __create_io__(file_name=default_asset_title, parent_folder=parent_folder, **kwargs)
|
|
907
923
|
|
|
924
|
+
if io_ref is None:
|
|
925
|
+
default_asset_title = kwargs.get('Title', None)
|
|
926
|
+
if default_asset_title is None:
|
|
927
|
+
default_asset_title = "New Asset"
|
|
928
|
+
xip, io_ref = __create_io__(file_name=default_asset_title, parent_folder=parent_folder, **kwargs)
|
|
929
|
+
|
|
908
930
|
if has_preservation_files:
|
|
909
931
|
# add the content objects
|
|
910
932
|
representation_name = kwargs.get('Preservation_Representation_Name', "Preservation")
|
|
911
|
-
preservation_refs_dict = __make_representation_multiple_co__(xip, rep_name=representation_name,
|
|
933
|
+
preservation_refs_dict = __make_representation_multiple_co__(xip, rep_name=representation_name,
|
|
934
|
+
rep_type="Preservation",
|
|
912
935
|
rep_files=preservation_files_list, io_ref=io_ref)
|
|
913
936
|
|
|
914
937
|
if has_access_files:
|
|
915
938
|
# add the content objects
|
|
916
939
|
access_name = kwargs.get('Access_Representation_Name', "Access")
|
|
917
|
-
access_refs_dict = __make_representation_multiple_co__(xip, rep_name=access_name, rep_type="Access",
|
|
940
|
+
access_refs_dict = __make_representation_multiple_co__(xip, rep_name=access_name, rep_type="Access",
|
|
941
|
+
rep_files=access_files_list,
|
|
918
942
|
io_ref=io_ref)
|
|
919
943
|
|
|
920
944
|
if has_preservation_files:
|
|
@@ -922,7 +946,8 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
|
|
|
922
946
|
for content_ref, filename in preservation_refs_dict.items():
|
|
923
947
|
default_content_objects_title = os.path.splitext(os.path.basename(filename))[0]
|
|
924
948
|
preservation_content_title = kwargs.get('Preservation_Content_Title', default_content_objects_title)
|
|
925
|
-
preservation_content_description = kwargs.get('Preservation_Content_Description',
|
|
949
|
+
preservation_content_description = kwargs.get('Preservation_Content_Description',
|
|
950
|
+
default_content_objects_title)
|
|
926
951
|
|
|
927
952
|
if isinstance(preservation_content_title, dict):
|
|
928
953
|
preservation_content_title = preservation_content_title[filename]
|
|
@@ -930,7 +955,8 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
|
|
|
930
955
|
if isinstance(preservation_content_description, dict):
|
|
931
956
|
preservation_content_description = preservation_content_description[filename]
|
|
932
957
|
|
|
933
|
-
__make_content_objects__(xip, preservation_content_title, content_ref, io_ref, security_tag,
|
|
958
|
+
__make_content_objects__(xip, preservation_content_title, content_ref, io_ref, security_tag,
|
|
959
|
+
preservation_content_description,
|
|
934
960
|
content_type)
|
|
935
961
|
|
|
936
962
|
if has_access_files:
|
|
@@ -947,7 +973,8 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
|
|
|
947
973
|
if isinstance(access_content_description, dict):
|
|
948
974
|
access_content_title = access_content_title[filename]
|
|
949
975
|
|
|
950
|
-
__make_content_objects__(xip, access_content_title, content_ref, io_ref, security_tag,
|
|
976
|
+
__make_content_objects__(xip, access_content_title, content_ref, io_ref, security_tag,
|
|
977
|
+
access_content_description, content_type)
|
|
951
978
|
|
|
952
979
|
if has_preservation_files:
|
|
953
980
|
|
|
@@ -955,7 +982,8 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
|
|
|
955
982
|
|
|
956
983
|
for content_ref, filename in preservation_refs_dict.items():
|
|
957
984
|
preservation_file_name = os.path.basename(filename)
|
|
958
|
-
__make_generation__(xip, preservation_file_name, content_ref, preservation_generation_label,
|
|
985
|
+
__make_generation__(xip, preservation_file_name, content_ref, preservation_generation_label,
|
|
986
|
+
PRESERVATION_CONTENT_FOLDER)
|
|
959
987
|
|
|
960
988
|
if has_access_files:
|
|
961
989
|
|
|
@@ -1070,7 +1098,8 @@ def complex_asset_package(preservation_files_list=None, access_files_list=None,
|
|
|
1070
1098
|
return top_level_folder + ".zip"
|
|
1071
1099
|
|
|
1072
1100
|
|
|
1073
|
-
def simple_asset_package(preservation_file=None, access_file=None, export_folder=None, parent_folder=None,
|
|
1101
|
+
def simple_asset_package(preservation_file=None, access_file=None, export_folder=None, parent_folder=None,
|
|
1102
|
+
compress=True, **kwargs):
|
|
1074
1103
|
"""
|
|
1075
1104
|
Create a Preservica package containing a single Asset from a single preservation file
|
|
1076
1105
|
and an optional access file.
|
|
@@ -1130,378 +1159,14 @@ def _unpad(s):
|
|
|
1130
1159
|
|
|
1131
1160
|
class UploadAPI(AuthenticatedAPI):
|
|
1132
1161
|
|
|
1133
|
-
def ingest_tweet(self, twitter_user=None, tweet_id: int = 0, twitter_consumer_key=None, twitter_secret_key=None, folder=None,
|
|
1134
|
-
callback=None, **kwargs):
|
|
1135
|
-
|
|
1136
|
-
"""
|
|
1137
|
-
Ingest tweets from a twitter stream by twitter username
|
|
1138
|
-
|
|
1139
|
-
:param tweet_id:
|
|
1140
|
-
:param str twitter_user: Twitter Username
|
|
1141
|
-
:param str twitter_consumer_key: Optional asset title
|
|
1142
|
-
:param str twitter_secret_key: Optional asset description
|
|
1143
|
-
:param str folder: Folder to ingest into
|
|
1144
|
-
:param callback callback: Optional upload progress callback
|
|
1145
|
-
:raises RuntimeError:
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
"""
|
|
1149
|
-
|
|
1150
|
-
def get_image(m, has_video_element):
|
|
1151
|
-
media_url_https_ = m["media_url_https"]
|
|
1152
|
-
if media_url_https_:
|
|
1153
|
-
req = requests.get(media_url_https_)
|
|
1154
|
-
if req.status_code == requests.codes.ok:
|
|
1155
|
-
if has_video_element:
|
|
1156
|
-
image_name_ = f"{{{media_id_str}}}_[{twitter_user}]_thumb.jpg"
|
|
1157
|
-
else:
|
|
1158
|
-
image_name_ = f"{{{media_id_str}}}_[{twitter_user}].jpg"
|
|
1159
|
-
image_name_document_ = open(image_name_, "wb")
|
|
1160
|
-
image_name_document_.write(req.content)
|
|
1161
|
-
image_name_document_.close()
|
|
1162
|
-
return image_name_
|
|
1163
|
-
|
|
1164
|
-
def get_video(m):
|
|
1165
|
-
video_info_ = m["video_info"]
|
|
1166
|
-
variants_ = video_info_["variants"]
|
|
1167
|
-
for v_ in variants_:
|
|
1168
|
-
video_url_ = v_["url"]
|
|
1169
|
-
req = requests.get(video_url_)
|
|
1170
|
-
if req.status_code == requests.codes.ok:
|
|
1171
|
-
video_name_ = f"{{{media_id_str}}}_[{twitter_user}].mp4"
|
|
1172
|
-
video_name_document_ = open(video_name_, "wb")
|
|
1173
|
-
video_name_document_.write(req.content)
|
|
1174
|
-
video_name_document_.close()
|
|
1175
|
-
return video_name_, True
|
|
1176
|
-
|
|
1177
|
-
entity_client = pyPreservica.EntityAPI(username=self.username, password=self.password, server=self.server, tenant=self.tenant)
|
|
1178
|
-
if hasattr(folder, "reference"):
|
|
1179
|
-
folder = entity_client.folder(folder.reference)
|
|
1180
|
-
else:
|
|
1181
|
-
folder = entity_client.folder(folder)
|
|
1182
|
-
try:
|
|
1183
|
-
import tweepy
|
|
1184
|
-
except ImportError:
|
|
1185
|
-
logger.error("Package tweepy is required for twitter harvesting. pip install --upgrade tweepy")
|
|
1186
|
-
raise RuntimeError("Package tweepy is required for twitter harvesting. pip install --upgrade tweepy")
|
|
1187
|
-
config = configparser.ConfigParser()
|
|
1188
|
-
config.read('credentials.properties')
|
|
1189
|
-
if twitter_consumer_key is None:
|
|
1190
|
-
twitter_consumer_key = os.environ.get('TWITTER_CONSUMER_KEY')
|
|
1191
|
-
if twitter_consumer_key is None:
|
|
1192
|
-
try:
|
|
1193
|
-
twitter_consumer_key = config['credentials']['TWITTER_CONSUMER_KEY']
|
|
1194
|
-
except KeyError:
|
|
1195
|
-
logger.error("No valid TWITTER_CONSUMER_KEY found in method arguments, "
|
|
1196
|
-
"environment variables or credentials.properties file")
|
|
1197
|
-
raise RuntimeError("No valid TWITTER_CONSUMER_KEY found in method arguments, "
|
|
1198
|
-
"environment variables or credentials.properties file")
|
|
1199
|
-
if twitter_secret_key is None:
|
|
1200
|
-
twitter_secret_key = os.environ.get('TWITTER_SECRET_KEY')
|
|
1201
|
-
if twitter_secret_key is None:
|
|
1202
|
-
try:
|
|
1203
|
-
twitter_secret_key = config['credentials']['TWITTER_SECRET_KEY']
|
|
1204
|
-
except KeyError:
|
|
1205
|
-
logger.error("No valid TWITTER_SECRET_KEY found in method arguments, "
|
|
1206
|
-
"environment variables or credentials.properties file")
|
|
1207
|
-
raise RuntimeError("No valid TWITTER_SECRET_KEY found in method arguments, "
|
|
1208
|
-
"environment variables or credentials.properties file")
|
|
1209
|
-
|
|
1210
|
-
api = None
|
|
1211
|
-
try:
|
|
1212
|
-
auth = tweepy.AppAuthHandler(twitter_consumer_key, twitter_secret_key)
|
|
1213
|
-
api = tweepy.API(auth, wait_on_rate_limit=True)
|
|
1214
|
-
except TweepError:
|
|
1215
|
-
logger.error("No valid Twitter API keys. Could not authenticate")
|
|
1216
|
-
raise RuntimeError("No valid Twitter API keys. Could not authenticate")
|
|
1217
|
-
if api is not None:
|
|
1218
|
-
logger.debug(api)
|
|
1219
|
-
tweet = api.get_status(tweet_id, tweet_mode="extended", include_entities=True)
|
|
1220
|
-
created_at = tweet.created_at
|
|
1221
|
-
id_str = tweet.id_str
|
|
1222
|
-
author = tweet.author.name
|
|
1223
|
-
tweet_entities = tweet.entities
|
|
1224
|
-
hashtags = dict()
|
|
1225
|
-
if 'hashtags' in tweet_entities:
|
|
1226
|
-
hashtags = tweet.entities['hashtags']
|
|
1227
|
-
entities = entity_client.identifier("tweet_id", id_str.strip())
|
|
1228
|
-
if len(entities) > 0:
|
|
1229
|
-
logger.warning("Tweet already exists, skipping....")
|
|
1230
|
-
return
|
|
1231
|
-
logger.info(f"Processing tweet {id_str} ...")
|
|
1232
|
-
tid = tweet.id
|
|
1233
|
-
content_objects = list()
|
|
1234
|
-
full_tweet = api.get_status(tid, tweet_mode="extended", include_entities=True)
|
|
1235
|
-
text = tweet.full_text
|
|
1236
|
-
full_text = full_tweet.full_text
|
|
1237
|
-
file_name = f"{{{id_str}}}_[{twitter_user}].json"
|
|
1238
|
-
json_doc = json.dumps(full_tweet._json)
|
|
1239
|
-
json_file = open(file_name, "wt", encoding="utf-8")
|
|
1240
|
-
json_file.write(json_doc)
|
|
1241
|
-
json_file.close()
|
|
1242
|
-
content_objects.append(file_name)
|
|
1243
|
-
if hasattr(full_tweet, "extended_entities"):
|
|
1244
|
-
extended_entities = full_tweet.extended_entities
|
|
1245
|
-
if "media" in extended_entities:
|
|
1246
|
-
media = extended_entities["media"]
|
|
1247
|
-
for med in media:
|
|
1248
|
-
media_id_str = med["id_str"]
|
|
1249
|
-
has_video = False
|
|
1250
|
-
if "video_info" in med:
|
|
1251
|
-
co, has_video = get_video(med)
|
|
1252
|
-
content_objects.append(co)
|
|
1253
|
-
if has_video:
|
|
1254
|
-
co = get_image(med, has_video)
|
|
1255
|
-
content_objects.append(co)
|
|
1256
|
-
continue
|
|
1257
|
-
if "media_url_https" in med:
|
|
1258
|
-
co = get_image(med, has_video)
|
|
1259
|
-
content_objects.append(co)
|
|
1260
|
-
identifiers = dict()
|
|
1261
|
-
asset_metadata = dict()
|
|
1262
|
-
identifiers["tweet_id"] = id_str
|
|
1263
|
-
|
|
1264
|
-
user = full_tweet._json['user']
|
|
1265
|
-
|
|
1266
|
-
if full_tweet._json.get('retweeted_status'):
|
|
1267
|
-
retweeted_status = full_tweet._json['retweeted_status']
|
|
1268
|
-
if retweeted_status.get("extended_entities"):
|
|
1269
|
-
extended_entities = retweeted_status["extended_entities"]
|
|
1270
|
-
if "media" in extended_entities:
|
|
1271
|
-
media = extended_entities["media"]
|
|
1272
|
-
for med in media:
|
|
1273
|
-
media_id_str = med["id_str"]
|
|
1274
|
-
has_video = False
|
|
1275
|
-
if "video_info" in med:
|
|
1276
|
-
co, has_video = get_video(med)
|
|
1277
|
-
content_objects.append(co)
|
|
1278
|
-
continue
|
|
1279
|
-
if "media_url_https" in med:
|
|
1280
|
-
co = get_image(med, has_video)
|
|
1281
|
-
content_objects.append(co)
|
|
1282
|
-
|
|
1283
|
-
xml_object = xml.etree.ElementTree.Element('tweet', {"xmlns": "http://www.preservica.com/tweets/v1"})
|
|
1284
|
-
xml.etree.ElementTree.SubElement(xml_object, "id").text = id_str
|
|
1285
|
-
xml.etree.ElementTree.SubElement(xml_object, "full_text").text = full_text
|
|
1286
|
-
xml.etree.ElementTree.SubElement(xml_object, "created_at").text = str(created_at)
|
|
1287
|
-
xml.etree.ElementTree.SubElement(xml_object, "screen_name_sender").text = user.get('screen_name')
|
|
1288
|
-
for h in hashtags:
|
|
1289
|
-
xml.etree.ElementTree.SubElement(xml_object, "hashtag").text = str(h['text'])
|
|
1290
|
-
|
|
1291
|
-
xml.etree.ElementTree.SubElement(xml_object, "name").text = author
|
|
1292
|
-
xml.etree.ElementTree.SubElement(xml_object, "retweet").text = str(full_tweet._json['retweet_count'])
|
|
1293
|
-
xml.etree.ElementTree.SubElement(xml_object, "likes").text = str(full_tweet._json['favorite_count'])
|
|
1294
|
-
|
|
1295
|
-
xml_request = xml.etree.ElementTree.tostring(xml_object, encoding='utf-8')
|
|
1296
|
-
|
|
1297
|
-
metadata_document = open("metadata.xml", "wt", encoding="utf-8")
|
|
1298
|
-
metadata_document.write(xml_request.decode("utf-8"))
|
|
1299
|
-
metadata_document.close()
|
|
1300
|
-
|
|
1301
|
-
asset_metadata["http://www.preservica.com/tweets/v1"] = "metadata.xml"
|
|
1302
|
-
|
|
1303
|
-
security_tag = kwargs.get("SecurityTag", "open")
|
|
1304
|
-
asset_title = kwargs.get("Title", text)
|
|
1305
|
-
asset_description = kwargs.get("Description", full_text)
|
|
1306
|
-
|
|
1307
|
-
p = complex_asset_package(preservation_files_list=content_objects, parent_folder=folder, Title=asset_title,
|
|
1308
|
-
Description=asset_description, CustomType="Tweet", Identifiers=identifiers,
|
|
1309
|
-
Asset_Metadata=asset_metadata, SecurityTag=security_tag)
|
|
1310
|
-
self.upload_zip_package(p, folder=folder, callback=callback)
|
|
1311
|
-
for ob in content_objects:
|
|
1312
|
-
os.remove(ob)
|
|
1313
|
-
os.remove("metadata.xml")
|
|
1314
|
-
|
|
1315
|
-
def ingest_twitter_feed(self, twitter_user=None, num_tweets: int = 25, twitter_consumer_key=None, twitter_secret_key=None, folder=None,
|
|
1316
|
-
callback=None, **kwargs):
|
|
1317
|
-
|
|
1318
|
-
"""
|
|
1319
|
-
Ingest tweets from a twitter stream by twitter username
|
|
1320
|
-
|
|
1321
|
-
:param str twitter_user: Twitter Username
|
|
1322
|
-
:param int num_tweets: The number of tweets from the stream
|
|
1323
|
-
:param str twitter_consumer_key: Optional asset title
|
|
1324
|
-
:param str twitter_secret_key: Optional asset description
|
|
1325
|
-
:param str folder: Folder to ingest into
|
|
1326
|
-
:param callback callback: Optional upload progress callback
|
|
1327
|
-
:raises RuntimeError:
|
|
1328
1162
|
|
|
1329
1163
|
|
|
1330
|
-
"""
|
|
1331
|
-
|
|
1332
|
-
def get_image(m, has_video_element):
|
|
1333
|
-
media_url_https_ = m["media_url_https"]
|
|
1334
|
-
if media_url_https_:
|
|
1335
|
-
req = requests.get(media_url_https_)
|
|
1336
|
-
if req.status_code == requests.codes.ok:
|
|
1337
|
-
if has_video_element:
|
|
1338
|
-
image_name_ = f"{{{media_id_str}}}_[{twitter_user}]_thumb.jpg"
|
|
1339
|
-
else:
|
|
1340
|
-
image_name_ = f"{{{media_id_str}}}_[{twitter_user}].jpg"
|
|
1341
|
-
image_name_document_ = open(image_name_, "wb")
|
|
1342
|
-
image_name_document_.write(req.content)
|
|
1343
|
-
image_name_document_.close()
|
|
1344
|
-
return image_name_
|
|
1345
|
-
|
|
1346
|
-
def get_video(m):
|
|
1347
|
-
video_info_ = m["video_info"]
|
|
1348
|
-
variants_ = video_info_["variants"]
|
|
1349
|
-
for v_ in variants_:
|
|
1350
|
-
if v_['content_type'] == 'video/mp4':
|
|
1351
|
-
video_url_ = v_["url"]
|
|
1352
|
-
with requests.get(video_url_, stream=True) as req:
|
|
1353
|
-
video_name_ = f"{{{media_id_str}}}_[{twitter_user}].mp4"
|
|
1354
|
-
with open(video_name_, 'wb') as video_name_document_:
|
|
1355
|
-
for chunk in req.iter_content(chunk_size=1024):
|
|
1356
|
-
video_name_document_.write(chunk)
|
|
1357
|
-
video_name_document_.flush()
|
|
1358
|
-
return video_name_, True
|
|
1359
|
-
|
|
1360
|
-
entity_client = pyPreservica.EntityAPI(username=self.username, password=self.password, server=self.server, tenant=self.tenant)
|
|
1361
|
-
if hasattr(folder, "reference"):
|
|
1362
|
-
folder = entity_client.folder(folder.reference)
|
|
1363
|
-
else:
|
|
1364
|
-
folder = entity_client.folder(folder)
|
|
1365
|
-
try:
|
|
1366
|
-
import tweepy
|
|
1367
|
-
except ImportError:
|
|
1368
|
-
logger.error("Package tweepy is required for twitter harvesting. pip install --upgrade tweepy")
|
|
1369
|
-
raise RuntimeError("Package tweepy is required for twitter harvesting. pip install --upgrade tweepy")
|
|
1370
|
-
config = configparser.ConfigParser()
|
|
1371
|
-
config.read('credentials.properties')
|
|
1372
|
-
if twitter_consumer_key is None:
|
|
1373
|
-
twitter_consumer_key = os.environ.get('TWITTER_CONSUMER_KEY')
|
|
1374
|
-
if twitter_consumer_key is None:
|
|
1375
|
-
try:
|
|
1376
|
-
twitter_consumer_key = config['credentials']['TWITTER_CONSUMER_KEY']
|
|
1377
|
-
except KeyError:
|
|
1378
|
-
logger.error("No valid TWITTER_CONSUMER_KEY found in method arguments, "
|
|
1379
|
-
"environment variables or credentials.properties file")
|
|
1380
|
-
raise RuntimeError("No valid TWITTER_CONSUMER_KEY found in method arguments, "
|
|
1381
|
-
"environment variables or credentials.properties file")
|
|
1382
|
-
if twitter_secret_key is None:
|
|
1383
|
-
twitter_secret_key = os.environ.get('TWITTER_SECRET_KEY')
|
|
1384
|
-
if twitter_secret_key is None:
|
|
1385
|
-
try:
|
|
1386
|
-
twitter_secret_key = config['credentials']['TWITTER_SECRET_KEY']
|
|
1387
|
-
except KeyError:
|
|
1388
|
-
logger.error("No valid TWITTER_SECRET_KEY found in method arguments, "
|
|
1389
|
-
"environment variables or credentials.properties file")
|
|
1390
|
-
raise RuntimeError("No valid TWITTER_SECRET_KEY found in method arguments, "
|
|
1391
|
-
"environment variables or credentials.properties file")
|
|
1392
|
-
|
|
1393
|
-
api = None
|
|
1394
|
-
try:
|
|
1395
|
-
auth = tweepy.AppAuthHandler(twitter_consumer_key, twitter_secret_key)
|
|
1396
|
-
api = tweepy.API(auth, wait_on_rate_limit=True)
|
|
1397
|
-
except TweepError:
|
|
1398
|
-
logger.error("No valid Twitter API keys. Could not authenticate")
|
|
1399
|
-
raise RuntimeError("No valid Twitter API keys. Could not authenticate")
|
|
1400
|
-
if api is not None:
|
|
1401
|
-
logger.debug(api)
|
|
1402
|
-
for tweet in tweepy.Cursor(api.user_timeline, id=twitter_user).items(int(num_tweets)):
|
|
1403
|
-
created_at = tweet.created_at
|
|
1404
|
-
id_str = tweet.id_str
|
|
1405
|
-
author = tweet.author.name
|
|
1406
|
-
tweet_entities = tweet.entities
|
|
1407
|
-
hashtags = dict()
|
|
1408
|
-
if 'hashtags' in tweet_entities:
|
|
1409
|
-
hashtags = tweet.entities['hashtags']
|
|
1410
|
-
entities = entity_client.identifier("tweet_id", id_str.strip())
|
|
1411
|
-
if len(entities) > 0:
|
|
1412
|
-
logger.warning("Tweet already exists, skipping....")
|
|
1413
|
-
continue
|
|
1414
|
-
logger.info(f"Processing tweet {id_str} ...")
|
|
1415
|
-
tid = tweet.id
|
|
1416
|
-
content_objects = list()
|
|
1417
|
-
full_tweet = api.get_status(tid, tweet_mode="extended", include_entities=True)
|
|
1418
|
-
text = tweet.text
|
|
1419
|
-
logger.debug(text)
|
|
1420
|
-
full_text = full_tweet.full_text
|
|
1421
|
-
file_name = f"{{{id_str}}}_[{twitter_user}].json"
|
|
1422
|
-
json_doc = json.dumps(full_tweet._json)
|
|
1423
|
-
json_file = open(file_name, "wt", encoding="utf-8")
|
|
1424
|
-
json_file.write(json_doc)
|
|
1425
|
-
json_file.close()
|
|
1426
|
-
content_objects.append(file_name)
|
|
1427
|
-
if hasattr(full_tweet, "extended_entities"):
|
|
1428
|
-
extended_entities = full_tweet.extended_entities
|
|
1429
|
-
if "media" in extended_entities:
|
|
1430
|
-
media = extended_entities["media"]
|
|
1431
|
-
for med in media:
|
|
1432
|
-
media_id_str = med["id_str"]
|
|
1433
|
-
has_video = False
|
|
1434
|
-
if "video_info" in med:
|
|
1435
|
-
co, has_video = get_video(med)
|
|
1436
|
-
content_objects.append(co)
|
|
1437
|
-
if has_video:
|
|
1438
|
-
co = get_image(med, has_video)
|
|
1439
|
-
content_objects.append(co)
|
|
1440
|
-
continue
|
|
1441
|
-
if "media_url_https" in med:
|
|
1442
|
-
co = get_image(med, has_video)
|
|
1443
|
-
content_objects.append(co)
|
|
1444
|
-
identifiers = {}
|
|
1445
|
-
asset_metadata = {}
|
|
1446
|
-
identifiers["tweet_id"] = id_str
|
|
1447
|
-
|
|
1448
|
-
user = full_tweet._json['user']
|
|
1449
|
-
|
|
1450
|
-
if full_tweet._json.get('retweeted_status'):
|
|
1451
|
-
retweeted_status = full_tweet._json['retweeted_status']
|
|
1452
|
-
if retweeted_status.get("extended_entities"):
|
|
1453
|
-
extended_entities = retweeted_status["extended_entities"]
|
|
1454
|
-
if "media" in extended_entities:
|
|
1455
|
-
media = extended_entities["media"]
|
|
1456
|
-
for med in media:
|
|
1457
|
-
media_id_str = med["id_str"]
|
|
1458
|
-
has_video = False
|
|
1459
|
-
if "video_info" in med:
|
|
1460
|
-
co, has_video = get_video(med)
|
|
1461
|
-
content_objects.append(co)
|
|
1462
|
-
continue
|
|
1463
|
-
if "media_url_https" in med:
|
|
1464
|
-
co = get_image(med, has_video)
|
|
1465
|
-
content_objects.append(co)
|
|
1466
|
-
|
|
1467
|
-
xml_object = xml.etree.ElementTree.Element('tweet', {"xmlns": "http://www.preservica.com/tweets/v1"})
|
|
1468
|
-
xml.etree.ElementTree.SubElement(xml_object, "id").text = id_str
|
|
1469
|
-
xml.etree.ElementTree.SubElement(xml_object, "full_text").text = full_text
|
|
1470
|
-
xml.etree.ElementTree.SubElement(xml_object, "created_at").text = str(created_at)
|
|
1471
|
-
xml.etree.ElementTree.SubElement(xml_object, "screen_name_sender").text = user.get('screen_name')
|
|
1472
|
-
for h in hashtags:
|
|
1473
|
-
xml.etree.ElementTree.SubElement(xml_object, "hashtag").text = str(h['text'])
|
|
1474
|
-
|
|
1475
|
-
xml.etree.ElementTree.SubElement(xml_object, "name").text = author
|
|
1476
|
-
xml.etree.ElementTree.SubElement(xml_object, "retweet").text = str(full_tweet._json['retweet_count'])
|
|
1477
|
-
xml.etree.ElementTree.SubElement(xml_object, "likes").text = str(full_tweet._json['favorite_count'])
|
|
1478
|
-
|
|
1479
|
-
xml_request = xml.etree.ElementTree.tostring(xml_object, encoding='utf-8')
|
|
1480
|
-
|
|
1481
|
-
metadata_document = open("metadata.xml", "wt", encoding="utf-8")
|
|
1482
|
-
metadata_document.write(xml_request.decode("utf-8"))
|
|
1483
|
-
metadata_document.close()
|
|
1484
|
-
|
|
1485
|
-
asset_metadata["http://www.preservica.com/tweets/v1"] = "metadata.xml"
|
|
1486
|
-
|
|
1487
|
-
security_tag = kwargs.get("SecurityTag", "open")
|
|
1488
|
-
asset_title = kwargs.get("Title", text)
|
|
1489
|
-
asset_description = kwargs.get("Description", full_text)
|
|
1490
|
-
|
|
1491
|
-
p = complex_asset_package(preservation_files_list=content_objects, parent_folder=folder, Title=asset_title,
|
|
1492
|
-
Description=asset_description, CustomType="Tweet", Identifiers=identifiers,
|
|
1493
|
-
Asset_Metadata=asset_metadata, SecurityTag=security_tag)
|
|
1494
|
-
self.upload_zip_package(p, folder=folder, callback=callback)
|
|
1495
|
-
for ob in content_objects:
|
|
1496
|
-
os.remove(ob)
|
|
1497
|
-
os.remove("metadata.xml")
|
|
1498
|
-
sleep(2)
|
|
1499
1164
|
|
|
1500
1165
|
def ingest_web_video(self, url=None, parent_folder=None, **kwargs):
|
|
1501
1166
|
"""
|
|
1502
1167
|
Ingest a web video such as YouTube etc based on the URL
|
|
1503
1168
|
|
|
1504
|
-
:param str url: URL to the
|
|
1169
|
+
:param str url: URL to the YouTube video
|
|
1505
1170
|
:param Folder parent_folder: The folder to ingest the video into
|
|
1506
1171
|
:param str Title: Optional asset title
|
|
1507
1172
|
:param str Description: Optional asset description
|
|
@@ -1572,7 +1237,8 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1572
1237
|
duration = meta.get('duration')
|
|
1573
1238
|
|
|
1574
1239
|
package = simple_asset_package(preservation_file=f"{vid_id}.mp4", parent_folder=parent_folder, Title=title,
|
|
1575
|
-
Description=description, Identifiers=identifier_map,
|
|
1240
|
+
Description=description, Identifiers=identifier_map,
|
|
1241
|
+
Asset_Metadata=descriptive_metadata,
|
|
1576
1242
|
Preservation_Content_Title=title, SecurityTag=security_tag)
|
|
1577
1243
|
|
|
1578
1244
|
self.upload_zip_package(path_to_zip_package=package, folder=parent_folder, callback=callback)
|
|
@@ -1593,10 +1259,57 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1593
1259
|
self.token = self.__token__()
|
|
1594
1260
|
return self.upload_credentials(location_id)
|
|
1595
1261
|
else:
|
|
1596
|
-
exception = HTTPException(location_id, request.status_code, request.url, "upload_credentials",
|
|
1262
|
+
exception = HTTPException(location_id, request.status_code, request.url, "upload_credentials",
|
|
1263
|
+
request.content.decode('utf-8'))
|
|
1597
1264
|
logger.error(exception)
|
|
1598
1265
|
raise exception
|
|
1599
1266
|
|
|
1267
|
+
def clean_upload_bucket(self, bucket_name: str, older_than_days: int = 90):
|
|
1268
|
+
"""
|
|
1269
|
+
Clean up objects in an upload bucket which are older than older_than_days.
|
|
1270
|
+
|
|
1271
|
+
"""
|
|
1272
|
+
from azure.storage.blob import ContainerClient
|
|
1273
|
+
|
|
1274
|
+
for location in self.upload_locations():
|
|
1275
|
+
if location['containerName'] == bucket_name:
|
|
1276
|
+
|
|
1277
|
+
if location['type'] != 'AWS':
|
|
1278
|
+
credentials = self.upload_credentials(location['apiId'])
|
|
1279
|
+
account_key = credentials['key']
|
|
1280
|
+
session_token = credentials['sessionToken']
|
|
1281
|
+
sas_url = f"https://{account_key}.blob.core.windows.net/{bucket_name}"
|
|
1282
|
+
container = ContainerClient.from_container_url(container_url=sas_url, credential=session_token)
|
|
1283
|
+
now = datetime.now(timezone.utc)
|
|
1284
|
+
for blob in container.list_blobs():
|
|
1285
|
+
if abs((blob.last_modified - now).days) > older_than_days:
|
|
1286
|
+
logger.debug(f"Deleting expired object {blob.name}")
|
|
1287
|
+
container.delete_blob(blob.name)
|
|
1288
|
+
|
|
1289
|
+
if location['type'] == 'AWS':
|
|
1290
|
+
credentials = self.upload_credentials(location['apiId'])
|
|
1291
|
+
access_key = credentials['key']
|
|
1292
|
+
secret_key = credentials['secret']
|
|
1293
|
+
session_token = credentials['sessionToken']
|
|
1294
|
+
session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key,
|
|
1295
|
+
aws_session_token=session_token)
|
|
1296
|
+
s3_client = session.client("s3")
|
|
1297
|
+
paginator = s3_client.get_paginator('list_objects_v2')
|
|
1298
|
+
now = datetime.now(timezone.utc)
|
|
1299
|
+
for page in paginator.paginate(Bucket=bucket_name):
|
|
1300
|
+
if 'Contents' in page:
|
|
1301
|
+
for key in page['Contents']:
|
|
1302
|
+
last_modified = key['LastModified']
|
|
1303
|
+
if abs((last_modified - now).days) > older_than_days:
|
|
1304
|
+
logger.debug(f"Deleting expired object {key['Key']}")
|
|
1305
|
+
s3_client.delete_object(Bucket=bucket_name, Key=key['Key'])
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
|
|
1309
|
+
|
|
1310
|
+
|
|
1311
|
+
|
|
1312
|
+
|
|
1600
1313
|
def upload_locations(self):
|
|
1601
1314
|
"""
|
|
1602
1315
|
Upload locations are configured on the Sources page as 'SIP Upload'.
|
|
@@ -1612,7 +1325,8 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1612
1325
|
self.token = self.__token__()
|
|
1613
1326
|
return self.upload_locations()
|
|
1614
1327
|
else:
|
|
1615
|
-
exception = HTTPException("", request.status_code, request.url, "upload_locations",
|
|
1328
|
+
exception = HTTPException("", request.status_code, request.url, "upload_locations",
|
|
1329
|
+
request.content.decode('utf-8'))
|
|
1616
1330
|
logger.error(exception)
|
|
1617
1331
|
raise exception
|
|
1618
1332
|
|
|
@@ -1624,35 +1338,60 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1624
1338
|
"""
|
|
1625
1339
|
return self.upload_locations()
|
|
1626
1340
|
|
|
1627
|
-
def crawl_filesystem(self, filesystem_path, bucket_name, preservica_parent, callback: bool = False,
|
|
1341
|
+
def crawl_filesystem(self, filesystem_path, bucket_name, preservica_parent, callback: bool = False,
|
|
1342
|
+
security_tag: str = "open",
|
|
1628
1343
|
delete_after_upload: bool = True, max_MB_ingested: int = -1):
|
|
1629
1344
|
|
|
1345
|
+
from pyPreservica import EntityAPI
|
|
1346
|
+
|
|
1347
|
+
def entity_value(client: EntityAPI, identifier: str) -> Entity:
|
|
1348
|
+
back_off: int = 5
|
|
1349
|
+
while True:
|
|
1350
|
+
try:
|
|
1351
|
+
entities = client.identifier("code", identifier)
|
|
1352
|
+
if bool(len(entities) > 0):
|
|
1353
|
+
return entities.pop()
|
|
1354
|
+
else:
|
|
1355
|
+
return None
|
|
1356
|
+
except HTTPException as e:
|
|
1357
|
+
sleep(back_off)
|
|
1358
|
+
back_off = back_off * 2
|
|
1359
|
+
|
|
1360
|
+
def entity_exists(client: EntityAPI, identifier: str) -> bool:
|
|
1361
|
+
back_off: int = 5
|
|
1362
|
+
while True:
|
|
1363
|
+
try:
|
|
1364
|
+
entities = client.identifier("code", identifier)
|
|
1365
|
+
return bool(len(entities) > 0)
|
|
1366
|
+
except HTTPException as e:
|
|
1367
|
+
sleep(back_off)
|
|
1368
|
+
back_off = back_off * 2
|
|
1369
|
+
|
|
1630
1370
|
def get_parent(client, identifier, parent_reference):
|
|
1631
|
-
|
|
1632
|
-
if not
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
if
|
|
1636
|
-
folder = entities.pop()
|
|
1371
|
+
dirname_id: str = str(os.path.dirname(identifier))
|
|
1372
|
+
if not dirname_id:
|
|
1373
|
+
dirname_id = identifier
|
|
1374
|
+
folder = entity_value(client, dirname_id)
|
|
1375
|
+
if folder is not None:
|
|
1637
1376
|
folder = client.folder(folder.reference)
|
|
1638
1377
|
return folder.reference
|
|
1639
1378
|
else:
|
|
1640
1379
|
return parent_reference
|
|
1641
1380
|
|
|
1642
1381
|
def get_folder(client, name, tag, parent_reference, identifier):
|
|
1643
|
-
|
|
1644
|
-
if
|
|
1382
|
+
folder = entity_value(client, identifier)
|
|
1383
|
+
if folder is None:
|
|
1645
1384
|
logger.info(f"Creating new folder with name {name}")
|
|
1646
1385
|
folder = client.create_folder(name, name, tag, parent_reference)
|
|
1647
1386
|
client.add_identifier(folder, "code", identifier)
|
|
1648
1387
|
else:
|
|
1649
1388
|
logger.info(f"Found existing folder with name {name}")
|
|
1650
|
-
folder = entities.pop()
|
|
1651
1389
|
return folder
|
|
1652
1390
|
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
two_fa_secret_key=self.two_fa_secret_key, use_shared_secret=self.shared_secret,
|
|
1391
|
+
entity_client = EntityAPI(username=self.username, password=self.password, server=self.server,
|
|
1392
|
+
tenant=self.tenant,
|
|
1393
|
+
two_fa_secret_key=self.two_fa_secret_key, use_shared_secret=self.shared_secret,
|
|
1394
|
+
protocol=self.protocol)
|
|
1656
1395
|
|
|
1657
1396
|
if preservica_parent:
|
|
1658
1397
|
parent = entity_client.folder(preservica_parent)
|
|
@@ -1678,7 +1417,7 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1678
1417
|
files.remove(file)
|
|
1679
1418
|
continue
|
|
1680
1419
|
asset_code = os.path.join(code, file)
|
|
1681
|
-
if
|
|
1420
|
+
if not entity_exists(entity_client, asset_code):
|
|
1682
1421
|
bytes_ingested = bytes_ingested + os.stat(full_path).st_size
|
|
1683
1422
|
logger.info(f"Adding new file: {file} to package ready for upload")
|
|
1684
1423
|
file_identifiers = {"code": asset_code}
|
|
@@ -1696,8 +1435,14 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1696
1435
|
else:
|
|
1697
1436
|
progress_display = None
|
|
1698
1437
|
|
|
1699
|
-
|
|
1438
|
+
if bucket_name is None:
|
|
1439
|
+
self.upload_zip_package(path_to_zip_package=package, callback=progress_display,
|
|
1440
|
+
delete_after_upload=delete_after_upload)
|
|
1441
|
+
else:
|
|
1442
|
+
self.upload_zip_to_Source(path_to_zip_package=package, container_name=bucket_name,
|
|
1443
|
+
show_progress=bool(progress_display is not None),
|
|
1700
1444
|
delete_after_upload=delete_after_upload)
|
|
1445
|
+
|
|
1701
1446
|
logger.info(f"Uploaded " + "{:.1f}".format(bytes_ingested / (1024 * 1024)) + " MB")
|
|
1702
1447
|
|
|
1703
1448
|
if max_MB_ingested > 0:
|
|
@@ -1705,7 +1450,8 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1705
1450
|
logger.info(f"Reached Max Upload Limit")
|
|
1706
1451
|
break
|
|
1707
1452
|
|
|
1708
|
-
def upload_zip_to_Source(self, path_to_zip_package, container_name, folder=None, delete_after_upload=False,
|
|
1453
|
+
def upload_zip_to_Source(self, path_to_zip_package, container_name, folder=None, delete_after_upload=False,
|
|
1454
|
+
show_progress=False):
|
|
1709
1455
|
|
|
1710
1456
|
"""
|
|
1711
1457
|
Uploads a zip file package to either an Azure container or S3 bucket
|
|
@@ -1726,13 +1472,17 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1726
1472
|
callback = None
|
|
1727
1473
|
if show_progress:
|
|
1728
1474
|
callback = UploadProgressConsoleCallback(path_to_zip_package)
|
|
1729
|
-
self.upload_zip_package_to_S3(path_to_zip_package=path_to_zip_package, bucket_name=container_name,
|
|
1475
|
+
self.upload_zip_package_to_S3(path_to_zip_package=path_to_zip_package, bucket_name=container_name,
|
|
1476
|
+
folder=folder,
|
|
1730
1477
|
callback=callback, delete_after_upload=delete_after_upload)
|
|
1731
1478
|
else:
|
|
1732
|
-
self.upload_zip_package_to_Azure(path_to_zip_package=path_to_zip_package,
|
|
1733
|
-
|
|
1479
|
+
self.upload_zip_package_to_Azure(path_to_zip_package=path_to_zip_package,
|
|
1480
|
+
container_name=container_name, folder=folder,
|
|
1481
|
+
delete_after_upload=delete_after_upload,
|
|
1482
|
+
show_progress=show_progress)
|
|
1734
1483
|
|
|
1735
|
-
def upload_zip_package_to_Azure(self, path_to_zip_package, container_name, folder=None, delete_after_upload=False,
|
|
1484
|
+
def upload_zip_package_to_Azure(self, path_to_zip_package, container_name, folder=None, delete_after_upload=False,
|
|
1485
|
+
show_progress=False):
|
|
1736
1486
|
|
|
1737
1487
|
"""
|
|
1738
1488
|
Uploads a zip file package to an Azure container connected to a Preservica Cloud System
|
|
@@ -1745,7 +1495,8 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1745
1495
|
"""
|
|
1746
1496
|
|
|
1747
1497
|
if (self.major_version < 7) and (self.minor_version < 5):
|
|
1748
|
-
raise RuntimeError(
|
|
1498
|
+
raise RuntimeError(
|
|
1499
|
+
"This call [upload_zip_package_to_Azure] is only available against v6.5 systems and above")
|
|
1749
1500
|
|
|
1750
1501
|
from azure.storage.blob import ContainerClient
|
|
1751
1502
|
|
|
@@ -1773,11 +1524,13 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1773
1524
|
|
|
1774
1525
|
if show_progress:
|
|
1775
1526
|
with tqdm.wrapattr(open(path_to_zip_package, 'rb'), "read", total=len_bytes) as data:
|
|
1776
|
-
blob_client = container.upload_blob(name=upload_key, data=data, metadata=metadata,
|
|
1527
|
+
blob_client = container.upload_blob(name=upload_key, data=data, metadata=metadata,
|
|
1528
|
+
length=len_bytes)
|
|
1777
1529
|
properties = blob_client.get_blob_properties()
|
|
1778
1530
|
else:
|
|
1779
1531
|
with open(path_to_zip_package, "rb") as data:
|
|
1780
|
-
blob_client = container.upload_blob(name=upload_key, data=data, metadata=metadata,
|
|
1532
|
+
blob_client = container.upload_blob(name=upload_key, data=data, metadata=metadata,
|
|
1533
|
+
length=len_bytes)
|
|
1781
1534
|
properties = blob_client.get_blob_properties()
|
|
1782
1535
|
|
|
1783
1536
|
if delete_after_upload:
|
|
@@ -1785,7 +1538,8 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1785
1538
|
|
|
1786
1539
|
return properties
|
|
1787
1540
|
|
|
1788
|
-
def upload_zip_package_to_S3(self, path_to_zip_package, bucket_name, folder=None, callback=None,
|
|
1541
|
+
def upload_zip_package_to_S3(self, path_to_zip_package, bucket_name, folder=None, callback=None,
|
|
1542
|
+
delete_after_upload=False):
|
|
1789
1543
|
|
|
1790
1544
|
"""
|
|
1791
1545
|
Uploads a zip file package to an S3 bucket connected to a Preservica Cloud System
|
|
@@ -1814,7 +1568,8 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1814
1568
|
session_token = credentials['sessionToken']
|
|
1815
1569
|
endpoint = credentials['endpoint']
|
|
1816
1570
|
|
|
1817
|
-
session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key,
|
|
1571
|
+
session = boto3.Session(aws_access_key_id=access_key, aws_secret_access_key=secret_key,
|
|
1572
|
+
aws_session_token=session_token)
|
|
1818
1573
|
s3 = session.resource(service_name="s3")
|
|
1819
1574
|
|
|
1820
1575
|
logger.debug(f"S3 Session: {s3}")
|
|
@@ -1833,7 +1588,8 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1833
1588
|
|
|
1834
1589
|
metadata_map = {'Metadata': metadata}
|
|
1835
1590
|
|
|
1836
|
-
s3_object.upload_file(path_to_zip_package, Callback=callback, ExtraArgs=metadata_map,
|
|
1591
|
+
s3_object.upload_file(path_to_zip_package, Callback=callback, ExtraArgs=metadata_map,
|
|
1592
|
+
Config=transfer_config)
|
|
1837
1593
|
|
|
1838
1594
|
if delete_after_upload:
|
|
1839
1595
|
os.remove(path_to_zip_package)
|
|
@@ -1859,8 +1615,42 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1859
1615
|
endpoint = f'{self.protocol}://{self.server}/api/s3/buckets'
|
|
1860
1616
|
self.token = self.__token__()
|
|
1861
1617
|
|
|
1862
|
-
|
|
1863
|
-
|
|
1618
|
+
|
|
1619
|
+
retries= {
|
|
1620
|
+
'max_attempts': 5,
|
|
1621
|
+
'mode': 'adaptive'
|
|
1622
|
+
}
|
|
1623
|
+
|
|
1624
|
+
def new_credentials():
|
|
1625
|
+
cred_metadata: dict = {}
|
|
1626
|
+
cred_metadata['access_key'] = self.__token__()
|
|
1627
|
+
cred_metadata['secret_key'] = "NOT_USED"
|
|
1628
|
+
cred_metadata['token'] = ""
|
|
1629
|
+
cred_metadata["expiry_time"] = (datetime.now(tzlocal()) + timedelta(minutes=12)).isoformat()
|
|
1630
|
+
logger.info("Refreshing credentials at: " + str(datetime.now(tzlocal())))
|
|
1631
|
+
return cred_metadata
|
|
1632
|
+
|
|
1633
|
+
session = get_session()
|
|
1634
|
+
|
|
1635
|
+
session_credentials = RefreshableCredentials.create_from_metadata(
|
|
1636
|
+
metadata=new_credentials(),
|
|
1637
|
+
refresh_using=new_credentials,
|
|
1638
|
+
advisory_timeout = 4 * 60,
|
|
1639
|
+
mandatory_timeout = 12 * 60,
|
|
1640
|
+
method = 'Preservica'
|
|
1641
|
+
)
|
|
1642
|
+
|
|
1643
|
+
autorefresh_session = boto3.Session(botocore_session=session)
|
|
1644
|
+
|
|
1645
|
+
session._credentials = session_credentials
|
|
1646
|
+
|
|
1647
|
+
config = Config(s3={'addressing_style': 'path'}, read_timeout=120, connect_timeout=120,
|
|
1648
|
+
request_checksum_calculation="WHEN_REQUIRED",
|
|
1649
|
+
response_checksum_validation="WHEN_REQUIRED",
|
|
1650
|
+
retries=retries, tcp_keepalive=True)
|
|
1651
|
+
|
|
1652
|
+
|
|
1653
|
+
s3_client = autorefresh_session.client('s3', endpoint_url=endpoint, config=config)
|
|
1864
1654
|
|
|
1865
1655
|
metadata = {}
|
|
1866
1656
|
if folder is not None:
|
|
@@ -1873,20 +1663,48 @@ class UploadAPI(AuthenticatedAPI):
|
|
|
1873
1663
|
try:
|
|
1874
1664
|
key_id = str(uuid.uuid4()) + ".zip"
|
|
1875
1665
|
|
|
1666
|
+
|
|
1667
|
+
# how big is the package
|
|
1668
|
+
package_size = os.path.getsize(path_to_zip_package)
|
|
1669
|
+
if package_size > 1 * GB:
|
|
1670
|
+
transfer_config.multipart_chunksize = 16 * MB ## Min 64 Chunks
|
|
1671
|
+
if package_size > 8 * GB:
|
|
1672
|
+
transfer_config.multipart_chunksize = 32 * MB ## Min 256 Chunks
|
|
1673
|
+
if package_size > 24 * GB:
|
|
1674
|
+
transfer_config.multipart_chunksize = 48 * MB ## Min 512 Chunks
|
|
1675
|
+
if package_size > 48 * GB:
|
|
1676
|
+
transfer_config.multipart_chunksize = 64 * MB
|
|
1677
|
+
|
|
1678
|
+
logger.info("Using Multipart Chunk Size: " + str(transfer_config.multipart_chunksize))
|
|
1679
|
+
|
|
1876
1680
|
transfer = S3Transfer(client=s3_client, config=transfer_config)
|
|
1877
1681
|
|
|
1878
1682
|
transfer.PutObjectTask = PutObjectTask
|
|
1879
1683
|
transfer.CompleteMultipartUploadTask = CompleteMultipartUploadTask
|
|
1880
1684
|
transfer.upload_file = upload_file
|
|
1881
1685
|
|
|
1882
|
-
|
|
1686
|
+
|
|
1687
|
+
response = transfer.upload_file(self=transfer, filename=path_to_zip_package, bucket=bucket,
|
|
1688
|
+
key=key_id,
|
|
1689
|
+
extra_args=metadata,
|
|
1883
1690
|
callback=callback)
|
|
1884
1691
|
|
|
1692
|
+
|
|
1885
1693
|
if delete_after_upload:
|
|
1886
1694
|
os.remove(path_to_zip_package)
|
|
1887
1695
|
|
|
1888
1696
|
return response['ResponseMetadata']['HTTPHeaders']['preservica-progress-token']
|
|
1889
1697
|
|
|
1890
|
-
except
|
|
1891
|
-
logger.error(
|
|
1892
|
-
raise
|
|
1698
|
+
except (NoCredentialsError, PartialCredentialsError) as ex:
|
|
1699
|
+
logger.error(ex)
|
|
1700
|
+
raise ex
|
|
1701
|
+
|
|
1702
|
+
except ClientError as ex:
|
|
1703
|
+
logger.error(ex)
|
|
1704
|
+
raise ex
|
|
1705
|
+
|
|
1706
|
+
|
|
1707
|
+
|
|
1708
|
+
|
|
1709
|
+
|
|
1710
|
+
|