pyPreservica 2.0.3__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyPreservica might be problematic. Click here for more details.

pyPreservica/common.py CHANGED
@@ -7,12 +7,13 @@ author: James Carr
7
7
  licence: Apache License 2.0
8
8
 
9
9
  """
10
-
11
10
  import configparser
11
+ import functools
12
12
  import hashlib
13
13
  import json
14
14
  import logging
15
15
  import os
16
+ import platform
16
17
  import re
17
18
  import sys
18
19
  import threading
@@ -22,15 +23,18 @@ import xml.etree.ElementTree
22
23
  from enum import Enum
23
24
  from pathlib import Path
24
25
  import pyotp
25
-
26
+ from requests import Session
27
+ from urllib3.util import Retry
26
28
  import requests
29
+ from requests.adapters import HTTPAdapter
30
+ from typing import TypeVar
31
+ from datetime import datetime
32
+ import dateutil
27
33
 
28
34
  import pyPreservica
29
35
 
30
36
  logger = logging.getLogger(__name__)
31
37
 
32
- CHUNK_SIZE = 1024 * 2
33
-
34
38
  NS_XIP_ROOT = "http://preservica.com/XIP/"
35
39
  NS_ENTITY_ROOT = "http://preservica.com/EntityAPI/"
36
40
  NS_RM_ROOT = "http://preservica.com/RetentionManagement/"
@@ -50,6 +54,8 @@ SO_PATH = "structural-objects"
50
54
  CO_PATH = "content-objects"
51
55
 
52
56
  HASH_BLOCK_SIZE = 65536
57
+ TIME_OUT = 62
58
+ CHUNK_SIZE = 1024 * 4
53
59
 
54
60
 
55
61
  class FileHash:
@@ -76,9 +82,9 @@ class FileHash:
76
82
 
77
83
  def identifiersToDict(identifiers: set) -> dict:
78
84
  """
79
- Convert a set of tuples to a dict
80
- :param identifiers:
81
- :return:
85
+ Convert a set of tuples to a dict
86
+ :param identifiers:
87
+ :return:
82
88
  """
83
89
  result = {}
84
90
  for identifier_tuple in identifiers:
@@ -124,7 +130,7 @@ def _make_stored_zipfile(base_name, base_dir, owner, group, verbose=0, dry_run=0
124
130
 
125
131
  if logger is not None:
126
132
  logger.info("creating '%s' and adding '%s' to it",
127
- zip_filename, base_dir)
133
+ zip_filename, base_dir)
128
134
 
129
135
  if not dry_run:
130
136
  with zipfile.ZipFile(zip_filename, "w", compression=zipfile.ZIP_STORED) as zf:
@@ -402,21 +408,45 @@ class Bitstream:
402
408
  self.length = int(length)
403
409
  self.fixity = fixity
404
410
  self.content_url = content_url
411
+ self.bs_index = None
412
+ self.gen_index = None
413
+ self.co_ref = None
405
414
 
406
415
  def __str__(self):
407
- return f"Filename:\t\t\t{self.filename}\n" \
408
- f"FileSize:\t\t\t{self.length}\n" \
409
- f"Content:\t{self.content_url}\n" \
410
- f"Fixity:\t{self.fixity}"
416
+ return f"""
417
+ Filename: {self.filename}
418
+ File Length: {self.length}
419
+ Fixity: {self.fixity}
420
+ """
411
421
 
412
422
  def __repr__(self):
413
423
  return self.__str__()
414
424
 
415
425
 
426
+ class ExternIdentifier:
427
+ """
428
+ Class to represent the External Identifier Object in the Preservica data model
429
+ """
430
+
431
+ def __init__(self, identifier_type: str, identifier_value: str):
432
+ self.type = identifier_type
433
+ self.value = identifier_value
434
+ self.id = None
435
+
436
+ def __str__(self):
437
+ return f"""
438
+ Identifier: {self.id}
439
+ Identifier Type: {self.type}
440
+ Identifier Value: {self.value}
441
+ """
442
+
443
+ def __repr__(self):
444
+ return self.__str__()
445
+
416
446
  class Generation:
417
447
  """
418
448
  Class to represent the Generation Object in the Preservica data model
419
- """
449
+ """
420
450
 
421
451
  def __init__(self, original: bool, active: bool, format_group: str, effective_date: str, bitstreams: list):
422
452
  self.original = bool(original)
@@ -425,11 +455,18 @@ class Generation:
425
455
  self.format_group = format_group
426
456
  self.effective_date = effective_date
427
457
  self.bitstreams = bitstreams
458
+ self.properties = list()
459
+ self.formats = list()
428
460
 
429
461
  def __str__(self):
430
- return f"Active:\t\t\t{self.active}\n" \
431
- f"Original:\t\t\t{self.original}\n" \
432
- f"Format_group:\t{self.format_group}"
462
+ return f"""
463
+ Active: {self.active}
464
+ Original: {self.original}
465
+ Format Group: {self.format_group}
466
+ Effective Date: {self.effective_date}
467
+ Formats: {self.formats}
468
+ Properties: {self.properties}
469
+ """
433
470
 
434
471
  def __repr__(self):
435
472
  return self.__str__()
@@ -453,27 +490,23 @@ class Entity:
453
490
  self.custom_type = None
454
491
 
455
492
  def __str__(self):
456
- if self.custom_type is None:
457
- return f"Ref:\t\t\t{self.reference}\n" \
458
- f"Title:\t\t\t{self.title}\n" \
459
- f"Description:\t{self.description}\n" \
460
- f"Security Tag:\t{self.security_tag}\n" \
461
- f"Parent:\t\t\t{self.parent}\n\n"
462
- else:
463
- return f"Ref:\t\t\t{self.reference}\n" \
464
- f"Title:\t\t\t{self.title}\n" \
465
- f"Description:\t{self.description}\n" \
466
- f"Security Tag:\t{self.security_tag}\n" \
467
- f"Parent:\t\t\t{self.parent}\n" \
468
- f"Type:\t\t\t{self.custom_type}\n\n"
493
+ return f"""
494
+ Entity: {self.entity_type}
495
+ Entity Ref: {self.reference}
496
+ Title: {self.title}
497
+ Description: {self.description}
498
+ Security Tag: {self.security_tag}
499
+ Parent: {self.parent}
500
+ Custom Type: {self.custom_type}
501
+ """
469
502
 
470
503
  def __repr__(self):
471
504
  return self.__str__()
472
505
 
473
- def has_metadata(self):
506
+ def has_metadata(self) -> bool:
474
507
  return bool(self.metadata)
475
508
 
476
- def metadata_namespaces(self):
509
+ def metadata_namespaces(self) -> list:
477
510
  return list(self.metadata.values())
478
511
 
479
512
 
@@ -518,6 +551,9 @@ class ContentObject(Entity):
518
551
  self.tag = "ContentObject"
519
552
 
520
553
 
554
+ EntityT = TypeVar("EntityT", Folder, Asset, ContentObject, None)
555
+
556
+
521
557
  class Representation:
522
558
  """
523
559
  Class to represent the Representation Object in the Preservica data model
@@ -558,14 +594,30 @@ class Thumbnail(Enum):
558
594
  LARGE = "large"
559
595
 
560
596
 
597
+ class AsyncProgress(Enum):
598
+ """
599
+ Enumeration of the possible status of an asynchronous process
600
+ """
601
+ ABORTED = "ABORTED"
602
+ ACTIVE = "ACTIVE"
603
+ COMPLETED = "COMPLETED"
604
+ PENDING = "PENDING"
605
+ SUSPENDING = "SUSPENDING"
606
+ SUSPENDED = "SUSPENDED"
607
+ UNKNOWN = "UNKNOWN"
608
+ FAILED = "FAILED"
609
+ FINISHED_MIXED_OUTCOME = "FINISHED_MIXED_OUTCOME"
610
+ CANCELLED = "CANCELLED"
611
+
612
+
561
613
  def sanitize(filename) -> str:
562
614
  """
563
615
  Return a fairly safe version of the filename.
564
616
 
565
617
  We don't limit ourselves to ascii, because we want to keep municipality
566
- names, etc, but we do want to get rid of anything potentially harmful,
618
+ names, etc., but we do want to get rid of anything potentially harmful,
567
619
  and make sure we do not exceed Windows filename length limits.
568
- Hence a less safe blacklist, rather than a whitelist.
620
+ Hence, a less safe blacklist, rather than a whitelist.
569
621
  """
570
622
  blacklist = ["\\", "/", ":", "*", "?", "\"", "<", ">", "|", "\0"]
571
623
  reserved = [
@@ -608,27 +660,39 @@ def sanitize(filename) -> str:
608
660
 
609
661
  class AuthenticatedAPI:
610
662
  """
611
- Base class for authenticated calls which need access token
663
+ Base class for authenticated calls which need an access token
664
+ Authenticated calls include a "Preservica-Access-Token" header in the request
612
665
  """
613
666
 
614
667
  def _check_if_user_has_manager_role(self):
668
+ """
669
+ Check if the current user has a least a manager role
670
+ :return: None
671
+
672
+ Throws RuntimeError if the user does not have required roles
673
+ """
615
674
  if ('ROLE_SDB_MANAGER_USER' not in self.roles) and ('ROLE_SDB_ADMIN_USER' not in self.roles):
616
675
  logger.error(f"The AdminAPI requires the user to have ROLE_SDB_MANAGER_USER")
617
- raise RuntimeError(f"The AdminAPI requires the user to have ROLE_SDB_MANAGER_USER")
676
+ raise RuntimeError(f"The API requires the user to have at least the ROLE_SDB_MANAGER_USER")
618
677
 
619
- def _find_user_roles_(self) -> list:
678
+ def _find_user_roles_(self) -> list[str]:
620
679
  """
621
- Get a list of roles for the user
622
- :return list of roles:
680
+ Get a list of roles for the user
681
+ :return list of roles:
623
682
  """
624
- headers = {HEADER_TOKEN: self.token, 'Content-Type': 'application/xml;charset=UTF-8'}
683
+ headers = {HEADER_TOKEN: self.token, 'Content-Type': 'application/json'}
625
684
  request = self.session.get(f"{self.protocol}://{self.server}/api/user/details", headers=headers)
685
+ logger.debug(request.headers)
626
686
  if request.status_code == requests.codes.ok:
627
- roles = json.loads(str(request.content.decode('utf-8')))['roles']
687
+ json_document = str(request.content.decode('utf-8'))
688
+ logger.debug(json_document)
689
+ roles: list[str] = json.loads(json_document)['roles']
628
690
  return roles
629
691
  elif request.status_code == requests.codes.unauthorized:
630
692
  self.token = self.__token__()
631
693
  return self._find_user_roles_()
694
+ return []
695
+
632
696
 
633
697
  def security_tags_base(self, with_permissions: bool = False) -> dict:
634
698
  """
@@ -651,10 +715,10 @@ class AuthenticatedAPI:
651
715
  security_tags = {}
652
716
  tags = entity_response.findall(f'.//{{{self.sec_ns}}}Tag')
653
717
  for tag in tags:
654
- permissions = []
655
- for p in tag.findall(f'.//{{{self.sec_ns}}}Permission'):
656
- permissions.append(p.text)
657
718
  if with_permissions:
719
+ permissions = []
720
+ for p in tag.findall(f'.//{{{self.sec_ns}}}Permission'):
721
+ permissions.append(p.text)
658
722
  security_tags[tag.attrib['name']] = permissions
659
723
  else:
660
724
  security_tags[tag.attrib['name']] = tag.attrib['name']
@@ -667,6 +731,12 @@ class AuthenticatedAPI:
667
731
  raise RuntimeError(request.status_code, "security_tags failed")
668
732
 
669
733
  def entity_from_string(self, xml_data: str) -> dict:
734
+ """
735
+ Create a basic entity from XML data
736
+
737
+ :param xml_data:
738
+ :return: dict
739
+ """
670
740
  entity_response = xml.etree.ElementTree.fromstring(xml_data)
671
741
  reference = entity_response.find(f'.//{{{self.xip_ns}}}Ref')
672
742
  title = entity_response.find(f'.//{{{self.xip_ns}}}Title')
@@ -694,10 +764,39 @@ class AuthenticatedAPI:
694
764
 
695
765
  return entity_dict
696
766
 
767
+ def edition(self) -> str:
768
+ """
769
+ Return the edition of this tenancy
770
+ """
771
+ if self.major_version < 8 and self.minor_version < 3:
772
+ raise RuntimeError("Entitlement API is only available when connected to a v7.3 System")
773
+
774
+ headers = {HEADER_TOKEN: self.token, 'Content-Type': 'application/json'}
775
+
776
+ response = self.session.get(f'{self.protocol}://{self.server}/api/entitlement/edition', headers=headers)
777
+
778
+ if response.status_code == requests.codes.ok:
779
+ return response.json()['edition']
780
+ elif response.status_code == requests.codes.unauthorized:
781
+ self.token = self.__token__()
782
+ return self.edition()
783
+ else:
784
+ exception = HTTPException("", response.status_code, response.url,
785
+ "edition", response.content.decode('utf-8'))
786
+ logger.error(exception)
787
+ raise exception
788
+
697
789
  def __version_namespace__(self):
698
790
  """
699
791
  Generate version specific namespaces from the server version
700
792
  """
793
+ if self.major_version > 6:
794
+ self.xip_ns = f"{NS_XIP_ROOT}v{self.major_version}.{self.minor_version}"
795
+ self.entity_ns = f"{NS_ENTITY_ROOT}v{self.major_version}.{self.minor_version}"
796
+ self.rm_ns = f"{NS_RM_ROOT}v{6}.{2}"
797
+ self.sec_ns = f"{NS_SEC_ROOT}/v{self.major_version}.{self.minor_version}"
798
+ self.admin_ns = f"{NS_ADMIN}/v{self.major_version}.{self.minor_version}"
799
+
701
800
  if self.major_version == 6:
702
801
  if self.minor_version < 2:
703
802
  self.xip_ns = NS_XIP_V6
@@ -709,6 +808,8 @@ class AuthenticatedAPI:
709
808
  self.sec_ns = f"{NS_SEC_ROOT}/v{self.major_version}.{self.minor_version}"
710
809
  self.admin_ns = f"{NS_ADMIN}/v{self.major_version}.{self.minor_version}"
711
810
 
811
+ xml.etree.ElementTree.register_namespace("xip", f"{self.xip_ns}")
812
+
712
813
  def __version_number__(self):
713
814
  """
714
815
  Determine the version number of the server
@@ -723,6 +824,7 @@ class AuthenticatedAPI:
723
824
  self.major_version = int(version_numbers[0])
724
825
  self.minor_version = int(version_numbers[1])
725
826
  self.patch_version = int(version_numbers[2])
827
+
726
828
  return version
727
829
  elif request.status_code == requests.codes.unauthorized:
728
830
  self.token = self.__token__()
@@ -731,9 +833,12 @@ class AuthenticatedAPI:
731
833
  logger.error(f"version number failed with http response {request.status_code}")
732
834
  logger.error(str(request.content))
733
835
  RuntimeError(request.status_code, "version number failed")
836
+ return None
837
+
838
+
734
839
 
735
840
  def __str__(self):
736
- return f"pyPreservica version: {pyPreservica.__version__} (Preservica 6.9 Compatible) " \
841
+ return f"pyPreservica version: {pyPreservica.__version__} (Preservica 8.0 Compatible) " \
737
842
  f"Connected to: {self.server} Preservica version: {self.version} as {self.username} " \
738
843
  f"in tenancy {self.tenant}"
739
844
 
@@ -750,7 +855,7 @@ class AuthenticatedAPI:
750
855
  with open('credentials.properties', 'wt', encoding="utf-8") as configfile:
751
856
  config.write(configfile)
752
857
 
753
- def manager_token(self, username: str, password: str):
858
+ def manager_token(self, username: str, password: str) -> str:
754
859
  data = {'username': username, 'password': password, 'tenant': self.tenant}
755
860
  response = self.session.post(f'{self.protocol}://{self.server}/api/accesstoken/login', data=data)
756
861
  if response.status_code == requests.codes.ok:
@@ -760,9 +865,13 @@ class AuthenticatedAPI:
760
865
  logger.error(msg)
761
866
  logger.error(response.status_code)
762
867
  logger.error(str(response.content))
763
- RuntimeError(response.status_code, "Could not generate valid manager approval password")
868
+ RuntimeError(response.status_code, "Could not generate valid manager approval token")
764
869
 
765
- def __token__(self):
870
+ def __token__(self) -> str:
871
+ """
872
+ Generate am API token to use to authenticate calls
873
+ :return: API Token
874
+ """
766
875
  logger.debug("Token Expired Requesting New Token")
767
876
  if self.shared_secret is False:
768
877
  if self.tenant is None:
@@ -781,20 +890,23 @@ class AuthenticatedAPI:
781
890
  if self.tenant is None:
782
891
  self.tenant = response.json()['tenant']
783
892
  if self.two_fa_secret_key:
893
+ logger.debug("Found Two Factor Token")
784
894
  totp = pyotp.TOTP(self.two_fa_secret_key)
785
895
  data = {'username': self.username,
786
896
  'continuationToken': response.json()['continuationToken'],
787
897
  'tenant': self.tenant, 'twoFactorToken': totp.now()}
898
+
899
+ header = {'Content-Type': 'application/x-www-form-urlencoded'}
788
900
  response_2fa = self.session.post(
789
901
  f'{self.protocol}://{self.server}/api/accesstoken/complete-2fa',
790
- data=data)
902
+ data=data, headers=header)
791
903
  if response_2fa.status_code == requests.codes.ok:
792
904
  return response_2fa.json()['token']
793
905
  else:
794
906
  msg = "Failed to create a 2FA authentication token. Check your credentials are correct"
795
907
  logger.error(msg)
796
- logger.error(str(response.content))
797
- raise RuntimeError(response.status_code, msg)
908
+ logger.error(str(response_2fa.content))
909
+ raise RuntimeError(response_2fa.status_code, msg)
798
910
  else:
799
911
  msg = "2FA twoFactorToken required to authenticate against this account using 2FA"
800
912
  logger.error(msg)
@@ -826,13 +938,30 @@ class AuthenticatedAPI:
826
938
  raise RuntimeError(response.status_code, msg)
827
939
 
828
940
  def __init__(self, username: str = None, password: str = None, tenant: str = None, server: str = None,
829
- use_shared_secret: bool = False, two_fa_secret_key: str = None, protocol: str = "https"):
941
+ use_shared_secret: bool = False, two_fa_secret_key: str = None,
942
+ protocol: str = "https", request_hook=None, credentials_path: str = 'credentials.properties'):
830
943
 
831
944
  config = configparser.ConfigParser(interpolation=configparser.Interpolation())
832
- config.read('credentials.properties', encoding='utf-8')
833
- self.session = requests.Session()
834
- self.shared_secret = bool(use_shared_secret)
945
+ config.read(os.path.relpath(credentials_path), encoding='utf-8')
946
+ self.session: Session = requests.Session()
947
+
948
+ if request_hook is not None:
949
+ self.session.hooks['response'].append(request_hook)
950
+
951
+ retries = Retry(
952
+ total=3,
953
+ backoff_factor=0.1,
954
+ status_forcelist=[502, 503, 504],
955
+ allowed_methods=Retry.DEFAULT_ALLOWED_METHODS
956
+ )
957
+
958
+ self.shared_secret: bool = bool(use_shared_secret)
835
959
  self.protocol = protocol
960
+ self.two_fa_secret_key = two_fa_secret_key
961
+
962
+ self.session.mount(f'{self.protocol}://', HTTPAdapter(max_retries=retries))
963
+
964
+ self.session.request = functools.partial(self.session.request, timeout=TIME_OUT)
836
965
 
837
966
  if not two_fa_secret_key:
838
967
  two_fa_secret_key = os.environ.get('PRESERVICA_2FA_TOKEN')
@@ -901,5 +1030,20 @@ class AuthenticatedAPI:
901
1030
  self.__version_namespace__()
902
1031
  self.roles = self._find_user_roles_()
903
1032
 
1033
+ self.session.headers.update({'User-Agent': f'pyPreservica SDK/({pyPreservica.__version__}) '
1034
+ f' ({platform.platform()}/{os.name}/{sys.platform})'})
1035
+
904
1036
  logger.debug(self.xip_ns)
905
1037
  logger.debug(self.entity_ns)
1038
+
1039
+ def parse_date_to_iso(date):
1040
+ try:
1041
+ date = datetime.datetime.fromisoformat(date.replace('Z','+0000'))
1042
+ if date.tzinfo is None or date.tzinfo.utcoffset(date) is None:
1043
+ date = date.replace(tzinfo=datetime.timezone.utc)
1044
+ date = date.strftime('%Y-%m-%dT%H:%M:%S.%f%z')
1045
+ except ValueError:
1046
+ date = dateutil.parser.parse(date)
1047
+ if date.tzinfo is None or date.tzinfo.utcoffset(date) is None:
1048
+ date = date.replace(tzinfo=datetime.timezone.utc)
1049
+ date = date.strftime('%Y-%m-%dT%H:%M:%S.%f%z')