pyPreservica 2.0.3__py3-none-any.whl → 3.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyPreservica might be problematic. Click here for more details.

@@ -10,17 +10,43 @@ licence: Apache License 2.0
10
10
  """
11
11
 
12
12
  import csv
13
- from typing import Generator
13
+ from io import BytesIO
14
+ from typing import Generator, Callable, Optional, Union
14
15
  from pyPreservica.common import *
15
16
 
16
17
  logger = logging.getLogger(__name__)
17
18
 
19
+ class SortOrder(Enum):
20
+ asc = 1
21
+ desc = 2
22
+
23
+ class Field:
24
+ name: str
25
+ value: Optional[str]
26
+ operator: Optional[str]
27
+ sort_order: Optional[SortOrder]
28
+
29
+ def __init__(self, name: str, value: str, operator: Optional[str]=None, sort_order: Optional[SortOrder]=None):
30
+ self.name = name
31
+ self.value = value
32
+ self.operator = operator
33
+ self.sort_order = sort_order
34
+
35
+
18
36
 
19
37
  class ContentAPI(AuthenticatedAPI):
38
+ """
39
+ The ContentAPI class provides the search interface to the Preservica repository.
40
+
41
+ """
42
+
43
+
44
+ def __init__(self, username: str = None, password: str = None, tenant: str = None, server: str = None,
45
+ use_shared_secret: bool = False, two_fa_secret_key: str = None,
46
+ protocol: str = "https", request_hook: Callable = None, credentials_path: str = 'credentials.properties'):
20
47
 
21
- def __init__(self, username=None, password=None, tenant=None, server=None, use_shared_secret=False,
22
- two_fa_secret_key: str = None, protocol: str = "https"):
23
- super().__init__(username, password, tenant, server, use_shared_secret, two_fa_secret_key, protocol)
48
+ super().__init__(username, password, tenant, server, use_shared_secret, two_fa_secret_key,
49
+ protocol, request_hook, credentials_path)
24
50
  self.callback = None
25
51
 
26
52
  class SearchResult:
@@ -56,7 +82,8 @@ class ContentAPI(AuthenticatedAPI):
56
82
  params = {'id': f'sdb:{entity_type.value}|{reference}'}
57
83
  else:
58
84
  params = {'id': f'sdb:{entity_type}|{reference}'}
59
- request = self.session.get(f'{self.protocol}://{self.server}/api/content/object-details', params=params, headers=headers)
85
+ request = self.session.get(f'{self.protocol}://{self.server}/api/content/object-details', params=params,
86
+ headers=headers)
60
87
  if request.status_code == requests.codes.ok:
61
88
  return request.json()["value"]
62
89
  elif request.status_code == requests.codes.not_found:
@@ -69,6 +96,29 @@ class ContentAPI(AuthenticatedAPI):
69
96
  logger.error(f"object_details failed with error code: {request.status_code}")
70
97
  raise RuntimeError(request.status_code, f"object_details failed with error code: {request.status_code}")
71
98
 
99
+
100
+ def download_bytes(self, reference):
101
+ headers = {HEADER_TOKEN: self.token, 'Content-Type': 'application/octet-stream'}
102
+ params = {'id': f'sdb:IO|{reference}'}
103
+ with self.session.get(f'{self.protocol}://{self.server}/api/content/download', params=params, headers=headers,
104
+ stream=True) as req:
105
+ if req.status_code == requests.codes.ok:
106
+ file_bytes = BytesIO()
107
+ for chunk in req.iter_content(chunk_size=CHUNK_SIZE):
108
+ file_bytes.write(chunk)
109
+ file_bytes.seek(0)
110
+ return file_bytes
111
+ elif req.status_code == requests.codes.unauthorized:
112
+ self.token = self.__token__()
113
+ return self.download_bytes(reference)
114
+ elif req.status_code == requests.codes.not_found:
115
+ logger.error(f"The requested asset reference is not found in the repository: {reference}")
116
+ raise RuntimeError(reference, "The requested reference is not found in the repository")
117
+ else:
118
+ logger.error(f"download failed with error code: {req.status_code}")
119
+ raise RuntimeError(req.status_code, f"download failed with error code: {req.status_code}")
120
+
121
+
72
122
  def download(self, reference, filename):
73
123
  headers = {HEADER_TOKEN: self.token, 'Content-Type': 'application/octet-stream'}
74
124
  params = {'id': f'sdb:IO|{reference}'}
@@ -91,6 +141,27 @@ class ContentAPI(AuthenticatedAPI):
91
141
  logger.error(f"download failed with error code: {req.status_code}")
92
142
  raise RuntimeError(req.status_code, f"download failed with error code: {req.status_code}")
93
143
 
144
+ def thumbnail_bytes(self, entity_type, reference: str, size: Thumbnail = Thumbnail.LARGE) -> Union[BytesIO, None]:
145
+ headers = {HEADER_TOKEN: self.token, 'accept': 'image/png'}
146
+ params = {'id': f'sdb:{entity_type}|{reference}', 'size': f'{size.value}'}
147
+ with self.session.get(f'{self.protocol}://{self.server}/api/content/thumbnail', params=params, headers=headers, stream=True) as req:
148
+ if req.status_code == requests.codes.ok:
149
+ file_bytes = BytesIO()
150
+ for chunk in req.iter_content(chunk_size=CHUNK_SIZE):
151
+ file_bytes.write(chunk)
152
+ file_bytes.seek(0)
153
+ return file_bytes
154
+ elif req.status_code == requests.codes.unauthorized:
155
+ self.token = self.__token__()
156
+ return self.thumbnail_bytes(entity_type, reference, size)
157
+ elif req.status_code == requests.codes.not_found:
158
+ logger.error(req.content.decode("utf-8"))
159
+ logger.error(f"The requested reference is not found in the repository: {reference}")
160
+ raise RuntimeError(reference, "The requested reference is not found in the repository")
161
+ else:
162
+ logger.error(f"thumbnail failed with error code: {req.status_code}")
163
+ raise RuntimeError(req.status_code, f"thumbnail failed with error code: {req.status_code}")
164
+
94
165
  def thumbnail(self, entity_type, reference, filename, size=Thumbnail.LARGE):
95
166
  headers = {HEADER_TOKEN: self.token, 'accept': 'image/png'}
96
167
  params = {'id': f'sdb:{entity_type}|{reference}', 'size': f'{size.value}'}
@@ -129,8 +200,8 @@ class ContentAPI(AuthenticatedAPI):
129
200
  logger.error(f"indexed_fields failed with error code: {results.status_code}")
130
201
  raise RuntimeError(results.status_code, f"indexed_fields failed with error code: {results.status_code}")
131
202
 
132
- def simple_search_csv(self, query: str = "%", csv_file="search.csv", list_indexes: list = None):
133
- page_size = 50
203
+ def simple_search_csv(self, query: str = "%", page_size: int = 50, csv_file="search.csv",
204
+ list_indexes: list = None):
134
205
  if list_indexes is None or len(list_indexes) == 0:
135
206
  metadata_fields = ["xip.reference", "xip.title", "xip.description", "xip.document_type",
136
207
  "xip.parent_ref", "xip.security_descriptor"]
@@ -163,7 +234,8 @@ class ContentAPI(AuthenticatedAPI):
163
234
  else:
164
235
  metadata_fields = ','.join(list_indexes)
165
236
  payload = {'start': start_from, 'max': str(page_size), 'metadata': metadata_fields, 'q': query_term}
166
- results = self.session.post(f'{self.protocol}://{self.server}/api/content/search', data=payload, headers=headers)
237
+ results = self.session.post(f'{self.protocol}://{self.server}/api/content/search', data=payload,
238
+ headers=headers)
167
239
  results_list = []
168
240
  if results.status_code == requests.codes.ok:
169
241
  json_doc = results.json()
@@ -192,8 +264,9 @@ class ContentAPI(AuthenticatedAPI):
192
264
  logger.error(f"search failed with error code: {results.status_code}")
193
265
  raise RuntimeError(results.status_code, f"simple_search failed with error code: {results.status_code}")
194
266
 
195
- def search_index_filter_csv(self, query: str = "%", csv_file="search.csv", filter_values: dict = None):
196
- page_size = 50
267
+ def search_index_filter_csv(self, query: str = "%", csv_file="search.csv", page_size: int = 50,
268
+ filter_values: dict = None,
269
+ sort_values: dict = None):
197
270
  if filter_values is None:
198
271
  filter_values = {}
199
272
  if "xip.reference" not in filter_values:
@@ -205,23 +278,116 @@ class ContentAPI(AuthenticatedAPI):
205
278
  with open(csv_file, newline='', mode="wt", encoding="utf-8") as csv_file:
206
279
  writer = csv.DictWriter(csv_file, fieldnames=header_fields)
207
280
  writer.writeheader()
208
- writer.writerows(self.search_index_filter_list(query, page_size, filter_values))
281
+ writer.writerows(self.search_index_filter_list(query, page_size, filter_values, sort_values))
209
282
 
210
- def search_index_filter_list(self, query: str = "%", page_size: int = 25, filter_values: dict = None) -> Generator:
283
+ def search_fields(self, query: str = "%", fields: list[Field]=None, page_size: int = 25) -> Generator:
284
+ """
285
+ Run a search query with multiple fields
286
+
287
+ :param query: The main search query.
288
+ :param fields: List of search fields
289
+ :param page_size: The default search page size
290
+ :return: search result
291
+ """
292
+
293
+ if self.major_version < 7 and self.minor_version < 5:
294
+ raise RuntimeError("search_fields API call is not available when connected to a v7.5 System")
295
+
296
+ search_result = self._search_fields(query=query, fields=fields, start_index=0, page_size=page_size)
297
+ for e in search_result.results_list:
298
+ yield e
299
+ found = len(search_result.results_list)
300
+ while search_result.hits > found:
301
+ search_result = self._search_fields(query=query, fields=fields, start_index=found, page_size=page_size)
302
+ for e in search_result.results_list:
303
+ yield e
304
+ found = found + len(search_result.results_list)
305
+
306
+ def _search_fields(self, query: str = "%", fields: list[Field]=None, start_index: int = 0, page_size: int = 25):
307
+
308
+ start_from = str(start_index)
309
+ headers = {'Content-Type': 'application/x-www-form-urlencoded', HEADER_TOKEN: self.token}
310
+
311
+ if fields is None:
312
+ fields = []
313
+
314
+ field_list = []
315
+ sort_list = []
316
+ metadata_elements = []
317
+ for field in fields:
318
+ metadata_elements.append(field.name)
319
+ if field.value is None or field.value == "":
320
+ field_list.append('{' f' "name": "{field.name}", "values": [] ' + '}')
321
+ elif field.operator == "NOT":
322
+ field_list.append('{' f' "name": "{field.name}", "values": ["{field.value}"], "operator": "NOT" ' + '}')
323
+ else:
324
+ field_list.append('{' f' "name": "{field.name}", "values": ["{field.value}"] ' + '}')
325
+
326
+ if field.sort_order is not None:
327
+ sort_list.append(f'{{"sortFields": ["{field.name}"], "sortOrder": "{field.sort_order.name}"}}')
328
+
329
+
330
+ filter_terms = ','.join(field_list)
331
+
332
+ if len(sort_list) == 0:
333
+ query_term = ('{ "q": "%s", "fields": [ %s ] }' % (query, filter_terms))
334
+ else:
335
+ sort_terms = ','.join(sort_list)
336
+ query_term = ('{ "q": "%s", "fields": [ %s ], "sort": [ %s ]}' % (query, filter_terms, sort_terms))
337
+
338
+ if len(metadata_elements) == 0:
339
+ metadata_elements.append("xip.title")
340
+
341
+
342
+ payload = {'start': start_from, 'max': str(page_size), 'metadata': list(metadata_elements), 'q': query_term}
343
+ logger.debug(payload)
344
+ results = self.session.post(f'{self.protocol}://{self.server}/api/content/search', data=payload,
345
+ headers=headers)
346
+ results_list = []
347
+ if results.status_code == requests.codes.ok:
348
+ json_doc = results.json()
349
+ metadata = json_doc['value']['metadata']
350
+ refs = list(json_doc['value']['objectIds'])
351
+ refs = list(map(lambda x: content_api_identifier_to_type(x), refs))
352
+ hits = int(json_doc['value']['totalHits'])
353
+
354
+ for m_row, r_row in zip(metadata, refs):
355
+ results_map = {'xip.reference': r_row[1]}
356
+ for li in m_row:
357
+ results_map[li['name']] = li['value']
358
+ results_list.append(results_map)
359
+ next_start = start_index + page_size
360
+
361
+ if self.callback is not None:
362
+ value = str(f'{len(results_list) + start_index}:{hits}')
363
+ self.callback(value)
364
+
365
+ search_results = self.SearchResult(metadata, refs, hits, results_list, next_start)
366
+ return search_results
367
+ elif results.status_code == requests.codes.unauthorized:
368
+ self.token = self.__token__()
369
+ return self._search_fields(query, fields, start_index, page_size)
370
+ else:
371
+ logger.error(f"search failed with error code: {results.status_code}")
372
+ raise RuntimeError(results.status_code, f"search_index_filter failed")
373
+
374
+ def search_index_filter_list(self, query: str = "%", page_size: int = 25, filter_values: dict = None,
375
+ sort_values: dict = None) -> Generator:
211
376
  """
212
377
  Run a search query with optional filters
213
378
 
214
379
  :param query: The main search query.
215
380
  :param page_size: The default search page size
216
381
  :param filter_values: Dictionary of index names and values
382
+ :param sort_values: Dictionary of sort index names and values
217
383
  :return: search result
218
384
  """
219
- search_result = self._search_index_filter(query, 0, page_size, filter_values)
385
+ search_result = self._search_index_filter(query, 0, page_size, filter_values, sort_values)
220
386
  for e in search_result.results_list:
221
387
  yield e
222
388
  found = len(search_result.results_list)
223
389
  while search_result.hits > found:
224
- search_result = self._search_index_filter(query, found, page_size, filter_values)
390
+ search_result = self._search_index_filter(query, found, page_size, filter_values, sort_values)
225
391
  for e in search_result.results_list:
226
392
  yield e
227
393
  found = found + len(search_result.results_list)
@@ -249,7 +415,8 @@ class ContentAPI(AuthenticatedAPI):
249
415
  query_term = ('{ "q": "%s", "fields": [ %s ] }' % (query, filter_terms))
250
416
 
251
417
  payload = {'start': start_from, 'max': str(10), 'metadata': list(filter_values.keys()), 'q': query_term}
252
- results = self.session.post(f'{self.protocol}://{self.server}/api/content/search', data=payload, headers=headers)
418
+ results = self.session.post(f'{self.protocol}://{self.server}/api/content/search', data=payload,
419
+ headers=headers)
253
420
  if results.status_code == requests.codes.ok:
254
421
  json_doc = results.json()
255
422
  return int(json_doc['value']['totalHits'])
@@ -261,10 +428,13 @@ class ContentAPI(AuthenticatedAPI):
261
428
  raise RuntimeError(results.status_code, f"_search_index_filter_hits failed")
262
429
 
263
430
  def _search_index_filter(self, query: str = "%", start_index: int = 0, page_size: int = 25,
264
- filter_values: dict = None):
431
+ filter_values: dict = None, sort_values: dict = None):
265
432
  start_from = str(start_index)
266
433
  headers = {'Content-Type': 'application/x-www-form-urlencoded', HEADER_TOKEN: self.token}
267
434
 
435
+ if filter_values is None:
436
+ filter_values = {}
437
+
268
438
  field_list = []
269
439
  for key, value in filter_values.items():
270
440
  if value == "":
@@ -274,11 +444,22 @@ class ContentAPI(AuthenticatedAPI):
274
444
 
275
445
  filter_terms = ','.join(field_list)
276
446
 
277
- query_term = ('{ "q": "%s", "fields": [ %s ] }' % (query, filter_terms))
447
+ if sort_values is None:
448
+ query_term = ('{ "q": "%s", "fields": [ %s ] }' % (query, filter_terms))
449
+ else:
450
+ sort_list = []
451
+ for key, value in sort_values.items():
452
+ direction = "asc"
453
+ if str(value).lower().startswith("d"):
454
+ direction = "desc"
455
+ sort_list.append(f'{{"sortFields": ["{key}"], "sortOrder": "{direction}"}}')
456
+ sort_terms = ','.join(sort_list)
457
+ query_term = ('{ "q": "%s", "fields": [ %s ], "sort": [ %s ]}' % (query, filter_terms, sort_terms))
278
458
 
279
459
  payload = {'start': start_from, 'max': str(page_size), 'metadata': list(filter_values.keys()), 'q': query_term}
280
460
  logger.debug(payload)
281
- results = self.session.post(f'{self.protocol}://{self.server}/api/content/search', data=payload, headers=headers)
461
+ results = self.session.post(f'{self.protocol}://{self.server}/api/content/search', data=payload,
462
+ headers=headers)
282
463
  results_list = []
283
464
  if results.status_code == requests.codes.ok:
284
465
  json_doc = results.json()