pyxecm 1.4__py3-none-any.whl → 1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

pyxecm/otmm.py ADDED
@@ -0,0 +1,928 @@
1
+ """
2
+ OTMM Module to interact with the OpenText Media Management API
3
+ See:
4
+
5
+ Class: OTMM
6
+ Methods:
7
+
8
+ __init__ : class initializer
9
+ config : Returns config data set
10
+ get_data: Get the Data object that holds all processed Media Management base Assets
11
+ credentials: Returns the token data
12
+ request_header: Returns the request header for ServiceNow API calls
13
+ parse_request_response: Parse the REST API responses and convert
14
+ them to Python dict in a safe way
15
+ exist_result_item: Check if an dict item is in the response
16
+ of the ServiceNow API call
17
+ get_result_value: Check if a defined value (based on a key) is in the ServiceNow API response
18
+
19
+ authenticate : Authenticates at ServiceNow API
20
+ """
21
+
22
+ __author__ = "Dr. Marc Diefenbruch"
23
+ __copyright__ = "Copyright 2024, OpenText"
24
+ __credits__ = ["Kai-Philip Gatzweiler"]
25
+ __maintainer__ = "Dr. Marc Diefenbruch"
26
+ __email__ = "mdiefenb@opentext.com"
27
+
28
+ from json import JSONDecodeError
29
+ import os
30
+ import logging
31
+ import urllib.parse
32
+ import threading
33
+ import traceback
34
+
35
+ import requests
36
+ from requests.exceptions import HTTPError, RequestException
37
+
38
+ from pyxecm.helper.data import Data
39
+
40
+ logger = logging.getLogger("pyxecm.otmm")
41
+
42
+ REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
43
+
44
+ REQUEST_TIMEOUT = 60
45
+
46
+ ASSET_BASE_PATH = "/tmp/mediaassets"
47
+
48
+
49
+ class OTMM:
50
+ """Used to retrieve and automate data extraction from OTMM."""
51
+
52
+ _config: dict
53
+ _access_token = None
54
+ _data: Data = None
55
+ _thread_number = 3
56
+ _download_dir = ""
57
+ _business_unit_exclusions = None
58
+ _product_exclusions = None
59
+
60
+ def __init__(
61
+ self,
62
+ base_url: str,
63
+ username: str,
64
+ password: str,
65
+ client_id: str,
66
+ client_secret: str,
67
+ thread_number: int,
68
+ download_dir: str,
69
+ business_unit_exclusions: list | None = None,
70
+ product_exclusions: list | None = None,
71
+ ):
72
+
73
+ # Initialize otcs_config as an empty dictionary
74
+ otmm_config = {}
75
+
76
+ # Store the credentials and parameters in a config dictionary:
77
+ otmm_config["baseUrl"] = base_url
78
+ otmm_config["username"] = username
79
+ otmm_config["password"] = password
80
+ otmm_config["clientId"] = client_id
81
+ otmm_config["clientSecret"] = client_secret
82
+
83
+ otmm_config["restUrl"] = otmm_config["baseUrl"] + "/otmmapi/v6"
84
+ otmm_config["tokenUrl"] = otmm_config["restUrl"] + "/sessions/oauth2/token"
85
+ otmm_config["domainUrl"] = otmm_config["restUrl"] + "/lookupdomains"
86
+ otmm_config["assetsUrl"] = otmm_config["restUrl"] + "/assets"
87
+ otmm_config["searchUrl"] = otmm_config["restUrl"] + "/search/text"
88
+
89
+ self._config = otmm_config
90
+
91
+ self._session = requests.Session()
92
+
93
+ self._data = Data()
94
+
95
+ self._thread_number = thread_number
96
+
97
+ self._download_dir = download_dir
98
+
99
+ self._business_unit_exclusions = business_unit_exclusions
100
+ self._product_exclusions = product_exclusions
101
+
102
+ # end method definition
103
+
104
+ def thread_wrapper(self, target, *args, **kwargs):
105
+ """Function to wrap around threads to catch exceptions during exection"""
106
+ try:
107
+ target(*args, **kwargs)
108
+ except Exception as e:
109
+ thread_name = threading.current_thread().name
110
+ logger.error("Thread '%s': failed with exception -> %s", thread_name, e)
111
+ logger.error(traceback.format_exc())
112
+
113
+ # end method definition
114
+
115
+ def config(self) -> dict:
116
+ """Returns the configuration dictionary
117
+
118
+ Returns:
119
+ dict: Configuration dictionary
120
+ """
121
+ return self._config
122
+
123
+ # end method definition
124
+
125
+ def get_data(self) -> Data:
126
+ """Get the Data object that holds all processed Media Management base Assets
127
+
128
+ Returns:
129
+ Data: Datastructure with all processed assets.
130
+ """
131
+
132
+ return self._data
133
+
134
+ # end method definition
135
+
136
+ def authenticate(self) -> str | None:
137
+ """Authenticate at OTMM with client ID and client secret or with basic authentication."""
138
+
139
+ request_url = self.config()["tokenUrl"]
140
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
141
+ payload = {
142
+ "username": self.config()["username"],
143
+ "password": self.config()["password"],
144
+ "grant_type": "password",
145
+ "client_id": self.config()["clientId"],
146
+ "client_secret": self.config()["clientSecret"],
147
+ }
148
+
149
+ try:
150
+ response = self._session.post(
151
+ request_url,
152
+ headers=headers,
153
+ data=urllib.parse.urlencode(payload),
154
+ )
155
+ response.raise_for_status()
156
+
157
+ self._access_token = (
158
+ response.json().get("token_info").get("oauth_token").get("accessToken")
159
+ )
160
+ self._session.headers.update(
161
+ {"Authorization": f"Bearer {self._access_token}"}
162
+ )
163
+
164
+ return self._access_token
165
+
166
+ except requests.exceptions.HTTPError as http_err:
167
+ logger.error("HTTP error occurred: %s", http_err)
168
+ except requests.exceptions.ConnectionError as conn_err:
169
+ logger.error("Connection error occurred: %s", conn_err)
170
+ except requests.exceptions.Timeout as timeout_err:
171
+ logger.error("Timeout error occurred: %s", timeout_err)
172
+ except requests.exceptions.RequestException as req_err:
173
+ logger.error("Request error occurred: %s", req_err)
174
+ except Exception as e:
175
+ logger.error("An unexpected error occurred: %s", e)
176
+
177
+ return None
178
+
179
+ # end method definition
180
+
181
+ def get_products(self, domain: str = "OTMM.DOMAIN.OTM_PRODUCT") -> dict:
182
+ """Get a dictionary with product names (keys) and IDs (values)
183
+
184
+ Args:
185
+ domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_PRODUCT".
186
+ Returns:
187
+ dict: Dictionary of all known products.
188
+ """
189
+
190
+ lookup_products = self.lookup_domains(domain)
191
+
192
+ result = {}
193
+ for product in lookup_products:
194
+ result[product.get("display_value")] = product.get("field_value").get(
195
+ "value"
196
+ )
197
+
198
+ return result
199
+
200
+ # end method definition
201
+
202
+ def get_business_units(
203
+ self, domain: str = "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU"
204
+ ) -> dict:
205
+ """Get a dictionary with product names (keys) and IDs (values)
206
+
207
+ Args:
208
+ domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU".
209
+
210
+ Returns:
211
+ dict: Dictionary of all known business units.
212
+ """
213
+
214
+ lookup_bus = self.lookup_domains(domain)
215
+ result = {}
216
+ for bu in lookup_bus:
217
+ result[bu.get("display_value")] = bu.get("field_value").get("value")
218
+
219
+ return result
220
+
221
+ # end method definition
222
+
223
+ def lookup_domains(self, domain: str):
224
+ """Lookup domain values in a given OTMM domain
225
+
226
+ Args:
227
+ domain (str): name / identifier of the domain.
228
+
229
+ Returns:
230
+ _type_: _description_
231
+ """
232
+
233
+ request_url = self.config()["domainUrl"] + "/" + domain
234
+
235
+ try:
236
+ response = self._session.get(
237
+ request_url,
238
+ )
239
+
240
+ response.raise_for_status()
241
+
242
+ except requests.exceptions.HTTPError as http_err:
243
+ logger.error("HTTP error occurred: %s", http_err)
244
+ except requests.exceptions.ConnectionError as conn_err:
245
+ logger.error("Connection error occurred: %s", conn_err)
246
+ except requests.exceptions.Timeout as timeout_err:
247
+ logger.error("Timeout error occurred: %s", timeout_err)
248
+ except requests.exceptions.RequestException as req_err:
249
+ logger.error("Request error occurred: %s", req_err)
250
+ except Exception as e:
251
+ logger.error("An unexpected error occurred: %s", e)
252
+
253
+ response = (
254
+ response.json()
255
+ .get("lookup_domain_resource")
256
+ .get("lookup_domain")
257
+ .get("domainValues")
258
+ )
259
+
260
+ return response
261
+
262
+ # end method definition
263
+
264
+ def get_asset(self, asset_id: str) -> dict:
265
+ """Get an asset based on its ID
266
+
267
+ Args:
268
+ asset_id (str): Asset ID
269
+
270
+ Returns:
271
+ dict: dictionary with asset data
272
+ """
273
+
274
+ request_url = self.config()["assetsUrl"] + "/" + asset_id
275
+
276
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
277
+
278
+ try:
279
+ response = self._session.get(
280
+ request_url,
281
+ headers=headers,
282
+ )
283
+
284
+ response.raise_for_status()
285
+
286
+ except requests.exceptions.HTTPError as http_err:
287
+ logger.error("HTTP error occurred: %s", http_err)
288
+ return None
289
+ except requests.exceptions.ConnectionError as conn_err:
290
+ logger.error("Connection error occurred: %s", conn_err)
291
+ return None
292
+ except requests.exceptions.Timeout as timeout_err:
293
+ logger.error("Timeout error occurred: %s", timeout_err)
294
+ return None
295
+ except requests.exceptions.RequestException as req_err:
296
+ logger.error("Request error occurred: %s", req_err)
297
+ return None
298
+ except Exception as e:
299
+ logger.error("An unexpected error occurred: %s", e)
300
+ return None
301
+
302
+ return response.json()
303
+
304
+ # end method definition
305
+
306
+ def get_business_unit_assets(
307
+ self, bu_id: int, offset: int = 0, limit: int = 200
308
+ ) -> list | None:
309
+ """Get all Media Assets for a given Business Unit (ID) that are NOT related to a product.
310
+
311
+ Args:
312
+ bu_id (int): Identifier of the Business Unit.
313
+ offset (int, optional): Result pagination. Starting ID. Defaults to 0.
314
+ limit (int, optional): Result pagination. Page length. Defaults to 200.
315
+
316
+ Returns:
317
+ dict: Search Results
318
+ """
319
+
320
+ payload = {
321
+ "load_type": ["metadata"],
322
+ "load_multilingual_values": ["true"],
323
+ "level_of_detail": ["full"],
324
+ "after": offset,
325
+ "limit": limit,
326
+ "multilingual_language_code": ["en_US"],
327
+ "search_config_id": ["3"],
328
+ "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
329
+ "metadata_to_return": ["ARTESIA.FIELD.TAG"],
330
+ "facet_restriction_list": '{"facet_restriction_list":{"facet_field_restriction":[{"type":"com.artesia.search.facet.FacetSimpleFieldRestriction","facet_generation_behavior":"EXCLUDE","field_id":"PRODUCT_CHAR_ID","value_list":[null]}]}}',
331
+ "search_condition_list": [
332
+ '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTMM.FIELD.BUSINESS_UNIT.TAB","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"OTMM.COLUMN.BUSINESS_UNIT.TAB","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
333
+ + str(bu_id)
334
+ + '","left_paren":"(","right_paren":")"}]}]}}'
335
+ ],
336
+ }
337
+
338
+ flattened_data = {
339
+ k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
340
+ }
341
+
342
+ search_result = self.search_assets(flattened_data)
343
+
344
+ if not search_result or not "search_result_resource" in search_result:
345
+ logger.error("No assets found via search!")
346
+ return None
347
+ search_result = search_result.get("search_result_resource")
348
+
349
+ hits = search_result["search_result"]["hit_count"]
350
+ hits_total = search_result["search_result"]["total_hit_count"]
351
+
352
+ asset_list = search_result.get("asset_list", None)
353
+
354
+ hits_remaining = hits_total - hits
355
+
356
+ while hits_remaining > 0:
357
+ flattened_data["after"] += hits
358
+ search_result = self.search_assets(flattened_data)
359
+
360
+ if not search_result or not "search_result_resource" in search_result:
361
+ break
362
+
363
+ search_result = search_result.get("search_result_resource")
364
+
365
+ hits = search_result["search_result"]["hit_count"]
366
+ hits_remaining = hits_remaining - hits
367
+
368
+ asset_list += search_result.get("asset_list", [])
369
+
370
+ return asset_list
371
+
372
+ # end method definition
373
+
374
+ def get_product_assets(
375
+ self, product_id: int, offset: int = 0, limit: int = 200
376
+ ) -> list | None:
377
+ """Get all Media Assets for a given product (ID).
378
+
379
+ Args:
380
+ product_id (int): Identifier of the product.
381
+ offset (int, optional): Result pagination. Starting ID. Defaults to 0.
382
+ limit (int, optional): Result pagination. Page length. Defaults to 200.
383
+
384
+ Returns:
385
+ dict: Search Results
386
+ """
387
+
388
+ payload = {
389
+ "load_type": ["metadata"],
390
+ "load_multilingual_values": ["true"],
391
+ "level_of_detail": ["full"],
392
+ "after": offset,
393
+ "limit": limit,
394
+ "multilingual_language_code": ["en_US"],
395
+ "search_config_id": ["3"],
396
+ "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
397
+ "metadata_to_return": ["ARTESIA.FIELD.TAG"],
398
+ "search_condition_list": [
399
+ '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTM.TABLE.PRODUCT_TABLE_FIELD","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"PRODUCT_CHAR_ID","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
400
+ + str(product_id)
401
+ + '","left_paren":"(","right_paren":")"}]}]}}'
402
+ ],
403
+ }
404
+
405
+ flattened_data = {
406
+ k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
407
+ }
408
+
409
+ search_result = self.search_assets(flattened_data)
410
+
411
+ if not search_result or not "search_result_resource" in search_result:
412
+ logger.error("No assets found via search!")
413
+ return None
414
+ search_result = search_result.get("search_result_resource")
415
+
416
+ hits = search_result["search_result"]["hit_count"]
417
+ hits_total = search_result["search_result"]["total_hit_count"]
418
+
419
+ asset_list = search_result.get("asset_list", None)
420
+
421
+ hits_remaining = hits_total - hits
422
+
423
+ while hits_remaining > 0:
424
+ flattened_data["after"] += hits
425
+ search_result = self.search_assets(flattened_data)
426
+
427
+ if not search_result or not "search_result_resource" in search_result:
428
+ break
429
+
430
+ search_result = search_result.get("search_result_resource")
431
+
432
+ hits = search_result["search_result"]["hit_count"]
433
+ hits_remaining = hits_remaining - hits
434
+
435
+ asset_list += search_result.get("asset_list", [])
436
+
437
+ return asset_list
438
+
439
+ # end method definition
440
+
441
+ def download_asset(
442
+ self,
443
+ asset_id: str,
444
+ asset_name: str,
445
+ download_url: str = "",
446
+ skip_existing: bool = True,
447
+ ) -> bool:
448
+ """Download a given Media Asset
449
+
450
+ Args:
451
+ asset_id (str): ID of the asset to download
452
+ asset_name (str): Name of the assets - becomes the file name.
453
+ download_url (str, optiona): URL to download the asset (optional).
454
+
455
+ Returns:
456
+ bool: True = success, False = failure
457
+ """
458
+ # url = f"{self.base_url}/assets/v1/{asset_id}/download"
459
+
460
+ if download_url:
461
+ request_url = download_url
462
+ else:
463
+ request_url = self.config()["assetsUrl"] + "/" + asset_id + "/contents"
464
+
465
+ file_name = os.path.join(self._download_dir, asset_id)
466
+
467
+ if os.path.exists(file_name):
468
+ if skip_existing:
469
+ logger.debug(
470
+ "OpenText Media Management asset has been downloaded before skipping download -> '%s' (%s) to -> %s...",
471
+ asset_name,
472
+ asset_id,
473
+ file_name,
474
+ )
475
+ return True
476
+ else:
477
+ logger.debug(
478
+ "OpenText Media Management asset has been downloaded before. Update download -> '%s' (%s) to -> %s...",
479
+ asset_name,
480
+ asset_id,
481
+ file_name,
482
+ )
483
+ os.remove(file_name)
484
+
485
+ try:
486
+ if not os.path.exists(self._download_dir):
487
+ # Create the directory
488
+ os.makedirs(self._download_dir)
489
+
490
+ logger.info(
491
+ "Downloading OpenText Media Management asset -> '%s' (%s) to -> %s...",
492
+ asset_name,
493
+ asset_id,
494
+ file_name,
495
+ )
496
+ response = self._session.get(request_url, stream=True)
497
+ response.raise_for_status()
498
+ with open(file_name, "wb") as f:
499
+ for chunk in response.iter_content(chunk_size=8192):
500
+ f.write(chunk)
501
+ return True
502
+ except HTTPError as http_err:
503
+ logger.error("HTTP error occurred -> %s!", str(http_err))
504
+ except RequestException as req_err:
505
+ logger.error("Request error occurred -> %s!", str(req_err))
506
+ except Exception as err:
507
+ logger.error("An error occurred -> %s!", str(err))
508
+
509
+ return False
510
+
511
+ # end method definition
512
+
513
+ def search_assets(self, payload: dict):
514
+ """Search an asset based on the given parameters / criterias.
515
+
516
+ Args:
517
+ payload (dict): in the format of:
518
+ payload = {
519
+ "PRODUCT_CHAR_ID": "Extended ECM for Engineering",
520
+ "BUSINESS_AREA_CHAR_ID": "Content",
521
+ "keyword_query": "*",
522
+ "limit": "5",
523
+ }
524
+
525
+ Returns:
526
+ _type_: JSON search results
527
+ """
528
+
529
+ request_url = self.config()["searchUrl"]
530
+
531
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
532
+
533
+ encoded_payload = urllib.parse.urlencode(payload, safe="/:")
534
+
535
+ try:
536
+ response = self._session.post(
537
+ request_url,
538
+ headers=headers,
539
+ data=encoded_payload,
540
+ )
541
+
542
+ response.raise_for_status()
543
+
544
+ except requests.exceptions.HTTPError as http_err:
545
+ logger.error("HTTP error occurred: %s", http_err)
546
+ except requests.exceptions.ConnectionError as conn_err:
547
+ logger.error("Connection error occurred: %s", conn_err)
548
+ except requests.exceptions.Timeout as timeout_err:
549
+ logger.error("Timeout error occurred: %s", timeout_err)
550
+ except requests.exceptions.RequestException as req_err:
551
+ logger.error("Request error occurred: %s", req_err)
552
+ except Exception as e:
553
+ logger.error("An unexpected error occurred: %s", e)
554
+
555
+ return response.json()
556
+
557
+ # end method definition
558
+
559
+ def get_asset_metadata(self, asset_id: str) -> dict:
560
+ """Retrieve metadata of an asset based on the given parameters / criterias.
561
+
562
+ Args:
563
+ asset_id (str): asset_id of the asset to query
564
+
565
+ Returns:
566
+ dict: Metadata information as dict with values as list
567
+
568
+ example:
569
+ {
570
+ 'OTMM.CUSTOM.FIELD_TITLE': [],
571
+ 'OTMM.CUSTOM.FIELD_DESCRIPTION': [],
572
+ 'OTMM.CUSTOM.FIELD_KEYWORDS': [],
573
+ 'CONTENT_TYPE_COMBO_CHAR_ID': [],
574
+ 'OTM.TABLE.APPROVED_USAGE_FIELD': [],
575
+ 'OTMM.FIELD.RESOURCE_LIBRARY.TAB': [],
576
+ 'LANGUAGE_COMBO_CHAR_ID': [],
577
+ 'OTMM.CUSTOM.FIELD_PART_NUMBER': [],
578
+ 'OTMM.FIELD.BUSINESS_UNIT.TAB': ['Content'],
579
+ 'OTM.TABLE.PRODUCT_TABLE_FIELD': ['Vendor Invoice Management for SAP'],
580
+ 'OTM.TABLE.INDUSTRY_TABLE_FIELD': [],
581
+ 'OTMM.CUSTOM.FIELD_URL': [],
582
+ 'OTMM.CUSTOM.FIELD_PREVIOUS_URL': [],
583
+ 'OTMM.CUSTOM.FIELD_CONTENT_OWNER': [],
584
+ 'OTMM.CUSTOM.FIELD_EMAIL': [],
585
+ 'OTMM.CUSTOM.FIELD_JOB_NUMBER': [],
586
+ 'OTM.TABLE.BUSINESS_AREA_TABLE_FIELD': [],
587
+ 'OTM.TABLE.JOURNEY_TABLE_FIELD': ['Buy', 'Try', 'Learn'],
588
+ 'OTMM.FIELD.PERSONA.TAB': [],
589
+ 'OTMM.FIELD.SERVICES.TAB': [],
590
+ 'OTMM.FIELD.REGION.TAB': [],
591
+ 'OTMM.FIELD.PURPOSE.TAB': [],
592
+ 'AODA_CHAR_ID': [],
593
+ 'REVIEW_CADENCE_CHAR_ID': [],
594
+ 'CONTENT_CREATED_DATE_ID': [],
595
+ 'ARTESIA.FIELD.EXPIRATION DATE': [],
596
+ 'OTMM.CUSTOM.FIELD_REAL_COMMENTS': []
597
+ }
598
+ """
599
+
600
+ request_url = self.config()["assetsUrl"] + f"/{asset_id}"
601
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
602
+
603
+ params = {
604
+ "load_type": "custom",
605
+ "level_of_detail": "slim",
606
+ "data_load_request": '{"data_load_request":{"load_multilingual_field_values":"true","load_subscribed_to":"true","load_asset_content_info":"true","load_metadata":"true","load_inherited_metadata":"true","load_thumbnail_info":"true","load_preview_info":"true", "load_pdf_preview_info":"true", "load_3d_preview_info" : "true","load_destination_links":"true", "load_security_policies":"true","load_path":"true","load_deep_zoom_info":"true"}}',
607
+ }
608
+
609
+ try:
610
+ response = self._session.get(request_url, headers=headers, params=params)
611
+
612
+ response.raise_for_status()
613
+
614
+ except requests.exceptions.HTTPError as http_err:
615
+ logger.error("HTTP error occurred: %s", http_err)
616
+ except requests.exceptions.ConnectionError as conn_err:
617
+ logger.error("Connection error occurred: %s", conn_err)
618
+ except requests.exceptions.Timeout as timeout_err:
619
+ logger.error("Timeout error occurred: %s", timeout_err)
620
+ except requests.exceptions.RequestException as req_err:
621
+ logger.error("Request error occurred: %s", req_err)
622
+ except Exception as e:
623
+ logger.error("An unexpected error occurred: %s", e)
624
+
625
+ # Read Metadata from nested structure
626
+ try:
627
+ metadata = (
628
+ response.json()
629
+ .get("asset_resource", {})
630
+ .get("asset", {})
631
+ .get("metadata", {})
632
+ .get("metadata_element_list", [])[0]
633
+ .get("metadata_element_list", [])
634
+ )
635
+ except JSONDecodeError:
636
+ logger.error("Cannot decode JSON response for assset_id -> %s", asset_id)
637
+ return {}
638
+
639
+ # Generate empty result dict
640
+ result = {}
641
+
642
+ # Extract Metadata fields with values as list
643
+ for data in metadata:
644
+ index = data.get("id").replace(" ", "").replace(".", "_")
645
+
646
+ try:
647
+ result[index] = data.get("value").get("value").get("value")
648
+ except AttributeError:
649
+
650
+ infos = []
651
+ for element in data.get("metadata_element_list", []):
652
+ for value in element.get("values", []):
653
+ infos.append(value.get("value").get("display_value"))
654
+
655
+ result[index] = infos
656
+ return result
657
+
658
+ # end method definition
659
+
660
+ def load_assets(
661
+ self,
662
+ load_products: bool = True,
663
+ load_business_units: bool = True,
664
+ download_assets: bool = True,
665
+ ) -> bool:
666
+ """Load all Media Assets for Products and Business Units
667
+
668
+ Args:
669
+ load_products (bool, optional): If true load assets on Business Unit level. Defaults to True.
670
+ load_business_units (bool, optional): If true load assets on Product level. Defaults to True.
671
+ download_assets (bool, optional): Should assets been downloaded. Defaults to True.
672
+
673
+ Returns:
674
+ bool: True = Success, False = Failure
675
+
676
+ Example Asset:
677
+ {
678
+ 'access_control_descriptor': {
679
+ 'permissions_map': {...}
680
+ },
681
+ 'asset_content_info': {
682
+ 'master_content': {...}
683
+ },
684
+ 'asset_id': '68fe5a6423fd317fdf87e83bc8cde736d4df27bf',
685
+ 'asset_lock_state_last_update_date': '2024-09-09T22:02:53Z',
686
+ 'asset_lock_state_user_id': '202',
687
+ 'asset_state': 'NORMAL',
688
+ 'asset_state_last_update_date': '2024-09-09T22:02:53Z',
689
+ 'asset_state_user_id': '202',
690
+ 'checked_out': False,
691
+ 'content_editable': True,
692
+ 'content_lock_state_last_update_date': '2024-08-14T00:33:27Z',
693
+ 'content_lock_state_user_id': '202',
694
+ 'content_lock_state_user_name': 'ajohnson3',
695
+ 'content_size': 18474085,
696
+ 'content_state': 'NORMAL',
697
+ 'content_state_last_update_date': '2024-08-14T00:33:27Z',
698
+ 'content_state_user_id': '202',
699
+ 'content_state_user_name': 'Amanda Johnson',
700
+ 'content_type': 'ACROBAT',
701
+ 'creator_id': '202',
702
+ 'date_imported': '2024-08-14T00:33:26Z',
703
+ 'date_last_updated': '2024-09-09T22:02:53Z',
704
+ 'deleted': False,
705
+ 'delivery_service_url': 'https://assets.opentext.com/adaptivemedia/rendition?id=68fe5a6423fd317fdf87e83bc8cde736d4df27bf',
706
+ 'expired': False,
707
+ 'import_job_id': 7764,
708
+ 'import_user_name': 'ajohnson3',
709
+ 'latest_version': True,
710
+ 'legacy_model_id': 104,
711
+ 'locked': False,
712
+ 'master_content_info': {
713
+ 'content_checksum': '45f42d19542af5b6146cbb3927a5490f',
714
+ 'content_data': {...},
715
+ 'content_kind': 'MASTER',
716
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
717
+ 'content_path': 'data/repository/original/generative-ai-governance-essentials-wp-en_56cbbfe270593ba1a5ab6551d2c8b373469cc1a9.pdf',
718
+ 'content_size': 18474085,
719
+ 'height': -1,
720
+ 'id': '56cbbfe270593ba1a5ab6551d2c8b373469cc1a9',
721
+ 'mime_type': 'application/pdf',
722
+ 'name': 'generative-ai-governance-essentials-wp-en.pdf',
723
+ 'unit_of_size': 'BYTES',
724
+ 'url': '/otmmapi/v6/renditions/56cbbfe270593ba1a5ab6551d2c8b373469cc1a9',
725
+ 'width': -1
726
+ },
727
+ 'metadata_lock_state_user_name': 'ajohnson3',
728
+ 'metadata_model_id': 'OTM.MARKETING.MODEL',
729
+ 'metadata_state_user_name': 'Amanda Johnson',
730
+ 'mime_type': 'application/pdf',
731
+ 'name': 'generative-ai-governance-essentials-wp-en.pdf',
732
+ 'original_asset_id': '68fe5a6423fd317fdf87e83bc8cde736d4df27bf',
733
+ 'product_associations': False,
734
+ 'rendition_content': {
735
+ 'thumbnail_content': {...},
736
+ 'preview_content': {...},
737
+ 'pdf_preview_content': {...}
738
+ },
739
+ 'subscribed_to': False,
740
+ 'thumbnail_content_id': '70aef1a5b5e480337bc115e47443884432c355ff',
741
+ 'version': 1
742
+ }
743
+ """
744
+
745
+ asset_list = []
746
+
747
+ if load_products:
748
+
749
+ products = self.get_products() # dictionary with key = name and value = ID
750
+
751
+ if self._product_exclusions:
752
+ logger.info("Excluding products -> %s", str(self._product_exclusions))
753
+ for key in self._product_exclusions:
754
+ products.pop(
755
+ key, None
756
+ ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
757
+
758
+ for product_name, product_id in products.items():
759
+ if "DO NOT USE" in product_name:
760
+ continue
761
+
762
+ logger.info("Processing product -> '%s'...", product_name)
763
+
764
+ assets = self.get_product_assets(product_id)
765
+
766
+ if not assets:
767
+ logger.info("Found no assets for product -> '%s'", product_name)
768
+ continue
769
+
770
+ for asset in assets:
771
+ asset["workspace_type"] = "Product"
772
+ asset["workspace_name"] = product_name
773
+
774
+ asset_list += [asset for asset in assets if "content_size" in asset]
775
+
776
+ if load_business_units:
777
+
778
+ business_units = self.get_business_units()
779
+
780
+ if self._business_unit_exclusions:
781
+ logger.info(
782
+ "Excluding business units -> %s",
783
+ str(self._business_unit_exclusions),
784
+ )
785
+ for key in self._business_unit_exclusions:
786
+ business_units.pop(
787
+ key, None
788
+ ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
789
+
790
+ for bu_name, bu_id in business_units.items():
791
+ logger.debug(bu_name)
792
+ assets = self.get_business_unit_assets(bu_id)
793
+
794
+ if not assets:
795
+ logger.info("Found no assets for business unit -> '%s'", bu_name)
796
+ continue
797
+
798
+ for asset in assets:
799
+ asset["workspace_type"] = "Business Unit"
800
+ asset["workspace_name"] = bu_name
801
+
802
+ asset_list += [asset for asset in assets if "content_size" in asset]
803
+ # end for bu_name...
804
+ # end if load_business_units
805
+
806
+ # WE DON'T WANT TO DO THIS HERE ANY MORE!
807
+ # This is now done in the bulk document processing
808
+ # using conditions_delete and conditions_create
809
+ # asset_list = [
810
+ # item
811
+ # for item in asset_list
812
+ # if not item.get("deleted", False) and not item.get("expired", False)
813
+ # ]
814
+
815
+ total_count = len(asset_list)
816
+
817
+ number = self._thread_number
818
+
819
+ if total_count >= number:
820
+ partition_size = total_count // number
821
+ remainder = total_count % number
822
+ else:
823
+ partition_size = total_count
824
+ number = 1
825
+ remainder = 0
826
+
827
+ logger.info(
828
+ "Processing -> %s Media Assets, thread number -> %s, partition size -> %s",
829
+ str(total_count),
830
+ number,
831
+ partition_size,
832
+ )
833
+
834
+ threads = []
835
+
836
+ start = 0
837
+ for index in range(number):
838
+ extra = 1 if remainder > 0 else 0
839
+ end = start + partition_size + extra
840
+ if remainder > 0:
841
+ remainder -= 1
842
+
843
+ thread = threading.Thread(
844
+ name=f"load_assets_{index + 1:02}",
845
+ target=self.thread_wrapper,
846
+ args=(
847
+ self.load_assets_worker,
848
+ asset_list,
849
+ partition_size + extra,
850
+ start,
851
+ download_assets,
852
+ ),
853
+ )
854
+ thread.start()
855
+ threads.append(thread)
856
+ start = end
857
+
858
+ for thread in threads:
859
+ thread.join()
860
+
861
+ return True
862
+
863
+ # end method definition
864
+
865
+ def load_assets_worker(
866
+ self,
867
+ asset_list: list,
868
+ partition_size: int,
869
+ offset: int = 0,
870
+ download_assets: bool = True,
871
+ ):
872
+ """Worker Method for multi-threading
873
+
874
+ Args:
875
+ asset_list (list): List of assets to process
876
+ business_unit (str, optional): Name of business unit. Defaults to "".
877
+ """
878
+
879
+ logger.info(
880
+ "Processing Media Assets in range from -> %s to -> %s...",
881
+ offset,
882
+ offset + partition_size,
883
+ )
884
+
885
+ worker_asset_list = asset_list[offset : offset + partition_size]
886
+
887
+ for asset in worker_asset_list:
888
+ asset_id = asset.get("asset_id")
889
+ asset_name = asset.get("name")
890
+ # Store name as asset_name
891
+ asset["asset_name"] = asset_name
892
+ asset_download_url = asset.get("delivery_service_url")
893
+ asset_deleted = asset.get("deleted", False)
894
+ asset_expired = asset.get("expired", False)
895
+ if asset_deleted or asset_expired:
896
+ logger.info(
897
+ "Asset -> '%s' is deleted or expired. Skipping...",
898
+ asset_name,
899
+ )
900
+ continue
901
+
902
+ if download_assets and asset.get("content_size", 0) > 0:
903
+ success = self.download_asset(
904
+ asset_id=asset_id,
905
+ asset_name=asset_name,
906
+ download_url=asset_download_url,
907
+ )
908
+ if not success:
909
+ logger.error(
910
+ "Failed to download asset -> '%s' (%s) to '%s'",
911
+ asset_name,
912
+ asset_id,
913
+ self._download_dir,
914
+ )
915
+ else:
916
+ logger.info(
917
+ "Successfully downloaded asset -> '%s' (%s) to '%s'",
918
+ asset_name,
919
+ asset_id,
920
+ self._download_dir,
921
+ )
922
+
923
+ ## Add metadata to asset and add to new list
924
+ asset.update(self.get_asset_metadata(asset_id))
925
+
926
+ # Now we add the article to the Pandas Data Frame in the Data class:
927
+ with self._data.lock():
928
+ self._data.append(worker_asset_list)