pyxecm 1.4__py3-none-any.whl → 1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

pyxecm/otmm.py ADDED
@@ -0,0 +1,808 @@
1
+ """
2
+ OTMM Module to interact with the OpenText Media Management API
3
+ See:
4
+
5
+ Class: OTMM
6
+ Methods:
7
+
8
+ __init__ : class initializer
9
+ config : Returns config data set
10
+ get_data: Get the Data object that holds all processed Media Management base Assets
11
+ credentials: Returns the token data
12
+ request_header: Returns the request header for ServiceNow API calls
13
+ parse_request_response: Parse the REST API responses and convert
14
+ them to Python dict in a safe way
15
+ exist_result_item: Check if an dict item is in the response
16
+ of the ServiceNow API call
17
+ get_result_value: Check if a defined value (based on a key) is in the ServiceNow API response
18
+
19
+ authenticate : Authenticates at ServiceNow API
20
+ """
21
+
22
+ __author__ = "Dr. Marc Diefenbruch"
23
+ __copyright__ = "Copyright 2024, OpenText"
24
+ __credits__ = ["Kai-Philip Gatzweiler"]
25
+ __maintainer__ = "Dr. Marc Diefenbruch"
26
+ __email__ = "mdiefenb@opentext.com"
27
+
28
+ from json import JSONDecodeError
29
+ import os
30
+ import logging
31
+ import urllib.parse
32
+ import threading
33
+ import traceback
34
+
35
+ import requests
36
+ from requests.exceptions import HTTPError, RequestException
37
+
38
+ from pyxecm.helper.data import Data
39
+
40
+ logger = logging.getLogger("pyxecm.otmm")
41
+
42
+ REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
43
+
44
+ REQUEST_TIMEOUT = 60
45
+
46
+ ASSET_BASE_PATH = "/tmp/mediaassets"
47
+
48
+
49
+ class OTMM:
50
+ """Used to retrieve and automate data extraction from OTMM."""
51
+
52
+ _config: dict
53
+ _access_token = None
54
+ _data: Data = None
55
+ _thread_number = 3
56
+ _download_dir = ""
57
+ _business_unit_exclusions = None
58
+ _product_exclusions = None
59
+
60
+ def __init__(
61
+ self,
62
+ base_url: str,
63
+ username: str,
64
+ password: str,
65
+ client_id: str,
66
+ client_secret: str,
67
+ thread_number: int,
68
+ download_dir: str,
69
+ business_unit_exclusions: list | None = None,
70
+ product_exclusions: list | None = None,
71
+ ):
72
+
73
+ # Initialize otcs_config as an empty dictionary
74
+ otmm_config = {}
75
+
76
+ # Store the credentials and parameters in a config dictionary:
77
+ otmm_config["baseUrl"] = base_url
78
+ otmm_config["username"] = username
79
+ otmm_config["password"] = password
80
+ otmm_config["clientId"] = client_id
81
+ otmm_config["clientSecret"] = client_secret
82
+
83
+ otmm_config["restUrl"] = otmm_config["baseUrl"] + "/otmmapi/v6"
84
+ otmm_config["tokenUrl"] = otmm_config["restUrl"] + "/sessions/oauth2/token"
85
+ otmm_config["domainUrl"] = otmm_config["restUrl"] + "/lookupdomains"
86
+ otmm_config["assetsUrl"] = otmm_config["restUrl"] + "/assets"
87
+ otmm_config["searchUrl"] = otmm_config["restUrl"] + "/search/text"
88
+
89
+ self._config = otmm_config
90
+
91
+ self._session = requests.Session()
92
+
93
+ self._data = Data()
94
+
95
+ self._thread_number = thread_number
96
+
97
+ self._download_dir = download_dir
98
+
99
+ self._business_unit_exclusions = business_unit_exclusions
100
+ self._product_exclusions = product_exclusions
101
+
102
+ # end method definition
103
+
104
+ def thread_wrapper(self, target, *args, **kwargs):
105
+ """Function to wrap around threads to catch exceptions during exection"""
106
+ try:
107
+ target(*args, **kwargs)
108
+ except Exception as e:
109
+ thread_name = threading.current_thread().name
110
+ logger.error("Thread %s: failed with exception %s", thread_name, e)
111
+ logger.error(traceback.format_exc())
112
+
113
+ # end method definition
114
+
115
+ def config(self) -> dict:
116
+ """Returns the configuration dictionary
117
+
118
+ Returns:
119
+ dict: Configuration dictionary
120
+ """
121
+ return self._config
122
+
123
+ # end method definition
124
+
125
+ def get_data(self) -> Data:
126
+ """Get the Data object that holds all processed Media Management base Assets
127
+
128
+ Returns:
129
+ Data: Datastructure with all processed assets.
130
+ """
131
+
132
+ return self._data
133
+
134
+ # end method definition
135
+
136
+ def authenticate(self) -> str | None:
137
+ """Authenticate at OTMM with client ID and client secret or with basic authentication."""
138
+
139
+ request_url = self.config()["tokenUrl"]
140
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
141
+ payload = {
142
+ "username": self.config()["username"],
143
+ "password": self.config()["password"],
144
+ "grant_type": "password",
145
+ "client_id": self.config()["clientId"],
146
+ "client_secret": self.config()["clientSecret"],
147
+ }
148
+
149
+ try:
150
+ response = self._session.post(
151
+ request_url,
152
+ headers=headers,
153
+ data=urllib.parse.urlencode(payload),
154
+ )
155
+ response.raise_for_status()
156
+
157
+ self._access_token = (
158
+ response.json().get("token_info").get("oauth_token").get("accessToken")
159
+ )
160
+ self._session.headers.update(
161
+ {"Authorization": f"Bearer {self._access_token}"}
162
+ )
163
+
164
+ return self._access_token
165
+
166
+ except requests.exceptions.HTTPError as http_err:
167
+ logger.error("HTTP error occurred: %s", http_err)
168
+ except requests.exceptions.ConnectionError as conn_err:
169
+ logger.error("Connection error occurred: %s", conn_err)
170
+ except requests.exceptions.Timeout as timeout_err:
171
+ logger.error("Timeout error occurred: %s", timeout_err)
172
+ except requests.exceptions.RequestException as req_err:
173
+ logger.error("Request error occurred: %s", req_err)
174
+ except Exception as e:
175
+ logger.error("An unexpected error occurred: %s", e)
176
+
177
+ return None
178
+
179
+ # end method definition
180
+
181
+ def get_products(self, domain: str = "OTMM.DOMAIN.OTM_PRODUCT") -> dict:
182
+ """Get a dictionary with product names (keys) and IDs (values)
183
+
184
+ Args:
185
+ domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_PRODUCT".
186
+ Returns:
187
+ dict: Dictionary of all known products.
188
+ """
189
+
190
+ lookup_products = self.lookup_domains(domain)
191
+
192
+ result = {}
193
+ for product in lookup_products:
194
+ result[product.get("display_value")] = product.get("field_value").get(
195
+ "value"
196
+ )
197
+
198
+ return result
199
+
200
+ # end method definition
201
+
202
+ def get_business_units(
203
+ self, domain: str = "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU"
204
+ ) -> dict:
205
+ """Get a dictionary with product names (keys) and IDs (values)
206
+
207
+ Args:
208
+ domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU".
209
+
210
+ Returns:
211
+ dict: Dictionary of all known business units.
212
+ """
213
+
214
+ lookup_bus = self.lookup_domains(domain)
215
+ result = {}
216
+ for bu in lookup_bus:
217
+ result[bu.get("display_value")] = bu.get("field_value").get("value")
218
+
219
+ return result
220
+
221
+ # end method definition
222
+
223
+ def lookup_domains(self, domain: str):
224
+ """Lookup domain values in a given OTMM domain
225
+
226
+ Args:
227
+ domain (str): name / identifier of the domain.
228
+
229
+ Returns:
230
+ _type_: _description_
231
+ """
232
+
233
+ request_url = self.config()["domainUrl"] + "/" + domain
234
+
235
+ try:
236
+ response = self._session.get(
237
+ request_url,
238
+ )
239
+
240
+ response.raise_for_status()
241
+
242
+ except requests.exceptions.HTTPError as http_err:
243
+ logger.error("HTTP error occurred: %s", http_err)
244
+ except requests.exceptions.ConnectionError as conn_err:
245
+ logger.error("Connection error occurred: %s", conn_err)
246
+ except requests.exceptions.Timeout as timeout_err:
247
+ logger.error("Timeout error occurred: %s", timeout_err)
248
+ except requests.exceptions.RequestException as req_err:
249
+ logger.error("Request error occurred: %s", req_err)
250
+ except Exception as e:
251
+ logger.error("An unexpected error occurred: %s", e)
252
+
253
+ response = (
254
+ response.json()
255
+ .get("lookup_domain_resource")
256
+ .get("lookup_domain")
257
+ .get("domainValues")
258
+ )
259
+
260
+ return response
261
+
262
+ # end method definition
263
+
264
+ def get_business_unit_assets(
265
+ self, bu_id: int, offset: int = 0, limit: int = 200
266
+ ) -> list | None:
267
+ """Get all Media Assets for a given Business Unit (ID) that are NOT related to a product.
268
+
269
+ Args:
270
+ bu_id (int): Identifier of the Business Unit.
271
+ offset (int, optional): Result pagination. Starting ID. Defaults to 0.
272
+ limit (int, optional): Result pagination. Page length. Defaults to 200.
273
+
274
+ Returns:
275
+ dict: Search Results
276
+ """
277
+
278
+ payload = {
279
+ "load_type": ["metadata"],
280
+ "load_multilingual_values": ["true"],
281
+ "level_of_detail": ["full"],
282
+ "after": offset,
283
+ "limit": limit,
284
+ "multilingual_language_code": ["en_US"],
285
+ "search_config_id": ["3"],
286
+ "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
287
+ "metadata_to_return": ["ARTESIA.FIELD.TAG"],
288
+ "facet_restriction_list": '{"facet_restriction_list":{"facet_field_restriction":[{"type":"com.artesia.search.facet.FacetSimpleFieldRestriction","facet_generation_behavior":"EXCLUDE","field_id":"PRODUCT_CHAR_ID","value_list":[null]}]}}',
289
+ "search_condition_list": [
290
+ '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTMM.FIELD.BUSINESS_UNIT.TAB","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"OTMM.COLUMN.BUSINESS_UNIT.TAB","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
291
+ + str(bu_id)
292
+ + '","left_paren":"(","right_paren":")"}]}]}}'
293
+ ],
294
+ }
295
+
296
+ flattened_data = {
297
+ k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
298
+ }
299
+
300
+ search_result = self.search_assets(flattened_data)
301
+
302
+ if not search_result or not "search_result_resource" in search_result:
303
+ logger.error("No assets found via search!")
304
+ return None
305
+ search_result = search_result.get("search_result_resource")
306
+
307
+ hits = search_result["search_result"]["hit_count"]
308
+ hits_total = search_result["search_result"]["total_hit_count"]
309
+
310
+ asset_list = search_result.get("asset_list", None)
311
+
312
+ hits_remaining = hits_total - hits
313
+
314
+ while hits_remaining > 0:
315
+ flattened_data["after"] += hits
316
+ search_result = self.search_assets(flattened_data)
317
+
318
+ if not search_result or not "search_result_resource" in search_result:
319
+ break
320
+
321
+ search_result = search_result.get("search_result_resource")
322
+
323
+ hits = search_result["search_result"]["hit_count"]
324
+ hits_remaining = hits_remaining - hits
325
+
326
+ asset_list += search_result.get("asset_list", [])
327
+
328
+ return asset_list
329
+
330
+ # end method definition
331
+
332
+ def get_product_assets(
333
+ self, product_id: int, offset: int = 0, limit: int = 200
334
+ ) -> list | None:
335
+ """Get all Media Assets for a given product (ID).
336
+
337
+ Args:
338
+ product_id (int): Identifier of the product.
339
+ offset (int, optional): Result pagination. Starting ID. Defaults to 0.
340
+ limit (int, optional): Result pagination. Page length. Defaults to 200.
341
+
342
+ Returns:
343
+ dict: Search Results
344
+ """
345
+
346
+ payload = {
347
+ "load_type": ["metadata"],
348
+ "load_multilingual_values": ["true"],
349
+ "level_of_detail": ["full"],
350
+ "after": offset,
351
+ "limit": limit,
352
+ "multilingual_language_code": ["en_US"],
353
+ "search_config_id": ["3"],
354
+ "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
355
+ "metadata_to_return": ["ARTESIA.FIELD.TAG"],
356
+ "search_condition_list": [
357
+ '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTM.TABLE.PRODUCT_TABLE_FIELD","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"PRODUCT_CHAR_ID","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
358
+ + str(product_id)
359
+ + '","left_paren":"(","right_paren":")"}]}]}}'
360
+ ],
361
+ }
362
+
363
+ flattened_data = {
364
+ k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
365
+ }
366
+
367
+ search_result = self.search_assets(flattened_data)
368
+
369
+ if not search_result or not "search_result_resource" in search_result:
370
+ logger.error("No assets found via search!")
371
+ return None
372
+ search_result = search_result.get("search_result_resource")
373
+
374
+ hits = search_result["search_result"]["hit_count"]
375
+ hits_total = search_result["search_result"]["total_hit_count"]
376
+
377
+ asset_list = search_result.get("asset_list", None)
378
+
379
+ hits_remaining = hits_total - hits
380
+
381
+ while hits_remaining > 0:
382
+ flattened_data["after"] += hits
383
+ search_result = self.search_assets(flattened_data)
384
+
385
+ if not search_result or not "search_result_resource" in search_result:
386
+ break
387
+
388
+ search_result = search_result.get("search_result_resource")
389
+
390
+ hits = search_result["search_result"]["hit_count"]
391
+ hits_remaining = hits_remaining - hits
392
+
393
+ asset_list += search_result.get("asset_list", [])
394
+
395
+ return asset_list
396
+
397
+ # end method definition
398
+
399
+ def download_asset(
400
+ self,
401
+ asset_id: str,
402
+ asset_name: str,
403
+ download_url: str = "",
404
+ skip_existing: bool = True,
405
+ ) -> bool:
406
+ """Download a given Media Asset
407
+
408
+ Args:
409
+ asset_id (str): ID of the asset to download
410
+ asset_name (str): Name of the assets - becomes the file name.
411
+ download_url (str, optiona): URL to download the asset (optional).
412
+
413
+ Returns:
414
+ bool: True = success, False = failure
415
+ """
416
+ # url = f"{self.base_url}/assets/v1/{asset_id}/download"
417
+
418
+ if download_url:
419
+ request_url = download_url
420
+ else:
421
+ request_url = self.config()["assetsUrl"] + "/" + asset_id + "/contens"
422
+
423
+ file_name = os.path.join(self._download_dir, asset_id)
424
+
425
+ if os.path.exists(file_name):
426
+ if skip_existing:
427
+ logger.debug(
428
+ "OpenText Media Management asset has been downloaded before skipping download -> '%s' (%s) to -> %s...",
429
+ asset_name,
430
+ asset_id,
431
+ file_name,
432
+ )
433
+ return True
434
+ else:
435
+ logger.debug(
436
+ "OpenText Media Management asset has been downloaded before. Update download -> '%s' (%s) to -> %s...",
437
+ asset_name,
438
+ asset_id,
439
+ file_name,
440
+ )
441
+ os.remove(file_name)
442
+
443
+ try:
444
+ if not os.path.exists(self._download_dir):
445
+ # Create the directory
446
+ os.makedirs(self._download_dir)
447
+
448
+ logger.info(
449
+ "Downloading OpenText Media Management asset -> '%s' (%s) to -> %s...",
450
+ asset_name,
451
+ asset_id,
452
+ file_name,
453
+ )
454
+ response = self._session.get(request_url, stream=True)
455
+ response.raise_for_status()
456
+ with open(file_name, "wb") as f:
457
+ for chunk in response.iter_content(chunk_size=8192):
458
+ f.write(chunk)
459
+ return True
460
+ except HTTPError as http_err:
461
+ logger.error("HTTP error occurred -> %s!", str(http_err))
462
+ except RequestException as req_err:
463
+ logger.error("Request error occurred -> %s!", str(req_err))
464
+ except Exception as err:
465
+ logger.error("An error occurred -> %s!", str(err))
466
+
467
+ return False
468
+
469
+ # end method definition
470
+
471
+ def search_assets(self, payload: dict):
472
+ """Search an asset based on the given parameters / criterias.
473
+
474
+ Args:
475
+ payload (dict): in the format of:
476
+ payload = {
477
+ "PRODUCT_CHAR_ID": "Extended ECM for Engineering",
478
+ "BUSINESS_AREA_CHAR_ID": "Content",
479
+ "keyword_query": "*",
480
+ "limit": "5",
481
+ }
482
+
483
+ Returns:
484
+ _type_: JSON search results
485
+ """
486
+
487
+ request_url = self.config()["searchUrl"]
488
+
489
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
490
+
491
+ encoded_payload = urllib.parse.urlencode(payload, safe="/:")
492
+
493
+ try:
494
+ response = self._session.post(
495
+ request_url,
496
+ headers=headers,
497
+ data=encoded_payload,
498
+ )
499
+
500
+ response.raise_for_status()
501
+
502
+ except requests.exceptions.HTTPError as http_err:
503
+ logger.error("HTTP error occurred: %s", http_err)
504
+ except requests.exceptions.ConnectionError as conn_err:
505
+ logger.error("Connection error occurred: %s", conn_err)
506
+ except requests.exceptions.Timeout as timeout_err:
507
+ logger.error("Timeout error occurred: %s", timeout_err)
508
+ except requests.exceptions.RequestException as req_err:
509
+ logger.error("Request error occurred: %s", req_err)
510
+ except Exception as e:
511
+ logger.error("An unexpected error occurred: %s", e)
512
+
513
+ return response.json()
514
+
515
+ # end method definition
516
+
517
+ def get_asset_metadata(self, asset_id: str) -> dict:
518
+ """Retrieve metadata of an asset based on the given parameters / criterias.
519
+
520
+ Args:
521
+ asset_id (str): asset_id of the asset to query
522
+
523
+ Returns:
524
+ dict: Metadata information as dict with values as list
525
+
526
+ example:
527
+ {
528
+ 'OTMM.CUSTOM.FIELD_TITLE': [],
529
+ 'OTMM.CUSTOM.FIELD_DESCRIPTION': [],
530
+ 'OTMM.CUSTOM.FIELD_KEYWORDS': [],
531
+ 'CONTENT_TYPE_COMBO_CHAR_ID': [],
532
+ 'OTM.TABLE.APPROVED_USAGE_FIELD': [],
533
+ 'OTMM.FIELD.RESOURCE_LIBRARY.TAB': [],
534
+ 'LANGUAGE_COMBO_CHAR_ID': [],
535
+ 'OTMM.CUSTOM.FIELD_PART_NUMBER': [],
536
+ 'OTMM.FIELD.BUSINESS_UNIT.TAB': ['Content'],
537
+ 'OTM.TABLE.PRODUCT_TABLE_FIELD': ['Vendor Invoice Management for SAP'],
538
+ 'OTM.TABLE.INDUSTRY_TABLE_FIELD': [],
539
+ 'OTMM.CUSTOM.FIELD_URL': [],
540
+ 'OTMM.CUSTOM.FIELD_PREVIOUS_URL': [],
541
+ 'OTMM.CUSTOM.FIELD_CONTENT_OWNER': [],
542
+ 'OTMM.CUSTOM.FIELD_EMAIL': [],
543
+ 'OTMM.CUSTOM.FIELD_JOB_NUMBER': [],
544
+ 'OTM.TABLE.BUSINESS_AREA_TABLE_FIELD': [],
545
+ 'OTM.TABLE.JOURNEY_TABLE_FIELD': ['Buy', 'Try', 'Learn'],
546
+ 'OTMM.FIELD.PERSONA.TAB': [],
547
+ 'OTMM.FIELD.SERVICES.TAB': [],
548
+ 'OTMM.FIELD.REGION.TAB': [],
549
+ 'OTMM.FIELD.PURPOSE.TAB': [],
550
+ 'AODA_CHAR_ID': [],
551
+ 'REVIEW_CADENCE_CHAR_ID': [],
552
+ 'CONTENT_CREATED_DATE_ID': [],
553
+ 'ARTESIA.FIELD.EXPIRATION DATE': [],
554
+ 'OTMM.CUSTOM.FIELD_REAL_COMMENTS': []
555
+ }
556
+ """
557
+
558
+ request_url = self.config()["assetsUrl"] + f"/{asset_id}"
559
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
560
+
561
+ params = {
562
+ "load_type": "custom",
563
+ "level_of_detail": "slim",
564
+ "data_load_request": '{"data_load_request":{"load_multilingual_field_values":"true","load_subscribed_to":"true","load_asset_content_info":"true","load_metadata":"true","load_inherited_metadata":"true","load_thumbnail_info":"true","load_preview_info":"true", "load_pdf_preview_info":"true", "load_3d_preview_info" : "true","load_destination_links":"true", "load_security_policies":"true","load_path":"true","load_deep_zoom_info":"true"}}',
565
+ }
566
+
567
+ try:
568
+ response = self._session.get(request_url, headers=headers, params=params)
569
+
570
+ response.raise_for_status()
571
+
572
+ except requests.exceptions.HTTPError as http_err:
573
+ logger.error("HTTP error occurred: %s", http_err)
574
+ except requests.exceptions.ConnectionError as conn_err:
575
+ logger.error("Connection error occurred: %s", conn_err)
576
+ except requests.exceptions.Timeout as timeout_err:
577
+ logger.error("Timeout error occurred: %s", timeout_err)
578
+ except requests.exceptions.RequestException as req_err:
579
+ logger.error("Request error occurred: %s", req_err)
580
+ except Exception as e:
581
+ logger.error("An unexpected error occurred: %s", e)
582
+
583
+ # Read Metadata from nested structure
584
+ try:
585
+ metadata = (
586
+ response.json()
587
+ .get("asset_resource", {})
588
+ .get("asset", {})
589
+ .get("metadata", {})
590
+ .get("metadata_element_list", [])[0]
591
+ .get("metadata_element_list", [])
592
+ )
593
+ except JSONDecodeError:
594
+ logger.error("Cannot decode JSON response for assset_id -> %s", asset_id)
595
+ return {}
596
+
597
+ # Generate empty result dict
598
+ result = {}
599
+
600
+ # Extract Metadata fields with values as list
601
+ for data in metadata:
602
+ index = data.get("id").replace(" ", "").replace(".", "_")
603
+
604
+ try:
605
+ result[index] = data.get("value").get("value").get("value")
606
+ except AttributeError:
607
+
608
+ infos = []
609
+ for element in data.get("metadata_element_list", []):
610
+ for value in element.get("values", []):
611
+ infos.append(value.get("value").get("display_value"))
612
+
613
+ result[index] = infos
614
+ return result
615
+
616
+ # end method definition
617
+
618
+ def load_assets(
619
+ self,
620
+ load_products: bool = True,
621
+ load_business_units: bool = True,
622
+ download_assets: bool = True,
623
+ ) -> bool:
624
+ """Load all Media Assets for Products and Business Units
625
+
626
+ Returns:
627
+ bool: True = Success, False = Failure
628
+ """
629
+
630
+ asset_list = []
631
+
632
+ if load_products:
633
+
634
+ products = self.get_products() # dictionary with key = name and value = ID
635
+
636
+ if self._product_exclusions:
637
+ logger.info("Excluding products -> %s", str(self._product_exclusions))
638
+ for key in self._product_exclusions:
639
+ products.pop(
640
+ key, None
641
+ ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
642
+
643
+ for product_name, product_id in products.items():
644
+ if "DO NOT USE" in product_name:
645
+ continue
646
+
647
+ logger.info("Processing product -> '%s'...", product_name)
648
+
649
+ assets = self.get_product_assets(product_id)
650
+
651
+ if not assets:
652
+ logger.info("Found no assets for product -> '%s'", product_name)
653
+ continue
654
+
655
+ for asset in assets:
656
+ asset["workspace_type"] = "Product"
657
+ asset["workspace_name"] = product_name
658
+
659
+ asset_list += assets
660
+
661
+ if load_business_units:
662
+
663
+ business_units = self.get_business_units()
664
+
665
+ if self._business_unit_exclusions:
666
+ logger.info(
667
+ "Excluding business units -> %s",
668
+ str(self._business_unit_exclusions),
669
+ )
670
+ for key in self._business_unit_exclusions:
671
+ business_units.pop(
672
+ key, None
673
+ ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
674
+
675
+ for bu_name, bu_id in business_units.items():
676
+ logger.debug(bu_name)
677
+ assets = self.get_business_unit_assets(bu_id)
678
+
679
+ if not assets:
680
+ logger.info("Found no assets for business unit -> '%s'", bu_name)
681
+ continue
682
+
683
+ for asset in assets:
684
+ asset["workspace_type"] = "Business Unit"
685
+ asset["workspace_name"] = bu_name
686
+
687
+ asset_list += assets
688
+
689
+ total_count = len(asset_list)
690
+
691
+ asset_list = [
692
+ item
693
+ for item in asset_list
694
+ if not item.get("deleted", False) and not item.get("expired", False)
695
+ ]
696
+
697
+ number = self._thread_number
698
+
699
+ if total_count >= number:
700
+ partition_size = total_count // number
701
+ remainder = total_count % number
702
+ else:
703
+ partition_size = total_count
704
+ number = 1
705
+ remainder = 0
706
+
707
+ logger.info(
708
+ "Processing -> %s Media Assets, thread number -> %s, partition size -> %s",
709
+ str(total_count),
710
+ number,
711
+ partition_size,
712
+ )
713
+
714
+ threads = []
715
+
716
+ start = 0
717
+ for index in range(number):
718
+ extra = 1 if remainder > 0 else 0
719
+ end = start + partition_size + extra
720
+ if remainder > 0:
721
+ remainder -= 1
722
+
723
+ thread = threading.Thread(
724
+ name=f"load_assets_{index + 1:02}",
725
+ target=self.thread_wrapper,
726
+ args=(
727
+ self.load_assets_worker,
728
+ asset_list,
729
+ partition_size + extra,
730
+ start,
731
+ download_assets,
732
+ ),
733
+ )
734
+ thread.start()
735
+ threads.append(thread)
736
+ start = end
737
+
738
+ for thread in threads:
739
+ thread.join()
740
+
741
+ return True
742
+
743
+ # end method definition
744
+
745
+ def load_assets_worker(
746
+ self,
747
+ asset_list: list,
748
+ partition_size: int,
749
+ offset: int = 0,
750
+ download_assets: bool = True,
751
+ ):
752
+ """Worker Method for multi-threading
753
+
754
+ Args:
755
+ asset_list (list): List of assets to process
756
+ business_unit (str, optional): Name of business unit. Defaults to "".
757
+ """
758
+
759
+ logger.info(
760
+ "Processing Media Assets in range from -> %s to -> %s...",
761
+ offset,
762
+ offset + partition_size,
763
+ )
764
+
765
+ worker_asset_list = asset_list[offset : offset + partition_size]
766
+
767
+ for asset in worker_asset_list:
768
+ asset_id = asset.get("asset_id")
769
+ asset_name = asset.get("name")
770
+ # Store name as asset_name
771
+ asset["asset_name"] = asset_name
772
+ asset_download_url = asset.get("delivery_service_url")
773
+ asset_deleted = asset.get("deleted", False)
774
+ asset_expired = asset.get("expired", False)
775
+ if asset_deleted or asset_expired:
776
+ logger.info(
777
+ "Asset -> '%s' is deleted or expired. Skipping...",
778
+ asset_name,
779
+ )
780
+ continue
781
+
782
+ if download_assets:
783
+ success = self.download_asset(
784
+ asset_id=asset_id,
785
+ asset_name=asset_name,
786
+ download_url=asset_download_url,
787
+ )
788
+ if not success:
789
+ logger.error(
790
+ "Failed to download asset -> '%s' (%s) to '%s'",
791
+ asset_name,
792
+ asset_id,
793
+ self._download_dir,
794
+ )
795
+ else:
796
+ logger.info(
797
+ "Successfully downloaded asset -> '%s' (%s) to '%s'",
798
+ asset_name,
799
+ asset_id,
800
+ self._download_dir,
801
+ )
802
+
803
+ ## Add metadata to asset and add to new list
804
+ asset.update(self.get_asset_metadata(asset_id))
805
+
806
+ # Now we add the article to the Pandas Data Frame in the Data class:
807
+ with self._data.lock():
808
+ self._data.append(worker_asset_list)