pyxecm 1.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

Files changed (56) hide show
  1. pyxecm/__init__.py +6 -2
  2. pyxecm/avts.py +1492 -0
  3. pyxecm/coreshare.py +1075 -960
  4. pyxecm/customizer/__init__.py +16 -4
  5. pyxecm/customizer/__main__.py +58 -0
  6. pyxecm/customizer/api/__init__.py +5 -0
  7. pyxecm/customizer/api/__main__.py +6 -0
  8. pyxecm/customizer/api/app.py +914 -0
  9. pyxecm/customizer/api/auth.py +154 -0
  10. pyxecm/customizer/api/metrics.py +92 -0
  11. pyxecm/customizer/api/models.py +13 -0
  12. pyxecm/customizer/api/payload_list.py +865 -0
  13. pyxecm/customizer/api/settings.py +103 -0
  14. pyxecm/customizer/browser_automation.py +332 -139
  15. pyxecm/customizer/customizer.py +1075 -1057
  16. pyxecm/customizer/exceptions.py +35 -0
  17. pyxecm/customizer/guidewire.py +322 -0
  18. pyxecm/customizer/k8s.py +787 -338
  19. pyxecm/customizer/log.py +107 -0
  20. pyxecm/customizer/m365.py +3424 -2270
  21. pyxecm/customizer/nhc.py +1169 -0
  22. pyxecm/customizer/openapi.py +258 -0
  23. pyxecm/customizer/payload.py +18201 -7030
  24. pyxecm/customizer/pht.py +1047 -210
  25. pyxecm/customizer/salesforce.py +836 -727
  26. pyxecm/customizer/sap.py +58 -41
  27. pyxecm/customizer/servicenow.py +851 -383
  28. pyxecm/customizer/settings.py +442 -0
  29. pyxecm/customizer/successfactors.py +408 -346
  30. pyxecm/customizer/translate.py +83 -48
  31. pyxecm/helper/__init__.py +5 -2
  32. pyxecm/helper/assoc.py +98 -38
  33. pyxecm/helper/data.py +2482 -742
  34. pyxecm/helper/logadapter.py +27 -0
  35. pyxecm/helper/web.py +229 -101
  36. pyxecm/helper/xml.py +528 -172
  37. pyxecm/maintenance_page/__init__.py +5 -0
  38. pyxecm/maintenance_page/__main__.py +6 -0
  39. pyxecm/maintenance_page/app.py +51 -0
  40. pyxecm/maintenance_page/settings.py +28 -0
  41. pyxecm/maintenance_page/static/favicon.avif +0 -0
  42. pyxecm/maintenance_page/templates/maintenance.html +165 -0
  43. pyxecm/otac.py +234 -140
  44. pyxecm/otawp.py +2689 -0
  45. pyxecm/otcs.py +12344 -7547
  46. pyxecm/otds.py +3166 -2219
  47. pyxecm/otiv.py +36 -21
  48. pyxecm/otmm.py +1363 -296
  49. pyxecm/otpd.py +231 -127
  50. pyxecm-2.0.0.dist-info/METADATA +145 -0
  51. pyxecm-2.0.0.dist-info/RECORD +54 -0
  52. {pyxecm-1.5.dist-info → pyxecm-2.0.0.dist-info}/WHEEL +1 -1
  53. pyxecm-1.5.dist-info/METADATA +0 -51
  54. pyxecm-1.5.dist-info/RECORD +0 -30
  55. {pyxecm-1.5.dist-info → pyxecm-2.0.0.dist-info/licenses}/LICENSE +0 -0
  56. {pyxecm-1.5.dist-info → pyxecm-2.0.0.dist-info}/top_level.txt +0 -0
pyxecm/otmm.py CHANGED
@@ -1,53 +1,61 @@
1
- """
2
- OTMM Module to interact with the OpenText Media Management API
3
- See:
4
-
5
- Class: OTMM
6
- Methods:
7
-
8
- __init__ : class initializer
9
- config : Returns config data set
10
- get_data: Get the Data object that holds all processed Media Management base Assets
11
- credentials: Returns the token data
12
- request_header: Returns the request header for ServiceNow API calls
13
- parse_request_response: Parse the REST API responses and convert
14
- them to Python dict in a safe way
15
- exist_result_item: Check if an dict item is in the response
16
- of the ServiceNow API call
17
- get_result_value: Check if a defined value (based on a key) is in the ServiceNow API response
18
-
19
- authenticate : Authenticates at ServiceNow API
1
+ """OTMM Module to interact with the OpenText Media Management API.
2
+
3
+ The documentation for the used REST APIs can be found here:
4
+ - [https://developer.opentext.com](https://developer.opentext.com/ce/products/media-management)
20
5
  """
21
6
 
22
7
  __author__ = "Dr. Marc Diefenbruch"
23
- __copyright__ = "Copyright 2024, OpenText"
8
+ __copyright__ = "Copyright (C) 2024-2025, OpenText"
24
9
  __credits__ = ["Kai-Philip Gatzweiler"]
25
10
  __maintainer__ = "Dr. Marc Diefenbruch"
26
11
  __email__ = "mdiefenb@opentext.com"
27
12
 
28
- from json import JSONDecodeError
29
- import os
13
+ import json
30
14
  import logging
31
- import urllib.parse
15
+ import os
16
+ import platform
17
+ import sys
32
18
  import threading
33
19
  import traceback
20
+ import urllib.parse
21
+ from collections.abc import Callable
22
+ from datetime import datetime, timezone
23
+ from importlib.metadata import version
24
+ from json import JSONDecodeError
34
25
 
35
26
  import requests
27
+ from requests.adapters import HTTPAdapter
36
28
  from requests.exceptions import HTTPError, RequestException
37
29
 
38
- from pyxecm.helper.data import Data
30
+ from pyxecm.helper import Data
39
31
 
40
- logger = logging.getLogger("pyxecm.otmm")
32
+ APP_NAME = "pyxecm"
33
+ APP_VERSION = version("pyxecm")
34
+ MODULE_NAME = APP_NAME + ".otmm"
41
35
 
42
- REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
36
+ PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
37
+ OS_INFO = f"{platform.system()} {platform.release()}"
38
+ ARCH_INFO = platform.machine()
39
+ REQUESTS_VERSION = requests.__version__
43
40
 
41
+ USER_AGENT = (
42
+ f"{APP_NAME}/{APP_VERSION} ({MODULE_NAME}/{APP_VERSION}; "
43
+ f"Python/{PYTHON_VERSION}; {OS_INFO}; {ARCH_INFO}; Requests/{REQUESTS_VERSION})"
44
+ )
45
+
46
+ REQUEST_HEADERS = {
47
+ "User-Agent": USER_AGENT,
48
+ "Content-Type": "application/x-www-form-urlencoded",
49
+ }
44
50
  REQUEST_TIMEOUT = 60
45
51
 
46
- ASSET_BASE_PATH = "/tmp/mediaassets"
52
+ default_logger = logging.getLogger(MODULE_NAME)
47
53
 
48
54
 
49
55
  class OTMM:
50
- """Used to retrieve and automate data extraction from OTMM."""
56
+ """Class OTMM is used to automate data extraction from OTMM."""
57
+
58
+ logger: logging.Logger = default_logger
51
59
 
52
60
  _config: dict
53
61
  _access_token = None
@@ -55,7 +63,11 @@ class OTMM:
55
63
  _thread_number = 3
56
64
  _download_dir = ""
57
65
  _business_unit_exclusions = None
66
+ _business_unit_inclusions = None
58
67
  _product_exclusions = None
68
+ _product_inclusions = None
69
+ _asset_exclusions = None
70
+ _asset_inclusions = None
59
71
 
60
72
  def __init__(
61
73
  self,
@@ -67,8 +79,51 @@ class OTMM:
67
79
  thread_number: int,
68
80
  download_dir: str,
69
81
  business_unit_exclusions: list | None = None,
82
+ business_unit_inclusions: list | None = None,
70
83
  product_exclusions: list | None = None,
71
- ):
84
+ product_inclusions: list | None = None,
85
+ asset_exclusions: list | None = None,
86
+ asset_inclusions: list | None = None,
87
+ logger: logging.Logger = default_logger,
88
+ ) -> None:
89
+ """Initialize for the OTMM object.
90
+
91
+ Args:
92
+ base_url (str):
93
+ The base URL for accessing OTMM.
94
+ username (str):
95
+ The name of the user.
96
+ password (str):
97
+ The password of the user.
98
+ client_id (str):
99
+ The client ID for the credentials.
100
+ client_secret (str):
101
+ The client secret for the credentials.
102
+ thread_number (int):
103
+ The number of threads for parallel processing for data loads.
104
+ download_dir (str):
105
+ The filesystem directory to download the OTMM assets to.
106
+ business_unit_exclusions (list | None, optional):
107
+ An optional list of business units to exclude. Defaults to None.
108
+ business_unit_inclusions (list | None, optional):
109
+ An optional list of business units to include. Defaults to None.
110
+ product_exclusions (list | None, optional):
111
+ An optional list of products to exclude. Defaults to None.
112
+ product_inclusions (list | None, optional):
113
+ An optional list of products to include. Defaults to None.
114
+ asset_exclusions (list | None, optional):
115
+ An optional list of asset (IDs) to exclude. Defaults to None.
116
+ asset_inclusions (list | None, optional):
117
+ An optional list of asset (IDs) to include. Defaults to None.
118
+ logger (logging.Logger, optional):
119
+ The logging object to use for all log messages. Defaults to default_logger.
120
+
121
+ """
122
+
123
+ if logger != default_logger:
124
+ self.logger = logger.getChild("otmm")
125
+ for logfilter in logger.filters:
126
+ self.logger.addFilter(logfilter)
72
127
 
73
128
  # Initialize otcs_config as an empty dictionary
74
129
  otmm_config = {}
@@ -80,7 +135,8 @@ class OTMM:
80
135
  otmm_config["clientId"] = client_id
81
136
  otmm_config["clientSecret"] = client_secret
82
137
 
83
- otmm_config["restUrl"] = otmm_config["baseUrl"] + "/otmmapi/v6"
138
+ # Make sure we don't have double-slashes if base_url comes with a trailing slash:
139
+ otmm_config["restUrl"] = urllib.parse.urljoin(base_url, "/otmmapi/v6")
84
140
  otmm_config["tokenUrl"] = otmm_config["restUrl"] + "/sessions/oauth2/token"
85
141
  otmm_config["domainUrl"] = otmm_config["restUrl"] + "/lookupdomains"
86
142
  otmm_config["assetsUrl"] = otmm_config["restUrl"] + "/assets"
@@ -89,44 +145,75 @@ class OTMM:
89
145
  self._config = otmm_config
90
146
 
91
147
  self._session = requests.Session()
148
+ self._session.headers.update({"User-Agent": USER_AGENT})
149
+
150
+ self._adapter = HTTPAdapter(
151
+ pool_connections=thread_number,
152
+ pool_maxsize=thread_number,
153
+ )
154
+ self._session.mount("http://", self._adapter)
155
+ self._session.mount("https://", self._adapter)
92
156
 
93
- self._data = Data()
157
+ self._data = Data(logger=self.logger)
94
158
 
95
159
  self._thread_number = thread_number
96
160
 
97
161
  self._download_dir = download_dir
98
162
 
99
163
  self._business_unit_exclusions = business_unit_exclusions
164
+ self._business_unit_inclusions = business_unit_inclusions
100
165
  self._product_exclusions = product_exclusions
166
+ self._product_inclusions = product_inclusions
167
+ self._asset_exclusions = asset_exclusions
168
+ self._asset_inclusions = asset_inclusions
101
169
 
102
170
  # end method definition
103
171
 
104
- def thread_wrapper(self, target, *args, **kwargs):
105
- """Function to wrap around threads to catch exceptions during exection"""
172
+ def thread_wrapper(self, target: Callable, *args: tuple, **kwargs: dict) -> None:
173
+ """Wrap around threads to catch exceptions during exection.
174
+
175
+ Args:
176
+ target (Callable):
177
+ The method (callable) the Thread should run.
178
+ args (tuple):
179
+ The arguments for the method.
180
+ kwargs (dict):
181
+ Keyword arguments for the method.
182
+
183
+ """
184
+
106
185
  try:
107
186
  target(*args, **kwargs)
108
- except Exception as e:
187
+ except Exception:
109
188
  thread_name = threading.current_thread().name
110
- logger.error("Thread %s: failed with exception %s", thread_name, e)
111
- logger.error(traceback.format_exc())
189
+ self.logger.error(
190
+ "Thread '%s' failed!",
191
+ thread_name,
192
+ )
193
+ self.logger.error(traceback.format_exc())
112
194
 
113
195
  # end method definition
114
196
 
115
197
  def config(self) -> dict:
116
- """Returns the configuration dictionary
198
+ """Return the configuration dictionary.
117
199
 
118
200
  Returns:
119
- dict: Configuration dictionary
201
+ dict:
202
+ The configuration dictionary.
203
+
120
204
  """
205
+
121
206
  return self._config
122
207
 
123
208
  # end method definition
124
209
 
125
210
  def get_data(self) -> Data:
126
- """Get the Data object that holds all processed Media Management base Assets
211
+ """Get the data frame that holds all processed Media Management assets.
127
212
 
128
213
  Returns:
129
- Data: Datastructure with all processed assets.
214
+ Data:
215
+ Data frame with all processed assets.
216
+
130
217
  """
131
218
 
132
219
  return self._data
@@ -134,10 +221,18 @@ class OTMM:
134
221
  # end method definition
135
222
 
136
223
  def authenticate(self) -> str | None:
137
- """Authenticate at OTMM with client ID and client secret or with basic authentication."""
224
+ """Authenticate at OTMM.
225
+
226
+ Supports authentication with client ID and client secret
227
+ or with basic authentication.
228
+
229
+ Returns:
230
+ str | None:
231
+ The access token for OTMM.
232
+
233
+ """
138
234
 
139
235
  request_url = self.config()["tokenUrl"]
140
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
141
236
  payload = {
142
237
  "username": self.config()["username"],
143
238
  "password": self.config()["password"],
@@ -149,130 +244,396 @@ class OTMM:
149
244
  try:
150
245
  response = self._session.post(
151
246
  request_url,
152
- headers=headers,
247
+ headers=REQUEST_HEADERS,
153
248
  data=urllib.parse.urlencode(payload),
154
249
  )
155
250
  response.raise_for_status()
156
251
 
157
- self._access_token = (
158
- response.json().get("token_info").get("oauth_token").get("accessToken")
159
- )
252
+ self._access_token = response.json().get("token_info").get("oauth_token").get("accessToken")
160
253
  self._session.headers.update(
161
- {"Authorization": f"Bearer {self._access_token}"}
254
+ {"Authorization": f"Bearer {self._access_token}"},
162
255
  )
163
256
 
164
- return self._access_token
257
+ except requests.exceptions.HTTPError as http_error:
258
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
259
+ self.logger.debug("HTTP request header -> %s", str(REQUEST_HEADERS))
260
+ return None
261
+ except requests.exceptions.ConnectionError:
262
+ self.logger.error("Connection error requesting -> %s", request_url)
263
+ return None
264
+ except requests.exceptions.Timeout:
265
+ self.logger.error("Timeout error requesting -> %s", request_url)
266
+ return None
267
+ except requests.exceptions.RequestException:
268
+ self.logger.error("Request error requesting -> %s", request_url)
269
+ return None
270
+ except Exception:
271
+ self.logger.error("Unexpected error requesting -> %s", request_url)
272
+ return None
273
+
274
+ return self._access_token
275
+
276
+ # end method definition
277
+
278
+ def get_lookup_domains(self) -> dict | None:
279
+ """Get all OTMM lookup domains.
280
+
281
+ Args:
282
+ None
283
+
284
+ Returns:
285
+ dict | None:
286
+ All OTMM lookup domains.
287
+
288
+ Example:
289
+ {
290
+ 'lookup_domains_resource': {
291
+ 'lookup_domains': [
292
+ {
293
+ 'cacheable': True,
294
+ 'datatype': 'CHAR',
295
+ 'domainId': 'ARTESIA.DOMAIN.MEDIA_ANALYSIS.SOURCE.LANGUAGE',
296
+ 'domainValues': [
297
+ {
298
+ 'display_value': 'Hausa (Ghana)',
299
+ 'expired_value': False,
300
+ 'field_value': {...}
301
+ },
302
+ ...
303
+ ]
304
+ },
305
+ ...
306
+ ]
307
+ }
308
+ }
309
+
310
+ """
311
+
312
+ request_url = self.config()["domainUrl"]
165
313
 
166
- except requests.exceptions.HTTPError as http_err:
167
- logger.error("HTTP error occurred: %s", http_err)
168
- except requests.exceptions.ConnectionError as conn_err:
169
- logger.error("Connection error occurred: %s", conn_err)
170
- except requests.exceptions.Timeout as timeout_err:
171
- logger.error("Timeout error occurred: %s", timeout_err)
172
- except requests.exceptions.RequestException as req_err:
173
- logger.error("Request error occurred: %s", req_err)
174
- except Exception as e:
175
- logger.error("An unexpected error occurred: %s", e)
314
+ try:
315
+ response = self._session.get(
316
+ request_url,
317
+ )
318
+
319
+ response.raise_for_status()
320
+
321
+ except requests.exceptions.HTTPError as http_error:
322
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
323
+ return None
324
+ except requests.exceptions.ConnectionError:
325
+ self.logger.error("Connection error requesting -> %s", request_url)
326
+ return None
327
+ except requests.exceptions.Timeout:
328
+ self.logger.error("Timeout error requesting -> %s", request_url)
329
+ return None
330
+ except requests.exceptions.RequestException:
331
+ self.logger.error("Request error requesting -> %s", request_url)
332
+ return None
333
+ except Exception:
334
+ self.logger.error("Unexpected error requesting -> %s", request_url)
335
+ return None
176
336
 
177
- return None
337
+ return response.json()
178
338
 
179
339
  # end method definition
180
340
 
181
- def get_products(self, domain: str = "OTMM.DOMAIN.OTM_PRODUCT") -> dict:
182
- """Get a dictionary with product names (keys) and IDs (values)
341
+ def get_lookup_domain(self, domain: str) -> dict | None:
342
+ """Get OTMM lookup domain with a given name.
183
343
 
184
344
  Args:
185
- domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_PRODUCT".
345
+ domain (str):
346
+ The name / identifier of the domain.
347
+
186
348
  Returns:
187
- dict: Dictionary of all known products.
349
+ dict | None:
350
+ The response includes data for the given lookup domain
351
+ or None if the request fails.
352
+
353
+ Example:
354
+ {
355
+ 'lookup_domain_resource': {
356
+ 'lookup_domain': {
357
+ 'cacheable': True,
358
+ 'datatype': 'CHAR',
359
+ 'domainId': 'OTMM.DOMAIN.OTM_PRODUCT',
360
+ 'domainValues': [
361
+ {
362
+ 'active_from': '',
363
+ 'active_to': '',
364
+ 'description': 'Active Access',
365
+ 'display_value': 'Active Access',
366
+ 'expired_value': False,
367
+ 'field_value': {
368
+ 'type': 'string',
369
+ 'value': '213'
370
+ }
371
+ },
372
+ ...
373
+ ]
374
+ }
375
+ }
376
+ }
377
+
188
378
  """
189
379
 
190
- lookup_products = self.lookup_domains(domain)
380
+ request_url = self.config()["domainUrl"] + "/" + domain
191
381
 
192
- result = {}
193
- for product in lookup_products:
194
- result[product.get("display_value")] = product.get("field_value").get(
195
- "value"
382
+ try:
383
+ response = self._session.get(
384
+ request_url,
196
385
  )
197
386
 
198
- return result
387
+ response.raise_for_status()
388
+
389
+ except requests.exceptions.HTTPError as http_error:
390
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
391
+ return None
392
+ except requests.exceptions.ConnectionError:
393
+ self.logger.error("Connection error requesting -> %s", request_url)
394
+ return None
395
+ except requests.exceptions.Timeout:
396
+ self.logger.error("Timeout error requesting -> %s", request_url)
397
+ return None
398
+ except requests.exceptions.RequestException:
399
+ self.logger.error("Request error requesting -> %s", request_url)
400
+ return None
401
+ except Exception:
402
+ self.logger.error("Unexpected error requesting -> %s", request_url)
403
+ return None
404
+
405
+ return response.json()
406
+
407
+ # end method definition
408
+
409
+ def get_lookup_domain_values(self, domain: str) -> list | None:
410
+ """Get values of an OTMM lookup domain with a given name.
411
+
412
+ Args:
413
+ domain (str):
414
+ The name / identifier of the domain.
415
+
416
+ Returns:
417
+ list | None:
418
+ The list of domain values or None if the request fails.
419
+
420
+ """
421
+
422
+ lookup_domain = self.get_lookup_domain(domain=domain)
423
+ if not lookup_domain:
424
+ self.logger.error(
425
+ "Cannot get lookup domain values for domain -> '%s'",
426
+ domain,
427
+ )
428
+ return None
429
+
430
+ values = lookup_domain.get("lookup_domain_resource").get("lookup_domain").get("domainValues")
431
+
432
+ return values
433
+
434
+ # end method definition
435
+
436
+ def get_products(self, domain: str = "OTMM.DOMAIN.OTM_PRODUCT") -> dict:
437
+ """Get a dictionary with product names (keys) and IDs (values).
438
+
439
+ Args:
440
+ domain (str, optional):
441
+ The identifier of the Domain. Defaults to "OTMM.DOMAIN.OTM_PRODUCT".
442
+
443
+ Returns:
444
+ dict:
445
+ Dictionary of all known products.
446
+
447
+ """
448
+
449
+ lookup_products = self.get_lookup_domain_values(domain) or []
450
+
451
+ # Comprehension to create a dictionary.
452
+ # Keys are the product names, values the product IDs.
453
+ # We remove leading and trailing spaces -
454
+ # OTMM data seems to have this in some places.
455
+ return {
456
+ product.get("display_value").strip(): product.get("field_value").get(
457
+ "value",
458
+ )
459
+ for product in lookup_products
460
+ }
199
461
 
200
462
  # end method definition
201
463
 
202
464
  def get_business_units(
203
- self, domain: str = "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU"
465
+ self,
466
+ domain: str = "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU",
204
467
  ) -> dict:
205
- """Get a dictionary with product names (keys) and IDs (values)
468
+ """Get a dictionary with business unit names (keys) and business unit IDs (values).
206
469
 
207
470
  Args:
208
- domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU".
471
+ domain (str, optional):
472
+ The domain. Defaults to "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU".
209
473
 
210
474
  Returns:
211
- dict: Dictionary of all known business units.
475
+ dict:
476
+ Dictionary of all known business units.
477
+
212
478
  """
213
479
 
214
- lookup_bus = self.lookup_domains(domain)
215
- result = {}
216
- for bu in lookup_bus:
217
- result[bu.get("display_value")] = bu.get("field_value").get("value")
480
+ lookup_bus = self.get_lookup_domain_values(domain) or []
218
481
 
219
- return result
482
+ # Comprehension to create a dictionary.
483
+ # Keys are the product names, values the product IDs:
484
+ return {bu.get("display_value").strip(): bu.get("field_value").get("value") for bu in lookup_bus}
220
485
 
221
486
  # end method definition
222
487
 
223
- def lookup_domains(self, domain: str):
224
- """Lookup domain values in a given OTMM domain
488
+ def get_asset(self, asset_id: str) -> dict | None:
489
+ """Get an asset based on its ID.
225
490
 
226
491
  Args:
227
- domain (str): name / identifier of the domain.
492
+ asset_id (str):
493
+ The ID of the asset.
228
494
 
229
495
  Returns:
230
- _type_: _description_
496
+ dict | None:
497
+ A dictionary with asset data or None if the asset is not found.
498
+
499
+ Example:
500
+ {
501
+ 'asset_resource': {
502
+ 'asset': {
503
+ 'access_control_descriptor': {
504
+ 'permissions_map': {...}
505
+ },
506
+ 'asset_content_info': {
507
+ 'master_content': {...}
508
+ },
509
+ 'asset_id': 'e064571da79c926ee14b0850734b49edf42d9ba5',
510
+ 'asset_lock_state_last_update_date': '2024-04-16T15:03:48Z',
511
+ 'asset_lock_state_user_id': '153',
512
+ 'asset_state': 'NORMAL',
513
+ 'asset_state_last_update_date': '2024-04-16T15:03:48Z',
514
+ 'asset_state_user_id': '153',
515
+ 'checked_out': False,
516
+ 'content_editable': True,
517
+ 'content_lock_state_last_update_date': '2023-12-11T20:56:26Z',
518
+ 'content_lock_state_user_id': '202',
519
+ 'content_lock_state_user_name': 'ajohnson3',
520
+ 'content_size': 95873,
521
+ 'content_state': 'NORMAL',
522
+ 'content_state_last_update_date': '2023-12-11T20:56:26Z',
523
+ 'content_state_user_id': '202',
524
+ 'content_state_user_name': 'Amanda Johnson',
525
+ 'content_type': 'ACROBAT',
526
+ 'creator_id': '202',
527
+ 'date_imported': '2023-12-11T20:56:26Z',
528
+ 'date_last_updated': '2024-04-16T15:03:48Z',
529
+ 'deleted': False,
530
+ 'delivery_service_url': 'https://assets.opentext.com/adaptivemedia/rendition?id=726d14f14bb1ae93c3efda5a870399a20c991770',
531
+ 'expired': False,
532
+ 'import_job_id': 5776,
533
+ 'import_user_name': 'ajohnson3',
534
+ 'latest_version': True,
535
+ 'legacy_model_id': 104,
536
+ 'locked': False,
537
+ 'master_content_info': {
538
+ 'content_checksum': '2a31defcf7ad2feb7c557acb068a5c22',
539
+ 'content_data': {...},
540
+ 'content_kind': 'MASTER',
541
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
542
+ 'content_size': 95873,
543
+ 'height': -1,
544
+ 'id': 'b563035e050a89e58a921df8a4047a0673ad9691',
545
+ 'mime_type': 'application/pdf',
546
+ 'name': 'a-business-case-for-arcsight-soar-wp.pdf',
547
+ 'unit_of_size': 'BYTES',
548
+ 'url': '/otmmapi/v6/renditions/b563035e050a89e58a921df8a4047a0673ad9691',
549
+ 'width': -1
550
+ },
551
+ 'metadata_lock_state_user_name': 'ababigian',
552
+ 'metadata_model_id': 'OTM.MARKETING.MODEL',
553
+ 'metadata_state_user_name': 'Andra Babigian',
554
+ 'mime_type': 'application/pdf',
555
+ 'name': 'a-business-case-for-arcsight-soar-pp-en.pdf',
556
+ 'original_asset_id': '726d14f14bb1ae93c3efda5a870399a20c991770',
557
+ 'product_associations': False,
558
+ 'rendition_content': {
559
+ 'pdf_preview_content': {
560
+ 'content_checksum': '2a31defcf7ad2feb7c557acb068a5c22',
561
+ 'content_data': {
562
+ 'data_source': 'NO_CONTENT',
563
+ 'temp_file': False
564
+ },
565
+ 'content_kind': 'MASTER',
566
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
567
+ 'content_size': 95873,
568
+ 'height': -1,
569
+ 'id': 'b563035e050a89e58a921df8a4047a0673ad9691',
570
+ 'mime_type': 'application/pdf',
571
+ 'name': 'a-business-case-for-arcsight-soar-wp.pdf',
572
+ 'unit_of_size': 'BYTES',
573
+ 'url': '/otmmapi/v6/renditions/b563035e050a89e58a921df8a4047a0673ad9691',
574
+ 'width': -1
575
+ }
576
+ },
577
+ 'subscribed_to': False,
578
+ 'version': 3
579
+ }
580
+ }
581
+ }
582
+
231
583
  """
232
584
 
233
- request_url = self.config()["domainUrl"] + "/" + domain
585
+ request_url = self.config()["assetsUrl"] + "/" + asset_id
234
586
 
235
587
  try:
236
588
  response = self._session.get(
237
589
  request_url,
590
+ headers=REQUEST_HEADERS,
238
591
  )
239
592
 
240
593
  response.raise_for_status()
241
594
 
242
- except requests.exceptions.HTTPError as http_err:
243
- logger.error("HTTP error occurred: %s", http_err)
244
- except requests.exceptions.ConnectionError as conn_err:
245
- logger.error("Connection error occurred: %s", conn_err)
246
- except requests.exceptions.Timeout as timeout_err:
247
- logger.error("Timeout error occurred: %s", timeout_err)
248
- except requests.exceptions.RequestException as req_err:
249
- logger.error("Request error occurred: %s", req_err)
250
- except Exception as e:
251
- logger.error("An unexpected error occurred: %s", e)
252
-
253
- response = (
254
- response.json()
255
- .get("lookup_domain_resource")
256
- .get("lookup_domain")
257
- .get("domainValues")
258
- )
595
+ except requests.exceptions.HTTPError as http_error:
596
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
597
+ self.logger.debug("HTTP request header -> %s", str(REQUEST_HEADERS))
598
+ return None
599
+ except requests.exceptions.ConnectionError:
600
+ self.logger.error("Connection error requesting -> %s", request_url)
601
+ return None
602
+ except requests.exceptions.Timeout:
603
+ self.logger.error("Timeout error requesting -> %s", request_url)
604
+ return None
605
+ except requests.exceptions.RequestException:
606
+ self.logger.error("Request error requesting -> %s", request_url)
607
+ return None
608
+ except Exception:
609
+ self.logger.error("Unexpected error requesting -> %s", request_url)
610
+ return None
259
611
 
260
- return response
612
+ return response.json()
261
613
 
262
614
  # end method definition
263
615
 
264
616
  def get_business_unit_assets(
265
- self, bu_id: int, offset: int = 0, limit: int = 200
617
+ self,
618
+ bu_id: str,
619
+ offset: int = 0,
620
+ limit: int = 200,
266
621
  ) -> list | None:
267
622
  """Get all Media Assets for a given Business Unit (ID) that are NOT related to a product.
268
623
 
269
624
  Args:
270
- bu_id (int): Identifier of the Business Unit.
271
- offset (int, optional): Result pagination. Starting ID. Defaults to 0.
272
- limit (int, optional): Result pagination. Page length. Defaults to 200.
625
+ bu_id (str):
626
+ Identifier of the Business Unit. DON'T USE INT HERE! OTMM delivers
627
+ strings for get_business_units()
628
+ offset (int, optional):
629
+ Result pagination. Starting ID. Defaults to 0.
630
+ limit (int, optional):
631
+ Result pagination. Page length. Defaults to 200.
273
632
 
274
633
  Returns:
275
- dict: Search Results
634
+ dict:
635
+ Search Results
636
+
276
637
  """
277
638
 
278
639
  payload = {
@@ -285,22 +646,57 @@ class OTMM:
285
646
  "search_config_id": ["3"],
286
647
  "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
287
648
  "metadata_to_return": ["ARTESIA.FIELD.TAG"],
288
- "facet_restriction_list": '{"facet_restriction_list":{"facet_field_restriction":[{"type":"com.artesia.search.facet.FacetSimpleFieldRestriction","facet_generation_behavior":"EXCLUDE","field_id":"PRODUCT_CHAR_ID","value_list":[null]}]}}',
649
+ "facet_restriction_list": json.dumps(
650
+ {
651
+ "facet_restriction_list": {
652
+ "facet_field_restriction": [
653
+ {
654
+ "type": "com.artesia.search.facet.FacetSimpleFieldRestriction",
655
+ "facet_generation_behavior": "EXCLUDE",
656
+ "field_id": "PRODUCT_CHAR_ID",
657
+ "value_list": [None],
658
+ },
659
+ ],
660
+ },
661
+ },
662
+ ),
289
663
  "search_condition_list": [
290
- '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTMM.FIELD.BUSINESS_UNIT.TAB","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"OTMM.COLUMN.BUSINESS_UNIT.TAB","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
291
- + str(bu_id)
292
- + '","left_paren":"(","right_paren":")"}]}]}}'
664
+ json.dumps(
665
+ {
666
+ "search_condition_list": {
667
+ "search_condition": [
668
+ {
669
+ "type": "com.artesia.search.SearchTabularCondition",
670
+ "metadata_table_id": "OTMM.FIELD.BUSINESS_UNIT.TAB",
671
+ "tabular_field_list": [
672
+ {
673
+ "type": "com.artesia.search.SearchTabularFieldCondition",
674
+ "metadata_field_id": "OTMM.COLUMN.BUSINESS_UNIT.TAB",
675
+ "relational_operator_id": "ARTESIA.OPERATOR.CHAR.CONTAINS",
676
+ "value": str(bu_id),
677
+ "left_paren": "(",
678
+ "right_paren": ")",
679
+ },
680
+ ],
681
+ },
682
+ ],
683
+ },
684
+ },
685
+ ),
293
686
  ],
294
687
  }
295
688
 
296
- flattened_data = {
297
- k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
298
- }
689
+ # Convert list values into comma-separated strings:
690
+ flattened_data = {k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()}
299
691
 
692
+ # Use OTMM's search to find the assets for the business unit:
300
693
  search_result = self.search_assets(flattened_data)
301
694
 
302
- if not search_result or not "search_result_resource" in search_result:
303
- logger.error("No assets found via search!")
695
+ if not search_result or "search_result_resource" not in search_result:
696
+ self.logger.error(
697
+ "No assets found via search for business unit with ID -> '%s'!",
698
+ bu_id,
699
+ )
304
700
  return None
305
701
  search_result = search_result.get("search_result_resource")
306
702
 
@@ -315,7 +711,7 @@ class OTMM:
315
711
  flattened_data["after"] += hits
316
712
  search_result = self.search_assets(flattened_data)
317
713
 
318
- if not search_result or not "search_result_resource" in search_result:
714
+ if not search_result or "search_result_resource" not in search_result:
319
715
  break
320
716
 
321
717
  search_result = search_result.get("search_result_resource")
@@ -330,17 +726,29 @@ class OTMM:
330
726
  # end method definition
331
727
 
332
728
  def get_product_assets(
333
- self, product_id: int, offset: int = 0, limit: int = 200
729
+ self,
730
+ product_id: str,
731
+ offset: int = 0,
732
+ limit: int = 200,
334
733
  ) -> list | None:
335
734
  """Get all Media Assets for a given product (ID).
336
735
 
736
+ This does currently NOT include the asset metadata even though lead type
737
+ is set to "metadata" below as "metadata_to_return" is set to a single field.
738
+
337
739
  Args:
338
- product_id (int): Identifier of the product.
339
- offset (int, optional): Result pagination. Starting ID. Defaults to 0.
340
- limit (int, optional): Result pagination. Page length. Defaults to 200.
740
+ product_id (str):
741
+ Identifier of the product. DON'T USE `int` HERE!
742
+ OTMM delivers strings for get_products()
743
+ offset (int, optional):
744
+ Result pagination. Starting ID. Defaults to 0.
745
+ limit (int, optional):
746
+ Result pagination. Page length. Defaults to 200.
341
747
 
342
748
  Returns:
343
- dict: Search Results
749
+ dict:
750
+ Search Results
751
+
344
752
  """
345
753
 
346
754
  payload = {
@@ -354,20 +762,39 @@ class OTMM:
354
762
  "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
355
763
  "metadata_to_return": ["ARTESIA.FIELD.TAG"],
356
764
  "search_condition_list": [
357
- '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTM.TABLE.PRODUCT_TABLE_FIELD","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"PRODUCT_CHAR_ID","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
358
- + str(product_id)
359
- + '","left_paren":"(","right_paren":")"}]}]}}'
765
+ json.dumps(
766
+ {
767
+ "search_condition_list": {
768
+ "search_condition": [
769
+ {
770
+ "type": "com.artesia.search.SearchTabularCondition",
771
+ "metadata_table_id": "OTM.TABLE.PRODUCT_TABLE_FIELD",
772
+ "tabular_field_list": [
773
+ {
774
+ "type": "com.artesia.search.SearchTabularFieldCondition",
775
+ "metadata_field_id": "PRODUCT_CHAR_ID",
776
+ "relational_operator_id": "ARTESIA.OPERATOR.CHAR.CONTAINS",
777
+ "value": str(product_id),
778
+ "left_paren": "(",
779
+ "right_paren": ")",
780
+ },
781
+ ],
782
+ },
783
+ ],
784
+ },
785
+ },
786
+ ),
360
787
  ],
361
788
  }
362
789
 
363
- flattened_data = {
364
- k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
365
- }
790
+ # Convert list values into comma-separated strings:
791
+ flattened_data = {k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()}
366
792
 
367
- search_result = self.search_assets(flattened_data)
793
+ # Use OTMM's search to find the assets for the product:
794
+ search_result = self.search_assets(payload=flattened_data)
368
795
 
369
- if not search_result or not "search_result_resource" in search_result:
370
- logger.error("No assets found via search!")
796
+ if not search_result or "search_result_resource" not in search_result:
797
+ self.logger.error("No assets found via search!")
371
798
  return None
372
799
  search_result = search_result.get("search_result_resource")
373
800
 
@@ -378,11 +805,12 @@ class OTMM:
378
805
 
379
806
  hits_remaining = hits_total - hits
380
807
 
808
+ # Iterate through all result pages:
381
809
  while hits_remaining > 0:
382
810
  flattened_data["after"] += hits
383
- search_result = self.search_assets(flattened_data)
811
+ search_result = self.search_assets(payload=flattened_data)
384
812
 
385
- if not search_result or not "search_result_resource" in search_result:
813
+ if not search_result or "search_result_resource" not in search_result:
386
814
  break
387
815
 
388
816
  search_result = search_result.get("search_result_resource")
@@ -401,39 +829,53 @@ class OTMM:
401
829
  asset_id: str,
402
830
  asset_name: str,
403
831
  download_url: str = "",
404
- skip_existing: bool = True,
832
+ asset_modification_date: str | None = None,
405
833
  ) -> bool:
406
- """Download a given Media Asset
834
+ """Download a given media asset.
407
835
 
408
836
  Args:
409
- asset_id (str): ID of the asset to download
410
- asset_name (str): Name of the assets - becomes the file name.
411
- download_url (str, optiona): URL to download the asset (optional).
837
+ asset_id (str):
838
+ ID of the asset to download. This becomes the file name.
839
+ asset_name (str):
840
+ The name of the asset.
841
+ download_url (str, optiona):
842
+ URL to download the asset (optional).
843
+ asset_modification_date (str | None, optional):
844
+ The last asset modification in OpenText Media Management.
412
845
 
413
846
  Returns:
414
- bool: True = success, False = failure
847
+ bool:
848
+ True = success, False = failure
849
+
415
850
  """
416
- # url = f"{self.base_url}/assets/v1/{asset_id}/download"
417
851
 
418
- if download_url:
419
- request_url = download_url
420
- else:
421
- request_url = self.config()["assetsUrl"] + "/" + asset_id + "/contens"
852
+ request_url = download_url if download_url else self.config()["assetsUrl"] + "/" + asset_id + "/contents"
422
853
 
854
+ # We use the Asset ID as the filename to avoid name collisions:
423
855
  file_name = os.path.join(self._download_dir, asset_id)
424
856
 
425
857
  if os.path.exists(file_name):
426
- if skip_existing:
427
- logger.debug(
428
- "OpenText Media Management asset has been downloaded before skipping download -> '%s' (%s) to -> %s...",
858
+ if asset_modification_date:
859
+ file_mod_time = datetime.fromtimestamp(os.path.getmtime(file_name), tz=timezone.utc)
860
+ date_last_updated = datetime.strptime(
861
+ asset_modification_date,
862
+ "%Y-%m-%dT%H:%M:%SZ",
863
+ ).replace(tzinfo=timezone.utc)
864
+ download_up_to_date: bool = file_mod_time >= date_last_updated
865
+ else:
866
+ download_up_to_date = True
867
+
868
+ if download_up_to_date:
869
+ self.logger.debug(
870
+ "Asset -> '%s' (%s) has been downloaded before and is up to date. Skipping download to -> %s...",
429
871
  asset_name,
430
872
  asset_id,
431
873
  file_name,
432
874
  )
433
875
  return True
434
876
  else:
435
- logger.debug(
436
- "OpenText Media Management asset has been downloaded before. Update download -> '%s' (%s) to -> %s...",
877
+ self.logger.debug(
878
+ "Asset -> '%s' (%s) has been downloaded before, but it is outdated. Updating download to -> %s...",
437
879
  asset_name,
438
880
  asset_id,
439
881
  file_name,
@@ -445,8 +887,8 @@ class OTMM:
445
887
  # Create the directory
446
888
  os.makedirs(self._download_dir)
447
889
 
448
- logger.info(
449
- "Downloading OpenText Media Management asset -> '%s' (%s) to -> %s...",
890
+ self.logger.info(
891
+ "Downloading asset -> '%s' (%s) to -> %s...",
450
892
  asset_name,
451
893
  asset_id,
452
894
  file_name,
@@ -456,161 +898,516 @@ class OTMM:
456
898
  with open(file_name, "wb") as f:
457
899
  for chunk in response.iter_content(chunk_size=8192):
458
900
  f.write(chunk)
901
+ except HTTPError as http_error:
902
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
903
+ return False
904
+ except RequestException:
905
+ self.logger.error("Request error requesting -> %s!", request_url)
906
+ return False
907
+ except Exception:
908
+ self.logger.error("Unexpected error requesting -> %s!", request_url)
909
+ return False
910
+
911
+ return True
912
+
913
+ # end method definition
914
+
915
+ def remove_stale_download(
916
+ self,
917
+ asset_id: str,
918
+ asset_name: str = "",
919
+ ) -> bool:
920
+ """Remove stale download file for an expired or deleted asset.
921
+
922
+ Args:
923
+ asset_id (str):
924
+ The ID of the asset to delete in the file system.
925
+ asset_name (str, optional):
926
+ The name of the assets. Just uswed for logging.
927
+
928
+ Returns:
929
+ bool: True = success, False = failure
930
+
931
+ """
932
+
933
+ file_name = os.path.join(self._download_dir, asset_id)
934
+
935
+ if os.path.exists(file_name):
936
+ self.logger.debug(
937
+ "Deleting stale download file -> '%s' for asset %s...",
938
+ file_name,
939
+ "-> '{}' ({})".format(asset_name, asset_id) if asset_name else "-> {}".format(asset_id),
940
+ )
941
+ os.remove(file_name)
459
942
  return True
460
- except HTTPError as http_err:
461
- logger.error("HTTP error occurred -> %s!", str(http_err))
462
- except RequestException as req_err:
463
- logger.error("Request error occurred -> %s!", str(req_err))
464
- except Exception as err:
465
- logger.error("An error occurred -> %s!", str(err))
466
943
 
467
944
  return False
468
945
 
469
946
  # end method definition
470
947
 
471
- def search_assets(self, payload: dict):
948
+ def search_assets(self, payload: dict) -> dict | None:
472
949
  """Search an asset based on the given parameters / criterias.
473
950
 
474
951
  Args:
475
- payload (dict): in the format of:
476
- payload = {
477
- "PRODUCT_CHAR_ID": "Extended ECM for Engineering",
478
- "BUSINESS_AREA_CHAR_ID": "Content",
479
- "keyword_query": "*",
480
- "limit": "5",
481
- }
952
+ payload (dict):
953
+ In the format of:
954
+ payload = {
955
+ "PRODUCT_CHAR_ID": "Extended ECM for Engineering",
956
+ "BUSINESS_AREA_CHAR_ID": "Content",
957
+ "keyword_query": "*",
958
+ "limit": "5",
959
+ }
482
960
 
483
961
  Returns:
484
- _type_: JSON search results
962
+ dict | None:
963
+ The search results.
964
+
965
+ Example:
966
+ {
967
+ 'search_result_resource': {
968
+ 'search_result': {
969
+ 'asset_group_count': {
970
+ 'entry': [...]
971
+ },
972
+ 'asset_id_list': [
973
+ '00084f808d1331bca1f24134bde9cd8e742fe24a',
974
+ '000af201d7130d1bb2778af672f3bfb554ea965a',
975
+ '000f9594985b766ee495c27172446d5c9c4e0ebf',
976
+ '0012d344dc39d4d23aaeb04fbe9db3b21daee6e0',
977
+ '00135d36232d66b6f11e0020f317244d08a613d1'
978
+ ],
979
+ 'contains_invalid_conditions': False,
980
+ 'facet_field_response_list': [
981
+ {...},
982
+ {...},
983
+ ...
984
+ ],
985
+ 'hit_count': 5,
986
+ 'offset': 0,
987
+ 'total_hit_count': 11886
988
+ },
989
+ 'asset_list': [
990
+ {
991
+ 'access_control_descriptor': {...},
992
+ 'asset_content_info': {...},
993
+ 'asset_id': '00084f808d1331bca1f24134bde9cd8e742fe24a',
994
+ 'asset_lock_state_last_update_date': '2024-01-03T16:47:22Z',
995
+ 'asset_lock_state_user_id': '166',
996
+ 'asset_state': 'NORMAL',
997
+ 'asset_state_last_update_date': '2024-01-03T16:47:22Z',
998
+ 'asset_state_user_id': '166',
999
+ 'checked_out': False,
1000
+ 'content_editable': True,
1001
+ 'content_lock_state_last_update_date': '2021-11-22T16:32:59Z',
1002
+ 'content_lock_state_user_id': '49',
1003
+ 'content_lock_state_user_name': 'sspasik',
1004
+ 'content_size': 3103,
1005
+ 'content_state': 'NORMAL',
1006
+ 'content_state_last_update_date': '2021-11-22T16:32:57Z',
1007
+ 'content_state_user_id': '49',
1008
+ 'content_state_user_name': 'Srgjan Spasik',
1009
+ 'content_type': 'BITMAP',
1010
+ ...
1011
+ },
1012
+ ...
1013
+ ]
1014
+ }
1015
+ }
1016
+
485
1017
  """
486
1018
 
487
1019
  request_url = self.config()["searchUrl"]
488
1020
 
489
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
490
-
491
1021
  encoded_payload = urllib.parse.urlencode(payload, safe="/:")
492
1022
 
493
1023
  try:
494
1024
  response = self._session.post(
495
1025
  request_url,
496
- headers=headers,
1026
+ headers=REQUEST_HEADERS,
497
1027
  data=encoded_payload,
498
1028
  )
499
1029
 
500
1030
  response.raise_for_status()
501
1031
 
502
- except requests.exceptions.HTTPError as http_err:
503
- logger.error("HTTP error occurred: %s", http_err)
504
- except requests.exceptions.ConnectionError as conn_err:
505
- logger.error("Connection error occurred: %s", conn_err)
506
- except requests.exceptions.Timeout as timeout_err:
507
- logger.error("Timeout error occurred: %s", timeout_err)
508
- except requests.exceptions.RequestException as req_err:
509
- logger.error("Request error occurred: %s", req_err)
510
- except Exception as e:
511
- logger.error("An unexpected error occurred: %s", e)
1032
+ except requests.exceptions.HTTPError as http_error:
1033
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
1034
+ self.logger.debug("HTTP request header -> %s", str(REQUEST_HEADERS))
1035
+ return None
1036
+ except requests.exceptions.ConnectionError:
1037
+ self.logger.error("Connection error requesting -> %s", request_url)
1038
+ return None
1039
+ except requests.exceptions.Timeout:
1040
+ self.logger.error("Timeout error requesting -> %s", request_url)
1041
+ return None
1042
+ except requests.exceptions.RequestException:
1043
+ self.logger.error("Request error requesting -> %s", request_url)
1044
+ return None
1045
+ except Exception:
1046
+ self.logger.error("Unexpected error requesting -> %s", request_url)
1047
+ return None
512
1048
 
513
1049
  return response.json()
514
1050
 
515
1051
  # end method definition
516
1052
 
517
- def get_asset_metadata(self, asset_id: str) -> dict:
518
- """Retrieve metadata of an asset based on the given parameters / criterias.
1053
+ def get_asset_details(
1054
+ self,
1055
+ asset_id: str,
1056
+ level_of_detail: str = "slim",
1057
+ load_multilingual_field_values: bool = True,
1058
+ load_subscribed_to: bool = True,
1059
+ load_asset_content_info: bool = True,
1060
+ load_metadata: bool = True,
1061
+ load_inherited_metadata: bool = True,
1062
+ load_thumbnail_info: bool = True,
1063
+ load_preview_info: bool = True,
1064
+ load_pdf_preview_info: bool = True,
1065
+ load_3d_preview_info: bool = True,
1066
+ load_destination_links: bool = True,
1067
+ load_security_policies: bool = True,
1068
+ load_path: bool = True,
1069
+ load_deep_zoom_info: bool = True,
1070
+ ) -> dict | None:
1071
+ """Retrieve details of an asset based on the given parameters / criterias.
519
1072
 
520
1073
  Args:
521
- asset_id (str): asset_id of the asset to query
1074
+ asset_id (str):
1075
+ The ID of the asset to query.
1076
+ level_of_detail (str, optional):
1077
+ Can either be "slim" or "full". "slim" is the default.
1078
+ load_multilingual_field_values (bool, optional):
1079
+ If True, load multilingual fields, default = True.
1080
+ load_subscribed_to (bool, optional):
1081
+ If True, load subscriber information, default = True.
1082
+ load_asset_content_info (bool, optional):
1083
+ If True, load content information, default = True.
1084
+ load_metadata (bool, optional):
1085
+ If True, load metadata, default = True.
1086
+ load_inherited_metadata (bool, optional):
1087
+ If True, load inherited metadata, default = True.
1088
+ load_thumbnail_info (bool, optional):
1089
+ If True, load thumbnail information, default = True.
1090
+ load_preview_info (bool, optional):
1091
+ If True, load preview information, default = True.
1092
+ load_pdf_preview_info (bool, optional):
1093
+ If true, load PDF preview information, default = True.
1094
+ load_3d_preview_info (bool, optional):
1095
+ If True, load 3D preview information, default = True.
1096
+ load_destination_links (bool, optional):
1097
+ If true, load destination links, default = True.
1098
+ load_security_policies (bool, optional):
1099
+ If True, load security policies, default = True.
1100
+ load_path (bool, optional):
1101
+ If True, load path, default = True.
1102
+ load_deep_zoom_info(bool, optional):
1103
+ If True, load deep zoom information, default = True.
522
1104
 
523
1105
  Returns:
524
- dict: Metadata information as dict with values as list
525
-
526
- example:
527
- {
528
- 'OTMM.CUSTOM.FIELD_TITLE': [],
529
- 'OTMM.CUSTOM.FIELD_DESCRIPTION': [],
530
- 'OTMM.CUSTOM.FIELD_KEYWORDS': [],
531
- 'CONTENT_TYPE_COMBO_CHAR_ID': [],
532
- 'OTM.TABLE.APPROVED_USAGE_FIELD': [],
533
- 'OTMM.FIELD.RESOURCE_LIBRARY.TAB': [],
534
- 'LANGUAGE_COMBO_CHAR_ID': [],
535
- 'OTMM.CUSTOM.FIELD_PART_NUMBER': [],
536
- 'OTMM.FIELD.BUSINESS_UNIT.TAB': ['Content'],
537
- 'OTM.TABLE.PRODUCT_TABLE_FIELD': ['Vendor Invoice Management for SAP'],
538
- 'OTM.TABLE.INDUSTRY_TABLE_FIELD': [],
539
- 'OTMM.CUSTOM.FIELD_URL': [],
540
- 'OTMM.CUSTOM.FIELD_PREVIOUS_URL': [],
541
- 'OTMM.CUSTOM.FIELD_CONTENT_OWNER': [],
542
- 'OTMM.CUSTOM.FIELD_EMAIL': [],
543
- 'OTMM.CUSTOM.FIELD_JOB_NUMBER': [],
544
- 'OTM.TABLE.BUSINESS_AREA_TABLE_FIELD': [],
545
- 'OTM.TABLE.JOURNEY_TABLE_FIELD': ['Buy', 'Try', 'Learn'],
546
- 'OTMM.FIELD.PERSONA.TAB': [],
547
- 'OTMM.FIELD.SERVICES.TAB': [],
548
- 'OTMM.FIELD.REGION.TAB': [],
549
- 'OTMM.FIELD.PURPOSE.TAB': [],
550
- 'AODA_CHAR_ID': [],
551
- 'REVIEW_CADENCE_CHAR_ID': [],
552
- 'CONTENT_CREATED_DATE_ID': [],
553
- 'ARTESIA.FIELD.EXPIRATION DATE': [],
554
- 'OTMM.CUSTOM.FIELD_REAL_COMMENTS': []
555
- }
1106
+ dict | None:
1107
+ Metadata information as dict with values as list
1108
+
1109
+ Example:
1110
+ {
1111
+ 'asset_resource': {
1112
+ 'asset': {
1113
+ 'access_control_descriptor': {
1114
+ 'permissions_map': {...}
1115
+ },
1116
+ 'asset_content_info': {
1117
+ 'master_content': {...}
1118
+ },
1119
+ 'asset_id': 'e064571da79c926ee14b0850734b49edf42d9ba5',
1120
+ 'asset_lock_state_last_update_date': '2024-04-16T15:03:48Z',
1121
+ 'asset_lock_state_user_id': '153',
1122
+ 'asset_state': 'NORMAL',
1123
+ 'asset_state_last_update_date': '2024-04-16T15:03:48Z',
1124
+ 'asset_state_user_id': '153',
1125
+ 'checked_out': False,
1126
+ 'content_editable': True,
1127
+ 'content_lock_state_last_update_date': '2023-12-11T20:56:26Z',
1128
+ 'content_lock_state_user_id': '202',
1129
+ 'content_lock_state_user_name': 'ajohnson3',
1130
+ 'content_size': 95873,
1131
+ 'content_state': 'NORMAL',
1132
+ 'content_state_last_update_date': '2023-12-11T20:56:26Z',
1133
+ 'content_state_user_id': '202',
1134
+ 'content_state_user_name': 'Amanda Johnson',
1135
+ 'content_type': 'ACROBAT',
1136
+ 'creator_id': '202',
1137
+ 'date_imported': '2023-12-11T20:56:26Z',
1138
+ 'date_last_updated': '2024-04-16T15:03:48Z',
1139
+ 'deleted': False,
1140
+ 'delivery_service_url': 'https://assets.opentext.com/adaptivemedia/rendition?id=726d14f14bb1ae93c3efda5a870399a20c991770',
1141
+ 'expired': False,
1142
+ 'import_job_id': 5776,
1143
+ 'import_user_name': 'ajohnson3',
1144
+ 'latest_version': True,
1145
+ 'legacy_model_id': 104,
1146
+ 'links': {
1147
+ 'links': [...],
1148
+ 'source_id': 'e064571da79c926ee14b0850734b49edf42d9ba5'
1149
+ },
1150
+ 'locked': False,
1151
+ 'master_content_info': {
1152
+ 'content_checksum': '2a31defcf7ad2feb7c557acb068a5c22',
1153
+ 'content_data': {...},
1154
+ 'content_kind': 'MASTER',
1155
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
1156
+ 'content_size': 95873,
1157
+ 'height': -1,
1158
+ 'id': 'b563035e050a89e58a921df8a4047a0673ad9691',
1159
+ 'mime_type': 'application/pdf',
1160
+ 'name': 'a-business-case-for-arcsight-soar-wp.pdf',
1161
+ 'unit_of_size': 'BYTES',
1162
+ 'url': '/otmmapi/v6/renditions/b563035e050a89e58a921df8a4047a0673ad9691',
1163
+ 'width': -1
1164
+ },
1165
+ 'metadata': {
1166
+ 'type': 'com.artesia.metadata.MetadataModel',
1167
+ 'id': 'OTM.MARKETING.MODEL',
1168
+ 'name': 'OTM Marketing Tags',
1169
+ 'metadata_element_list': [...],
1170
+ 'has_multilingual_fields': False,
1171
+ 'legacy_id': 104
1172
+ },
1173
+ 'metadata_lock_state_user_name': 'ababigian',
1174
+ 'metadata_model_id': 'OTM.MARKETING.MODEL',
1175
+ 'metadata_state_user_name': 'Andra Babigian',
1176
+ 'mime_type': 'application/pdf',
1177
+ 'name': 'a-business-case-for-arcsight-soar-pp-en.pdf',
1178
+ 'original_asset_id': '726d14f14bb1ae93c3efda5a870399a20c991770',
1179
+ 'path_list': [
1180
+ {...}
1181
+ ],
1182
+ 'product_associations': False,
1183
+ 'rendition_content': {
1184
+ 'pdf_preview_content': {...}
1185
+ },
1186
+ 'security_policy_list': [
1187
+ {...}
1188
+ ],
1189
+ 'subscribed_to': False,
1190
+ 'version': 3
1191
+ }
1192
+ }
1193
+ }
1194
+
556
1195
  """
557
1196
 
558
- request_url = self.config()["assetsUrl"] + f"/{asset_id}"
559
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
1197
+ request_url = self.config()["assetsUrl"] + "/" + asset_id
560
1198
 
561
1199
  params = {
562
1200
  "load_type": "custom",
563
- "level_of_detail": "slim",
564
- "data_load_request": '{"data_load_request":{"load_multilingual_field_values":"true","load_subscribed_to":"true","load_asset_content_info":"true","load_metadata":"true","load_inherited_metadata":"true","load_thumbnail_info":"true","load_preview_info":"true", "load_pdf_preview_info":"true", "load_3d_preview_info" : "true","load_destination_links":"true", "load_security_policies":"true","load_path":"true","load_deep_zoom_info":"true"}}',
1201
+ "level_of_detail": level_of_detail,
1202
+ "data_load_request": json.dumps(
1203
+ {
1204
+ "data_load_request": {
1205
+ "load_multilingual_field_values": load_multilingual_field_values,
1206
+ "load_subscribed_to": load_subscribed_to,
1207
+ "load_asset_content_info": load_asset_content_info,
1208
+ "load_metadata": load_metadata,
1209
+ "load_inherited_metadata": load_inherited_metadata,
1210
+ "load_thumbnail_info": load_thumbnail_info,
1211
+ "load_preview_info": load_preview_info,
1212
+ "load_pdf_preview_info": load_pdf_preview_info,
1213
+ "load_3d_preview_info": load_3d_preview_info,
1214
+ "load_destination_links": load_destination_links,
1215
+ "load_security_policies": load_security_policies,
1216
+ "load_path": load_path,
1217
+ "load_deep_zoom_info": load_deep_zoom_info,
1218
+ },
1219
+ },
1220
+ ),
565
1221
  }
566
1222
 
567
1223
  try:
568
- response = self._session.get(request_url, headers=headers, params=params)
1224
+ response = self._session.get(
1225
+ request_url,
1226
+ headers=REQUEST_HEADERS,
1227
+ params=params,
1228
+ )
569
1229
 
570
1230
  response.raise_for_status()
571
1231
 
572
- except requests.exceptions.HTTPError as http_err:
573
- logger.error("HTTP error occurred: %s", http_err)
574
- except requests.exceptions.ConnectionError as conn_err:
575
- logger.error("Connection error occurred: %s", conn_err)
576
- except requests.exceptions.Timeout as timeout_err:
577
- logger.error("Timeout error occurred: %s", timeout_err)
578
- except requests.exceptions.RequestException as req_err:
579
- logger.error("Request error occurred: %s", req_err)
580
- except Exception as e:
581
- logger.error("An unexpected error occurred: %s", e)
1232
+ except requests.exceptions.HTTPError as http_error:
1233
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
1234
+ self.logger.debug("HTTP request header -> %s", str(REQUEST_HEADERS))
1235
+ return None
1236
+ except requests.exceptions.ConnectionError:
1237
+ self.logger.error("Connection error requesting -> %s", request_url)
1238
+ return None
1239
+ except requests.exceptions.Timeout:
1240
+ self.logger.error("Timeout error requesting -> %s", request_url)
1241
+ return None
1242
+ except requests.exceptions.RequestException:
1243
+ self.logger.error("Request error requesting -> %s", request_url)
1244
+ return None
1245
+ except Exception:
1246
+ self.logger.error("Unexpected error requesting -> %s", request_url)
1247
+ return None
1248
+
1249
+ return response.json()
1250
+
1251
+ # end method definition
1252
+
1253
+ def prepare_asset_data(self, asset_id: str, asset: dict | None = None) -> dict:
1254
+ """Prepare the asset data for the Pandas Data frame.
1255
+
1256
+ The asset data is either provided with the asset parameter or
1257
+ retrieved by the method.
1258
+
1259
+ Args:
1260
+ asset_id (str):
1261
+ The ID of the asset.
1262
+ asset (dict | None, optional):
1263
+ If the asset data structure is already available pass it
1264
+ with this parameter. Make sure the asset data was retrieved
1265
+ to include the metadata. If None is provided then the method
1266
+ will retrieve the asset data (including metadata) on the fly.
1267
+
1268
+ Returns:
1269
+ dict | None:
1270
+ The simplified / flat structure for the Pandas data frame.
1271
+
1272
+ Example:
1273
+ {
1274
+ 'OTMM_CUSTOM_FIELD_TITLE': 'A Business Case for ArcSight SOAR',
1275
+ 'OTMM_CUSTOM_FIELD_DESCRIPTION': 'Cybersecurity is a complex problem.',
1276
+ 'OTMM_CUSTOM_FIELD_KEYWORDS': 'SOAR, SIEM, cybersecurity, SecOps, SOC, cybersecurity automation',
1277
+ 'CONTENT_TYPE_COMBO_CHAR_ID': None,
1278
+ 'OTMM_FIELD_IMAGE_TYPE': None,
1279
+ 'OTM_TABLE_APPROVED_USAGE_FIELD': None,
1280
+ 'OTMM_FIELD_RESOURCE_LIBRARY_TAB': ['Resource Library'],
1281
+ 'LANGUAGE_COMBO_CHAR_ID': 'English',
1282
+ 'OTMM_CUSTOM_FIELD_PART_NUMBER': '762-000033-003',
1283
+ 'OTMM_FIELD_AVIATOR': None,
1284
+ 'OTMM_FIELD_BUSINESS_UNIT_TAB': ['Cybersecurity'],
1285
+ 'OTM_TABLE_PRODUCT_TABLE_FIELD': ['ArcSight Enterprise Security Manager', 'Arcsight Intelligence'],
1286
+ 'OTMM_FIELD_PRODUCT_NEW_TAB': [],
1287
+ 'OTMM_FIELD_MARKET_SEGMENT_TAB': [],
1288
+ 'OTM_TABLE_INDUSTRY_TABLE_FIELD': [],
1289
+ 'OTMM_CUSTOM_FIELD_URL': None,
1290
+ 'OTMM_CUSTOM_FIELD_PREVIOUS_URL': 'https://www.microfocus.com/media/white-paper/a-business-case-for-arcsight-soar-wp.pdf',
1291
+ 'OTMM_CUSTOM_FIELD_CONTENT_OWNER': 'Steve Jones',
1292
+ 'OTMM_CUSTOM_FIELD_EMAIL': 'sjones2@opentext.com',
1293
+ 'OTMM_CUSTOM_FIELD_JOB_NUMBER': [],
1294
+ 'OTM_TABLE_BUSINESS_AREA_TABLE_FIELD': [],
1295
+ 'OTM_TABLE_JOURNEY_TABLE_FIELD': [],
1296
+ 'OTMM_FIELD_PERSONA_TAB': [],
1297
+ 'OTMM_FIELD_SERVICES_TAB': [],
1298
+ 'OTMM_FIELD_REGION_TAB': [],
1299
+ 'OTMM_FIELD_PURPOSE_TAB': ['Marketing'],
1300
+ 'AODA_CHAR_ID': 'Yes',
1301
+ 'REVIEW_CADENCE_CHAR_ID': 'Quarterly',
1302
+ 'CONTENT_CREATED_DATE_ID': '2023-10-18T07:00:00Z',
1303
+ 'ARTESIA_FIELD_EXPIRATIONDATE': None,
1304
+ 'OTMM_CUSTOM_FIELD_REAL_COMMENTS': None
1305
+ }
1306
+
1307
+ """
1308
+
1309
+ # If the asset dictionary is not already provided
1310
+ # we retrieve it here:
1311
+ if not asset:
1312
+ asset = self.get_asset_details(asset_id=asset_id)
1313
+ if asset is None:
1314
+ self.logger.error(
1315
+ "Cannot get asset details for asset with ID -> %s",
1316
+ asset_id,
1317
+ )
1318
+ return {}
1319
+
1320
+ # We drill down to the actual asset data:
1321
+ if "asset_resource" in asset:
1322
+ asset = asset["asset_resource"]
1323
+ if "asset" in asset:
1324
+ asset = asset["asset"]
1325
+
1326
+ if "metadata" not in asset:
1327
+ self.logger.error(
1328
+ "The provided data for asset with ID -> '%s' was retrieved without metadata - cannot prepare metadata fields.",
1329
+ asset_id,
1330
+ )
1331
+ return {}
582
1332
 
583
1333
  # Read Metadata from nested structure
584
1334
  try:
585
- metadata = (
586
- response.json()
587
- .get("asset_resource", {})
588
- .get("asset", {})
589
- .get("metadata", {})
590
- .get("metadata_element_list", [])[0]
591
- .get("metadata_element_list", [])
1335
+ """
1336
+ metadata is a list of dictionaries. Each item has these keys:
1337
+ * type (str)
1338
+ * id (str)
1339
+ * name (str)
1340
+ * value (dict)
1341
+ - cascading_domain_value (bool)
1342
+ - domain_value (bool)
1343
+ - is_locked (bool)
1344
+ - value (dict)
1345
+ + type (str)
1346
+ + value (str)
1347
+ * metadata_element_list (list)
1348
+ * display_value
1349
+ """
1350
+ metadata_list = (
1351
+ asset.get("metadata", {}).get("metadata_element_list", [])[0].get("metadata_element_list", [])
592
1352
  )
593
1353
  except JSONDecodeError:
594
- logger.error("Cannot decode JSON response for assset_id -> %s", asset_id)
1354
+ self.logger.error(
1355
+ "Cannot decode JSON response for asset with ID -> %s",
1356
+ asset_id,
1357
+ )
1358
+ return {}
1359
+ except IndexError:
1360
+ self.logger.error(
1361
+ "Cannot find metadata in asset with ID -> %s",
1362
+ asset_id,
1363
+ )
595
1364
  return {}
596
1365
 
597
- # Generate empty result dict
1366
+ # Initialize empty result dict
598
1367
  result = {}
599
1368
 
600
- # Extract Metadata fields with values as list
601
- for data in metadata:
602
- index = data.get("id").replace(" ", "").replace(".", "_")
603
-
604
- try:
605
- result[index] = data.get("value").get("value").get("value")
606
- except AttributeError:
1369
+ # Extract Metadata fields with values as list and build up
1370
+ # a dictionary:
1371
+ for metadata in metadata_list:
1372
+ # IDs may have dots and spaces that we don't want as dictionary keys.
1373
+ # We remove spaces and replace dots with underscores
1374
+ # (example: OTMM.CUSTOM.FIELD_ PART_NUMBER -> OTMM_CUSTOM_FIELD_PART_NUMBER):
1375
+ dict_key = metadata.get("id").replace(" ", "").replace(".", "_")
1376
+
1377
+ # OTMM has a variety of metadata field types.
1378
+ # This includes list values, drop-down lists and strings.
1379
+ # Each of these have a different representation in
1380
+ # the 'metadata' structure:
1381
+ if "value" in metadata and "value" in metadata["value"]: # do we have a scalar value (plain string)?
1382
+ value_dict = metadata.get("value").get("value")
1383
+ if "value" in value_dict:
1384
+ result[dict_key] = value_dict.get("value")
1385
+ elif "display_value" in value_dict: # is to a domain value?
1386
+ result[dict_key] = value_dict.get("display_value")
1387
+ else:
1388
+ result[dict_key] = None
1389
+ elif "metadata_element_list" in metadata: # do we have a list value?
1390
+ # Create list with a comprehension:
1391
+ value_list = [
1392
+ value.get("value").get("display_value")
1393
+ for element in metadata.get("metadata_element_list", []) # outer loop
1394
+ for value in element.get("values", []) # inner loop
1395
+ ]
1396
+ result[dict_key] = value_list
1397
+ else: # it may also be that there's no value:
1398
+ self.logger.debug(
1399
+ "No value field in metadata -> %s for key -> '%s'",
1400
+ str(metadata),
1401
+ dict_key,
1402
+ )
1403
+ result[dict_key] = None
607
1404
 
608
- infos = []
609
- for element in data.get("metadata_element_list", []):
610
- for value in element.get("values", []):
611
- infos.append(value.get("value").get("display_value"))
1405
+ self.logger.debug(
1406
+ "Retrieved asset details for asset with ID -> %s: %s",
1407
+ asset_id,
1408
+ str(result),
1409
+ )
612
1410
 
613
- result[index] = infos
614
1411
  return result
615
1412
 
616
1413
  # end method definition
@@ -621,78 +1418,195 @@ class OTMM:
621
1418
  load_business_units: bool = True,
622
1419
  download_assets: bool = True,
623
1420
  ) -> bool:
624
- """Load all Media Assets for Products and Business Units
1421
+ """Load all Media Assets for Products and Business Units into a Pandas data frame.
1422
+
1423
+ Args:
1424
+ load_products (bool, optional):
1425
+ If True, load assets on Business Unit level.
1426
+ Defaults to True.
1427
+ load_business_units (bool, optional):
1428
+ If True, load assets on Product level. Defaults to True.
1429
+ download_assets (bool, optional):
1430
+ Only if True assets will be downloaded. Defaults to True.
625
1431
 
626
1432
  Returns:
627
1433
  bool: True = Success, False = Failure
1434
+
1435
+ Example:
1436
+ {
1437
+ 'access_control_descriptor': {
1438
+ 'permissions_map': {...}
1439
+ },
1440
+ 'asset_content_info': {
1441
+ 'master_content': {...}
1442
+ },
1443
+ 'asset_id': '68fe5a6423fd317fdf87e83bc8cde736d4df27bf',
1444
+ 'asset_lock_state_last_update_date': '2024-09-09T22:02:53Z',
1445
+ 'asset_lock_state_user_id': '202',
1446
+ 'asset_state': 'NORMAL',
1447
+ 'asset_state_last_update_date': '2024-09-09T22:02:53Z',
1448
+ 'asset_state_user_id': '202',
1449
+ 'checked_out': False,
1450
+ 'content_editable': True,
1451
+ 'content_lock_state_last_update_date': '2024-08-14T00:33:27Z',
1452
+ 'content_lock_state_user_id': '202',
1453
+ 'content_lock_state_user_name': 'ajohnson3',
1454
+ 'content_size': 18474085,
1455
+ 'content_state': 'NORMAL',
1456
+ 'content_state_last_update_date': '2024-08-14T00:33:27Z',
1457
+ 'content_state_user_id': '202',
1458
+ 'content_state_user_name': 'Amanda Johnson',
1459
+ 'content_type': 'ACROBAT',
1460
+ 'creator_id': '202',
1461
+ 'date_imported': '2024-08-14T00:33:26Z',
1462
+ 'date_last_updated': '2024-09-09T22:02:53Z',
1463
+ 'deleted': False,
1464
+ 'delivery_service_url': 'https://assets.opentext.com/adaptivemedia/rendition?id=68fe5a6423fd317fdf87e83bc8cde736d4df27bf',
1465
+ 'expired': False,
1466
+ 'import_job_id': 7764,
1467
+ 'import_user_name': 'ajohnson3',
1468
+ 'latest_version': True,
1469
+ 'legacy_model_id': 104,
1470
+ 'locked': False,
1471
+ 'master_content_info': {
1472
+ 'content_checksum': '45f42d19542af5b6146cbb3927a5490f',
1473
+ 'content_data': {...},
1474
+ 'content_kind': 'MASTER',
1475
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
1476
+ 'content_path': 'data/repository/original/generative-ai-governance-essentials-wp-en_56cbbfe.pdf',
1477
+ 'content_size': 18474085,
1478
+ 'height': -1,
1479
+ 'id': '56cbbfe270593ba1a5ab6551d2c8b373469cc1a9',
1480
+ 'mime_type': 'application/pdf',
1481
+ 'name': 'generative-ai-governance-essentials-wp-en.pdf',
1482
+ 'unit_of_size': 'BYTES',
1483
+ 'url': '/otmmapi/v6/renditions/56cbbfe270593ba1a5ab6551d2c8b373469cc1a9',
1484
+ 'width': -1
1485
+ },
1486
+ 'metadata_lock_state_user_name': 'ajohnson3',
1487
+ 'metadata_model_id': 'OTM.MARKETING.MODEL',
1488
+ 'metadata_state_user_name': 'Amanda Johnson',
1489
+ 'mime_type': 'application/pdf',
1490
+ 'name': 'generative-ai-governance-essentials-wp-en.pdf',
1491
+ 'original_asset_id': '68fe5a6423fd317fdf87e83bc8cde736d4df27bf',
1492
+ 'product_associations': False,
1493
+ 'rendition_content': {
1494
+ 'thumbnail_content': {...},
1495
+ 'preview_content': {...},
1496
+ 'pdf_preview_content': {...}
1497
+ },
1498
+ 'subscribed_to': False,
1499
+ 'thumbnail_content_id': '70aef1a5b5e480337bc115e47443884432c355ff',
1500
+ 'version': 1
1501
+ }
1502
+
628
1503
  """
629
1504
 
630
1505
  asset_list = []
631
1506
 
632
1507
  if load_products:
633
-
634
1508
  products = self.get_products() # dictionary with key = name and value = ID
635
1509
 
1510
+ if self._product_inclusions is not None:
1511
+ products_filtered = {}
1512
+ self.logger.info(
1513
+ "Apply include filter on products -> %s",
1514
+ str(self._product_inclusions),
1515
+ )
1516
+ for key in self._product_inclusions:
1517
+ if key in products:
1518
+ products_filtered[key] = products[key]
1519
+
1520
+ products = products_filtered
1521
+
636
1522
  if self._product_exclusions:
637
- logger.info("Excluding products -> %s", str(self._product_exclusions))
1523
+ self.logger.info(
1524
+ "Excluding products -> %s",
1525
+ str(self._product_exclusions),
1526
+ )
638
1527
  for key in self._product_exclusions:
639
- products.pop(
640
- key, None
641
- ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
1528
+ # pop(key, None) will remove the key if it exists,
1529
+ # and do nothing if it doesn't:
1530
+ products.pop(key, None)
642
1531
 
643
1532
  for product_name, product_id in products.items():
644
1533
  if "DO NOT USE" in product_name:
645
1534
  continue
646
1535
 
647
- logger.info("Processing product -> '%s'...", product_name)
1536
+ self.logger.info(
1537
+ "Processing assets for product -> '%s'...",
1538
+ product_name,
1539
+ )
648
1540
 
649
1541
  assets = self.get_product_assets(product_id)
650
1542
 
651
1543
  if not assets:
652
- logger.info("Found no assets for product -> '%s'", product_name)
1544
+ self.logger.info(
1545
+ "Found no assets for product -> '%s'. Skipping it...",
1546
+ product_name,
1547
+ )
653
1548
  continue
654
1549
 
1550
+ # We enrich the dictionary with tags for workspace type and
1551
+ # workspace name for later bulk processing:
655
1552
  for asset in assets:
656
1553
  asset["workspace_type"] = "Product"
657
1554
  asset["workspace_name"] = product_name
658
1555
 
659
- asset_list += assets
1556
+ # Filter out assets that are not files - we use the content size
1557
+ # attribute for this:
1558
+ asset_list += [asset for asset in assets if "content_size" in asset]
660
1559
 
661
1560
  if load_business_units:
662
-
663
1561
  business_units = self.get_business_units()
664
1562
 
1563
+ if self._business_unit_inclusions is not None:
1564
+ business_units_filtered = {}
1565
+ self.logger.info(
1566
+ "Apply include filter on business units -> %s",
1567
+ str(self._business_unit_inclusions),
1568
+ )
1569
+ for key in self._business_unit_inclusions:
1570
+ if key in business_units:
1571
+ business_units_filtered[key] = business_units[key]
1572
+
1573
+ business_units = business_units_filtered
1574
+
665
1575
  if self._business_unit_exclusions:
666
- logger.info(
1576
+ self.logger.info(
667
1577
  "Excluding business units -> %s",
668
1578
  str(self._business_unit_exclusions),
669
1579
  )
670
1580
  for key in self._business_unit_exclusions:
671
- business_units.pop(
672
- key, None
673
- ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
1581
+ # pop(key, None) will remove the key if it exists,
1582
+ # and do nothing if it doesn't:
1583
+ business_units.pop(key, None)
674
1584
 
675
1585
  for bu_name, bu_id in business_units.items():
676
- logger.debug(bu_name)
1586
+ self.logger.info("Processing assets for business unit -> '%s'", bu_name)
677
1587
  assets = self.get_business_unit_assets(bu_id)
678
1588
 
679
1589
  if not assets:
680
- logger.info("Found no assets for business unit -> '%s'", bu_name)
1590
+ self.logger.info(
1591
+ "Found no assets for business unit -> '%s'. Skipping it...",
1592
+ bu_name,
1593
+ )
681
1594
  continue
682
1595
 
1596
+ # We enrich the dictionary with tags for workspace type and name for
1597
+ # later bulk processing:
683
1598
  for asset in assets:
684
1599
  asset["workspace_type"] = "Business Unit"
685
1600
  asset["workspace_name"] = bu_name
686
1601
 
687
- asset_list += assets
1602
+ # Filter out assets that are not files - we use the content size
1603
+ # attribute for this:
1604
+ asset_list += [asset for asset in assets if "content_size" in asset]
688
1605
 
689
- total_count = len(asset_list)
1606
+ # end for bu_name...
1607
+ # end if load_business_units
690
1608
 
691
- asset_list = [
692
- item
693
- for item in asset_list
694
- if not item.get("deleted", False) and not item.get("expired", False)
695
- ]
1609
+ total_count = len(asset_list)
696
1610
 
697
1611
  number = self._thread_number
698
1612
 
@@ -704,11 +1618,11 @@ class OTMM:
704
1618
  number = 1
705
1619
  remainder = 0
706
1620
 
707
- logger.info(
708
- "Processing -> %s Media Assets, thread number -> %s, partition size -> %s",
1621
+ self.logger.info(
1622
+ "Processing -> %s media assets, thread number -> %s, partition size -> %s",
709
1623
  str(total_count),
710
- number,
711
- partition_size,
1624
+ str(number),
1625
+ str(partition_size),
712
1626
  )
713
1627
 
714
1628
  threads = []
@@ -748,16 +1662,114 @@ class OTMM:
748
1662
  partition_size: int,
749
1663
  offset: int = 0,
750
1664
  download_assets: bool = True,
751
- ):
752
- """Worker Method for multi-threading
1665
+ ) -> None:
1666
+ """Worker Method for multi-threading.
753
1667
 
754
1668
  Args:
755
- asset_list (list): List of assets to process
756
- business_unit (str, optional): Name of business unit. Defaults to "".
1669
+ asset_list (list):
1670
+ Complete list of assets. The thread uses offset an partition size
1671
+ to pick its working subset of it.
1672
+ partition_size (int):
1673
+ The size of the partition.
1674
+ offset (int, optional):
1675
+ The starting offset for the worker. The default is 0.
1676
+ download_assets (bool, optional):
1677
+ Whether the thread should download the assets. Default is True.
1678
+
1679
+ Example asset that get's added to the Data Frame:
1680
+ {
1681
+ 'access_control_descriptor': {
1682
+ 'permissions_map': {...}
1683
+ },
1684
+ 'asset_content_info': {'master_content': {...}},
1685
+ 'asset_id': '3eefc89705f53f0540d409cf866f1bc8119f65c0',
1686
+ 'asset_lock_state_last_update_date': '2024-06-26T22:15:00Z',
1687
+ 'asset_lock_state_user_id': '153',
1688
+ 'asset_state': 'NORMAL',
1689
+ 'asset_state_last_update_date': '2024-06-26T22:15:00Z',
1690
+ 'asset_state_user_id': '153',
1691
+ 'checked_out': False,
1692
+ 'content_editable': True,
1693
+ 'content_lock_state_last_update_date': '2021-11-22T05:33:46Z',
1694
+ 'content_lock_state_user_id': '76',
1695
+ 'content_lock_state_user_name': 'dgoyal',
1696
+ 'content_size': 25986,
1697
+ 'content_state': 'NORMAL',
1698
+ 'content_state_last_update_date': '2021-11-22T05:33:45Z',
1699
+ 'content_state_user_id': '76',
1700
+ 'content_state_user_name': 'Dignesh Goyal',
1701
+ 'content_type': 'BITMAP',
1702
+ 'creator_id': '76',
1703
+ 'date_imported': '2021-11-22T05:33:44Z',
1704
+ 'date_last_updated': '2024-06-26T22:15:00Z',
1705
+ 'deleted': False,
1706
+ 'delivery_service_url': 'https://assets.opentext.com/adaptivemedia/rendition?id=3eefc89705f53f0540d409cf866f1bc8119f65c0',
1707
+ 'expired': False,
1708
+ 'import_job_id': 381,
1709
+ 'import_user_name': 'dgoyal',
1710
+ 'latest_version': True,
1711
+ 'legacy_model_id': 104,
1712
+ 'locked': False,
1713
+ 'master_content_info': {
1714
+ 'content_checksum': '2cf0db34b37b2af71c516259c6b8287e',
1715
+ 'content_data': {...},
1716
+ 'content_kind': 'MASTER',
1717
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
1718
+ 'content_path': 'data/repository/original/co-op-food-logo-ss (1)_21d529dea732.jpg',
1719
+ 'content_size': 25986,
1720
+ 'height': 192,
1721
+ 'id': '21d529dea7324e54b2c00df8573951fcb3f4ebb2',
1722
+ 'mime_type': 'image/jpeg',
1723
+ 'name': 'co-op-food-logo-ss (1).jpg',
1724
+ 'unit_of_size': 'BYTES',
1725
+ 'url': '/otmmapi/v6/renditions/21d529dea7324e54b2c00df8573951fcb3f4ebb2',
1726
+ 'width': 192
1727
+ },
1728
+ 'metadata_lock_state_user_name': 'ababigian',
1729
+ 'metadata_model_id': 'OTM.MARKETING.MODEL',
1730
+ 'metadata_state_user_name': 'Andra Babigian',
1731
+ 'mime_type': 'image/jpeg',
1732
+ 'name': 'co-op-food-logo-ss (1).jpg',
1733
+ 'original_asset_id': '3eefc89705f53f0540d409cf866f1bc8119f65c0',
1734
+ 'product_associations': False,
1735
+ 'rendition_content': {
1736
+ 'thumbnail_content': {...},
1737
+ 'preview_content': {...}
1738
+ },
1739
+ 'subscribed_to': False,
1740
+ 'thumbnail_content_id': '94d71e6ac14890e89931f2bbfc2da74ffab8db5f',
1741
+ 'version': 1,
1742
+ 'workspace_type': 'Product',
1743
+ 'workspace_name': 'Trading Grid',
1744
+ 'asset_name': 'co-op-food-logo-ss (1).jpg',
1745
+ 'OTMM_CUSTOM_FIELD_TITLE': None,
1746
+ 'OTMM_CUSTOM_FIELD_DESCRIPTION': None,
1747
+ 'OTMM_CUSTOM_FIELD_KEYWORDS': None,
1748
+ 'CONTENT_TYPE_COMBO_CHAR_ID': 'Image',
1749
+ 'OTMM_FIELD_IMAGE_TYPE': None,
1750
+ 'OTM_TABLE_APPROVED_USAGE_FIELD': 'Internal',
1751
+ 'OTMM_FIELD_RESOURCE_LIBRARY_TAB': [],
1752
+ 'LANGUAGE_COMBO_CHAR_ID': 'English',
1753
+ 'OTMM_CUSTOM_FIELD_PART_NUMBER': None,
1754
+ 'OTMM_FIELD_AVIATOR': None,
1755
+ 'OTMM_FIELD_BUSINESS_UNIT_TAB': ['Business Network'],
1756
+ 'OTM_TABLE_PRODUCT_TABLE_FIELD': ['Trading Grid'],
1757
+ 'OTMM_FIELD_PRODUCT_NEW_TAB': ['Trading Grid'],
1758
+ 'OTMM_FIELD_MARKET_SEGMENT_TAB': [],
1759
+ 'OTM_TABLE_INDUSTRY_TABLE_FIELD': ['Retail'],
1760
+ 'OTMM_CUSTOM_FIELD_URL': None,
1761
+ ...,
1762
+ 'OTM_TABLE_JOURNEY_TABLE_FIELD': ['Buy', 'Try', 'Learn'],
1763
+ ...,
1764
+ 'REVIEW_CADENCE_CHAR_ID': 'Quarterly',
1765
+ 'CONTENT_CREATED_DATE_ID': '2021-11-08T00:00:00Z',
1766
+ ...
1767
+ }
1768
+
757
1769
  """
758
1770
 
759
- logger.info(
760
- "Processing Media Assets in range from -> %s to -> %s...",
1771
+ self.logger.info(
1772
+ "Processing media assets in range from -> %s to -> %s...",
761
1773
  offset,
762
1774
  offset + partition_size,
763
1775
  )
@@ -766,43 +1778,98 @@ class OTMM:
766
1778
 
767
1779
  for asset in worker_asset_list:
768
1780
  asset_id = asset.get("asset_id")
1781
+ if self._asset_exclusions and asset_id in self._asset_exclusions:
1782
+ self.logger.info(
1783
+ "Asset with ID -> %s is in exclusion list. Skipping it...",
1784
+ asset_id,
1785
+ )
1786
+ asset["included"] = False
1787
+ continue
1788
+ if self._asset_inclusions and asset_id not in self._asset_inclusions:
1789
+ self.logger.info(
1790
+ "Asset with ID -> %s is not in inclusion list. Skipping it...",
1791
+ asset_id,
1792
+ )
1793
+ asset["included"] = False
1794
+ continue
1795
+ if self._asset_exclusions or self._asset_inclusions:
1796
+ asset["included"] = True
769
1797
  asset_name = asset.get("name")
770
1798
  # Store name as asset_name
771
1799
  asset["asset_name"] = asset_name
772
- asset_download_url = asset.get("delivery_service_url")
1800
+ # We cannot fully trust the deliver_service_url -
1801
+ # instead we construct a URL that should always work:
1802
+ asset_download_url = self.config()["assetsUrl"] + "/" + asset_id + "/contents"
1803
+ # We also store the correct download URL to make it available
1804
+ # for the data frame and in bulkDocuments:
1805
+ asset["download_url"] = asset_download_url
773
1806
  asset_deleted = asset.get("deleted", False)
774
1807
  asset_expired = asset.get("expired", False)
775
- if asset_deleted or asset_expired:
776
- logger.info(
777
- "Asset -> '%s' is deleted or expired. Skipping...",
778
- asset_name,
779
- )
780
- continue
781
1808
 
782
- if download_assets:
1809
+ # We can skip the_download_ of deleted or expired assets,
1810
+ # but we still want to have them in the Data Frame for
1811
+ # bulk processing (to remove them from OTCS)
1812
+ if download_assets and asset.get("content_size", 0) > 0 and not asset_deleted and not asset_expired:
783
1813
  success = self.download_asset(
784
1814
  asset_id=asset_id,
785
1815
  asset_name=asset_name,
786
1816
  download_url=asset_download_url,
1817
+ asset_modification_date=asset.get("date_last_updated"),
787
1818
  )
788
1819
  if not success:
789
- logger.error(
1820
+ self.logger.error(
790
1821
  "Failed to download asset -> '%s' (%s) to '%s'",
791
1822
  asset_name,
792
1823
  asset_id,
793
1824
  self._download_dir,
794
1825
  )
795
1826
  else:
796
- logger.info(
1827
+ self.logger.info(
797
1828
  "Successfully downloaded asset -> '%s' (%s) to '%s'",
798
1829
  asset_name,
799
1830
  asset_id,
800
1831
  self._download_dir,
801
1832
  )
1833
+ elif asset_deleted or asset_expired:
1834
+ success = self.remove_stale_download(
1835
+ asset_id=asset_id,
1836
+ asset_name=asset_name,
1837
+ )
1838
+ if not success:
1839
+ self.logger.info(
1840
+ "No stale download for asset -> '%s' (%s) in directory -> '%s'. Nothing to clean up.",
1841
+ asset_name,
1842
+ asset_id,
1843
+ self._download_dir,
1844
+ )
1845
+ else:
1846
+ self.logger.info(
1847
+ "Deleted stale download for asset -> '%s' (%s) in directory -> '%s'",
1848
+ asset_name,
1849
+ asset_id,
1850
+ self._download_dir,
1851
+ )
802
1852
 
803
- ## Add metadata to asset and add to new list
804
- asset.update(self.get_asset_metadata(asset_id))
1853
+ # Add additional metadata to asset and add to new list
1854
+ asset.update(self.prepare_asset_data(asset_id=asset_id))
805
1855
 
806
- # Now we add the article to the Pandas Data Frame in the Data class:
1856
+ # Now we add the assets processed by the worker
1857
+ # to the Pandas Data Frame in the Data class:
807
1858
  with self._data.lock():
808
- self._data.append(worker_asset_list)
1859
+ # Check if we have added the temporary key "included"
1860
+ # to handle inclusions or exclusions. Then we want to
1861
+ # a) remove the excluded items
1862
+ # b) remove the "included" key to avoid polluting the
1863
+ # data frame with an additional temp column
1864
+ if self._asset_exclusions or self._asset_inclusions:
1865
+ self._data.append(
1866
+ [
1867
+ {k: v for k, v in item.items() if k != "included"}
1868
+ for item in worker_asset_list
1869
+ if item.get("included")
1870
+ ],
1871
+ )
1872
+ else:
1873
+ self._data.append(worker_asset_list)
1874
+
1875
+ # end method definition