pyxecm 1.6__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

Files changed (78) hide show
  1. pyxecm/__init__.py +7 -4
  2. pyxecm/avts.py +727 -254
  3. pyxecm/coreshare.py +686 -467
  4. pyxecm/customizer/__init__.py +16 -4
  5. pyxecm/customizer/__main__.py +58 -0
  6. pyxecm/customizer/api/__init__.py +5 -0
  7. pyxecm/customizer/api/__main__.py +6 -0
  8. pyxecm/customizer/api/app.py +163 -0
  9. pyxecm/customizer/api/auth/__init__.py +1 -0
  10. pyxecm/customizer/api/auth/functions.py +92 -0
  11. pyxecm/customizer/api/auth/models.py +13 -0
  12. pyxecm/customizer/api/auth/router.py +78 -0
  13. pyxecm/customizer/api/common/__init__.py +1 -0
  14. pyxecm/customizer/api/common/functions.py +47 -0
  15. pyxecm/customizer/api/common/metrics.py +92 -0
  16. pyxecm/customizer/api/common/models.py +21 -0
  17. pyxecm/customizer/api/common/payload_list.py +870 -0
  18. pyxecm/customizer/api/common/router.py +72 -0
  19. pyxecm/customizer/api/settings.py +128 -0
  20. pyxecm/customizer/api/terminal/__init__.py +1 -0
  21. pyxecm/customizer/api/terminal/router.py +87 -0
  22. pyxecm/customizer/api/v1_csai/__init__.py +1 -0
  23. pyxecm/customizer/api/v1_csai/router.py +87 -0
  24. pyxecm/customizer/api/v1_maintenance/__init__.py +1 -0
  25. pyxecm/customizer/api/v1_maintenance/functions.py +100 -0
  26. pyxecm/customizer/api/v1_maintenance/models.py +12 -0
  27. pyxecm/customizer/api/v1_maintenance/router.py +76 -0
  28. pyxecm/customizer/api/v1_otcs/__init__.py +1 -0
  29. pyxecm/customizer/api/v1_otcs/functions.py +61 -0
  30. pyxecm/customizer/api/v1_otcs/router.py +179 -0
  31. pyxecm/customizer/api/v1_payload/__init__.py +1 -0
  32. pyxecm/customizer/api/v1_payload/functions.py +179 -0
  33. pyxecm/customizer/api/v1_payload/models.py +51 -0
  34. pyxecm/customizer/api/v1_payload/router.py +499 -0
  35. pyxecm/customizer/browser_automation.py +721 -286
  36. pyxecm/customizer/customizer.py +1076 -1425
  37. pyxecm/customizer/exceptions.py +35 -0
  38. pyxecm/customizer/guidewire.py +1186 -0
  39. pyxecm/customizer/k8s.py +901 -379
  40. pyxecm/customizer/log.py +107 -0
  41. pyxecm/customizer/m365.py +2967 -920
  42. pyxecm/customizer/nhc.py +1169 -0
  43. pyxecm/customizer/openapi.py +258 -0
  44. pyxecm/customizer/payload.py +18228 -7820
  45. pyxecm/customizer/pht.py +717 -286
  46. pyxecm/customizer/salesforce.py +516 -342
  47. pyxecm/customizer/sap.py +58 -41
  48. pyxecm/customizer/servicenow.py +611 -372
  49. pyxecm/customizer/settings.py +445 -0
  50. pyxecm/customizer/successfactors.py +408 -346
  51. pyxecm/customizer/translate.py +83 -48
  52. pyxecm/helper/__init__.py +5 -2
  53. pyxecm/helper/assoc.py +83 -43
  54. pyxecm/helper/data.py +2406 -870
  55. pyxecm/helper/logadapter.py +27 -0
  56. pyxecm/helper/web.py +229 -101
  57. pyxecm/helper/xml.py +596 -171
  58. pyxecm/maintenance_page/__init__.py +5 -0
  59. pyxecm/maintenance_page/__main__.py +6 -0
  60. pyxecm/maintenance_page/app.py +51 -0
  61. pyxecm/maintenance_page/settings.py +28 -0
  62. pyxecm/maintenance_page/static/favicon.avif +0 -0
  63. pyxecm/maintenance_page/templates/maintenance.html +165 -0
  64. pyxecm/otac.py +235 -141
  65. pyxecm/otawp.py +2668 -1220
  66. pyxecm/otca.py +569 -0
  67. pyxecm/otcs.py +7956 -3237
  68. pyxecm/otds.py +2178 -925
  69. pyxecm/otiv.py +36 -21
  70. pyxecm/otmm.py +1272 -325
  71. pyxecm/otpd.py +231 -127
  72. pyxecm-2.0.1.dist-info/METADATA +122 -0
  73. pyxecm-2.0.1.dist-info/RECORD +76 -0
  74. {pyxecm-1.6.dist-info → pyxecm-2.0.1.dist-info}/WHEEL +1 -1
  75. pyxecm-1.6.dist-info/METADATA +0 -53
  76. pyxecm-1.6.dist-info/RECORD +0 -32
  77. {pyxecm-1.6.dist-info → pyxecm-2.0.1.dist-info/licenses}/LICENSE +0 -0
  78. {pyxecm-1.6.dist-info → pyxecm-2.0.1.dist-info}/top_level.txt +0 -0
pyxecm/otmm.py CHANGED
@@ -1,53 +1,61 @@
1
- """
2
- OTMM Module to interact with the OpenText Media Management API
3
- See:
4
-
5
- Class: OTMM
6
- Methods:
7
-
8
- __init__ : class initializer
9
- config : Returns config data set
10
- get_data: Get the Data object that holds all processed Media Management base Assets
11
- credentials: Returns the token data
12
- request_header: Returns the request header for ServiceNow API calls
13
- parse_request_response: Parse the REST API responses and convert
14
- them to Python dict in a safe way
15
- exist_result_item: Check if an dict item is in the response
16
- of the ServiceNow API call
17
- get_result_value: Check if a defined value (based on a key) is in the ServiceNow API response
18
-
19
- authenticate : Authenticates at ServiceNow API
1
+ """OTMM Module to interact with the OpenText Media Management API.
2
+
3
+ The documentation for the used REST APIs can be found here:
4
+ - [https://developer.opentext.com](https://developer.opentext.com/ce/products/media-management)
20
5
  """
21
6
 
22
7
  __author__ = "Dr. Marc Diefenbruch"
23
- __copyright__ = "Copyright 2024, OpenText"
8
+ __copyright__ = "Copyright (C) 2024-2025, OpenText"
24
9
  __credits__ = ["Kai-Philip Gatzweiler"]
25
10
  __maintainer__ = "Dr. Marc Diefenbruch"
26
11
  __email__ = "mdiefenb@opentext.com"
27
12
 
28
- from json import JSONDecodeError
29
- import os
13
+ import json
30
14
  import logging
31
- import urllib.parse
15
+ import os
16
+ import platform
17
+ import sys
32
18
  import threading
33
19
  import traceback
20
+ import urllib.parse
21
+ from collections.abc import Callable
22
+ from datetime import datetime, timezone
23
+ from importlib.metadata import version
24
+ from json import JSONDecodeError
34
25
 
35
26
  import requests
27
+ from requests.adapters import HTTPAdapter
36
28
  from requests.exceptions import HTTPError, RequestException
37
29
 
38
- from pyxecm.helper.data import Data
30
+ from pyxecm.helper import Data
39
31
 
40
- logger = logging.getLogger("pyxecm.otmm")
32
+ APP_NAME = "pyxecm"
33
+ APP_VERSION = version("pyxecm")
34
+ MODULE_NAME = APP_NAME + ".otmm"
41
35
 
42
- REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
36
+ PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
37
+ OS_INFO = f"{platform.system()} {platform.release()}"
38
+ ARCH_INFO = platform.machine()
39
+ REQUESTS_VERSION = requests.__version__
43
40
 
41
+ USER_AGENT = (
42
+ f"{APP_NAME}/{APP_VERSION} ({MODULE_NAME}/{APP_VERSION}; "
43
+ f"Python/{PYTHON_VERSION}; {OS_INFO}; {ARCH_INFO}; Requests/{REQUESTS_VERSION})"
44
+ )
45
+
46
+ REQUEST_HEADERS = {
47
+ "User-Agent": USER_AGENT,
48
+ "Content-Type": "application/x-www-form-urlencoded",
49
+ }
44
50
  REQUEST_TIMEOUT = 60
45
51
 
46
- ASSET_BASE_PATH = "/tmp/mediaassets"
52
+ default_logger = logging.getLogger(MODULE_NAME)
47
53
 
48
54
 
49
55
  class OTMM:
50
- """Used to retrieve and automate data extraction from OTMM."""
56
+ """Class OTMM is used to automate data extraction from OTMM."""
57
+
58
+ logger: logging.Logger = default_logger
51
59
 
52
60
  _config: dict
53
61
  _access_token = None
@@ -55,7 +63,11 @@ class OTMM:
55
63
  _thread_number = 3
56
64
  _download_dir = ""
57
65
  _business_unit_exclusions = None
66
+ _business_unit_inclusions = None
58
67
  _product_exclusions = None
68
+ _product_inclusions = None
69
+ _asset_exclusions = None
70
+ _asset_inclusions = None
59
71
 
60
72
  def __init__(
61
73
  self,
@@ -67,8 +79,51 @@ class OTMM:
67
79
  thread_number: int,
68
80
  download_dir: str,
69
81
  business_unit_exclusions: list | None = None,
82
+ business_unit_inclusions: list | None = None,
70
83
  product_exclusions: list | None = None,
71
- ):
84
+ product_inclusions: list | None = None,
85
+ asset_exclusions: list | None = None,
86
+ asset_inclusions: list | None = None,
87
+ logger: logging.Logger = default_logger,
88
+ ) -> None:
89
+ """Initialize for the OTMM object.
90
+
91
+ Args:
92
+ base_url (str):
93
+ The base URL for accessing OTMM.
94
+ username (str):
95
+ The name of the user.
96
+ password (str):
97
+ The password of the user.
98
+ client_id (str):
99
+ The client ID for the credentials.
100
+ client_secret (str):
101
+ The client secret for the credentials.
102
+ thread_number (int):
103
+ The number of threads for parallel processing for data loads.
104
+ download_dir (str):
105
+ The filesystem directory to download the OTMM assets to.
106
+ business_unit_exclusions (list | None, optional):
107
+ An optional list of business units to exclude. Defaults to None.
108
+ business_unit_inclusions (list | None, optional):
109
+ An optional list of business units to include. Defaults to None.
110
+ product_exclusions (list | None, optional):
111
+ An optional list of products to exclude. Defaults to None.
112
+ product_inclusions (list | None, optional):
113
+ An optional list of products to include. Defaults to None.
114
+ asset_exclusions (list | None, optional):
115
+ An optional list of asset (IDs) to exclude. Defaults to None.
116
+ asset_inclusions (list | None, optional):
117
+ An optional list of asset (IDs) to include. Defaults to None.
118
+ logger (logging.Logger, optional):
119
+ The logging object to use for all log messages. Defaults to default_logger.
120
+
121
+ """
122
+
123
+ if logger != default_logger:
124
+ self.logger = logger.getChild("otmm")
125
+ for logfilter in logger.filters:
126
+ self.logger.addFilter(logfilter)
72
127
 
73
128
  # Initialize otcs_config as an empty dictionary
74
129
  otmm_config = {}
@@ -80,7 +135,8 @@ class OTMM:
80
135
  otmm_config["clientId"] = client_id
81
136
  otmm_config["clientSecret"] = client_secret
82
137
 
83
- otmm_config["restUrl"] = otmm_config["baseUrl"] + "/otmmapi/v6"
138
+ # Make sure we don't have double-slashes if base_url comes with a trailing slash:
139
+ otmm_config["restUrl"] = urllib.parse.urljoin(base_url, "/otmmapi/v6")
84
140
  otmm_config["tokenUrl"] = otmm_config["restUrl"] + "/sessions/oauth2/token"
85
141
  otmm_config["domainUrl"] = otmm_config["restUrl"] + "/lookupdomains"
86
142
  otmm_config["assetsUrl"] = otmm_config["restUrl"] + "/assets"
@@ -89,44 +145,75 @@ class OTMM:
89
145
  self._config = otmm_config
90
146
 
91
147
  self._session = requests.Session()
148
+ self._session.headers.update({"User-Agent": USER_AGENT})
92
149
 
93
- self._data = Data()
150
+ self._adapter = HTTPAdapter(
151
+ pool_connections=thread_number,
152
+ pool_maxsize=thread_number,
153
+ )
154
+ self._session.mount("http://", self._adapter)
155
+ self._session.mount("https://", self._adapter)
156
+
157
+ self._data = Data(logger=self.logger)
94
158
 
95
159
  self._thread_number = thread_number
96
160
 
97
161
  self._download_dir = download_dir
98
162
 
99
163
  self._business_unit_exclusions = business_unit_exclusions
164
+ self._business_unit_inclusions = business_unit_inclusions
100
165
  self._product_exclusions = product_exclusions
166
+ self._product_inclusions = product_inclusions
167
+ self._asset_exclusions = asset_exclusions
168
+ self._asset_inclusions = asset_inclusions
101
169
 
102
170
  # end method definition
103
171
 
104
- def thread_wrapper(self, target, *args, **kwargs):
105
- """Function to wrap around threads to catch exceptions during exection"""
172
+ def thread_wrapper(self, target: Callable, *args: tuple, **kwargs: dict) -> None:
173
+ """Wrap around threads to catch exceptions during exection.
174
+
175
+ Args:
176
+ target (Callable):
177
+ The method (callable) the Thread should run.
178
+ args (tuple):
179
+ The arguments for the method.
180
+ kwargs (dict):
181
+ Keyword arguments for the method.
182
+
183
+ """
184
+
106
185
  try:
107
186
  target(*args, **kwargs)
108
- except Exception as e:
187
+ except Exception:
109
188
  thread_name = threading.current_thread().name
110
- logger.error("Thread '%s': failed with exception -> %s", thread_name, e)
111
- logger.error(traceback.format_exc())
189
+ self.logger.error(
190
+ "Thread '%s' failed!",
191
+ thread_name,
192
+ )
193
+ self.logger.error(traceback.format_exc())
112
194
 
113
195
  # end method definition
114
196
 
115
197
  def config(self) -> dict:
116
- """Returns the configuration dictionary
198
+ """Return the configuration dictionary.
117
199
 
118
200
  Returns:
119
- dict: Configuration dictionary
201
+ dict:
202
+ The configuration dictionary.
203
+
120
204
  """
205
+
121
206
  return self._config
122
207
 
123
208
  # end method definition
124
209
 
125
210
  def get_data(self) -> Data:
126
- """Get the Data object that holds all processed Media Management base Assets
211
+ """Get the data frame that holds all processed Media Management assets.
127
212
 
128
213
  Returns:
129
- Data: Datastructure with all processed assets.
214
+ Data:
215
+ Data frame with all processed assets.
216
+
130
217
  """
131
218
 
132
219
  return self._data
@@ -134,10 +221,18 @@ class OTMM:
134
221
  # end method definition
135
222
 
136
223
  def authenticate(self) -> str | None:
137
- """Authenticate at OTMM with client ID and client secret or with basic authentication."""
224
+ """Authenticate at OTMM.
225
+
226
+ Supports authentication with client ID and client secret
227
+ or with basic authentication.
228
+
229
+ Returns:
230
+ str | None:
231
+ The access token for OTMM.
232
+
233
+ """
138
234
 
139
235
  request_url = self.config()["tokenUrl"]
140
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
141
236
  payload = {
142
237
  "username": self.config()["username"],
143
238
  "password": self.config()["password"],
@@ -149,154 +244,369 @@ class OTMM:
149
244
  try:
150
245
  response = self._session.post(
151
246
  request_url,
152
- headers=headers,
247
+ headers=REQUEST_HEADERS,
153
248
  data=urllib.parse.urlencode(payload),
154
249
  )
155
250
  response.raise_for_status()
156
251
 
157
- self._access_token = (
158
- response.json().get("token_info").get("oauth_token").get("accessToken")
159
- )
252
+ self._access_token = response.json().get("token_info").get("oauth_token").get("accessToken")
160
253
  self._session.headers.update(
161
- {"Authorization": f"Bearer {self._access_token}"}
254
+ {"Authorization": f"Bearer {self._access_token}"},
255
+ )
256
+
257
+ except requests.exceptions.HTTPError as http_error:
258
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
259
+ self.logger.debug("HTTP request header -> %s", str(REQUEST_HEADERS))
260
+ return None
261
+ except requests.exceptions.ConnectionError:
262
+ self.logger.error("Connection error requesting -> %s", request_url)
263
+ return None
264
+ except requests.exceptions.Timeout:
265
+ self.logger.error("Timeout error requesting -> %s", request_url)
266
+ return None
267
+ except requests.exceptions.RequestException:
268
+ self.logger.error("Request error requesting -> %s", request_url)
269
+ return None
270
+ except Exception:
271
+ self.logger.error("Unexpected error requesting -> %s", request_url)
272
+ return None
273
+
274
+ return self._access_token
275
+
276
+ # end method definition
277
+
278
+ def get_lookup_domains(self) -> dict | None:
279
+ """Get all OTMM lookup domains.
280
+
281
+ Args:
282
+ None
283
+
284
+ Returns:
285
+ dict | None:
286
+ All OTMM lookup domains.
287
+
288
+ Example:
289
+ {
290
+ 'lookup_domains_resource': {
291
+ 'lookup_domains': [
292
+ {
293
+ 'cacheable': True,
294
+ 'datatype': 'CHAR',
295
+ 'domainId': 'ARTESIA.DOMAIN.MEDIA_ANALYSIS.SOURCE.LANGUAGE',
296
+ 'domainValues': [
297
+ {
298
+ 'display_value': 'Hausa (Ghana)',
299
+ 'expired_value': False,
300
+ 'field_value': {...}
301
+ },
302
+ ...
303
+ ]
304
+ },
305
+ ...
306
+ ]
307
+ }
308
+ }
309
+
310
+ """
311
+
312
+ request_url = self.config()["domainUrl"]
313
+
314
+ try:
315
+ response = self._session.get(
316
+ request_url,
162
317
  )
163
318
 
164
- return self._access_token
319
+ response.raise_for_status()
165
320
 
166
- except requests.exceptions.HTTPError as http_err:
167
- logger.error("HTTP error occurred: %s", http_err)
168
- except requests.exceptions.ConnectionError as conn_err:
169
- logger.error("Connection error occurred: %s", conn_err)
170
- except requests.exceptions.Timeout as timeout_err:
171
- logger.error("Timeout error occurred: %s", timeout_err)
172
- except requests.exceptions.RequestException as req_err:
173
- logger.error("Request error occurred: %s", req_err)
174
- except Exception as e:
175
- logger.error("An unexpected error occurred: %s", e)
321
+ except requests.exceptions.HTTPError as http_error:
322
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
323
+ return None
324
+ except requests.exceptions.ConnectionError:
325
+ self.logger.error("Connection error requesting -> %s", request_url)
326
+ return None
327
+ except requests.exceptions.Timeout:
328
+ self.logger.error("Timeout error requesting -> %s", request_url)
329
+ return None
330
+ except requests.exceptions.RequestException:
331
+ self.logger.error("Request error requesting -> %s", request_url)
332
+ return None
333
+ except Exception:
334
+ self.logger.error("Unexpected error requesting -> %s", request_url)
335
+ return None
176
336
 
177
- return None
337
+ return response.json()
178
338
 
179
339
  # end method definition
180
340
 
181
- def get_products(self, domain: str = "OTMM.DOMAIN.OTM_PRODUCT") -> dict:
182
- """Get a dictionary with product names (keys) and IDs (values)
341
+ def get_lookup_domain(self, domain: str) -> dict | None:
342
+ """Get OTMM lookup domain with a given name.
183
343
 
184
344
  Args:
185
- domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_PRODUCT".
345
+ domain (str):
346
+ The name / identifier of the domain.
347
+
186
348
  Returns:
187
- dict: Dictionary of all known products.
349
+ dict | None:
350
+ The response includes data for the given lookup domain
351
+ or None if the request fails.
352
+
353
+ Example:
354
+ {
355
+ 'lookup_domain_resource': {
356
+ 'lookup_domain': {
357
+ 'cacheable': True,
358
+ 'datatype': 'CHAR',
359
+ 'domainId': 'OTMM.DOMAIN.OTM_PRODUCT',
360
+ 'domainValues': [
361
+ {
362
+ 'active_from': '',
363
+ 'active_to': '',
364
+ 'description': 'Active Access',
365
+ 'display_value': 'Active Access',
366
+ 'expired_value': False,
367
+ 'field_value': {
368
+ 'type': 'string',
369
+ 'value': '213'
370
+ }
371
+ },
372
+ ...
373
+ ]
374
+ }
375
+ }
376
+ }
377
+
188
378
  """
189
379
 
190
- lookup_products = self.lookup_domains(domain)
380
+ request_url = self.config()["domainUrl"] + "/" + domain
191
381
 
192
- result = {}
193
- for product in lookup_products:
194
- result[product.get("display_value")] = product.get("field_value").get(
195
- "value"
382
+ try:
383
+ response = self._session.get(
384
+ request_url,
196
385
  )
197
386
 
198
- return result
387
+ response.raise_for_status()
388
+
389
+ except requests.exceptions.HTTPError as http_error:
390
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
391
+ return None
392
+ except requests.exceptions.ConnectionError:
393
+ self.logger.error("Connection error requesting -> %s", request_url)
394
+ return None
395
+ except requests.exceptions.Timeout:
396
+ self.logger.error("Timeout error requesting -> %s", request_url)
397
+ return None
398
+ except requests.exceptions.RequestException:
399
+ self.logger.error("Request error requesting -> %s", request_url)
400
+ return None
401
+ except Exception:
402
+ self.logger.error("Unexpected error requesting -> %s", request_url)
403
+ return None
404
+
405
+ return response.json()
199
406
 
200
407
  # end method definition
201
408
 
202
- def get_business_units(
203
- self, domain: str = "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU"
204
- ) -> dict:
205
- """Get a dictionary with product names (keys) and IDs (values)
409
+ def get_lookup_domain_values(self, domain: str) -> list | None:
410
+ """Get values of an OTMM lookup domain with a given name.
206
411
 
207
412
  Args:
208
- domain (str, optional): Domain. Defaults to "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU".
413
+ domain (str):
414
+ The name / identifier of the domain.
209
415
 
210
416
  Returns:
211
- dict: Dictionary of all known business units.
417
+ list | None:
418
+ The list of domain values or None if the request fails.
419
+
212
420
  """
213
421
 
214
- lookup_bus = self.lookup_domains(domain)
215
- result = {}
216
- for bu in lookup_bus:
217
- result[bu.get("display_value")] = bu.get("field_value").get("value")
422
+ lookup_domain = self.get_lookup_domain(domain=domain)
423
+ if not lookup_domain:
424
+ self.logger.error(
425
+ "Cannot get lookup domain values for domain -> '%s'",
426
+ domain,
427
+ )
428
+ return None
218
429
 
219
- return result
430
+ values = lookup_domain.get("lookup_domain_resource").get("lookup_domain").get("domainValues")
431
+
432
+ return values
220
433
 
221
434
  # end method definition
222
435
 
223
- def lookup_domains(self, domain: str):
224
- """Lookup domain values in a given OTMM domain
436
+ def get_products(self, domain: str = "OTMM.DOMAIN.OTM_PRODUCT") -> dict:
437
+ """Get a dictionary with product names (keys) and IDs (values).
225
438
 
226
439
  Args:
227
- domain (str): name / identifier of the domain.
440
+ domain (str, optional):
441
+ The identifier of the Domain. Defaults to "OTMM.DOMAIN.OTM_PRODUCT".
228
442
 
229
443
  Returns:
230
- _type_: _description_
444
+ dict:
445
+ Dictionary of all known products.
446
+
231
447
  """
232
448
 
233
- request_url = self.config()["domainUrl"] + "/" + domain
449
+ lookup_products = self.get_lookup_domain_values(domain) or []
234
450
 
235
- try:
236
- response = self._session.get(
237
- request_url,
451
+ # Comprehension to create a dictionary.
452
+ # Keys are the product names, values the product IDs.
453
+ # We remove leading and trailing spaces -
454
+ # OTMM data seems to have this in some places.
455
+ return {
456
+ product.get("display_value").strip(): product.get("field_value").get(
457
+ "value",
238
458
  )
459
+ for product in lookup_products
460
+ }
239
461
 
240
- response.raise_for_status()
462
+ # end method definition
241
463
 
242
- except requests.exceptions.HTTPError as http_err:
243
- logger.error("HTTP error occurred: %s", http_err)
244
- except requests.exceptions.ConnectionError as conn_err:
245
- logger.error("Connection error occurred: %s", conn_err)
246
- except requests.exceptions.Timeout as timeout_err:
247
- logger.error("Timeout error occurred: %s", timeout_err)
248
- except requests.exceptions.RequestException as req_err:
249
- logger.error("Request error occurred: %s", req_err)
250
- except Exception as e:
251
- logger.error("An unexpected error occurred: %s", e)
252
-
253
- response = (
254
- response.json()
255
- .get("lookup_domain_resource")
256
- .get("lookup_domain")
257
- .get("domainValues")
258
- )
464
+ def get_business_units(
465
+ self,
466
+ domain: str = "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU",
467
+ ) -> dict:
468
+ """Get a dictionary with business unit names (keys) and business unit IDs (values).
469
+
470
+ Args:
471
+ domain (str, optional):
472
+ The domain. Defaults to "OTMM.DOMAIN.OTM_BUSINESS_UNIT.LU".
473
+
474
+ Returns:
475
+ dict:
476
+ Dictionary of all known business units.
477
+
478
+ """
259
479
 
260
- return response
480
+ lookup_bus = self.get_lookup_domain_values(domain) or []
481
+
482
+ # Comprehension to create a dictionary.
483
+ # Keys are the product names, values the product IDs:
484
+ return {bu.get("display_value").strip(): bu.get("field_value").get("value") for bu in lookup_bus}
261
485
 
262
486
  # end method definition
263
487
 
264
- def get_asset(self, asset_id: str) -> dict:
265
- """Get an asset based on its ID
488
+ def get_asset(self, asset_id: str) -> dict | None:
489
+ """Get an asset based on its ID.
266
490
 
267
491
  Args:
268
- asset_id (str): Asset ID
492
+ asset_id (str):
493
+ The ID of the asset.
269
494
 
270
495
  Returns:
271
- dict: dictionary with asset data
496
+ dict | None:
497
+ A dictionary with asset data or None if the asset is not found.
498
+
499
+ Example:
500
+ {
501
+ 'asset_resource': {
502
+ 'asset': {
503
+ 'access_control_descriptor': {
504
+ 'permissions_map': {...}
505
+ },
506
+ 'asset_content_info': {
507
+ 'master_content': {...}
508
+ },
509
+ 'asset_id': 'e064571da79c926ee14b0850734b49edf42d9ba5',
510
+ 'asset_lock_state_last_update_date': '2024-04-16T15:03:48Z',
511
+ 'asset_lock_state_user_id': '153',
512
+ 'asset_state': 'NORMAL',
513
+ 'asset_state_last_update_date': '2024-04-16T15:03:48Z',
514
+ 'asset_state_user_id': '153',
515
+ 'checked_out': False,
516
+ 'content_editable': True,
517
+ 'content_lock_state_last_update_date': '2023-12-11T20:56:26Z',
518
+ 'content_lock_state_user_id': '202',
519
+ 'content_lock_state_user_name': 'ajohnson3',
520
+ 'content_size': 95873,
521
+ 'content_state': 'NORMAL',
522
+ 'content_state_last_update_date': '2023-12-11T20:56:26Z',
523
+ 'content_state_user_id': '202',
524
+ 'content_state_user_name': 'Amanda Johnson',
525
+ 'content_type': 'ACROBAT',
526
+ 'creator_id': '202',
527
+ 'date_imported': '2023-12-11T20:56:26Z',
528
+ 'date_last_updated': '2024-04-16T15:03:48Z',
529
+ 'deleted': False,
530
+ 'delivery_service_url': 'https://assets.opentext.com/adaptivemedia/rendition?id=726d14f14bb1ae93c3efda5a870399a20c991770',
531
+ 'expired': False,
532
+ 'import_job_id': 5776,
533
+ 'import_user_name': 'ajohnson3',
534
+ 'latest_version': True,
535
+ 'legacy_model_id': 104,
536
+ 'locked': False,
537
+ 'master_content_info': {
538
+ 'content_checksum': '2a31defcf7ad2feb7c557acb068a5c22',
539
+ 'content_data': {...},
540
+ 'content_kind': 'MASTER',
541
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
542
+ 'content_size': 95873,
543
+ 'height': -1,
544
+ 'id': 'b563035e050a89e58a921df8a4047a0673ad9691',
545
+ 'mime_type': 'application/pdf',
546
+ 'name': 'a-business-case-for-arcsight-soar-wp.pdf',
547
+ 'unit_of_size': 'BYTES',
548
+ 'url': '/otmmapi/v6/renditions/b563035e050a89e58a921df8a4047a0673ad9691',
549
+ 'width': -1
550
+ },
551
+ 'metadata_lock_state_user_name': 'ababigian',
552
+ 'metadata_model_id': 'OTM.MARKETING.MODEL',
553
+ 'metadata_state_user_name': 'Andra Babigian',
554
+ 'mime_type': 'application/pdf',
555
+ 'name': 'a-business-case-for-arcsight-soar-pp-en.pdf',
556
+ 'original_asset_id': '726d14f14bb1ae93c3efda5a870399a20c991770',
557
+ 'product_associations': False,
558
+ 'rendition_content': {
559
+ 'pdf_preview_content': {
560
+ 'content_checksum': '2a31defcf7ad2feb7c557acb068a5c22',
561
+ 'content_data': {
562
+ 'data_source': 'NO_CONTENT',
563
+ 'temp_file': False
564
+ },
565
+ 'content_kind': 'MASTER',
566
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
567
+ 'content_size': 95873,
568
+ 'height': -1,
569
+ 'id': 'b563035e050a89e58a921df8a4047a0673ad9691',
570
+ 'mime_type': 'application/pdf',
571
+ 'name': 'a-business-case-for-arcsight-soar-wp.pdf',
572
+ 'unit_of_size': 'BYTES',
573
+ 'url': '/otmmapi/v6/renditions/b563035e050a89e58a921df8a4047a0673ad9691',
574
+ 'width': -1
575
+ }
576
+ },
577
+ 'subscribed_to': False,
578
+ 'version': 3
579
+ }
580
+ }
581
+ }
582
+
272
583
  """
273
584
 
274
585
  request_url = self.config()["assetsUrl"] + "/" + asset_id
275
586
 
276
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
277
-
278
587
  try:
279
588
  response = self._session.get(
280
589
  request_url,
281
- headers=headers,
590
+ headers=REQUEST_HEADERS,
282
591
  )
283
592
 
284
593
  response.raise_for_status()
285
594
 
286
- except requests.exceptions.HTTPError as http_err:
287
- logger.error("HTTP error occurred: %s", http_err)
595
+ except requests.exceptions.HTTPError as http_error:
596
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
597
+ self.logger.debug("HTTP request header -> %s", str(REQUEST_HEADERS))
288
598
  return None
289
- except requests.exceptions.ConnectionError as conn_err:
290
- logger.error("Connection error occurred: %s", conn_err)
599
+ except requests.exceptions.ConnectionError:
600
+ self.logger.error("Connection error requesting -> %s", request_url)
291
601
  return None
292
- except requests.exceptions.Timeout as timeout_err:
293
- logger.error("Timeout error occurred: %s", timeout_err)
602
+ except requests.exceptions.Timeout:
603
+ self.logger.error("Timeout error requesting -> %s", request_url)
294
604
  return None
295
- except requests.exceptions.RequestException as req_err:
296
- logger.error("Request error occurred: %s", req_err)
605
+ except requests.exceptions.RequestException:
606
+ self.logger.error("Request error requesting -> %s", request_url)
297
607
  return None
298
- except Exception as e:
299
- logger.error("An unexpected error occurred: %s", e)
608
+ except Exception:
609
+ self.logger.error("Unexpected error requesting -> %s", request_url)
300
610
  return None
301
611
 
302
612
  return response.json()
@@ -304,17 +614,26 @@ class OTMM:
304
614
  # end method definition
305
615
 
306
616
  def get_business_unit_assets(
307
- self, bu_id: int, offset: int = 0, limit: int = 200
617
+ self,
618
+ bu_id: str,
619
+ offset: int = 0,
620
+ limit: int = 200,
308
621
  ) -> list | None:
309
622
  """Get all Media Assets for a given Business Unit (ID) that are NOT related to a product.
310
623
 
311
624
  Args:
312
- bu_id (int): Identifier of the Business Unit.
313
- offset (int, optional): Result pagination. Starting ID. Defaults to 0.
314
- limit (int, optional): Result pagination. Page length. Defaults to 200.
625
+ bu_id (str):
626
+ Identifier of the Business Unit. DON'T USE INT HERE! OTMM delivers
627
+ strings for get_business_units()
628
+ offset (int, optional):
629
+ Result pagination. Starting ID. Defaults to 0.
630
+ limit (int, optional):
631
+ Result pagination. Page length. Defaults to 200.
315
632
 
316
633
  Returns:
317
- dict: Search Results
634
+ dict:
635
+ Search Results
636
+
318
637
  """
319
638
 
320
639
  payload = {
@@ -327,22 +646,57 @@ class OTMM:
327
646
  "search_config_id": ["3"],
328
647
  "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
329
648
  "metadata_to_return": ["ARTESIA.FIELD.TAG"],
330
- "facet_restriction_list": '{"facet_restriction_list":{"facet_field_restriction":[{"type":"com.artesia.search.facet.FacetSimpleFieldRestriction","facet_generation_behavior":"EXCLUDE","field_id":"PRODUCT_CHAR_ID","value_list":[null]}]}}',
649
+ "facet_restriction_list": json.dumps(
650
+ {
651
+ "facet_restriction_list": {
652
+ "facet_field_restriction": [
653
+ {
654
+ "type": "com.artesia.search.facet.FacetSimpleFieldRestriction",
655
+ "facet_generation_behavior": "EXCLUDE",
656
+ "field_id": "PRODUCT_CHAR_ID",
657
+ "value_list": [None],
658
+ },
659
+ ],
660
+ },
661
+ },
662
+ ),
331
663
  "search_condition_list": [
332
- '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTMM.FIELD.BUSINESS_UNIT.TAB","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"OTMM.COLUMN.BUSINESS_UNIT.TAB","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
333
- + str(bu_id)
334
- + '","left_paren":"(","right_paren":")"}]}]}}'
664
+ json.dumps(
665
+ {
666
+ "search_condition_list": {
667
+ "search_condition": [
668
+ {
669
+ "type": "com.artesia.search.SearchTabularCondition",
670
+ "metadata_table_id": "OTMM.FIELD.BUSINESS_UNIT.TAB",
671
+ "tabular_field_list": [
672
+ {
673
+ "type": "com.artesia.search.SearchTabularFieldCondition",
674
+ "metadata_field_id": "OTMM.COLUMN.BUSINESS_UNIT.TAB",
675
+ "relational_operator_id": "ARTESIA.OPERATOR.CHAR.CONTAINS",
676
+ "value": str(bu_id),
677
+ "left_paren": "(",
678
+ "right_paren": ")",
679
+ },
680
+ ],
681
+ },
682
+ ],
683
+ },
684
+ },
685
+ ),
335
686
  ],
336
687
  }
337
688
 
338
- flattened_data = {
339
- k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
340
- }
689
+ # Convert list values into comma-separated strings:
690
+ flattened_data = {k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()}
341
691
 
692
+ # Use OTMM's search to find the assets for the business unit:
342
693
  search_result = self.search_assets(flattened_data)
343
694
 
344
- if not search_result or not "search_result_resource" in search_result:
345
- logger.error("No assets found via search!")
695
+ if not search_result or "search_result_resource" not in search_result:
696
+ self.logger.error(
697
+ "No assets found via search for business unit with ID -> '%s'!",
698
+ bu_id,
699
+ )
346
700
  return None
347
701
  search_result = search_result.get("search_result_resource")
348
702
 
@@ -357,7 +711,7 @@ class OTMM:
357
711
  flattened_data["after"] += hits
358
712
  search_result = self.search_assets(flattened_data)
359
713
 
360
- if not search_result or not "search_result_resource" in search_result:
714
+ if not search_result or "search_result_resource" not in search_result:
361
715
  break
362
716
 
363
717
  search_result = search_result.get("search_result_resource")
@@ -372,17 +726,29 @@ class OTMM:
372
726
  # end method definition
373
727
 
374
728
  def get_product_assets(
375
- self, product_id: int, offset: int = 0, limit: int = 200
729
+ self,
730
+ product_id: str,
731
+ offset: int = 0,
732
+ limit: int = 200,
376
733
  ) -> list | None:
377
734
  """Get all Media Assets for a given product (ID).
378
735
 
736
+ This does currently NOT include the asset metadata even though lead type
737
+ is set to "metadata" below as "metadata_to_return" is set to a single field.
738
+
379
739
  Args:
380
- product_id (int): Identifier of the product.
381
- offset (int, optional): Result pagination. Starting ID. Defaults to 0.
382
- limit (int, optional): Result pagination. Page length. Defaults to 200.
740
+ product_id (str):
741
+ Identifier of the product. DON'T USE `int` HERE!
742
+ OTMM delivers strings for get_products()
743
+ offset (int, optional):
744
+ Result pagination. Starting ID. Defaults to 0.
745
+ limit (int, optional):
746
+ Result pagination. Page length. Defaults to 200.
383
747
 
384
748
  Returns:
385
- dict: Search Results
749
+ dict:
750
+ Search Results
751
+
386
752
  """
387
753
 
388
754
  payload = {
@@ -396,20 +762,39 @@ class OTMM:
396
762
  "preference_id": ["ARTESIA.PREFERENCE.GALLERYVIEW.DISPLAYED_FIELDS"],
397
763
  "metadata_to_return": ["ARTESIA.FIELD.TAG"],
398
764
  "search_condition_list": [
399
- '{"search_condition_list":{"search_condition":[{"type":"com.artesia.search.SearchTabularCondition","metadata_table_id":"OTM.TABLE.PRODUCT_TABLE_FIELD","tabular_field_list":[{"type":"com.artesia.search.SearchTabularFieldCondition","metadata_field_id":"PRODUCT_CHAR_ID","relational_operator_id":"ARTESIA.OPERATOR.CHAR.CONTAINS","value":"'
400
- + str(product_id)
401
- + '","left_paren":"(","right_paren":")"}]}]}}'
765
+ json.dumps(
766
+ {
767
+ "search_condition_list": {
768
+ "search_condition": [
769
+ {
770
+ "type": "com.artesia.search.SearchTabularCondition",
771
+ "metadata_table_id": "OTM.TABLE.PRODUCT_TABLE_FIELD",
772
+ "tabular_field_list": [
773
+ {
774
+ "type": "com.artesia.search.SearchTabularFieldCondition",
775
+ "metadata_field_id": "PRODUCT_CHAR_ID",
776
+ "relational_operator_id": "ARTESIA.OPERATOR.CHAR.CONTAINS",
777
+ "value": str(product_id),
778
+ "left_paren": "(",
779
+ "right_paren": ")",
780
+ },
781
+ ],
782
+ },
783
+ ],
784
+ },
785
+ },
786
+ ),
402
787
  ],
403
788
  }
404
789
 
405
- flattened_data = {
406
- k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()
407
- }
790
+ # Convert list values into comma-separated strings:
791
+ flattened_data = {k: v if not isinstance(v, list) else ",".join(v) for k, v in payload.items()}
408
792
 
409
- search_result = self.search_assets(flattened_data)
793
+ # Use OTMM's search to find the assets for the product:
794
+ search_result = self.search_assets(payload=flattened_data)
410
795
 
411
- if not search_result or not "search_result_resource" in search_result:
412
- logger.error("No assets found via search!")
796
+ if not search_result or "search_result_resource" not in search_result:
797
+ self.logger.error("No assets found via search!")
413
798
  return None
414
799
  search_result = search_result.get("search_result_resource")
415
800
 
@@ -420,11 +805,12 @@ class OTMM:
420
805
 
421
806
  hits_remaining = hits_total - hits
422
807
 
808
+ # Iterate through all result pages:
423
809
  while hits_remaining > 0:
424
810
  flattened_data["after"] += hits
425
- search_result = self.search_assets(flattened_data)
811
+ search_result = self.search_assets(payload=flattened_data)
426
812
 
427
- if not search_result or not "search_result_resource" in search_result:
813
+ if not search_result or "search_result_resource" not in search_result:
428
814
  break
429
815
 
430
816
  search_result = search_result.get("search_result_resource")
@@ -443,39 +829,53 @@ class OTMM:
443
829
  asset_id: str,
444
830
  asset_name: str,
445
831
  download_url: str = "",
446
- skip_existing: bool = True,
832
+ asset_modification_date: str | None = None,
447
833
  ) -> bool:
448
- """Download a given Media Asset
834
+ """Download a given media asset.
449
835
 
450
836
  Args:
451
- asset_id (str): ID of the asset to download
452
- asset_name (str): Name of the assets - becomes the file name.
453
- download_url (str, optiona): URL to download the asset (optional).
837
+ asset_id (str):
838
+ ID of the asset to download. This becomes the file name.
839
+ asset_name (str):
840
+ The name of the asset.
841
+ download_url (str, optiona):
842
+ URL to download the asset (optional).
843
+ asset_modification_date (str | None, optional):
844
+ The last asset modification in OpenText Media Management.
454
845
 
455
846
  Returns:
456
- bool: True = success, False = failure
847
+ bool:
848
+ True = success, False = failure
849
+
457
850
  """
458
- # url = f"{self.base_url}/assets/v1/{asset_id}/download"
459
851
 
460
- if download_url:
461
- request_url = download_url
462
- else:
463
- request_url = self.config()["assetsUrl"] + "/" + asset_id + "/contents"
852
+ request_url = download_url if download_url else self.config()["assetsUrl"] + "/" + asset_id + "/contents"
464
853
 
854
+ # We use the Asset ID as the filename to avoid name collisions:
465
855
  file_name = os.path.join(self._download_dir, asset_id)
466
856
 
467
857
  if os.path.exists(file_name):
468
- if skip_existing:
469
- logger.debug(
470
- "OpenText Media Management asset has been downloaded before skipping download -> '%s' (%s) to -> %s...",
858
+ if asset_modification_date:
859
+ file_mod_time = datetime.fromtimestamp(os.path.getmtime(file_name), tz=timezone.utc)
860
+ date_last_updated = datetime.strptime(
861
+ asset_modification_date,
862
+ "%Y-%m-%dT%H:%M:%SZ",
863
+ ).replace(tzinfo=timezone.utc)
864
+ download_up_to_date: bool = file_mod_time >= date_last_updated
865
+ else:
866
+ download_up_to_date = True
867
+
868
+ if download_up_to_date:
869
+ self.logger.debug(
870
+ "Asset -> '%s' (%s) has been downloaded before and is up to date. Skipping download to -> %s...",
471
871
  asset_name,
472
872
  asset_id,
473
873
  file_name,
474
874
  )
475
875
  return True
476
876
  else:
477
- logger.debug(
478
- "OpenText Media Management asset has been downloaded before. Update download -> '%s' (%s) to -> %s...",
877
+ self.logger.debug(
878
+ "Asset -> '%s' (%s) has been downloaded before, but it is outdated. Updating download to -> %s...",
479
879
  asset_name,
480
880
  asset_id,
481
881
  file_name,
@@ -487,8 +887,8 @@ class OTMM:
487
887
  # Create the directory
488
888
  os.makedirs(self._download_dir)
489
889
 
490
- logger.info(
491
- "Downloading OpenText Media Management asset -> '%s' (%s) to -> %s...",
890
+ self.logger.info(
891
+ "Downloading asset -> '%s' (%s) to -> %s...",
492
892
  asset_name,
493
893
  asset_id,
494
894
  file_name,
@@ -498,161 +898,516 @@ class OTMM:
498
898
  with open(file_name, "wb") as f:
499
899
  for chunk in response.iter_content(chunk_size=8192):
500
900
  f.write(chunk)
901
+ except HTTPError as http_error:
902
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
903
+ return False
904
+ except RequestException:
905
+ self.logger.error("Request error requesting -> %s!", request_url)
906
+ return False
907
+ except Exception:
908
+ self.logger.error("Unexpected error requesting -> %s!", request_url)
909
+ return False
910
+
911
+ return True
912
+
913
+ # end method definition
914
+
915
+ def remove_stale_download(
916
+ self,
917
+ asset_id: str,
918
+ asset_name: str = "",
919
+ ) -> bool:
920
+ """Remove stale download file for an expired or deleted asset.
921
+
922
+ Args:
923
+ asset_id (str):
924
+ The ID of the asset to delete in the file system.
925
+ asset_name (str, optional):
926
+ The name of the assets. Just uswed for logging.
927
+
928
+ Returns:
929
+ bool: True = success, False = failure
930
+
931
+ """
932
+
933
+ file_name = os.path.join(self._download_dir, asset_id)
934
+
935
+ if os.path.exists(file_name):
936
+ self.logger.debug(
937
+ "Deleting stale download file -> '%s' for asset %s...",
938
+ file_name,
939
+ "-> '{}' ({})".format(asset_name, asset_id) if asset_name else "-> {}".format(asset_id),
940
+ )
941
+ os.remove(file_name)
501
942
  return True
502
- except HTTPError as http_err:
503
- logger.error("HTTP error occurred -> %s!", str(http_err))
504
- except RequestException as req_err:
505
- logger.error("Request error occurred -> %s!", str(req_err))
506
- except Exception as err:
507
- logger.error("An error occurred -> %s!", str(err))
508
943
 
509
944
  return False
510
945
 
511
946
  # end method definition
512
947
 
513
- def search_assets(self, payload: dict):
948
+ def search_assets(self, payload: dict) -> dict | None:
514
949
  """Search an asset based on the given parameters / criterias.
515
950
 
516
951
  Args:
517
- payload (dict): in the format of:
518
- payload = {
519
- "PRODUCT_CHAR_ID": "Extended ECM for Engineering",
520
- "BUSINESS_AREA_CHAR_ID": "Content",
521
- "keyword_query": "*",
522
- "limit": "5",
523
- }
952
+ payload (dict):
953
+ In the format of:
954
+ payload = {
955
+ "PRODUCT_CHAR_ID": "Extended ECM for Engineering",
956
+ "BUSINESS_AREA_CHAR_ID": "Content",
957
+ "keyword_query": "*",
958
+ "limit": "5",
959
+ }
524
960
 
525
961
  Returns:
526
- _type_: JSON search results
962
+ dict | None:
963
+ The search results.
964
+
965
+ Example:
966
+ {
967
+ 'search_result_resource': {
968
+ 'search_result': {
969
+ 'asset_group_count': {
970
+ 'entry': [...]
971
+ },
972
+ 'asset_id_list': [
973
+ '00084f808d1331bca1f24134bde9cd8e742fe24a',
974
+ '000af201d7130d1bb2778af672f3bfb554ea965a',
975
+ '000f9594985b766ee495c27172446d5c9c4e0ebf',
976
+ '0012d344dc39d4d23aaeb04fbe9db3b21daee6e0',
977
+ '00135d36232d66b6f11e0020f317244d08a613d1'
978
+ ],
979
+ 'contains_invalid_conditions': False,
980
+ 'facet_field_response_list': [
981
+ {...},
982
+ {...},
983
+ ...
984
+ ],
985
+ 'hit_count': 5,
986
+ 'offset': 0,
987
+ 'total_hit_count': 11886
988
+ },
989
+ 'asset_list': [
990
+ {
991
+ 'access_control_descriptor': {...},
992
+ 'asset_content_info': {...},
993
+ 'asset_id': '00084f808d1331bca1f24134bde9cd8e742fe24a',
994
+ 'asset_lock_state_last_update_date': '2024-01-03T16:47:22Z',
995
+ 'asset_lock_state_user_id': '166',
996
+ 'asset_state': 'NORMAL',
997
+ 'asset_state_last_update_date': '2024-01-03T16:47:22Z',
998
+ 'asset_state_user_id': '166',
999
+ 'checked_out': False,
1000
+ 'content_editable': True,
1001
+ 'content_lock_state_last_update_date': '2021-11-22T16:32:59Z',
1002
+ 'content_lock_state_user_id': '49',
1003
+ 'content_lock_state_user_name': 'sspasik',
1004
+ 'content_size': 3103,
1005
+ 'content_state': 'NORMAL',
1006
+ 'content_state_last_update_date': '2021-11-22T16:32:57Z',
1007
+ 'content_state_user_id': '49',
1008
+ 'content_state_user_name': 'Srgjan Spasik',
1009
+ 'content_type': 'BITMAP',
1010
+ ...
1011
+ },
1012
+ ...
1013
+ ]
1014
+ }
1015
+ }
1016
+
527
1017
  """
528
1018
 
529
1019
  request_url = self.config()["searchUrl"]
530
1020
 
531
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
532
-
533
1021
  encoded_payload = urllib.parse.urlencode(payload, safe="/:")
534
1022
 
535
1023
  try:
536
1024
  response = self._session.post(
537
1025
  request_url,
538
- headers=headers,
1026
+ headers=REQUEST_HEADERS,
539
1027
  data=encoded_payload,
540
1028
  )
541
1029
 
542
1030
  response.raise_for_status()
543
1031
 
544
- except requests.exceptions.HTTPError as http_err:
545
- logger.error("HTTP error occurred: %s", http_err)
546
- except requests.exceptions.ConnectionError as conn_err:
547
- logger.error("Connection error occurred: %s", conn_err)
548
- except requests.exceptions.Timeout as timeout_err:
549
- logger.error("Timeout error occurred: %s", timeout_err)
550
- except requests.exceptions.RequestException as req_err:
551
- logger.error("Request error occurred: %s", req_err)
552
- except Exception as e:
553
- logger.error("An unexpected error occurred: %s", e)
1032
+ except requests.exceptions.HTTPError as http_error:
1033
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
1034
+ self.logger.debug("HTTP request header -> %s", str(REQUEST_HEADERS))
1035
+ return None
1036
+ except requests.exceptions.ConnectionError:
1037
+ self.logger.error("Connection error requesting -> %s", request_url)
1038
+ return None
1039
+ except requests.exceptions.Timeout:
1040
+ self.logger.error("Timeout error requesting -> %s", request_url)
1041
+ return None
1042
+ except requests.exceptions.RequestException:
1043
+ self.logger.error("Request error requesting -> %s", request_url)
1044
+ return None
1045
+ except Exception:
1046
+ self.logger.error("Unexpected error requesting -> %s", request_url)
1047
+ return None
554
1048
 
555
1049
  return response.json()
556
1050
 
557
1051
  # end method definition
558
1052
 
559
- def get_asset_metadata(self, asset_id: str) -> dict:
560
- """Retrieve metadata of an asset based on the given parameters / criterias.
1053
+ def get_asset_details(
1054
+ self,
1055
+ asset_id: str,
1056
+ level_of_detail: str = "slim",
1057
+ load_multilingual_field_values: bool = True,
1058
+ load_subscribed_to: bool = True,
1059
+ load_asset_content_info: bool = True,
1060
+ load_metadata: bool = True,
1061
+ load_inherited_metadata: bool = True,
1062
+ load_thumbnail_info: bool = True,
1063
+ load_preview_info: bool = True,
1064
+ load_pdf_preview_info: bool = True,
1065
+ load_3d_preview_info: bool = True,
1066
+ load_destination_links: bool = True,
1067
+ load_security_policies: bool = True,
1068
+ load_path: bool = True,
1069
+ load_deep_zoom_info: bool = True,
1070
+ ) -> dict | None:
1071
+ """Retrieve details of an asset based on the given parameters / criterias.
561
1072
 
562
1073
  Args:
563
- asset_id (str): asset_id of the asset to query
1074
+ asset_id (str):
1075
+ The ID of the asset to query.
1076
+ level_of_detail (str, optional):
1077
+ Can either be "slim" or "full". "slim" is the default.
1078
+ load_multilingual_field_values (bool, optional):
1079
+ If True, load multilingual fields, default = True.
1080
+ load_subscribed_to (bool, optional):
1081
+ If True, load subscriber information, default = True.
1082
+ load_asset_content_info (bool, optional):
1083
+ If True, load content information, default = True.
1084
+ load_metadata (bool, optional):
1085
+ If True, load metadata, default = True.
1086
+ load_inherited_metadata (bool, optional):
1087
+ If True, load inherited metadata, default = True.
1088
+ load_thumbnail_info (bool, optional):
1089
+ If True, load thumbnail information, default = True.
1090
+ load_preview_info (bool, optional):
1091
+ If True, load preview information, default = True.
1092
+ load_pdf_preview_info (bool, optional):
1093
+ If true, load PDF preview information, default = True.
1094
+ load_3d_preview_info (bool, optional):
1095
+ If True, load 3D preview information, default = True.
1096
+ load_destination_links (bool, optional):
1097
+ If true, load destination links, default = True.
1098
+ load_security_policies (bool, optional):
1099
+ If True, load security policies, default = True.
1100
+ load_path (bool, optional):
1101
+ If True, load path, default = True.
1102
+ load_deep_zoom_info(bool, optional):
1103
+ If True, load deep zoom information, default = True.
564
1104
 
565
1105
  Returns:
566
- dict: Metadata information as dict with values as list
567
-
568
- example:
569
- {
570
- 'OTMM.CUSTOM.FIELD_TITLE': [],
571
- 'OTMM.CUSTOM.FIELD_DESCRIPTION': [],
572
- 'OTMM.CUSTOM.FIELD_KEYWORDS': [],
573
- 'CONTENT_TYPE_COMBO_CHAR_ID': [],
574
- 'OTM.TABLE.APPROVED_USAGE_FIELD': [],
575
- 'OTMM.FIELD.RESOURCE_LIBRARY.TAB': [],
576
- 'LANGUAGE_COMBO_CHAR_ID': [],
577
- 'OTMM.CUSTOM.FIELD_PART_NUMBER': [],
578
- 'OTMM.FIELD.BUSINESS_UNIT.TAB': ['Content'],
579
- 'OTM.TABLE.PRODUCT_TABLE_FIELD': ['Vendor Invoice Management for SAP'],
580
- 'OTM.TABLE.INDUSTRY_TABLE_FIELD': [],
581
- 'OTMM.CUSTOM.FIELD_URL': [],
582
- 'OTMM.CUSTOM.FIELD_PREVIOUS_URL': [],
583
- 'OTMM.CUSTOM.FIELD_CONTENT_OWNER': [],
584
- 'OTMM.CUSTOM.FIELD_EMAIL': [],
585
- 'OTMM.CUSTOM.FIELD_JOB_NUMBER': [],
586
- 'OTM.TABLE.BUSINESS_AREA_TABLE_FIELD': [],
587
- 'OTM.TABLE.JOURNEY_TABLE_FIELD': ['Buy', 'Try', 'Learn'],
588
- 'OTMM.FIELD.PERSONA.TAB': [],
589
- 'OTMM.FIELD.SERVICES.TAB': [],
590
- 'OTMM.FIELD.REGION.TAB': [],
591
- 'OTMM.FIELD.PURPOSE.TAB': [],
592
- 'AODA_CHAR_ID': [],
593
- 'REVIEW_CADENCE_CHAR_ID': [],
594
- 'CONTENT_CREATED_DATE_ID': [],
595
- 'ARTESIA.FIELD.EXPIRATION DATE': [],
596
- 'OTMM.CUSTOM.FIELD_REAL_COMMENTS': []
597
- }
1106
+ dict | None:
1107
+ Metadata information as dict with values as list
1108
+
1109
+ Example:
1110
+ {
1111
+ 'asset_resource': {
1112
+ 'asset': {
1113
+ 'access_control_descriptor': {
1114
+ 'permissions_map': {...}
1115
+ },
1116
+ 'asset_content_info': {
1117
+ 'master_content': {...}
1118
+ },
1119
+ 'asset_id': 'e064571da79c926ee14b0850734b49edf42d9ba5',
1120
+ 'asset_lock_state_last_update_date': '2024-04-16T15:03:48Z',
1121
+ 'asset_lock_state_user_id': '153',
1122
+ 'asset_state': 'NORMAL',
1123
+ 'asset_state_last_update_date': '2024-04-16T15:03:48Z',
1124
+ 'asset_state_user_id': '153',
1125
+ 'checked_out': False,
1126
+ 'content_editable': True,
1127
+ 'content_lock_state_last_update_date': '2023-12-11T20:56:26Z',
1128
+ 'content_lock_state_user_id': '202',
1129
+ 'content_lock_state_user_name': 'ajohnson3',
1130
+ 'content_size': 95873,
1131
+ 'content_state': 'NORMAL',
1132
+ 'content_state_last_update_date': '2023-12-11T20:56:26Z',
1133
+ 'content_state_user_id': '202',
1134
+ 'content_state_user_name': 'Amanda Johnson',
1135
+ 'content_type': 'ACROBAT',
1136
+ 'creator_id': '202',
1137
+ 'date_imported': '2023-12-11T20:56:26Z',
1138
+ 'date_last_updated': '2024-04-16T15:03:48Z',
1139
+ 'deleted': False,
1140
+ 'delivery_service_url': 'https://assets.opentext.com/adaptivemedia/rendition?id=726d14f14bb1ae93c3efda5a870399a20c991770',
1141
+ 'expired': False,
1142
+ 'import_job_id': 5776,
1143
+ 'import_user_name': 'ajohnson3',
1144
+ 'latest_version': True,
1145
+ 'legacy_model_id': 104,
1146
+ 'links': {
1147
+ 'links': [...],
1148
+ 'source_id': 'e064571da79c926ee14b0850734b49edf42d9ba5'
1149
+ },
1150
+ 'locked': False,
1151
+ 'master_content_info': {
1152
+ 'content_checksum': '2a31defcf7ad2feb7c557acb068a5c22',
1153
+ 'content_data': {...},
1154
+ 'content_kind': 'MASTER',
1155
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
1156
+ 'content_size': 95873,
1157
+ 'height': -1,
1158
+ 'id': 'b563035e050a89e58a921df8a4047a0673ad9691',
1159
+ 'mime_type': 'application/pdf',
1160
+ 'name': 'a-business-case-for-arcsight-soar-wp.pdf',
1161
+ 'unit_of_size': 'BYTES',
1162
+ 'url': '/otmmapi/v6/renditions/b563035e050a89e58a921df8a4047a0673ad9691',
1163
+ 'width': -1
1164
+ },
1165
+ 'metadata': {
1166
+ 'type': 'com.artesia.metadata.MetadataModel',
1167
+ 'id': 'OTM.MARKETING.MODEL',
1168
+ 'name': 'OTM Marketing Tags',
1169
+ 'metadata_element_list': [...],
1170
+ 'has_multilingual_fields': False,
1171
+ 'legacy_id': 104
1172
+ },
1173
+ 'metadata_lock_state_user_name': 'ababigian',
1174
+ 'metadata_model_id': 'OTM.MARKETING.MODEL',
1175
+ 'metadata_state_user_name': 'Andra Babigian',
1176
+ 'mime_type': 'application/pdf',
1177
+ 'name': 'a-business-case-for-arcsight-soar-pp-en.pdf',
1178
+ 'original_asset_id': '726d14f14bb1ae93c3efda5a870399a20c991770',
1179
+ 'path_list': [
1180
+ {...}
1181
+ ],
1182
+ 'product_associations': False,
1183
+ 'rendition_content': {
1184
+ 'pdf_preview_content': {...}
1185
+ },
1186
+ 'security_policy_list': [
1187
+ {...}
1188
+ ],
1189
+ 'subscribed_to': False,
1190
+ 'version': 3
1191
+ }
1192
+ }
1193
+ }
1194
+
598
1195
  """
599
1196
 
600
- request_url = self.config()["assetsUrl"] + f"/{asset_id}"
601
- headers = {"Content-Type": "application/x-www-form-urlencoded"}
1197
+ request_url = self.config()["assetsUrl"] + "/" + asset_id
602
1198
 
603
1199
  params = {
604
1200
  "load_type": "custom",
605
- "level_of_detail": "slim",
606
- "data_load_request": '{"data_load_request":{"load_multilingual_field_values":"true","load_subscribed_to":"true","load_asset_content_info":"true","load_metadata":"true","load_inherited_metadata":"true","load_thumbnail_info":"true","load_preview_info":"true", "load_pdf_preview_info":"true", "load_3d_preview_info" : "true","load_destination_links":"true", "load_security_policies":"true","load_path":"true","load_deep_zoom_info":"true"}}',
1201
+ "level_of_detail": level_of_detail,
1202
+ "data_load_request": json.dumps(
1203
+ {
1204
+ "data_load_request": {
1205
+ "load_multilingual_field_values": load_multilingual_field_values,
1206
+ "load_subscribed_to": load_subscribed_to,
1207
+ "load_asset_content_info": load_asset_content_info,
1208
+ "load_metadata": load_metadata,
1209
+ "load_inherited_metadata": load_inherited_metadata,
1210
+ "load_thumbnail_info": load_thumbnail_info,
1211
+ "load_preview_info": load_preview_info,
1212
+ "load_pdf_preview_info": load_pdf_preview_info,
1213
+ "load_3d_preview_info": load_3d_preview_info,
1214
+ "load_destination_links": load_destination_links,
1215
+ "load_security_policies": load_security_policies,
1216
+ "load_path": load_path,
1217
+ "load_deep_zoom_info": load_deep_zoom_info,
1218
+ },
1219
+ },
1220
+ ),
607
1221
  }
608
1222
 
609
1223
  try:
610
- response = self._session.get(request_url, headers=headers, params=params)
1224
+ response = self._session.get(
1225
+ request_url,
1226
+ headers=REQUEST_HEADERS,
1227
+ params=params,
1228
+ )
611
1229
 
612
1230
  response.raise_for_status()
613
1231
 
614
- except requests.exceptions.HTTPError as http_err:
615
- logger.error("HTTP error occurred: %s", http_err)
616
- except requests.exceptions.ConnectionError as conn_err:
617
- logger.error("Connection error occurred: %s", conn_err)
618
- except requests.exceptions.Timeout as timeout_err:
619
- logger.error("Timeout error occurred: %s", timeout_err)
620
- except requests.exceptions.RequestException as req_err:
621
- logger.error("Request error occurred: %s", req_err)
622
- except Exception as e:
623
- logger.error("An unexpected error occurred: %s", e)
1232
+ except requests.exceptions.HTTPError as http_error:
1233
+ self.logger.error("HTTP error requesting -> %s; error -> %s", request_url, str(http_error))
1234
+ self.logger.debug("HTTP request header -> %s", str(REQUEST_HEADERS))
1235
+ return None
1236
+ except requests.exceptions.ConnectionError:
1237
+ self.logger.error("Connection error requesting -> %s", request_url)
1238
+ return None
1239
+ except requests.exceptions.Timeout:
1240
+ self.logger.error("Timeout error requesting -> %s", request_url)
1241
+ return None
1242
+ except requests.exceptions.RequestException:
1243
+ self.logger.error("Request error requesting -> %s", request_url)
1244
+ return None
1245
+ except Exception:
1246
+ self.logger.error("Unexpected error requesting -> %s", request_url)
1247
+ return None
1248
+
1249
+ return response.json()
1250
+
1251
+ # end method definition
1252
+
1253
+ def prepare_asset_data(self, asset_id: str, asset: dict | None = None) -> dict:
1254
+ """Prepare the asset data for the Pandas Data frame.
1255
+
1256
+ The asset data is either provided with the asset parameter or
1257
+ retrieved by the method.
1258
+
1259
+ Args:
1260
+ asset_id (str):
1261
+ The ID of the asset.
1262
+ asset (dict | None, optional):
1263
+ If the asset data structure is already available pass it
1264
+ with this parameter. Make sure the asset data was retrieved
1265
+ to include the metadata. If None is provided then the method
1266
+ will retrieve the asset data (including metadata) on the fly.
1267
+
1268
+ Returns:
1269
+ dict | None:
1270
+ The simplified / flat structure for the Pandas data frame.
1271
+
1272
+ Example:
1273
+ {
1274
+ 'OTMM_CUSTOM_FIELD_TITLE': 'A Business Case for ArcSight SOAR',
1275
+ 'OTMM_CUSTOM_FIELD_DESCRIPTION': 'Cybersecurity is a complex problem.',
1276
+ 'OTMM_CUSTOM_FIELD_KEYWORDS': 'SOAR, SIEM, cybersecurity, SecOps, SOC, cybersecurity automation',
1277
+ 'CONTENT_TYPE_COMBO_CHAR_ID': None,
1278
+ 'OTMM_FIELD_IMAGE_TYPE': None,
1279
+ 'OTM_TABLE_APPROVED_USAGE_FIELD': None,
1280
+ 'OTMM_FIELD_RESOURCE_LIBRARY_TAB': ['Resource Library'],
1281
+ 'LANGUAGE_COMBO_CHAR_ID': 'English',
1282
+ 'OTMM_CUSTOM_FIELD_PART_NUMBER': '762-000033-003',
1283
+ 'OTMM_FIELD_AVIATOR': None,
1284
+ 'OTMM_FIELD_BUSINESS_UNIT_TAB': ['Cybersecurity'],
1285
+ 'OTM_TABLE_PRODUCT_TABLE_FIELD': ['ArcSight Enterprise Security Manager', 'Arcsight Intelligence'],
1286
+ 'OTMM_FIELD_PRODUCT_NEW_TAB': [],
1287
+ 'OTMM_FIELD_MARKET_SEGMENT_TAB': [],
1288
+ 'OTM_TABLE_INDUSTRY_TABLE_FIELD': [],
1289
+ 'OTMM_CUSTOM_FIELD_URL': None,
1290
+ 'OTMM_CUSTOM_FIELD_PREVIOUS_URL': 'https://www.microfocus.com/media/white-paper/a-business-case-for-arcsight-soar-wp.pdf',
1291
+ 'OTMM_CUSTOM_FIELD_CONTENT_OWNER': 'Steve Jones',
1292
+ 'OTMM_CUSTOM_FIELD_EMAIL': 'sjones2@opentext.com',
1293
+ 'OTMM_CUSTOM_FIELD_JOB_NUMBER': [],
1294
+ 'OTM_TABLE_BUSINESS_AREA_TABLE_FIELD': [],
1295
+ 'OTM_TABLE_JOURNEY_TABLE_FIELD': [],
1296
+ 'OTMM_FIELD_PERSONA_TAB': [],
1297
+ 'OTMM_FIELD_SERVICES_TAB': [],
1298
+ 'OTMM_FIELD_REGION_TAB': [],
1299
+ 'OTMM_FIELD_PURPOSE_TAB': ['Marketing'],
1300
+ 'AODA_CHAR_ID': 'Yes',
1301
+ 'REVIEW_CADENCE_CHAR_ID': 'Quarterly',
1302
+ 'CONTENT_CREATED_DATE_ID': '2023-10-18T07:00:00Z',
1303
+ 'ARTESIA_FIELD_EXPIRATIONDATE': None,
1304
+ 'OTMM_CUSTOM_FIELD_REAL_COMMENTS': None
1305
+ }
1306
+
1307
+ """
1308
+
1309
+ # If the asset dictionary is not already provided
1310
+ # we retrieve it here:
1311
+ if not asset:
1312
+ asset = self.get_asset_details(asset_id=asset_id)
1313
+ if asset is None:
1314
+ self.logger.error(
1315
+ "Cannot get asset details for asset with ID -> %s",
1316
+ asset_id,
1317
+ )
1318
+ return {}
1319
+
1320
+ # We drill down to the actual asset data:
1321
+ if "asset_resource" in asset:
1322
+ asset = asset["asset_resource"]
1323
+ if "asset" in asset:
1324
+ asset = asset["asset"]
1325
+
1326
+ if "metadata" not in asset:
1327
+ self.logger.error(
1328
+ "The provided data for asset with ID -> '%s' was retrieved without metadata - cannot prepare metadata fields.",
1329
+ asset_id,
1330
+ )
1331
+ return {}
624
1332
 
625
1333
  # Read Metadata from nested structure
626
1334
  try:
627
- metadata = (
628
- response.json()
629
- .get("asset_resource", {})
630
- .get("asset", {})
631
- .get("metadata", {})
632
- .get("metadata_element_list", [])[0]
633
- .get("metadata_element_list", [])
1335
+ """
1336
+ metadata is a list of dictionaries. Each item has these keys:
1337
+ * type (str)
1338
+ * id (str)
1339
+ * name (str)
1340
+ * value (dict)
1341
+ - cascading_domain_value (bool)
1342
+ - domain_value (bool)
1343
+ - is_locked (bool)
1344
+ - value (dict)
1345
+ + type (str)
1346
+ + value (str)
1347
+ * metadata_element_list (list)
1348
+ * display_value
1349
+ """
1350
+ metadata_list = (
1351
+ asset.get("metadata", {}).get("metadata_element_list", [])[0].get("metadata_element_list", [])
634
1352
  )
635
1353
  except JSONDecodeError:
636
- logger.error("Cannot decode JSON response for assset_id -> %s", asset_id)
1354
+ self.logger.error(
1355
+ "Cannot decode JSON response for asset with ID -> %s",
1356
+ asset_id,
1357
+ )
1358
+ return {}
1359
+ except IndexError:
1360
+ self.logger.error(
1361
+ "Cannot find metadata in asset with ID -> %s",
1362
+ asset_id,
1363
+ )
637
1364
  return {}
638
1365
 
639
- # Generate empty result dict
1366
+ # Initialize empty result dict
640
1367
  result = {}
641
1368
 
642
- # Extract Metadata fields with values as list
643
- for data in metadata:
644
- index = data.get("id").replace(" ", "").replace(".", "_")
645
-
646
- try:
647
- result[index] = data.get("value").get("value").get("value")
648
- except AttributeError:
1369
+ # Extract Metadata fields with values as list and build up
1370
+ # a dictionary:
1371
+ for metadata in metadata_list:
1372
+ # IDs may have dots and spaces that we don't want as dictionary keys.
1373
+ # We remove spaces and replace dots with underscores
1374
+ # (example: OTMM.CUSTOM.FIELD_ PART_NUMBER -> OTMM_CUSTOM_FIELD_PART_NUMBER):
1375
+ dict_key = metadata.get("id").replace(" ", "").replace(".", "_")
1376
+
1377
+ # OTMM has a variety of metadata field types.
1378
+ # This includes list values, drop-down lists and strings.
1379
+ # Each of these have a different representation in
1380
+ # the 'metadata' structure:
1381
+ if "value" in metadata and "value" in metadata["value"]: # do we have a scalar value (plain string)?
1382
+ value_dict = metadata.get("value").get("value")
1383
+ if "value" in value_dict:
1384
+ result[dict_key] = value_dict.get("value")
1385
+ elif "display_value" in value_dict: # is to a domain value?
1386
+ result[dict_key] = value_dict.get("display_value")
1387
+ else:
1388
+ result[dict_key] = None
1389
+ elif "metadata_element_list" in metadata: # do we have a list value?
1390
+ # Create list with a comprehension:
1391
+ value_list = [
1392
+ value.get("value").get("display_value")
1393
+ for element in metadata.get("metadata_element_list", []) # outer loop
1394
+ for value in element.get("values", []) # inner loop
1395
+ ]
1396
+ result[dict_key] = value_list
1397
+ else: # it may also be that there's no value:
1398
+ self.logger.debug(
1399
+ "No value field in metadata -> %s for key -> '%s'",
1400
+ str(metadata),
1401
+ dict_key,
1402
+ )
1403
+ result[dict_key] = None
649
1404
 
650
- infos = []
651
- for element in data.get("metadata_element_list", []):
652
- for value in element.get("values", []):
653
- infos.append(value.get("value").get("display_value"))
1405
+ self.logger.debug(
1406
+ "Retrieved asset details for asset with ID -> %s: %s",
1407
+ asset_id,
1408
+ str(result),
1409
+ )
654
1410
 
655
- result[index] = infos
656
1411
  return result
657
1412
 
658
1413
  # end method definition
@@ -663,17 +1418,21 @@ class OTMM:
663
1418
  load_business_units: bool = True,
664
1419
  download_assets: bool = True,
665
1420
  ) -> bool:
666
- """Load all Media Assets for Products and Business Units
1421
+ """Load all Media Assets for Products and Business Units into a Pandas data frame.
667
1422
 
668
1423
  Args:
669
- load_products (bool, optional): If true load assets on Business Unit level. Defaults to True.
670
- load_business_units (bool, optional): If true load assets on Product level. Defaults to True.
671
- download_assets (bool, optional): Should assets been downloaded. Defaults to True.
1424
+ load_products (bool, optional):
1425
+ If True, load assets on Business Unit level.
1426
+ Defaults to True.
1427
+ load_business_units (bool, optional):
1428
+ If True, load assets on Product level. Defaults to True.
1429
+ download_assets (bool, optional):
1430
+ Only if True assets will be downloaded. Defaults to True.
672
1431
 
673
1432
  Returns:
674
1433
  bool: True = Success, False = Failure
675
1434
 
676
- Example Asset:
1435
+ Example:
677
1436
  {
678
1437
  'access_control_descriptor': {
679
1438
  'permissions_map': {...}
@@ -714,7 +1473,7 @@ class OTMM:
714
1473
  'content_data': {...},
715
1474
  'content_kind': 'MASTER',
716
1475
  'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
717
- 'content_path': 'data/repository/original/generative-ai-governance-essentials-wp-en_56cbbfe270593ba1a5ab6551d2c8b373469cc1a9.pdf',
1476
+ 'content_path': 'data/repository/original/generative-ai-governance-essentials-wp-en_56cbbfe.pdf',
718
1477
  'content_size': 18474085,
719
1478
  'height': -1,
720
1479
  'id': '56cbbfe270593ba1a5ab6551d2c8b373469cc1a9',
@@ -740,78 +1499,113 @@ class OTMM:
740
1499
  'thumbnail_content_id': '70aef1a5b5e480337bc115e47443884432c355ff',
741
1500
  'version': 1
742
1501
  }
1502
+
743
1503
  """
744
1504
 
745
1505
  asset_list = []
746
1506
 
747
1507
  if load_products:
748
-
749
1508
  products = self.get_products() # dictionary with key = name and value = ID
750
1509
 
1510
+ if self._product_inclusions is not None:
1511
+ products_filtered = {}
1512
+ self.logger.info(
1513
+ "Apply include filter on products -> %s",
1514
+ str(self._product_inclusions),
1515
+ )
1516
+ for key in self._product_inclusions:
1517
+ if key in products:
1518
+ products_filtered[key] = products[key]
1519
+
1520
+ products = products_filtered
1521
+
751
1522
  if self._product_exclusions:
752
- logger.info("Excluding products -> %s", str(self._product_exclusions))
1523
+ self.logger.info(
1524
+ "Excluding products -> %s",
1525
+ str(self._product_exclusions),
1526
+ )
753
1527
  for key in self._product_exclusions:
754
- products.pop(
755
- key, None
756
- ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
1528
+ # pop(key, None) will remove the key if it exists,
1529
+ # and do nothing if it doesn't:
1530
+ products.pop(key, None)
757
1531
 
758
1532
  for product_name, product_id in products.items():
759
1533
  if "DO NOT USE" in product_name:
760
1534
  continue
761
1535
 
762
- logger.info("Processing product -> '%s'...", product_name)
1536
+ self.logger.info(
1537
+ "Processing assets for product -> '%s'...",
1538
+ product_name,
1539
+ )
763
1540
 
764
1541
  assets = self.get_product_assets(product_id)
765
1542
 
766
1543
  if not assets:
767
- logger.info("Found no assets for product -> '%s'", product_name)
1544
+ self.logger.info(
1545
+ "Found no assets for product -> '%s'. Skipping it...",
1546
+ product_name,
1547
+ )
768
1548
  continue
769
1549
 
1550
+ # We enrich the dictionary with tags for workspace type and
1551
+ # workspace name for later bulk processing:
770
1552
  for asset in assets:
771
1553
  asset["workspace_type"] = "Product"
772
1554
  asset["workspace_name"] = product_name
773
1555
 
1556
+ # Filter out assets that are not files - we use the content size
1557
+ # attribute for this:
774
1558
  asset_list += [asset for asset in assets if "content_size" in asset]
775
1559
 
776
1560
  if load_business_units:
777
-
778
1561
  business_units = self.get_business_units()
779
1562
 
1563
+ if self._business_unit_inclusions is not None:
1564
+ business_units_filtered = {}
1565
+ self.logger.info(
1566
+ "Apply include filter on business units -> %s",
1567
+ str(self._business_unit_inclusions),
1568
+ )
1569
+ for key in self._business_unit_inclusions:
1570
+ if key in business_units:
1571
+ business_units_filtered[key] = business_units[key]
1572
+
1573
+ business_units = business_units_filtered
1574
+
780
1575
  if self._business_unit_exclusions:
781
- logger.info(
1576
+ self.logger.info(
782
1577
  "Excluding business units -> %s",
783
1578
  str(self._business_unit_exclusions),
784
1579
  )
785
1580
  for key in self._business_unit_exclusions:
786
- business_units.pop(
787
- key, None
788
- ) # pop(key, None) will remove the key if it exists, and do nothing if it doesn't
1581
+ # pop(key, None) will remove the key if it exists,
1582
+ # and do nothing if it doesn't:
1583
+ business_units.pop(key, None)
789
1584
 
790
1585
  for bu_name, bu_id in business_units.items():
791
- logger.debug(bu_name)
1586
+ self.logger.info("Processing assets for business unit -> '%s'", bu_name)
792
1587
  assets = self.get_business_unit_assets(bu_id)
793
1588
 
794
1589
  if not assets:
795
- logger.info("Found no assets for business unit -> '%s'", bu_name)
1590
+ self.logger.info(
1591
+ "Found no assets for business unit -> '%s'. Skipping it...",
1592
+ bu_name,
1593
+ )
796
1594
  continue
797
1595
 
1596
+ # We enrich the dictionary with tags for workspace type and name for
1597
+ # later bulk processing:
798
1598
  for asset in assets:
799
1599
  asset["workspace_type"] = "Business Unit"
800
1600
  asset["workspace_name"] = bu_name
801
1601
 
1602
+ # Filter out assets that are not files - we use the content size
1603
+ # attribute for this:
802
1604
  asset_list += [asset for asset in assets if "content_size" in asset]
1605
+
803
1606
  # end for bu_name...
804
1607
  # end if load_business_units
805
1608
 
806
- # WE DON'T WANT TO DO THIS HERE ANY MORE!
807
- # This is now done in the bulk document processing
808
- # using conditions_delete and conditions_create
809
- # asset_list = [
810
- # item
811
- # for item in asset_list
812
- # if not item.get("deleted", False) and not item.get("expired", False)
813
- # ]
814
-
815
1609
  total_count = len(asset_list)
816
1610
 
817
1611
  number = self._thread_number
@@ -824,11 +1618,11 @@ class OTMM:
824
1618
  number = 1
825
1619
  remainder = 0
826
1620
 
827
- logger.info(
828
- "Processing -> %s Media Assets, thread number -> %s, partition size -> %s",
1621
+ self.logger.info(
1622
+ "Processing -> %s media assets, thread number -> %s, partition size -> %s",
829
1623
  str(total_count),
830
- number,
831
- partition_size,
1624
+ str(number),
1625
+ str(partition_size),
832
1626
  )
833
1627
 
834
1628
  threads = []
@@ -868,16 +1662,114 @@ class OTMM:
868
1662
  partition_size: int,
869
1663
  offset: int = 0,
870
1664
  download_assets: bool = True,
871
- ):
872
- """Worker Method for multi-threading
1665
+ ) -> None:
1666
+ """Worker Method for multi-threading.
873
1667
 
874
1668
  Args:
875
- asset_list (list): List of assets to process
876
- business_unit (str, optional): Name of business unit. Defaults to "".
1669
+ asset_list (list):
1670
+ Complete list of assets. The thread uses offset an partition size
1671
+ to pick its working subset of it.
1672
+ partition_size (int):
1673
+ The size of the partition.
1674
+ offset (int, optional):
1675
+ The starting offset for the worker. The default is 0.
1676
+ download_assets (bool, optional):
1677
+ Whether the thread should download the assets. Default is True.
1678
+
1679
+ Example asset that get's added to the Data Frame:
1680
+ {
1681
+ 'access_control_descriptor': {
1682
+ 'permissions_map': {...}
1683
+ },
1684
+ 'asset_content_info': {'master_content': {...}},
1685
+ 'asset_id': '3eefc89705f53f0540d409cf866f1bc8119f65c0',
1686
+ 'asset_lock_state_last_update_date': '2024-06-26T22:15:00Z',
1687
+ 'asset_lock_state_user_id': '153',
1688
+ 'asset_state': 'NORMAL',
1689
+ 'asset_state_last_update_date': '2024-06-26T22:15:00Z',
1690
+ 'asset_state_user_id': '153',
1691
+ 'checked_out': False,
1692
+ 'content_editable': True,
1693
+ 'content_lock_state_last_update_date': '2021-11-22T05:33:46Z',
1694
+ 'content_lock_state_user_id': '76',
1695
+ 'content_lock_state_user_name': 'dgoyal',
1696
+ 'content_size': 25986,
1697
+ 'content_state': 'NORMAL',
1698
+ 'content_state_last_update_date': '2021-11-22T05:33:45Z',
1699
+ 'content_state_user_id': '76',
1700
+ 'content_state_user_name': 'Dignesh Goyal',
1701
+ 'content_type': 'BITMAP',
1702
+ 'creator_id': '76',
1703
+ 'date_imported': '2021-11-22T05:33:44Z',
1704
+ 'date_last_updated': '2024-06-26T22:15:00Z',
1705
+ 'deleted': False,
1706
+ 'delivery_service_url': 'https://assets.opentext.com/adaptivemedia/rendition?id=3eefc89705f53f0540d409cf866f1bc8119f65c0',
1707
+ 'expired': False,
1708
+ 'import_job_id': 381,
1709
+ 'import_user_name': 'dgoyal',
1710
+ 'latest_version': True,
1711
+ 'legacy_model_id': 104,
1712
+ 'locked': False,
1713
+ 'master_content_info': {
1714
+ 'content_checksum': '2cf0db34b37b2af71c516259c6b8287e',
1715
+ 'content_data': {...},
1716
+ 'content_kind': 'MASTER',
1717
+ 'content_manager_id': 'ARTESIA.CONTENT.GOOGLE.CLOUD',
1718
+ 'content_path': 'data/repository/original/co-op-food-logo-ss (1)_21d529dea732.jpg',
1719
+ 'content_size': 25986,
1720
+ 'height': 192,
1721
+ 'id': '21d529dea7324e54b2c00df8573951fcb3f4ebb2',
1722
+ 'mime_type': 'image/jpeg',
1723
+ 'name': 'co-op-food-logo-ss (1).jpg',
1724
+ 'unit_of_size': 'BYTES',
1725
+ 'url': '/otmmapi/v6/renditions/21d529dea7324e54b2c00df8573951fcb3f4ebb2',
1726
+ 'width': 192
1727
+ },
1728
+ 'metadata_lock_state_user_name': 'ababigian',
1729
+ 'metadata_model_id': 'OTM.MARKETING.MODEL',
1730
+ 'metadata_state_user_name': 'Andra Babigian',
1731
+ 'mime_type': 'image/jpeg',
1732
+ 'name': 'co-op-food-logo-ss (1).jpg',
1733
+ 'original_asset_id': '3eefc89705f53f0540d409cf866f1bc8119f65c0',
1734
+ 'product_associations': False,
1735
+ 'rendition_content': {
1736
+ 'thumbnail_content': {...},
1737
+ 'preview_content': {...}
1738
+ },
1739
+ 'subscribed_to': False,
1740
+ 'thumbnail_content_id': '94d71e6ac14890e89931f2bbfc2da74ffab8db5f',
1741
+ 'version': 1,
1742
+ 'workspace_type': 'Product',
1743
+ 'workspace_name': 'Trading Grid',
1744
+ 'asset_name': 'co-op-food-logo-ss (1).jpg',
1745
+ 'OTMM_CUSTOM_FIELD_TITLE': None,
1746
+ 'OTMM_CUSTOM_FIELD_DESCRIPTION': None,
1747
+ 'OTMM_CUSTOM_FIELD_KEYWORDS': None,
1748
+ 'CONTENT_TYPE_COMBO_CHAR_ID': 'Image',
1749
+ 'OTMM_FIELD_IMAGE_TYPE': None,
1750
+ 'OTM_TABLE_APPROVED_USAGE_FIELD': 'Internal',
1751
+ 'OTMM_FIELD_RESOURCE_LIBRARY_TAB': [],
1752
+ 'LANGUAGE_COMBO_CHAR_ID': 'English',
1753
+ 'OTMM_CUSTOM_FIELD_PART_NUMBER': None,
1754
+ 'OTMM_FIELD_AVIATOR': None,
1755
+ 'OTMM_FIELD_BUSINESS_UNIT_TAB': ['Business Network'],
1756
+ 'OTM_TABLE_PRODUCT_TABLE_FIELD': ['Trading Grid'],
1757
+ 'OTMM_FIELD_PRODUCT_NEW_TAB': ['Trading Grid'],
1758
+ 'OTMM_FIELD_MARKET_SEGMENT_TAB': [],
1759
+ 'OTM_TABLE_INDUSTRY_TABLE_FIELD': ['Retail'],
1760
+ 'OTMM_CUSTOM_FIELD_URL': None,
1761
+ ...,
1762
+ 'OTM_TABLE_JOURNEY_TABLE_FIELD': ['Buy', 'Try', 'Learn'],
1763
+ ...,
1764
+ 'REVIEW_CADENCE_CHAR_ID': 'Quarterly',
1765
+ 'CONTENT_CREATED_DATE_ID': '2021-11-08T00:00:00Z',
1766
+ ...
1767
+ }
1768
+
877
1769
  """
878
1770
 
879
- logger.info(
880
- "Processing Media Assets in range from -> %s to -> %s...",
1771
+ self.logger.info(
1772
+ "Processing media assets in range from -> %s to -> %s...",
881
1773
  offset,
882
1774
  offset + partition_size,
883
1775
  )
@@ -886,43 +1778,98 @@ class OTMM:
886
1778
 
887
1779
  for asset in worker_asset_list:
888
1780
  asset_id = asset.get("asset_id")
1781
+ if self._asset_exclusions and asset_id in self._asset_exclusions:
1782
+ self.logger.info(
1783
+ "Asset with ID -> %s is in exclusion list. Skipping it...",
1784
+ asset_id,
1785
+ )
1786
+ asset["included"] = False
1787
+ continue
1788
+ if self._asset_inclusions and asset_id not in self._asset_inclusions:
1789
+ self.logger.info(
1790
+ "Asset with ID -> %s is not in inclusion list. Skipping it...",
1791
+ asset_id,
1792
+ )
1793
+ asset["included"] = False
1794
+ continue
1795
+ if self._asset_exclusions or self._asset_inclusions:
1796
+ asset["included"] = True
889
1797
  asset_name = asset.get("name")
890
1798
  # Store name as asset_name
891
1799
  asset["asset_name"] = asset_name
892
- asset_download_url = asset.get("delivery_service_url")
1800
+ # We cannot fully trust the deliver_service_url -
1801
+ # instead we construct a URL that should always work:
1802
+ asset_download_url = self.config()["assetsUrl"] + "/" + asset_id + "/contents"
1803
+ # We also store the correct download URL to make it available
1804
+ # for the data frame and in bulkDocuments:
1805
+ asset["download_url"] = asset_download_url
893
1806
  asset_deleted = asset.get("deleted", False)
894
1807
  asset_expired = asset.get("expired", False)
895
- if asset_deleted or asset_expired:
896
- logger.info(
897
- "Asset -> '%s' is deleted or expired. Skipping...",
898
- asset_name,
899
- )
900
- continue
901
1808
 
902
- if download_assets and asset.get("content_size", 0) > 0:
1809
+ # We can skip the_download_ of deleted or expired assets,
1810
+ # but we still want to have them in the Data Frame for
1811
+ # bulk processing (to remove them from OTCS)
1812
+ if download_assets and asset.get("content_size", 0) > 0 and not asset_deleted and not asset_expired:
903
1813
  success = self.download_asset(
904
1814
  asset_id=asset_id,
905
1815
  asset_name=asset_name,
906
1816
  download_url=asset_download_url,
1817
+ asset_modification_date=asset.get("date_last_updated"),
907
1818
  )
908
1819
  if not success:
909
- logger.error(
1820
+ self.logger.error(
910
1821
  "Failed to download asset -> '%s' (%s) to '%s'",
911
1822
  asset_name,
912
1823
  asset_id,
913
1824
  self._download_dir,
914
1825
  )
915
1826
  else:
916
- logger.info(
1827
+ self.logger.info(
917
1828
  "Successfully downloaded asset -> '%s' (%s) to '%s'",
918
1829
  asset_name,
919
1830
  asset_id,
920
1831
  self._download_dir,
921
1832
  )
1833
+ elif asset_deleted or asset_expired:
1834
+ success = self.remove_stale_download(
1835
+ asset_id=asset_id,
1836
+ asset_name=asset_name,
1837
+ )
1838
+ if not success:
1839
+ self.logger.info(
1840
+ "No stale download for asset -> '%s' (%s) in directory -> '%s'. Nothing to clean up.",
1841
+ asset_name,
1842
+ asset_id,
1843
+ self._download_dir,
1844
+ )
1845
+ else:
1846
+ self.logger.info(
1847
+ "Deleted stale download for asset -> '%s' (%s) in directory -> '%s'",
1848
+ asset_name,
1849
+ asset_id,
1850
+ self._download_dir,
1851
+ )
922
1852
 
923
- ## Add metadata to asset and add to new list
924
- asset.update(self.get_asset_metadata(asset_id))
1853
+ # Add additional metadata to asset and add to new list
1854
+ asset.update(self.prepare_asset_data(asset_id=asset_id))
925
1855
 
926
- # Now we add the article to the Pandas Data Frame in the Data class:
1856
+ # Now we add the assets processed by the worker
1857
+ # to the Pandas Data Frame in the Data class:
927
1858
  with self._data.lock():
928
- self._data.append(worker_asset_list)
1859
+ # Check if we have added the temporary key "included"
1860
+ # to handle inclusions or exclusions. Then we want to
1861
+ # a) remove the excluded items
1862
+ # b) remove the "included" key to avoid polluting the
1863
+ # data frame with an additional temp column
1864
+ if self._asset_exclusions or self._asset_inclusions:
1865
+ self._data.append(
1866
+ [
1867
+ {k: v for k, v in item.items() if k != "included"}
1868
+ for item in worker_asset_list
1869
+ if item.get("included")
1870
+ ],
1871
+ )
1872
+ else:
1873
+ self._data.append(worker_asset_list)
1874
+
1875
+ # end method definition