pyxecm 1.6__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

Files changed (78) hide show
  1. pyxecm/__init__.py +7 -4
  2. pyxecm/avts.py +727 -254
  3. pyxecm/coreshare.py +686 -467
  4. pyxecm/customizer/__init__.py +16 -4
  5. pyxecm/customizer/__main__.py +58 -0
  6. pyxecm/customizer/api/__init__.py +5 -0
  7. pyxecm/customizer/api/__main__.py +6 -0
  8. pyxecm/customizer/api/app.py +163 -0
  9. pyxecm/customizer/api/auth/__init__.py +1 -0
  10. pyxecm/customizer/api/auth/functions.py +92 -0
  11. pyxecm/customizer/api/auth/models.py +13 -0
  12. pyxecm/customizer/api/auth/router.py +78 -0
  13. pyxecm/customizer/api/common/__init__.py +1 -0
  14. pyxecm/customizer/api/common/functions.py +47 -0
  15. pyxecm/customizer/api/common/metrics.py +92 -0
  16. pyxecm/customizer/api/common/models.py +21 -0
  17. pyxecm/customizer/api/common/payload_list.py +870 -0
  18. pyxecm/customizer/api/common/router.py +72 -0
  19. pyxecm/customizer/api/settings.py +128 -0
  20. pyxecm/customizer/api/terminal/__init__.py +1 -0
  21. pyxecm/customizer/api/terminal/router.py +87 -0
  22. pyxecm/customizer/api/v1_csai/__init__.py +1 -0
  23. pyxecm/customizer/api/v1_csai/router.py +87 -0
  24. pyxecm/customizer/api/v1_maintenance/__init__.py +1 -0
  25. pyxecm/customizer/api/v1_maintenance/functions.py +100 -0
  26. pyxecm/customizer/api/v1_maintenance/models.py +12 -0
  27. pyxecm/customizer/api/v1_maintenance/router.py +76 -0
  28. pyxecm/customizer/api/v1_otcs/__init__.py +1 -0
  29. pyxecm/customizer/api/v1_otcs/functions.py +61 -0
  30. pyxecm/customizer/api/v1_otcs/router.py +179 -0
  31. pyxecm/customizer/api/v1_payload/__init__.py +1 -0
  32. pyxecm/customizer/api/v1_payload/functions.py +179 -0
  33. pyxecm/customizer/api/v1_payload/models.py +51 -0
  34. pyxecm/customizer/api/v1_payload/router.py +499 -0
  35. pyxecm/customizer/browser_automation.py +721 -286
  36. pyxecm/customizer/customizer.py +1076 -1425
  37. pyxecm/customizer/exceptions.py +35 -0
  38. pyxecm/customizer/guidewire.py +1186 -0
  39. pyxecm/customizer/k8s.py +901 -379
  40. pyxecm/customizer/log.py +107 -0
  41. pyxecm/customizer/m365.py +2967 -920
  42. pyxecm/customizer/nhc.py +1169 -0
  43. pyxecm/customizer/openapi.py +258 -0
  44. pyxecm/customizer/payload.py +18228 -7820
  45. pyxecm/customizer/pht.py +717 -286
  46. pyxecm/customizer/salesforce.py +516 -342
  47. pyxecm/customizer/sap.py +58 -41
  48. pyxecm/customizer/servicenow.py +611 -372
  49. pyxecm/customizer/settings.py +445 -0
  50. pyxecm/customizer/successfactors.py +408 -346
  51. pyxecm/customizer/translate.py +83 -48
  52. pyxecm/helper/__init__.py +5 -2
  53. pyxecm/helper/assoc.py +83 -43
  54. pyxecm/helper/data.py +2406 -870
  55. pyxecm/helper/logadapter.py +27 -0
  56. pyxecm/helper/web.py +229 -101
  57. pyxecm/helper/xml.py +596 -171
  58. pyxecm/maintenance_page/__init__.py +5 -0
  59. pyxecm/maintenance_page/__main__.py +6 -0
  60. pyxecm/maintenance_page/app.py +51 -0
  61. pyxecm/maintenance_page/settings.py +28 -0
  62. pyxecm/maintenance_page/static/favicon.avif +0 -0
  63. pyxecm/maintenance_page/templates/maintenance.html +165 -0
  64. pyxecm/otac.py +235 -141
  65. pyxecm/otawp.py +2668 -1220
  66. pyxecm/otca.py +569 -0
  67. pyxecm/otcs.py +7956 -3237
  68. pyxecm/otds.py +2178 -925
  69. pyxecm/otiv.py +36 -21
  70. pyxecm/otmm.py +1272 -325
  71. pyxecm/otpd.py +231 -127
  72. pyxecm-2.0.1.dist-info/METADATA +122 -0
  73. pyxecm-2.0.1.dist-info/RECORD +76 -0
  74. {pyxecm-1.6.dist-info → pyxecm-2.0.1.dist-info}/WHEEL +1 -1
  75. pyxecm-1.6.dist-info/METADATA +0 -53
  76. pyxecm-1.6.dist-info/RECORD +0 -32
  77. {pyxecm-1.6.dist-info → pyxecm-2.0.1.dist-info/licenses}/LICENSE +0 -0
  78. {pyxecm-1.6.dist-info → pyxecm-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,84 +1,71 @@
1
- """
2
- ServiceNow Module to interact with the ServiceNow API
3
- See:
4
-
5
- Class: ServiceNow
6
- Methods:
7
-
8
- __init__ : class initializer
9
- thread_wrapper: Function to wrap around threads to catch exceptions during exection
10
- config : Returns the configuration dictionary
11
- get_data: Get the Data object that holds all processed Knowledge base Articles (Pandas Data Frame)
12
- request_header: Returns the request header for ServiceNow API calls
13
- parse_request_response: Parse the REST API responses and convert
14
- them to Python dict in a safe way
15
- exist_result_item: Check if an dict item is in the response
16
- of the ServiceNow API call
17
- get_result_value: Check if a defined value (based on a key) is in the ServiceNow API response
18
-
19
- authenticate : Authenticates at ServiceNow API
20
- get_oauth_token: Returns the OAuth access token.
21
-
22
- get_object: Get an ServiceNow object based on table name and ID
23
- get_summary: Get summary object for an article.
24
- get_table: Retrieve a specified ServiceNow table data (row or values)
25
- get_table_count: Get number of table rows (e.g. Knowledge Base Articles) matching the query
26
- (or if query = "" it should be the total number)
27
- get_knowledge_bases: Get the configured knowledge bases in ServiceNow
28
- get_knowledge_base_articles: Get selected / filtered Knowledge Base articles
29
- make_file_names_unique: Make file names unique if required. The mutable
30
- list is changed "in-place".
31
- download_attachments: Download the attachments of a Knowledge Base Article (KBA) in ServiceNow.
32
- load_articles: Main method to load ServiceNow articles in a Data Frame and
33
- download the attchments.
34
- load_articles_worker: Worker Method for multi-threading.
35
- load_article: Process a single KBA: download attachments (if any)
36
- and add the KBA to the Data Frame.
1
+ """ServiceNow Module to interact with the ServiceNow API.
2
+
3
+ See: https://developer.servicenow.com
37
4
  """
38
5
 
39
6
  __author__ = "Dr. Marc Diefenbruch"
40
- __copyright__ = "Copyright 2024, OpenText"
7
+ __copyright__ = "Copyright (C) 2024-2025, OpenText"
41
8
  __credits__ = ["Kai-Philip Gatzweiler"]
42
9
  __maintainer__ = "Dr. Marc Diefenbruch"
43
10
  __email__ = "mdiefenb@opentext.com"
44
11
 
45
- import os
46
12
  import json
47
13
  import logging
48
- import urllib.parse
14
+ import os
15
+ import platform
16
+ import sys
17
+ import tempfile
49
18
  import threading
50
- import traceback
51
- from functools import cache
52
19
  import time
20
+ import urllib.parse
21
+ from collections.abc import Callable
22
+ from functools import cache
23
+ from importlib.metadata import version
24
+ from typing import Any
53
25
 
54
26
  import requests
55
27
  from requests.auth import HTTPBasicAuth
56
28
  from requests.exceptions import HTTPError, RequestException
57
- from pyxecm.helper.data import Data
58
29
 
59
- logger = logging.getLogger("pyxecm.customizer.servicenow")
30
+ from pyxecm.helper import Data
31
+
32
+ APP_NAME = "pyxecm"
33
+ APP_VERSION = version("pyxecm")
34
+ MODULE_NAME = APP_NAME + ".customizer.servicenow"
35
+
36
+ PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
37
+ OS_INFO = f"{platform.system()} {platform.release()}"
38
+ ARCH_INFO = platform.machine()
39
+ REQUESTS_VERSION = requests.__version__
40
+
41
+ USER_AGENT = (
42
+ f"{APP_NAME}/{APP_VERSION} ({MODULE_NAME}/{APP_VERSION}; "
43
+ f"Python/{PYTHON_VERSION}; {OS_INFO}; {ARCH_INFO}; Requests/{REQUESTS_VERSION})"
44
+ )
60
45
 
61
- REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
46
+ REQUEST_HEADERS = {"User-Agent": USER_AGENT, "Accept": "application/json", "Content-Type": "application/json"}
62
47
 
63
48
  REQUEST_TIMEOUT = 60
64
49
 
65
- KNOWLEDGE_BASE_PATH = "/tmp/attachments"
50
+ KNOWLEDGE_BASE_PATH = os.path.join(tempfile.gettempdir(), "attachments")
51
+
52
+ default_logger = logging.getLogger(MODULE_NAME)
66
53
 
67
54
  # ServiceNow database tables. Table names starting with "u_" are custom OpenText tables:
68
55
  SN_TABLE_CATEGORIES = "kb_category"
69
56
  SN_TABLE_KNOWLEDGE_BASES = "kb_knowledge_base"
70
57
  SN_TABLE_KNOWLEDGE_BASE_ARTICLES = "u_kb_template_technical_article_public"
71
- SN_TABLE_KNOWLEDGE_BASE_ARTICLES_PRODUCT = (
72
- "u_kb_template_product_documentation_standard"
73
- )
58
+ SN_TABLE_KNOWLEDGE_BASE_ARTICLES_PRODUCT = "u_kb_template_product_documentation_standard"
74
59
  SN_TABLE_RELATED_PRODUCTS = "cmdb_model"
75
60
  SN_TABLE_PRODUCT_LINES = "u_ot_product_model"
76
61
  SN_TABLE_PRODUCT_VERSIONS = "u_ot_product_model_version"
77
62
  SN_TABLE_ATTACHMENTS = "sys_attachment"
78
63
 
79
64
 
80
- class ServiceNow(object):
81
- """Used to retrieve and automate stettings in ServiceNow."""
65
+ class ServiceNow:
66
+ """Class used to retrieve and automate stettings in ServiceNow."""
67
+
68
+ logger: logging.Logger = default_logger
82
69
 
83
70
  _config: dict
84
71
  _access_token = None
@@ -86,6 +73,7 @@ class ServiceNow(object):
86
73
  _data: Data = None
87
74
  _thread_number = 3
88
75
  _download_dir = ""
76
+ _product_exclusions = None
89
77
 
90
78
  def __init__(
91
79
  self,
@@ -98,21 +86,42 @@ class ServiceNow(object):
98
86
  token_url: str = "",
99
87
  thread_number: int = 3,
100
88
  download_dir: str = KNOWLEDGE_BASE_PATH,
101
- ):
102
- """Initialize the Service Now object
89
+ product_exclusions: list | None = None,
90
+ logger: logging.Logger = default_logger,
91
+ ) -> None:
92
+ """Initialize the Service Now object.
103
93
 
104
94
  Args:
105
- base_url (str): base URL of the ServiceNow tenant
106
- auth_type (str): authorization type, either "oauth" or "basic"
107
- client_id (str): ServiceNow Client ID
108
- client_secret (str): ServiceNow Client Secret
109
- username (str): user name in Saleforce
110
- password (str): password of the user
111
- token_url (str, optional): Token URL for ServiceNow login via OAuth.
112
- thread_number (int, optional): number of threads for parallel processing. Default is 3.
113
- download_path (str): path to stored downloaded files from ServiceNow
95
+ base_url (str):
96
+ The base URL of the ServiceNow tenant.
97
+ auth_type (str):
98
+ The authorization type, either "oauth" or "basic".
99
+ client_id (str):
100
+ ServiceNow Client ID.
101
+ client_secret (str):
102
+ The ServiceNow client secret.
103
+ username (str):
104
+ The user name in ServiceNow.
105
+ password (str):
106
+ The password of the ServiceNow user.
107
+ token_url (str, optional):
108
+ Token URL for ServiceNow login via OAuth.
109
+ thread_number (int, optional):
110
+ The number of threads for parallel processing. Default is 3.
111
+ download_dir (str, optional):
112
+ The path to stored downloaded files from ServiceNow.
113
+ product_exclusions (list | None, optional):
114
+ List of products that should NOT be loaded from ServiceNow.
115
+ logger:
116
+ The logging object used for all log messages. Default is default_logger.
117
+
114
118
  """
115
119
 
120
+ if logger != default_logger:
121
+ self.logger = logger.getChild("servicenow")
122
+ for logfilter in logger.filters:
123
+ self.logger.addFilter(logfilter)
124
+
116
125
  servicenow_config = {}
117
126
 
118
127
  # Store the credentials and parameters in a config dictionary:
@@ -129,61 +138,68 @@ class ServiceNow(object):
129
138
 
130
139
  servicenow_config["restUrl"] = servicenow_config["baseUrl"] + "/api/now/"
131
140
  servicenow_config["tableUrl"] = servicenow_config["restUrl"] + "table"
132
- servicenow_config["knowledgeUrl"] = (
133
- servicenow_config["restUrl"] + "table/kb_knowledge"
134
- )
135
- servicenow_config["knowledgeBaseUrl"] = (
136
- servicenow_config["restUrl"] + "table/" + SN_TABLE_KNOWLEDGE_BASES
137
- )
138
- servicenow_config["attachmentsUrl"] = (
139
- servicenow_config["restUrl"] + "table/" + SN_TABLE_ATTACHMENTS
140
- )
141
- servicenow_config["attachmentDownloadUrl"] = (
142
- servicenow_config["restUrl"] + "attachment"
143
- )
141
+ servicenow_config["knowledgeUrl"] = servicenow_config["restUrl"] + "table/kb_knowledge"
142
+ servicenow_config["knowledgeBaseUrl"] = servicenow_config["restUrl"] + "table/" + SN_TABLE_KNOWLEDGE_BASES
143
+ servicenow_config["attachmentsUrl"] = servicenow_config["restUrl"] + "table/" + SN_TABLE_ATTACHMENTS
144
+ servicenow_config["attachmentDownloadUrl"] = servicenow_config["restUrl"] + "attachment"
144
145
  servicenow_config["statsUrl"] = servicenow_config["restUrl"] + "stats"
145
146
 
146
147
  self._config = servicenow_config
147
148
 
148
149
  self._session = requests.Session()
149
150
 
150
- self._data = Data()
151
+ self._data = Data(logger=self.logger)
151
152
 
152
153
  self._thread_number = thread_number
153
-
154
154
  self._download_dir = download_dir
155
+ self._product_exclusions = product_exclusions
155
156
 
156
157
  # end method definition
157
158
 
158
- def thread_wrapper(self, target, *args, **kwargs):
159
- """Function to wrap around threads to catch exceptions during exection"""
159
+ def thread_wrapper(self, target: Callable, *args: tuple, **kwargs: dict[str, Any]) -> None:
160
+ """Wrap around threads to catch exceptions during exection.
161
+
162
+ Args:
163
+ target (Callable):
164
+ The method (callable) the Thread should run.
165
+ args (tuple):
166
+ The arguments for the method.
167
+ kwargs (dict):
168
+ Keyword arguments for the method.
169
+
170
+ """
160
171
 
161
172
  try:
162
173
  target(*args, **kwargs)
163
- except Exception as e:
174
+ except Exception:
164
175
  thread_name = threading.current_thread().name
165
- logger.error(
166
- "Thread '%s': failed with exception -> %s", thread_name, str(e)
176
+ self.logger.error(
177
+ "Thread '%s': failed!",
178
+ thread_name,
167
179
  )
168
- logger.error(traceback.format_exc())
169
180
 
170
181
  # end method definition
171
182
 
172
183
  def config(self) -> dict:
173
- """Returns the configuration dictionary
184
+ """Return the configuration dictionary.
174
185
 
175
186
  Returns:
176
- dict: Configuration dictionary
187
+ dict:
188
+ The configuration dictionary with all settings.
189
+
177
190
  """
191
+
178
192
  return self._config
179
193
 
180
194
  # end method definition
181
195
 
182
196
  def get_data(self) -> Data:
183
- """Get the Data object that holds all processed Knowledge base Articles
197
+ """Get the Data object that holds all processed Knowledge base Articles.
184
198
 
185
199
  Returns:
186
- Data: Datastructure with all processed articles.
200
+ Data:
201
+ Data object (with embedded data frame) holding all processed articles.
202
+
187
203
  """
188
204
 
189
205
  return self._data
@@ -191,17 +207,22 @@ class ServiceNow(object):
191
207
  # end method definition
192
208
 
193
209
  def request_header(self, content_type: str = "") -> dict:
194
- """Returns the request header used for Application calls.
195
- Consists of Bearer access token and Content Type
210
+ """Return the request header used for Application calls.
211
+
212
+ Consists of Bearer access token and Content Type.
196
213
 
197
214
  Args:
198
- content_type (str, optional): custom content type for the request.
199
- Typical values:
200
- * application/json - Used for sending JSON-encoded data
201
- * application/x-www-form-urlencoded - The default for HTML forms. Data is sent as key-value pairs in the body of the request, similar to query parameters
202
- * multipart/form-data - Used for file uploads or when a form includes non-ASCII characters
215
+ content_type (str, optional):
216
+ Custom content type for the request.
217
+ Typical values:
218
+ * application/json - Used for sending JSON-encoded data
219
+ * application/x-www-form-urlencoded - The default for HTML forms.
220
+ Data is sent as key-value pairs in the body of the request, similar to query parameters.
221
+ * multipart/form-data - Used for file uploads or when a form includes non-ASCII characters
203
222
  Return:
204
- dict: request header values
223
+ dict:
224
+ The request header values.
225
+
205
226
  """
206
227
 
207
228
  request_header = {}
@@ -224,43 +245,48 @@ class ServiceNow(object):
224
245
  additional_error_message: str = "",
225
246
  show_error: bool = True,
226
247
  ) -> dict | None:
227
- """Converts the request response (JSon) to a Python dict in a safe way
228
- that also handles exceptions. It first tries to load the response.text
229
- via json.loads() that produces a dict output. Only if response.text is
230
- not set or is empty it just converts the response_object to a dict using
231
- the vars() built-in method.
248
+ """Convert the request response (JSon) to a Python dict in a safe way.
249
+
250
+ It handles exceptions and first tries to load the response.text
251
+ via json.loads() that produces a dict output. Only if response.text is
252
+ not set or is empty it just converts the response_object to a dict using
253
+ the vars() built-in method.
232
254
 
233
255
  Args:
234
- response_object (object): this is reponse object delivered by the request call
235
- additional_error_message (str, optional): use a more specific error message
236
- in case of an error
237
- show_error (bool): True: write an error to the log file
238
- False: write a warning to the log file
256
+ response_object (object):
257
+ This is reponse object delivered by the request call.
258
+ additional_error_message (str, optional):
259
+ If provided, use a more specific error message
260
+ in case of an error.
261
+ show_error (bool, optional):
262
+ True: write an error to the log file.
263
+ False: write a warning to the log file.
264
+
239
265
  Returns:
240
- dict: response information or None in case of an error
266
+ dict | None:
267
+ Response information or None in case of an error.
268
+
241
269
  """
242
270
 
243
271
  if not response_object:
244
272
  return None
245
273
 
246
274
  try:
247
- if response_object.text:
248
- dict_object = json.loads(response_object.text)
249
- else:
250
- dict_object = vars(response_object)
275
+ dict_object = json.loads(response_object.text) if response_object.text else vars(response_object)
251
276
  except json.JSONDecodeError as exception:
252
277
  if additional_error_message:
253
278
  message = "Cannot decode response as JSON. {}; error -> {}".format(
254
- additional_error_message, exception
279
+ additional_error_message,
280
+ exception,
255
281
  )
256
282
  else:
257
283
  message = "Cannot decode response as JSON; error -> {}".format(
258
- exception
284
+ exception,
259
285
  )
260
286
  if show_error:
261
- logger.error(message)
287
+ self.logger.error(message)
262
288
  else:
263
- logger.warning(message)
289
+ self.logger.warning(message)
264
290
  return None
265
291
  else:
266
292
  return dict_object
@@ -271,11 +297,17 @@ class ServiceNow(object):
271
297
  """Check existence of key / value pair in the response properties of an ServiceNow API call.
272
298
 
273
299
  Args:
274
- response (dict): REST response from an Salesforce API call
275
- key (str): property name (key)
276
- value (str): value to find in the item with the matching key
300
+ response (dict):
301
+ REST response from an ServiceNow API call.
302
+ key (str):
303
+ The property name (key) to check the value of.
304
+ value (str):
305
+ Value to find in the item with the matching key.
306
+
277
307
  Returns:
278
- bool: True if the value was found, False otherwise
308
+ bool:
309
+ True if the value was found, False otherwise.
310
+
279
311
  """
280
312
 
281
313
  if not response:
@@ -290,7 +322,7 @@ class ServiceNow(object):
290
322
  if value == record[key]:
291
323
  return True
292
324
  else:
293
- if not key in response:
325
+ if key not in response:
294
326
  return False
295
327
  if value == response[key]:
296
328
  return True
@@ -308,16 +340,22 @@ class ServiceNow(object):
308
340
  """Get value of a result property with a given key of an ServiceNow API call.
309
341
 
310
342
  Args:
311
- response (dict): REST response from an Salesforce REST Call
312
- key (str): property name (key)
313
- index (int, optional): Index to use (1st element has index 0).
314
- Defaults to 0.
343
+ response (dict):
344
+ REST response from an ServiceNow REST call.
345
+ key (str):
346
+ The property name (key) to get the value of.
347
+ index (int, optional):
348
+ Index to use (1st element has index 0).
349
+ Defaults to 0.
350
+
315
351
  Returns:
316
- str: value for the key, None otherwise
352
+ str:
353
+ The value for the key, None otherwise.
354
+
317
355
  """
318
356
 
319
357
  # ServiceNow responses should always have a "result":
320
- if not response or not "result" in response:
358
+ if not response or "result" not in response:
321
359
  return None
322
360
 
323
361
  values = response["result"]
@@ -331,7 +369,7 @@ class ServiceNow(object):
331
369
  elif isinstance(values, dict) and key in values:
332
370
  value = values[key]
333
371
  else:
334
- logger.error("Illegal data type in ServiceNow response!")
372
+ self.logger.error("Illegal data type in ServiceNow response!")
335
373
  return None
336
374
 
337
375
  return value
@@ -342,9 +380,12 @@ class ServiceNow(object):
342
380
  """Authenticate at ServiceNow with client ID and client secret or with basic authentication.
343
381
 
344
382
  Args:
345
- auth_type (str): this can be "basic" or "oauth"
383
+ auth_type (str):
384
+ The Authorization type. This can be "basic" or "oauth".
385
+
346
386
  Returns:
347
- str: session token or None in case of an error
387
+ str:
388
+ The session token or None in case of an error.
348
389
 
349
390
  """
350
391
 
@@ -363,16 +404,18 @@ class ServiceNow(object):
363
404
 
364
405
  return token
365
406
  else:
366
- logger.error("Unsupported authentication type")
407
+ self.logger.error("Unsupported authentication type -> %s!", auth_type)
367
408
  return None
368
409
 
369
410
  # end method definition
370
411
 
371
412
  def get_oauth_token(self) -> str:
372
- """Returns the OAuth access token.
413
+ """Return the OAuth access token.
373
414
 
374
415
  Returns:
375
- str: Access token
416
+ str:
417
+ The access token.
418
+
376
419
  """
377
420
 
378
421
  token_post_body = {
@@ -394,9 +437,9 @@ class ServiceNow(object):
394
437
  else:
395
438
  # Store authentication access_token:
396
439
  self._access_token = authenticate_dict["access_token"]
397
- logger.debug("Access Token -> %s", self._access_token)
440
+ self.logger.debug("Access Token -> %s", self._access_token)
398
441
  else:
399
- logger.error(
442
+ self.logger.error(
400
443
  "Failed to request an Service Now Access Token; error -> %s",
401
444
  response.text,
402
445
  )
@@ -408,57 +451,59 @@ class ServiceNow(object):
408
451
 
409
452
  @cache
410
453
  def get_object(self, table_name: str, sys_id: str) -> dict | None:
411
- """Get an ServiceNow object based on table name and ID
454
+ """Get an ServiceNow object based on table name and ID.
412
455
 
413
456
  Args:
414
- table_name (str): Name of the ServiceNow table.
415
- sys_id (str): ID of the data set to resolve.
457
+ table_name (str):
458
+ The name of the ServiceNow table.
459
+ sys_id (str):
460
+ The ID of the data set to resolve.
416
461
 
417
462
  Returns:
418
- dict | None: dictionary of fields of resulting table row or None
419
- in case an error occured.
463
+ dict | None:
464
+ The dictionary of fields of resulting table row or None
465
+ in case an error occured.
466
+
420
467
  """
421
468
 
422
469
  if not table_name:
423
- logger.error("Table name is missing!")
470
+ self.logger.error("Table name is missing!")
424
471
  return None
425
472
 
426
473
  if not sys_id:
427
- logger.error("System ID of item to lookup is missing!")
474
+ self.logger.error("System ID of item to lookup is missing!")
428
475
  return None
429
476
 
430
477
  request_header = self.request_header()
431
478
 
432
479
  request_url = self.config()["restUrl"] + "table/{}/{}".format(
433
- table_name, sys_id
480
+ table_name,
481
+ sys_id,
434
482
  )
435
483
 
436
484
  try:
437
485
  response = self._session.get(url=request_url, headers=request_header)
438
486
  data = self.parse_request_response(response)
439
-
440
- return data
441
- except HTTPError as http_err:
442
- logger.error(
443
- "HTTP error occurred while resolving -> %s in table -> '%s': %s",
487
+ except HTTPError:
488
+ self.logger.error(
489
+ "HTTP error occurred while resolving -> '%s' in table -> '%s'!",
444
490
  sys_id,
445
491
  table_name,
446
- str(http_err),
447
492
  )
448
- except RequestException as req_err:
449
- logger.error(
450
- "Request error occurred while resolving -> %s in table -> '%s': %s",
493
+ except RequestException:
494
+ self.logger.error(
495
+ "Request error occurred while resolving -> '%s' in table -> '%s'!",
451
496
  sys_id,
452
497
  table_name,
453
- str(req_err),
454
498
  )
455
- except Exception as err:
456
- logger.error(
457
- "An error occurred while resolving -> %s in table -> '%s': %s",
499
+ except Exception:
500
+ self.logger.error(
501
+ "An error occurred while resolving -> '%s' in table -> '%s'!",
458
502
  sys_id,
459
503
  table_name,
460
- str(err),
461
504
  )
505
+ else:
506
+ return data
462
507
 
463
508
  return None
464
509
 
@@ -468,10 +513,13 @@ class ServiceNow(object):
468
513
  """Get summary object for an article.
469
514
 
470
515
  Args:
471
- summary_sys_id (str): System ID of the article
516
+ summary_sys_id (str):
517
+ The system ID of the article.
472
518
 
473
519
  Returns:
474
- dict | None: dictionary with the summary
520
+ dict | None:
521
+ The dictionary with the summary.
522
+
475
523
  """
476
524
 
477
525
  return self.get_object(table_name="kb_knowledge_summary", sys_id=summary_sys_id)
@@ -490,17 +538,24 @@ class ServiceNow(object):
490
538
  """Retrieve a specified ServiceNow table data (row or values).
491
539
 
492
540
  Args:
493
- table_name (str): Name of the ServiceNow table
494
- query (str, optional): Query to filter the table rows (e.g. articles).
495
- fields (list, optional): Just return the fileds in this list.
496
- Defaults to None which means to deliver
497
- all fields.
498
- limit (int, optional): Number of results to return. None = unlimited.
499
- offset (int, optional): first item to return (for chunking)
500
- error_string (str, optional): custom error string
541
+ table_name (str):
542
+ The name of the ServiceNow table to retrieve.
543
+ query (str, optional):
544
+ Query to filter the table rows (e.g. articles).
545
+ fields (list, optional):
546
+ Just return the fileds in this list.
547
+ Defaults to None which means to deliver all fields.
548
+ limit (int, optional):
549
+ Number of results to return. None = unlimited.
550
+ offset (int, optional):
551
+ First item to return (for chunking).
552
+ error_string (str, optional):
553
+ A custom error string can be provided by this parameter.
501
554
 
502
555
  Returns:
503
- list | None: List or articles or None if the request fails.
556
+ list | None:
557
+ List or articles or None if the request fails.
558
+
504
559
  """
505
560
 
506
561
  request_header = self.request_header()
@@ -519,32 +574,34 @@ class ServiceNow(object):
519
574
  encoded_query = urllib.parse.urlencode(params, doseq=True)
520
575
 
521
576
  request_url = self.config()["tableUrl"] + "/{}?{}".format(
522
- table_name, encoded_query
577
+ table_name,
578
+ encoded_query,
523
579
  )
524
580
 
525
581
  try:
526
582
  while True:
527
583
  response = self._session.get(
528
- url=request_url, headers=request_header # , params=params
584
+ url=request_url,
585
+ headers=request_header, # , params=params
529
586
  )
530
587
  data = self.parse_request_response(response)
531
588
 
532
589
  if response.status_code == 200:
533
590
  return data.get("result", [])
534
591
  elif response.status_code == 202:
535
- logger.warning(
536
- "Service Now returned <202 Accepted> -> throtteling, retrying ..."
592
+ self.logger.warning(
593
+ "Service Now returned <202 Accepted> -> throtteling, retrying ...",
537
594
  )
538
595
  time.sleep(1000)
539
596
  else:
540
597
  return None
541
598
 
542
- except HTTPError as http_err:
543
- logger.error("%sHTTP error -> %s!", error_string, str(http_err))
544
- except RequestException as req_err:
545
- logger.error("%sRequest error -> %s!", error_string, str(req_err))
546
- except Exception as err:
547
- logger.error("%sError -> %s!", error_string, str(err))
599
+ except HTTPError:
600
+ self.logger.error("%sHTTP error!", error_string)
601
+ except RequestException:
602
+ self.logger.error("%sRequest error!", error_string)
603
+ except Exception:
604
+ self.logger.error("%s", error_string)
548
605
 
549
606
  return None
550
607
 
@@ -555,15 +612,20 @@ class ServiceNow(object):
555
612
  table_name: str,
556
613
  query: str | None = None,
557
614
  ) -> int:
558
- """Get number of table rows (e.g. Knowledge Base Articles) matching the query
559
- (or if query = "" it should be the total number)
615
+ """Get number of table rows (e.g. Knowledge Base Articles) matching the query.
616
+
617
+ (or if query = "" it should be the total number).
560
618
 
561
619
  Args:
562
- table_name (str): name of the ServiceNow table
563
- query (str, optional): Query string to filter the results. Defaults to "".
620
+ table_name (str):
621
+ The name of the ServiceNow table.
622
+ query (str, optional):
623
+ A query string to filter the results. Defaults to "".
564
624
 
565
625
  Returns:
566
- int: Number of table rows.
626
+ int:
627
+ Number of table rows.
628
+
567
629
  """
568
630
 
569
631
  request_header = self.request_header()
@@ -576,21 +638,24 @@ class ServiceNow(object):
576
638
  encoded_query = urllib.parse.urlencode(params, doseq=True)
577
639
 
578
640
  request_url = self.config()["statsUrl"] + "/{}?{}".format(
579
- table_name, encoded_query
641
+ table_name,
642
+ encoded_query,
580
643
  )
581
644
 
582
645
  try:
583
646
  response = self._session.get(
584
- url=request_url, headers=request_header, timeout=600
647
+ url=request_url,
648
+ headers=request_header,
649
+ timeout=600,
585
650
  )
586
651
  data = self.parse_request_response(response)
587
652
  return int(data["result"]["stats"]["count"])
588
- except HTTPError as http_err:
589
- logger.error("HTTP error occurred -> %s!", str(http_err))
590
- except RequestException as req_err:
591
- logger.error("Request error occurred -> %s!", str(req_err))
592
- except Exception as err:
593
- logger.error("An error occurred -> %s!", str(err))
653
+ except HTTPError:
654
+ self.logger.error("HTTP error occurred!")
655
+ except RequestException:
656
+ self.logger.error("Request error occurred!")
657
+ except Exception:
658
+ self.logger.error("An error occurred!")
594
659
 
595
660
  return None
596
661
 
@@ -600,9 +665,11 @@ class ServiceNow(object):
600
665
  """Get the configured knowledge base categories in ServiceNow.
601
666
 
602
667
  Returns:
603
- list | None: list of configured knowledge base categories or None in case of an error.
668
+ list | None:
669
+ A list of configured knowledge base categories
670
+ or None in case of an error.
604
671
 
605
- Example:
672
+ Example:
606
673
  [
607
674
  {
608
675
  'sys_mod_count': '2',
@@ -628,6 +695,7 @@ class ServiceNow(object):
628
695
  'sys_created_by': 'tiychowdhury@opentext.com'
629
696
  }
630
697
  ]
698
+
631
699
  """
632
700
 
633
701
  return self.get_table(
@@ -642,9 +710,10 @@ class ServiceNow(object):
642
710
  """Get the configured knowledge bases in ServiceNow.
643
711
 
644
712
  Returns:
645
- list | None: list of configured knowledge bases or None in case of an error.
713
+ list | None:
714
+ The list of configured knowledge bases or None in case of an error.
646
715
 
647
- Example:
716
+ Example:
648
717
  [
649
718
  {
650
719
  'mandatory_fields': '',
@@ -694,10 +763,11 @@ class ServiceNow(object):
694
763
  'card_color': '',
695
764
  'disable_rating': 'false',
696
765
  'create_translation_task': 'false',
697
- 'kb_managers': 'acab67001b6b811461a7a8e22a4bcbbe,7ab0b6801ba205d061a7a8e22a4bcbec,2a685f4c1be7811461a7a8e22a4bcbfd,6cc3c3d2db21781068cfd6c4e2961962,053429e31b5f0114fea2ec20604bcb95,5454eb441b6b0514fea2ec20604bcbfc,3a17970c1be7811461a7a8e22a4bcb23'
766
+ 'kb_managers': 'acab67001b6b811461a7a8e22a4bcbbe,7ab0b6801ba205d061a7a8e22a4bcbec'
698
767
  },
699
768
  ...
700
769
  ]
770
+
701
771
  """
702
772
 
703
773
  return self.get_table(
@@ -715,20 +785,26 @@ class ServiceNow(object):
715
785
  limit: int | None = 10,
716
786
  offset: int = 0,
717
787
  ) -> list | None:
718
- """Get selected / filtered Knowledge Base articles
788
+ """Get selected / filtered Knowledge Base articles.
719
789
 
720
790
  Args:
721
- query (str, optional): Query to filter the the articles.
722
- fields (list, optional): Just return the fileds in this list.
723
- Defaults to None which means to deliver
724
- all fields.
725
- limit (int, optional): Number of results to return. None = unlimited.
726
- offset (int, optional): first item to return (for chunking)
791
+ table_name (str, optional):
792
+ The name of the ServiceNow table.
793
+ query (str, optional):
794
+ Query to filter the articles.
795
+ fields (list, optional):
796
+ Just return the fields in this list.
797
+ Defaults to None which means to deliver all fields.
798
+ limit (int, optional):
799
+ Number of results to return. None = unlimited.
800
+ offset (int, optional):
801
+ The first item to return (for chunking).
727
802
 
728
803
  Returns:
729
- list | None: List or articles or None if the request fails.
804
+ list | None:
805
+ List or articles or None if the request fails.
730
806
 
731
- Example:
807
+ Example:
732
808
  [
733
809
  {
734
810
  'parent': '',
@@ -832,6 +908,7 @@ class ServiceNow(object):
832
908
  },
833
909
  ...
834
910
  ]
911
+
835
912
  """
836
913
 
837
914
  return self.get_table(
@@ -845,13 +922,16 @@ class ServiceNow(object):
845
922
 
846
923
  # end method definition
847
924
 
848
- def make_file_names_unique(self, file_list: list):
849
- """Make file names unique if required. The mutable
850
- list is changed "in-place".
925
+ def make_file_names_unique(self, file_list: list) -> None:
926
+ """Make file names unique if required.
927
+
928
+ The mutable list is changed "in-place".
851
929
 
852
930
  Args:
853
- file_list (list): list of attachments as dictionaries
854
- with "sys_id" and "file_name" keys.
931
+ file_list (list):
932
+ List of attachments as dictionaries
933
+ with "sys_id" and "file_name" keys.
934
+
855
935
  """
856
936
 
857
937
  # Dictionary to keep track of how many times each file name has been encountered
@@ -882,13 +962,16 @@ class ServiceNow(object):
882
962
  # end method definition
883
963
 
884
964
  def get_article_attachments(self, article: dict) -> list | None:
885
- """Get a list of attachments for an article
965
+ """Get a list of attachments for an article.
886
966
 
887
967
  Args:
888
- article (dict): Article information
968
+ article (dict):
969
+ Article information.
889
970
 
890
971
  Returns:
891
- list | None: list of attachments
972
+ list | None:
973
+ List of attachments for the article.
974
+
892
975
  """
893
976
 
894
977
  article_sys_id = article["sys_id"]
@@ -904,30 +987,32 @@ class ServiceNow(object):
904
987
 
905
988
  try:
906
989
  response = self._session.get(
907
- url=request_url, headers=request_header, params=params
990
+ url=request_url,
991
+ headers=request_header,
992
+ params=params,
908
993
  )
909
994
  data = self.parse_request_response(response)
910
995
  attachments = data.get("result", [])
911
996
  if not attachments:
912
- logger.debug(
997
+ self.logger.debug(
913
998
  "Knowledge base article -> %s does not have attachments!",
914
999
  article_number,
915
1000
  )
916
1001
  return []
917
1002
  else:
918
- logger.info(
1003
+ self.logger.debug(
919
1004
  "Knowledge base article -> %s has %s attachments.",
920
1005
  article_number,
921
1006
  len(attachments),
922
1007
  )
923
1008
  return attachments
924
1009
 
925
- except HTTPError as http_err:
926
- logger.error("HTTP error occurred -> %s!", str(http_err))
927
- except RequestException as req_err:
928
- logger.error("Request error occurred -> %s!", str(req_err))
929
- except Exception as err:
930
- logger.error("An error occurred -> %s!", str(err))
1010
+ except HTTPError:
1011
+ self.logger.error("HTTP error occurred!")
1012
+ except RequestException:
1013
+ self.logger.error("Request error occurred!")
1014
+ except Exception:
1015
+ self.logger.error("An error occurred!")
931
1016
 
932
1017
  return None
933
1018
 
@@ -941,26 +1026,30 @@ class ServiceNow(object):
941
1026
  """Download the attachments of a Knowledge Base Article (KBA) in ServiceNow.
942
1027
 
943
1028
  Args:
944
- article (dict): dictionary holding the Service Now article data
945
- skip_existing (bool, optional): skip download if file has been downloaded before
1029
+ article (dict):
1030
+ The dictionary holding the ServiceNow article data.
1031
+ skip_existing (bool, optional):
1032
+ If True, skip download if file has been downloaded before.
946
1033
 
947
1034
  Returns:
948
- bool: True = success, False = failure
1035
+ bool:
1036
+ True = success, False = failure.
1037
+
949
1038
  """
950
1039
 
951
1040
  article_number = article["number"]
952
1041
 
953
- attachments = self.get_article_attachments(article)
1042
+ attachments = self.get_article_attachments(article=article)
954
1043
 
955
1044
  if not attachments:
956
- logger.debug(
1045
+ self.logger.debug(
957
1046
  "Knowledge base article -> %s does not have attachments to download!",
958
1047
  article_number,
959
1048
  )
960
1049
  article["has_attachments"] = False
961
1050
  return False
962
1051
  else:
963
- logger.info(
1052
+ self.logger.info(
964
1053
  "Knowledge base article -> %s has %s attachments to download...",
965
1054
  article_number,
966
1055
  len(attachments),
@@ -980,79 +1069,108 @@ class ServiceNow(object):
980
1069
  article["download_files_ids"] = []
981
1070
 
982
1071
  if not os.path.exists(base_dir):
983
- os.makedirs(base_dir)
1072
+ try:
1073
+ os.makedirs(base_dir)
1074
+ except FileExistsError:
1075
+ self.logger.error(
1076
+ "Directory -> '%s' already exists. Race condition occurred.",
1077
+ base_dir,
1078
+ )
1079
+ except PermissionError:
1080
+ self.logger.error("Permission error with directory -> %s", base_dir)
1081
+ return False
1082
+ except OSError:
1083
+ self.logger.error("OS error with directory -> %s", base_dir)
1084
+ return False
1085
+ except TypeError:
1086
+ self.logger.error("Invalid path type -> %s", base_dir)
1087
+ return False
984
1088
 
985
1089
  for attachment in attachments:
986
1090
  file_path = os.path.join(base_dir, attachment["file_name"])
987
1091
 
988
1092
  if os.path.exists(file_path) and skip_existing:
989
- logger.info(
990
- "File -> %s has been downloaded before. Skipping download...",
1093
+ self.logger.info(
1094
+ "File -> '%s' has been downloaded before. Skipping download...",
991
1095
  file_path,
992
1096
  )
993
1097
 
994
- # we need to add file_name and sys_id in the list of files and for later use in bulkDocument processing...
1098
+ # We need to add file_name and sys_id in the list of files and and file IDs
1099
+ # for later use in bulkDocument processing...
1100
+ # This creates two new columns "download_files" and "download_files_ids"
1101
+ # in the data frame:
995
1102
  article["download_files"].append(attachment["file_name"])
996
1103
  article["download_files_ids"].append(attachment["sys_id"])
997
1104
  continue
998
- attachment_download_url = (
999
- self.config()["attachmentDownloadUrl"]
1000
- + "/"
1001
- + attachment["sys_id"]
1002
- + "/file"
1003
- )
1105
+ attachment_download_url = self.config()["attachmentDownloadUrl"] + "/" + attachment["sys_id"] + "/file"
1004
1106
  try:
1005
- logger.info(
1006
- "Downloading attachment file -> '%s' for article -> %s from ServiceNow...",
1107
+ self.logger.info(
1108
+ "Downloading attachment file -> '%s' for article -> '%s' from ServiceNow...",
1007
1109
  file_path,
1008
1110
  article_number,
1009
1111
  )
1010
1112
 
1113
+ # Request the attachment as a stream from ServiceNow.
1114
+ # This initiates the download process...
1011
1115
  attachment_response = self._session.get(
1012
- attachment_download_url, stream=True
1116
+ attachment_download_url,
1117
+ stream=True,
1013
1118
  )
1014
1119
  attachment_response.raise_for_status()
1015
1120
 
1016
- with open(file_path, "wb") as file:
1121
+ # Read and write the attachment file in chunks:
1122
+ with open(file_path, "wb") as attachment_file:
1017
1123
  for chunk in attachment_response.iter_content(chunk_size=8192):
1018
- file.write(chunk)
1124
+ attachment_file.write(chunk)
1019
1125
 
1020
- # we build a list of filenames and ids.
1021
- # the ids we want to use as nicknames later on
1126
+ # We build a list of filenames and IDs.
1127
+ # The IDs we want to use as nicknames later on.
1022
1128
  article["download_files"].append(attachment["file_name"])
1023
1129
  article["download_files_ids"].append(attachment["sys_id"])
1024
1130
 
1025
- except HTTPError as e:
1026
- logger.error(
1027
- "Failed to download -> '%s' using url -> %s; error -> %s",
1131
+ except HTTPError:
1132
+ self.logger.error(
1133
+ "Failed to download -> '%s' using url -> %s",
1028
1134
  attachment["file_name"],
1029
1135
  attachment_download_url,
1030
- str(e),
1031
1136
  )
1032
1137
 
1033
1138
  return True
1034
1139
 
1035
1140
  # end method definition
1036
1141
 
1037
- def load_articles(self, table_name: str, query: str | None) -> bool:
1038
- """Main method to load ServiceNow articles in a Data Frame and
1039
- download the attchments.
1142
+ def load_articles(
1143
+ self,
1144
+ table_name: str,
1145
+ query: str | None,
1146
+ skip_existing_downloads: bool = True,
1147
+ ) -> bool:
1148
+ """Load ServiceNow articles in a data frame and download the attchments.
1040
1149
 
1041
1150
  Args:
1042
- query (str): Filter criteria for the articles.
1151
+ table_name (str):
1152
+ The name of the ServiceNow table.
1153
+ query (str | None):
1154
+ Filter criteria for the articles.
1155
+ skip_existing_downloads (bool, optional):
1156
+ If True, it tries to optimize the processing by reusing
1157
+ existing downloads of attachments in the file system.
1043
1158
 
1044
1159
  Returns:
1045
- bool: True = Success, False = Failure
1160
+ bool:
1161
+ True = Success, False = Failure.
1162
+
1046
1163
  """
1047
1164
 
1048
1165
  total_count = self.get_table_count(table_name=table_name, query=query)
1049
1166
 
1050
- logger.info(
1051
- "Total number of Knowledge Base Articles (KBA) -> %s", str(total_count)
1167
+ self.logger.info(
1168
+ "Total number of Knowledge Base Articles (KBA) -> %s",
1169
+ str(total_count),
1052
1170
  )
1053
1171
 
1054
1172
  if total_count == 0:
1055
- logger.info(
1173
+ self.logger.info(
1056
1174
  "Query does not return any value from ServiceNow table -> '%s'. Finishing.",
1057
1175
  table_name,
1058
1176
  )
@@ -1068,7 +1186,7 @@ class ServiceNow(object):
1068
1186
  remainder = 0
1069
1187
  number = 1
1070
1188
 
1071
- logger.info(
1189
+ self.logger.info(
1072
1190
  "Processing -> %s Knowledge Base Articles (KBA), table name -> '%s', thread number -> %s, partition size -> %s",
1073
1191
  str(total_count),
1074
1192
  table_name,
@@ -1082,7 +1200,7 @@ class ServiceNow(object):
1082
1200
  for i in range(number):
1083
1201
  current_partition_size = partition_size + (1 if i < remainder else 0)
1084
1202
  thread = threading.Thread(
1085
- name=f"load_articles_{i+1:02}",
1203
+ name=f"load_articles_{i + 1:02}",
1086
1204
  target=self.thread_wrapper,
1087
1205
  args=(
1088
1206
  self.load_articles_worker,
@@ -1090,6 +1208,7 @@ class ServiceNow(object):
1090
1208
  query,
1091
1209
  current_partition_size,
1092
1210
  current_offset,
1211
+ skip_existing_downloads,
1093
1212
  ),
1094
1213
  )
1095
1214
  thread.start()
@@ -1104,17 +1223,31 @@ class ServiceNow(object):
1104
1223
  # end method definition
1105
1224
 
1106
1225
  def load_articles_worker(
1107
- self, table_name: str, query: str, partition_size: int, partition_offset: int
1226
+ self,
1227
+ table_name: str,
1228
+ query: str,
1229
+ partition_size: int,
1230
+ partition_offset: int,
1231
+ skip_existing_downloads: bool = True,
1108
1232
  ) -> None:
1109
- """Worker Method for multi-threading.
1233
+ """Worker method for multi-threading.
1110
1234
 
1111
1235
  Args:
1112
- query (str): Query to select the relevant KBA.
1113
- partition_size (int): Total size of the partition assigned to this thread.
1114
- partition_offset (int): Starting offset for the KBAs this thread is processing.
1236
+ table_name (str):
1237
+ Name of the ServiceNow table.
1238
+ query (str):
1239
+ Query to select the relevant KBA.
1240
+ partition_size (int):
1241
+ Total size of the partition assigned to this thread.
1242
+ partition_offset (int):
1243
+ Starting offset for the KBAs this thread is processing.
1244
+ skip_existing_downloads (bool, optional):
1245
+ If True, it tries to optimize the processing by reusing
1246
+ existing downloads of attachments in the file system.
1247
+
1115
1248
  """
1116
1249
 
1117
- logger.info(
1250
+ self.logger.info(
1118
1251
  "Start processing KBAs in range from -> %s to -> %s from table -> '%s'...",
1119
1252
  partition_offset,
1120
1253
  partition_offset + partition_size,
@@ -1125,23 +1258,29 @@ class ServiceNow(object):
1125
1258
  # So we define "limit" as the maximum number of KBAs we want to retrieve for one REST call.
1126
1259
  # This should be a reasonable number to avoid timeouts. We also need to make sure
1127
1260
  # the limit is not bigger than the the partition size:
1128
- limit = 100 if partition_size > 100 else partition_size
1261
+ limit = min(partition_size, 100)
1129
1262
 
1130
1263
  for offset in range(partition_offset, partition_offset + partition_size, limit):
1131
1264
  articles = self.get_table(
1132
- table_name=table_name, query=query, limit=limit, offset=offset
1265
+ table_name=table_name,
1266
+ query=query,
1267
+ limit=limit,
1268
+ offset=offset,
1133
1269
  )
1134
- logger.info(
1270
+ self.logger.info(
1135
1271
  "Retrieved a list of %s KBAs starting at offset -> %s to process.",
1136
1272
  str(len(articles)),
1137
1273
  offset,
1138
1274
  )
1139
1275
  for article in articles:
1140
- logger.info("Processing KBA -> %s...", article["number"])
1276
+ self.logger.info("Processing KBA -> %s...", article["number"])
1141
1277
  article["source_table"] = table_name
1142
- self.load_article(article)
1278
+ self.load_article(
1279
+ article=article,
1280
+ skip_existing_downloads=skip_existing_downloads,
1281
+ )
1143
1282
 
1144
- logger.info(
1283
+ self.logger.info(
1145
1284
  "Finished processing KBAs in range from -> %s to -> %s from table -> '%s'.",
1146
1285
  partition_offset,
1147
1286
  partition_offset + partition_size,
@@ -1150,15 +1289,19 @@ class ServiceNow(object):
1150
1289
 
1151
1290
  # end method definition
1152
1291
 
1153
- def load_article(self, article: dict, skip_existing_downloads: bool = True):
1154
- """Process a single KBA: download attachments (if any), add additional
1155
- keys / values to the article from other ServiceNow tables,
1156
- and finally add the KBA to the Data Frame.
1292
+ def load_article(self, article: dict, skip_existing_downloads: bool = True) -> None:
1293
+ """Process a single KBA.
1294
+
1295
+ Download attachments (if any), add additional keys / values to the article from
1296
+ other ServiceNow tables, and finally add the KBA to the data frame.
1157
1297
 
1158
1298
  Args:
1159
- article (dict): Dictionary inclusing all fields of
1160
- a single KBA. This is a mutable variable
1161
- that gets modified by this method!
1299
+ article (dict):
1300
+ Dictionary inclusing all fields of a single KBA.
1301
+ This is a mutable variable that gets modified by this method!
1302
+ skip_existing_downloads (bool, optional):
1303
+ If True it tries to optimize the processing by reusing
1304
+ existing downloads of attachments.
1162
1305
 
1163
1306
  Side effect:
1164
1307
  The article dict is modified with by adding additional key / value
@@ -1177,209 +1320,272 @@ class ServiceNow(object):
1177
1320
 
1178
1321
  """
1179
1322
 
1323
+ #
1324
+ # Download the attachments of the KBA:
1325
+ #
1326
+
1180
1327
  _ = self.download_attachments(
1181
- article=article, skip_existing=skip_existing_downloads
1328
+ article=article,
1329
+ skip_existing=skip_existing_downloads,
1182
1330
  )
1183
1331
 
1184
1332
  #
1185
1333
  # Add additional columns from related ServiceNow tables:
1186
1334
  #
1187
1335
 
1188
- if "kb_category" in article and article["kb_category"]:
1336
+ if article.get("kb_category"):
1189
1337
  category_key = article.get("kb_category")["value"]
1190
1338
  category_table_name = SN_TABLE_CATEGORIES
1191
1339
  category = self.get_object(
1192
- table_name=category_table_name, sys_id=category_key
1340
+ table_name=category_table_name,
1341
+ sys_id=category_key,
1193
1342
  )
1194
1343
  if category:
1195
1344
  article["kb_category_name"] = self.get_result_value(
1196
- response=category, key="full_category"
1345
+ response=category,
1346
+ key="full_category",
1197
1347
  )
1198
1348
  else:
1199
- logger.warning(
1200
- "Article -> %s has no category value!", article["number"]
1349
+ self.logger.warning(
1350
+ "Article -> %s has no category value!",
1351
+ article["number"],
1201
1352
  )
1202
1353
  article["kb_category_name"] = ""
1203
1354
  else:
1204
- logger.warning(
1205
- "Article -> %s has no value for category!", article["number"]
1355
+ self.logger.warning(
1356
+ "Article -> %s has no value for category!",
1357
+ article["number"],
1206
1358
  )
1207
1359
  article["kb_category_name"] = ""
1208
1360
 
1209
1361
  knowledge_base_key = article.get("kb_knowledge_base")["value"]
1210
1362
  knowledge_base_table_name = SN_TABLE_KNOWLEDGE_BASES
1211
1363
  knowledge_base = self.get_object(
1212
- table_name=knowledge_base_table_name, sys_id=knowledge_base_key
1364
+ table_name=knowledge_base_table_name,
1365
+ sys_id=knowledge_base_key,
1213
1366
  )
1214
1367
  if knowledge_base:
1215
1368
  article["kb_knowledge_base_name"] = self.get_result_value(
1216
- response=knowledge_base, key="title"
1369
+ response=knowledge_base,
1370
+ key="title",
1217
1371
  )
1218
1372
  else:
1219
- logger.warning(
1220
- "Article -> %s has no value for Knowledge Base!",
1373
+ self.logger.warning(
1374
+ "Article -> %s has no value for knowledge base!",
1221
1375
  article["number"],
1222
1376
  )
1223
1377
  article["kb_knowledge_base_name"] = ""
1224
1378
 
1225
- related_product_names = []
1226
- if article.get("related_products", None):
1379
+ # We use a set to make sure the resulting related items are unique:
1380
+ related_product_names: set = set()
1381
+ if article.get("related_products"):
1227
1382
  related_product_keys = article.get("related_products").split(",")
1228
1383
  for related_product_key in related_product_keys:
1229
1384
  related_product = self.get_object(
1230
- table_name=SN_TABLE_RELATED_PRODUCTS, sys_id=related_product_key
1385
+ table_name=SN_TABLE_RELATED_PRODUCTS,
1386
+ sys_id=related_product_key,
1231
1387
  )
1232
1388
  if related_product:
1233
1389
  related_product_name = self.get_result_value(
1234
- response=related_product, key="name"
1390
+ response=related_product,
1391
+ key="name",
1235
1392
  )
1236
- logger.debug(
1237
- "Found related Product -> '%s' (%s)",
1393
+ # Remove leading or trailing spaces (simple cleansing effort):
1394
+ related_product_name = related_product_name.strip() if related_product_name else ""
1395
+ if self._product_exclusions and related_product_name in self._product_exclusions:
1396
+ self.logger.info(
1397
+ "Found related product -> '%s' (%s) but it is on the product exclusion list. Skipping...",
1398
+ related_product_name,
1399
+ related_product_key,
1400
+ )
1401
+ continue
1402
+ self.logger.debug(
1403
+ "Found related product -> '%s' (%s)",
1238
1404
  related_product_name,
1239
1405
  related_product_key,
1240
1406
  )
1241
- related_product_names.append(related_product_name)
1407
+ # Add the related item to the resulting set
1408
+ # (duplicates will not be added as it is a set):
1409
+ related_product_names.add(related_product_name)
1242
1410
  # Extended ECM can only handle a maxiumum of 50 line items:
1243
1411
  if len(related_product_names) == 49:
1244
- logger.info(
1245
- "Reached maximum of 50 multi-value items for related Products of article -> %s",
1412
+ self.logger.info(
1413
+ "Reached maximum of 50 multi-value items for related products of article -> %s",
1246
1414
  article["number"],
1247
1415
  )
1248
1416
  break
1249
1417
  else:
1250
- logger.warning(
1251
- "Article -> %s: Cannot lookup related Product name in table -> '%s' with ID -> %s",
1418
+ self.logger.warning(
1419
+ "Article -> %s: Cannot lookup related product name in table -> '%s' with key -> '%s'",
1252
1420
  article["number"],
1253
1421
  SN_TABLE_RELATED_PRODUCTS,
1254
1422
  related_product_key,
1255
1423
  )
1256
1424
  else:
1257
- logger.warning(
1258
- "Article -> %s has no value related Products!",
1425
+ self.logger.debug(
1426
+ "Article -> %s has no related products!",
1259
1427
  article["number"],
1260
1428
  )
1261
- article["related_product_names"] = related_product_names
1429
+ # This adds a column to the data frame with the name "related_product_names"
1430
+ # (we convert the set to a list):
1431
+ article["related_product_names"] = list(related_product_names)
1262
1432
 
1263
- product_line_names = []
1264
- if article.get("u_product_line", None):
1433
+ # We use a set to make sure the resulting related items are unique:
1434
+ product_line_names: set = set()
1435
+ if article.get("u_product_line"):
1265
1436
  product_line_keys = article.get("u_product_line").split(",")
1266
1437
  product_line_table = SN_TABLE_PRODUCT_LINES
1267
1438
  for product_line_key in product_line_keys:
1268
1439
  product_line = self.get_object(
1269
- table_name=product_line_table, sys_id=product_line_key
1440
+ table_name=product_line_table,
1441
+ sys_id=product_line_key,
1270
1442
  )
1271
1443
  if product_line:
1272
1444
  product_line_name = self.get_result_value(
1273
- response=product_line, key="name"
1445
+ response=product_line,
1446
+ key="name",
1274
1447
  )
1275
- logger.debug(
1276
- "Found related Product Line -> '%s' (%s)",
1448
+ # Remove leading or trailing spaces (simple cleansing effort):
1449
+ product_line_name = product_line_name.strip() if product_line_name else ""
1450
+ self.logger.debug(
1451
+ "Found related product line -> '%s' (%s)",
1277
1452
  product_line_name,
1278
1453
  product_line_key,
1279
1454
  )
1280
- product_line_names.append(product_line_name)
1455
+ # Add the related item to the resulting set
1456
+ # (duplicates will not be added as it is a set):
1457
+ product_line_names.add(product_line_name)
1281
1458
  # Extended ECM can only handle a maxiumum of 50 line items:
1282
1459
  if len(product_line_names) == 49:
1283
- logger.info(
1284
- "Reached maximum of 50 multi-value items for related Product Lines of article -> %s",
1460
+ self.logger.info(
1461
+ "Reached maximum of 50 multi-value items for related product lines of article -> %s",
1285
1462
  article["number"],
1286
1463
  )
1287
1464
  break
1465
+ # end if product_line:
1288
1466
  else:
1289
- logger.warning(
1290
- "Article -> %s: Cannot lookup related Product Line name in table -> '%s' with ID -> %s",
1467
+ self.logger.warning(
1468
+ "Article -> %s: Cannot lookup related product line name in table -> '%s' with key -> '%s'",
1291
1469
  article["number"],
1292
1470
  product_line_table,
1293
1471
  product_line_key,
1294
1472
  )
1295
1473
  else:
1296
- logger.warning(
1297
- "Article -> %s has no value for related Product Lines!",
1474
+ self.logger.debug(
1475
+ "Article -> %s has no related product lines!",
1298
1476
  article["number"],
1299
1477
  )
1300
- article["u_product_line_names"] = product_line_names
1478
+ # This adds a column to the data frame with the name "u_product_line_names"
1479
+ # (we convert the set to a list):
1480
+ article["u_product_line_names"] = list(product_line_names)
1301
1481
 
1302
- sub_product_line_names = []
1303
- if article.get("u_sub_product_line", None):
1482
+ # We use a set to make sure the resulting related items are unique:
1483
+ sub_product_line_names: set = set()
1484
+ if article.get("u_sub_product_line"):
1304
1485
  sub_product_line_keys = article.get("u_sub_product_line").split(",")
1305
1486
  sub_product_line_table = SN_TABLE_PRODUCT_LINES
1306
1487
  for sub_product_line_key in sub_product_line_keys:
1307
1488
  sub_product_line = self.get_object(
1308
- table_name=sub_product_line_table, sys_id=sub_product_line_key
1489
+ table_name=sub_product_line_table,
1490
+ sys_id=sub_product_line_key,
1309
1491
  )
1310
1492
  if sub_product_line:
1311
1493
  sub_product_line_name = self.get_result_value(
1312
- response=sub_product_line, key="name"
1494
+ response=sub_product_line,
1495
+ key="name",
1313
1496
  )
1314
- logger.debug(
1315
- "Found related Sub Product Line -> '%s' (%s)",
1497
+ # Remove leading or trailing spaces (simple cleansing effort):
1498
+ sub_product_line_name = sub_product_line_name.strip() if sub_product_line_name else ""
1499
+ self.logger.debug(
1500
+ "Found related sub product line -> '%s' (%s)",
1316
1501
  sub_product_line_name,
1317
1502
  sub_product_line_key,
1318
1503
  )
1319
- sub_product_line_names.append(sub_product_line_name)
1504
+ # Add the related item to the resulting set
1505
+ # (duplicates will not be added as it is a set):
1506
+ sub_product_line_names.add(sub_product_line_name)
1320
1507
  # Extended ECM can only handle a maxiumum of 50 line items:
1321
1508
  if len(sub_product_line_names) == 49:
1322
- logger.info(
1323
- "Reached maximum of 50 multi-value items for related Sub Product Lines of article -> %s",
1509
+ self.logger.info(
1510
+ "Reached maximum of 50 multi-value items for related sub product lines of article -> %s",
1324
1511
  article["number"],
1325
1512
  )
1326
1513
  break
1327
1514
  else:
1328
- logger.warning(
1329
- "Article -> %s: Cannot lookup related Sub Product Line name in table -> '%s' with ID -> %s",
1515
+ self.logger.warning(
1516
+ "Article -> %s: Cannot lookup related sub product line name in table -> '%s' with key -> '%s'",
1330
1517
  article["number"],
1331
1518
  sub_product_line_table,
1332
1519
  sub_product_line_key,
1333
1520
  )
1334
1521
  else:
1335
- logger.warning(
1336
- "Article -> %s has no value for related Sub Product Lines!",
1522
+ self.logger.debug(
1523
+ "Article -> %s has no related sub product lines!",
1337
1524
  article["number"],
1338
1525
  )
1339
- article["u_sub_product_line_names"] = sub_product_line_names
1526
+ # This adds a column to the data frame with the name "u_sub_product_line_names"
1527
+ # (we convert the set to a list):
1528
+ article["u_sub_product_line_names"] = list(sub_product_line_names)
1340
1529
 
1341
- application_names = []
1342
- if article.get("u_application", None):
1530
+ # We use a set to make sure the resulting related items are unique:
1531
+ application_names: set = set()
1532
+ if article.get("u_application"):
1343
1533
  application_keys = article.get("u_application").split(",")
1344
1534
  application_table_name = SN_TABLE_PRODUCT_LINES
1345
1535
  for application_key in application_keys:
1346
1536
  application = self.get_object(
1347
- table_name=application_table_name, sys_id=application_key
1537
+ table_name=application_table_name,
1538
+ sys_id=application_key,
1348
1539
  )
1349
1540
  if application:
1350
1541
  application_name = self.get_result_value(
1351
- response=application, key="name"
1542
+ response=application,
1543
+ key="name",
1352
1544
  )
1353
- logger.debug(
1354
- "Found related Application -> '%s' (%s)",
1545
+ # Remove leading or trailing spaces (simple cleansing effort):
1546
+ application_name = application_name.strip() if application_name else ""
1547
+ if self._product_exclusions and application_name in self._product_exclusions:
1548
+ self.logger.info(
1549
+ "Found related application -> '%s' (%s) but it is on the product exclusion list. Skipping...",
1550
+ application_name,
1551
+ application_key,
1552
+ )
1553
+ continue
1554
+ self.logger.debug(
1555
+ "Found related application -> '%s' (%s)",
1355
1556
  application_name,
1356
1557
  application_key,
1357
1558
  )
1358
- application_names.append(application_name)
1559
+ # Add the related item to the resulting set
1560
+ # (duplicates will not be added as it is a set):
1561
+ application_names.add(application_name)
1359
1562
  # Extended ECM can only handle a maxiumum of 50 line items:
1360
1563
  if len(application_names) == 49:
1361
- logger.info(
1362
- "Reached maximum of 50 multi-value items for related Applications of article -> %s",
1564
+ self.logger.info(
1565
+ "Reached maximum of 50 multi-value items for related applications of article -> %s",
1363
1566
  article["number"],
1364
1567
  )
1365
1568
  break
1569
+ # end if application
1366
1570
  else:
1367
- logger.warning(
1368
- "Article -> %s: Cannot lookup related Application name in table -> '%s' with ID -> %s",
1571
+ self.logger.warning(
1572
+ "Article -> %s: Cannot lookup related application name in table -> '%s' with key -> %s",
1369
1573
  article["number"],
1370
1574
  application_table_name,
1371
1575
  application_key,
1372
1576
  )
1373
1577
  else:
1374
- logger.warning(
1375
- "Article -> %s has no value for related Applications!",
1578
+ self.logger.debug(
1579
+ "Article -> %s has no related applications!",
1376
1580
  article["number"],
1377
1581
  )
1378
- article["u_application_names"] = application_names
1582
+ # This adds a column to the data frame with the name "u_application_names"
1583
+ # (we convert the set to a list):
1584
+ article["u_application_names"] = list(application_names)
1379
1585
 
1380
- application_versions = []
1586
+ application_versions: set = set()
1381
1587
  application_version_sets = []
1382
- if article.get("u_application_version", None):
1588
+ if article.get("u_application_version"):
1383
1589
  application_version_keys = article.get("u_application_version").split(",")
1384
1590
  for application_version_key in application_version_keys:
1385
1591
  # Get the version object from ServiceNow. It includes both,
@@ -1390,74 +1596,107 @@ class ServiceNow(object):
1390
1596
  )
1391
1597
  if application_version:
1392
1598
  application_version_name = self.get_result_value(
1393
- response=application_version, key="u_version_name"
1599
+ response=application_version,
1600
+ key="u_version_name",
1394
1601
  )
1395
- logger.debug(
1396
- "Found related Application Version -> '%s' (%s)",
1602
+ self.logger.debug(
1603
+ "Found related application version -> '%s' in table -> '%s' with key -> '%s'",
1604
+ application_version_name,
1397
1605
  SN_TABLE_PRODUCT_LINES,
1398
1606
  application_version_key,
1399
1607
  )
1400
1608
 
1401
- application_versions.append(application_version_name)
1402
-
1403
- # Lookup application name of version and fill the set
1609
+ # Add the related version to the resulting set
1610
+ # (duplicates will not be added as it is a set):
1611
+ application_versions.add(application_version_name)
1404
1612
 
1613
+ # Use the application key to lookup application name
1614
+ # for the version and fill a set
1405
1615
  application_key = self.get_result_value(
1406
- response=application_version, key="u_product_model"
1616
+ response=application_version,
1617
+ key="u_product_model",
1407
1618
  )
1408
1619
 
1409
1620
  if application_key:
1410
- # u_applicatio_model has a substructure like this:
1411
- # {
1412
- # 'link': 'https://support.opentext.com/api/now/table/u_ot_product_model/9b2dcea747f6d910ab0a9ed7536d4364',
1413
- # 'value': '9b2dcea747f6d910ab0a9ed7536d4364'
1414
- # }
1415
- # We want the value:
1621
+ """
1622
+ u_application_model has a substructure like this:
1623
+ {
1624
+ 'link': 'https://support.opentext.com/api/now/table/u_ot_product_model/9b2dcea747f6d910ab0a9ed7536d4364',
1625
+ 'value': '9b2dcea747f6d910ab0a9ed7536d4364'
1626
+ }
1627
+ """
1628
+ # We want the value which represents the key to lookup the application name:
1416
1629
  application_key = application_key.get("value")
1417
1630
 
1418
1631
  if application_key:
1632
+ # Retrieve the application with the application key from ServiceNBow:
1419
1633
  application = self.get_object(
1420
1634
  table_name=SN_TABLE_PRODUCT_LINES,
1421
1635
  sys_id=application_key,
1422
1636
  )
1423
-
1424
1637
  application_name = self.get_result_value(
1425
- response=application, key="name"
1638
+ response=application,
1639
+ key="name",
1640
+ )
1641
+ # Remove leading or trailing spaces (simple cleansing effort):
1642
+ application_name = application_name.strip() if application_name else ""
1643
+
1644
+ # We check if the application name is in the product exclusions list.
1645
+ # If this is the case we skip it from being added to the Application Version Set
1646
+ # as we don't want to create a workspace relationship.
1647
+ if (
1648
+ self._product_exclusions
1649
+ and application_name
1650
+ and application_name in self._product_exclusions
1651
+ ):
1652
+ self.logger.info(
1653
+ "Found related application -> '%s' (%s) but it is on the product exclusion list. Skipping...",
1654
+ application_name,
1655
+ application_key,
1656
+ )
1657
+ continue
1658
+ self.logger.debug(
1659
+ "Found related application -> '%s' for version -> '%s' in table -> '%s' with key -> '%s'",
1660
+ application_name,
1661
+ application_version_name,
1662
+ SN_TABLE_PRODUCT_LINES,
1663
+ application_key,
1426
1664
  )
1427
1665
 
1428
1666
  if application_name:
1429
1667
  application_version_sets.append(
1430
1668
  {
1431
- # "Application": application_name,
1432
- # "Version": application_version_name,
1433
1669
  "u_product_model": application_name,
1434
1670
  "u_version_name": application_version_name,
1435
- }
1671
+ },
1436
1672
  )
1673
+ # end if application_key
1437
1674
 
1438
1675
  # Extended ECM can only handle a maxiumum of 50 line items:
1439
- if len(application_versions) == 49:
1440
- logger.info(
1441
- "Reached maximum of 50 multi-value items for related Application Version of article -> %s",
1676
+ if len(application_version_sets) == 49:
1677
+ self.logger.info(
1678
+ "Reached maximum of 50 multi-value items for related application versions of article -> %s",
1442
1679
  article["number"],
1443
1680
  )
1444
1681
  break
1682
+ # end if application_version
1445
1683
  else:
1446
- logger.warning(
1447
- "Article -> %s: Cannot lookup related Application Version in table -> '%s' with ID -> %s",
1684
+ self.logger.warning(
1685
+ "Article -> %s: Cannot lookup related application version in table -> '%s' with key -> '%s'",
1448
1686
  article["number"],
1449
1687
  SN_TABLE_PRODUCT_VERSIONS,
1450
1688
  application_version_key,
1451
1689
  )
1452
1690
  else:
1453
- logger.warning(
1454
- "Article -> %s has no value for related Application Version!",
1691
+ self.logger.debug(
1692
+ "Article -> %s has no related application version!",
1455
1693
  article["number"],
1456
1694
  )
1457
- # Convert to list and set to remove duplicates:
1458
- article["u_application_versions"] = list(set(application_versions))
1695
+ # This adds a column to the data frame with the name "u_application_versions"
1696
+ # (we convert the set to a list):
1697
+ article["u_application_versions"] = list(application_versions)
1459
1698
 
1460
- # This set maps the applications and the versions (table-like structure)
1699
+ # This list of dictionaries maps the applications and the versions (table-like structure)
1461
1700
  article["u_application_version_sets"] = application_version_sets
1462
1701
 
1463
1702
  # Now we add the article to the Pandas Data Frame in the Data class: