pyxecm 1.5__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

Files changed (56) hide show
  1. pyxecm/__init__.py +6 -2
  2. pyxecm/avts.py +1492 -0
  3. pyxecm/coreshare.py +1075 -960
  4. pyxecm/customizer/__init__.py +16 -4
  5. pyxecm/customizer/__main__.py +58 -0
  6. pyxecm/customizer/api/__init__.py +5 -0
  7. pyxecm/customizer/api/__main__.py +6 -0
  8. pyxecm/customizer/api/app.py +914 -0
  9. pyxecm/customizer/api/auth.py +154 -0
  10. pyxecm/customizer/api/metrics.py +92 -0
  11. pyxecm/customizer/api/models.py +13 -0
  12. pyxecm/customizer/api/payload_list.py +865 -0
  13. pyxecm/customizer/api/settings.py +103 -0
  14. pyxecm/customizer/browser_automation.py +332 -139
  15. pyxecm/customizer/customizer.py +1075 -1057
  16. pyxecm/customizer/exceptions.py +35 -0
  17. pyxecm/customizer/guidewire.py +322 -0
  18. pyxecm/customizer/k8s.py +787 -338
  19. pyxecm/customizer/log.py +107 -0
  20. pyxecm/customizer/m365.py +3424 -2270
  21. pyxecm/customizer/nhc.py +1169 -0
  22. pyxecm/customizer/openapi.py +258 -0
  23. pyxecm/customizer/payload.py +18201 -7030
  24. pyxecm/customizer/pht.py +1047 -210
  25. pyxecm/customizer/salesforce.py +836 -727
  26. pyxecm/customizer/sap.py +58 -41
  27. pyxecm/customizer/servicenow.py +851 -383
  28. pyxecm/customizer/settings.py +442 -0
  29. pyxecm/customizer/successfactors.py +408 -346
  30. pyxecm/customizer/translate.py +83 -48
  31. pyxecm/helper/__init__.py +5 -2
  32. pyxecm/helper/assoc.py +98 -38
  33. pyxecm/helper/data.py +2482 -742
  34. pyxecm/helper/logadapter.py +27 -0
  35. pyxecm/helper/web.py +229 -101
  36. pyxecm/helper/xml.py +528 -172
  37. pyxecm/maintenance_page/__init__.py +5 -0
  38. pyxecm/maintenance_page/__main__.py +6 -0
  39. pyxecm/maintenance_page/app.py +51 -0
  40. pyxecm/maintenance_page/settings.py +28 -0
  41. pyxecm/maintenance_page/static/favicon.avif +0 -0
  42. pyxecm/maintenance_page/templates/maintenance.html +165 -0
  43. pyxecm/otac.py +234 -140
  44. pyxecm/otawp.py +2689 -0
  45. pyxecm/otcs.py +12344 -7547
  46. pyxecm/otds.py +3166 -2219
  47. pyxecm/otiv.py +36 -21
  48. pyxecm/otmm.py +1363 -296
  49. pyxecm/otpd.py +231 -127
  50. pyxecm-2.0.0.dist-info/METADATA +145 -0
  51. pyxecm-2.0.0.dist-info/RECORD +54 -0
  52. {pyxecm-1.5.dist-info → pyxecm-2.0.0.dist-info}/WHEEL +1 -1
  53. pyxecm-1.5.dist-info/METADATA +0 -51
  54. pyxecm-1.5.dist-info/RECORD +0 -30
  55. {pyxecm-1.5.dist-info → pyxecm-2.0.0.dist-info/licenses}/LICENSE +0 -0
  56. {pyxecm-1.5.dist-info → pyxecm-2.0.0.dist-info}/top_level.txt +0 -0
pyxecm/avts.py ADDED
@@ -0,0 +1,1492 @@
1
+ """AVTS stands for Aviator Search and is an OpenText offering for LLMM-based search across multiple repositories."""
2
+
3
+ __author__ = "Dr. Marc Diefenbruch"
4
+ __copyright__ = "Copyright (C) 2024-2025, OpenText"
5
+ __credits__ = ["Kai-Philip Gatzweiler"]
6
+ __maintainer__ = "Dr. Marc Diefenbruch"
7
+ __email__ = "mdiefenb@opentext.com"
8
+
9
+ import base64
10
+ import json
11
+ import logging
12
+ import os
13
+ import time
14
+
15
+ import requests
16
+
17
+ default_logger = logging.getLogger("pyxecm.customizer.avts")
18
+
19
+ REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
20
+
21
+ REQUEST_TIMEOUT = 60
22
+ REQUEST_RETRY_DELAY = 20
23
+ REQUEST_MAX_RETRIES = 2
24
+
25
+
26
+ class AVTS:
27
+ """Configure and interact with Aviator Search REST API."""
28
+
29
+ logger: logging.Logger = default_logger
30
+
31
+ _config: dict
32
+ _session = None
33
+
34
+ def __init__(
35
+ self,
36
+ otds_url: str,
37
+ client_id: str,
38
+ client_secret: str,
39
+ base_url: str,
40
+ username: str,
41
+ password: str,
42
+ logger: logging.Logger = default_logger,
43
+ ) -> None:
44
+ """Initialize the AVTS object.
45
+
46
+ Args:
47
+ otds_url (str):
48
+ The URL of the OTDS Server used by Aviator Search.
49
+ client_id (str):
50
+ The client ID for the Aviator Search oAuth client.
51
+ client_secret (str):
52
+ The client secret for the Aviator Search oAuth client.
53
+ base_url (str):
54
+ The Aviator Search base URL.
55
+ username (str):
56
+ User with administrative permissions in Aviator Search.
57
+ password (str):
58
+ Password of the user with administrative permissions in Aviator Search.
59
+ logger (logging.Logger, optional):
60
+ The logging object to use for all log messages. Defaults to default_logger.
61
+
62
+ """
63
+
64
+ if logger != default_logger:
65
+ self.logger = logger.getChild("avts")
66
+ for logfilter in logger.filters:
67
+ self.logger.addFilter(logfilter)
68
+
69
+ avts_config = {}
70
+
71
+ # Store the credentials and parameters in a config dictionary:
72
+ avts_config["otdsUrl"] = otds_url
73
+ avts_config["clientId"] = client_id
74
+ avts_config["clientSecret"] = client_secret
75
+ avts_config["baseUrl"] = base_url
76
+ avts_config["username"] = username
77
+ avts_config["password"] = password
78
+
79
+ avts_config["tokenUrl"] = avts_config["otdsUrl"] + "/otdsws/oauth2/token"
80
+ avts_config["repoUrl"] = avts_config["baseUrl"] + "/aviator-gateway/avts-api/admin/v1/repo"
81
+
82
+ self._config = avts_config
83
+ self._accesstoken = None
84
+
85
+ self._session = requests.Session()
86
+
87
+ # end method definition
88
+
89
+ def config(self) -> dict:
90
+ """Return the configuration dictionary.
91
+
92
+ Returns:
93
+ dict: Configuration dictionary
94
+
95
+ """
96
+
97
+ return self._config
98
+
99
+ # end method definition
100
+
101
+ def request_header(self, content_type: str = "") -> dict:
102
+ """Return the request header used for Application calls.
103
+
104
+ Consists of Bearer access token and Content Type
105
+
106
+ Args:
107
+ content_type (str, optional):
108
+ Custom content type for the request.
109
+ Typical values:
110
+ * application/json - Used for sending JSON-encoded data
111
+ * application/x-www-form-urlencoded - The default for HTML forms.
112
+ Data is sent as key-value pairs in the body of the request, similar to query parameters.
113
+ * multipart/form-data - Used for file uploads or when a form includes non-ASCII characters
114
+
115
+ Returns:
116
+ dict: The request header values.
117
+
118
+ """
119
+
120
+ request_header = {}
121
+
122
+ request_header = REQUEST_HEADERS
123
+
124
+ if content_type:
125
+ request_header["Content-Type"] = content_type
126
+
127
+ if self._accesstoken is not None:
128
+ request_header["Authorization"] = f"Bearer {self._accesstoken}"
129
+
130
+ return request_header
131
+
132
+ # end method definition
133
+
134
+ def do_request(
135
+ self,
136
+ url: str,
137
+ method: str = "GET",
138
+ headers: dict | None = None,
139
+ data: dict | None = None,
140
+ json_data: dict | None = None,
141
+ files: dict | None = None,
142
+ timeout: int | None = REQUEST_TIMEOUT,
143
+ show_error: bool = True,
144
+ failure_message: str = "",
145
+ success_message: str = "",
146
+ max_retries: int = REQUEST_MAX_RETRIES,
147
+ retry_forever: bool = False,
148
+ ) -> dict | None:
149
+ """Call an Aviator Search REST API in a safe way.
150
+
151
+ Args:
152
+ url (str):
153
+ URL to send the request to.
154
+ method (str, optional):
155
+ HTTP method (GET, POST, etc.). Defaults to "GET".
156
+ headers (dict | None, optional):
157
+ Request headers. Defaults to None.
158
+ data (dict | None, optional):
159
+ Request payload. Defaults to None.
160
+ json_data (dict | None, optional):
161
+ Request payload for the JSON parameter. Defaults to None.
162
+ files (dict | None, optional):
163
+ Dictionary of {"name": file-tuple} for multipart encoding upload.
164
+ The file-tuple can be a 2-tuple ("filename", fileobj) or a 3-tuple
165
+ ("filename", fileobj, "content_type").
166
+ timeout (int | None, optional):
167
+ Timeout for the request in seconds. Defaults to REQUEST_TIMEOUT.
168
+ show_error (bool, optional):
169
+ Whether or not an error should be logged in case of a failed REST call.
170
+ If False, then only a warning is logged. Defaults to True.
171
+ failure_message (str, optional):
172
+ Specific error message. Defaults to "".
173
+ success_message (str, optional):
174
+ Specific success message. Defaults to "".
175
+ max_retries (int, optional):
176
+ Number of retries on connection errors. Defaults to REQUEST_MAX_RETRIES.
177
+ retry_forever (bool, optional):
178
+ Whether to wait forever without timeout. Defaults to False.
179
+
180
+ Returns:
181
+ dict | None: Response of Aviator Search REST API or None in case of an error.
182
+
183
+ """
184
+
185
+ retries = 0
186
+ while True:
187
+ try:
188
+ response = self._session.request(
189
+ method=method,
190
+ url=url,
191
+ data=data,
192
+ json=json_data,
193
+ files=files,
194
+ headers=headers,
195
+ timeout=timeout,
196
+ )
197
+
198
+ if response.ok:
199
+ if success_message:
200
+ self.logger.debug(success_message)
201
+ return self.parse_request_response(response)
202
+ # Check if Session has expired - then re-authenticate and try once more
203
+ elif response.status_code == 401 and retries == 0:
204
+ self.logger.debug("Session has expired - try to re-authenticate...")
205
+ self.authenticate()
206
+ retries += 1
207
+ else:
208
+ # Handle plain HTML responses to not pollute the logs
209
+ content_type = response.headers.get("content-type", None)
210
+ response_text = "HTML content (see debug log)" if content_type == "text/html" else response.text
211
+
212
+ if show_error:
213
+ self.logger.error(
214
+ "%s; status -> %s; error -> %s",
215
+ failure_message,
216
+ response.status_code,
217
+ response_text,
218
+ )
219
+ else:
220
+ self.logger.warning(
221
+ "%s; status -> %s; warning -> %s",
222
+ failure_message,
223
+ response.status_code,
224
+ response_text,
225
+ )
226
+
227
+ if content_type == "text/html":
228
+ self.logger.debug(
229
+ "%s; status -> %s; warning -> %s",
230
+ failure_message,
231
+ response.status_code,
232
+ response.text,
233
+ )
234
+
235
+ return None
236
+ except requests.exceptions.Timeout:
237
+ if retries <= max_retries:
238
+ self.logger.warning(
239
+ "Request timed out. Retrying in %s seconds...",
240
+ str(REQUEST_RETRY_DELAY),
241
+ )
242
+ retries += 1
243
+ time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
244
+ else:
245
+ self.logger.error(
246
+ "%s; timeout error.",
247
+ failure_message,
248
+ )
249
+ if retry_forever:
250
+ # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever
251
+ self.logger.warning("Turn timeouts off and wait forever...")
252
+ timeout = None
253
+ else:
254
+ return None
255
+ except requests.exceptions.ConnectionError:
256
+ if retries <= max_retries:
257
+ self.logger.warning(
258
+ "Connection error. Retrying in %s seconds...",
259
+ str(REQUEST_RETRY_DELAY),
260
+ )
261
+ retries += 1
262
+ time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
263
+ else:
264
+ self.logger.error(
265
+ "%s; connection error.",
266
+ failure_message,
267
+ )
268
+ if retry_forever:
269
+ # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever
270
+ self.logger.warning("Turn timeouts off and wait forever...")
271
+ timeout = None
272
+ time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
273
+ else:
274
+ return None
275
+
276
+ # end method definition
277
+
278
+ def parse_request_response(
279
+ self,
280
+ response_object: requests.Response,
281
+ additional_error_message: str = "",
282
+ show_error: bool = True,
283
+ ) -> list | None:
284
+ """Convert the request response (JSon) to a Python list in a safe way that also handles exceptions.
285
+
286
+ It first tries to load the response.text
287
+ via json.loads() that produces a dict output. Only if response.text is
288
+ not set or is empty it just converts the response_object to a dict using
289
+ the vars() built-in method.
290
+
291
+ Args:
292
+ response_object (object):
293
+ This is reponse object delivered by the request call.
294
+ additional_error_message (str, optional):
295
+ Use a more specific error message in case of an error.
296
+ show_error (bool, optional):
297
+ If True, write an error to the log file.
298
+ If False, write a warning to the log file.
299
+
300
+ Returns:
301
+ list | None:
302
+ The response information or None in case of an error.
303
+
304
+ """
305
+
306
+ if not response_object:
307
+ return None
308
+
309
+ try:
310
+ list_object = json.loads(response_object.text) if response_object.text else vars(response_object)
311
+ except json.JSONDecodeError as exception:
312
+ if additional_error_message:
313
+ message = "Cannot decode response as JSON. {}; error -> {}".format(
314
+ additional_error_message,
315
+ exception,
316
+ )
317
+ else:
318
+ message = "Cannot decode response as JSON; error -> {}".format(
319
+ exception,
320
+ )
321
+ if show_error:
322
+ self.logger.error(message)
323
+ else:
324
+ self.logger.warning(message)
325
+ return None
326
+ else:
327
+ return list_object
328
+
329
+ # end method definition
330
+
331
+ def authenticate(self) -> str | None:
332
+ """Authenticate at Aviator Search via OAuth.
333
+
334
+ Returns:
335
+ str | None:
336
+ The access token or None in case of an error.
337
+
338
+ """
339
+
340
+ if not self._session:
341
+ self._session = requests.Session()
342
+
343
+ self._session.headers.update(self.request_header())
344
+
345
+ request_url = self.config()["tokenUrl"]
346
+ request_header = {
347
+ "Authorization": "Bearer ",
348
+ "content-type": "application/x-www-form-urlencoded",
349
+ }
350
+ request_payload = {
351
+ "client_id": self.config()["clientId"],
352
+ "grant_type": "password",
353
+ "client_secret": self.config()["clientSecret"],
354
+ "username": self.config()["username"],
355
+ "password": self.config()["password"],
356
+ "scope": "otds:roles",
357
+ }
358
+
359
+ response = self.do_request(
360
+ url=request_url,
361
+ method="POST",
362
+ headers=request_header,
363
+ data=request_payload,
364
+ timeout=None,
365
+ failure_message="Failed to authenticate to OTDS with username -> {} and client_id -> {}".format(
366
+ self.config()["username"],
367
+ self.config()["clientId"],
368
+ ),
369
+ )
370
+
371
+ if response is not None:
372
+ self._accesstoken = response.get("access_token", None)
373
+
374
+ return response
375
+
376
+ # end method definition
377
+
378
+ def create_extended_ecm_repo(
379
+ self,
380
+ name: str,
381
+ username: str,
382
+ password: str,
383
+ otcs_url: str,
384
+ otcs_api_url: str,
385
+ node_id: int,
386
+ ) -> dict | None:
387
+ """Create a new Extended ECM repository to crawl with Aviator Search.
388
+
389
+ Args:
390
+ name (str):
391
+ The name of the repository.
392
+ username (str):
393
+ Username to use for crawling.
394
+ password (str):
395
+ Password of the user used for crawling.
396
+ otcs_url (str):
397
+ Base URL of Content Server e.g. https://otcs.base-url.tld/cs/cs
398
+ otcs_api_url (str):
399
+ The REST API URL of Content Server.
400
+ node_id (int):
401
+ Root Node ID for crawling
402
+ version (str, optional):
403
+ TODO: The version number of ???
404
+
405
+ Returns:
406
+ dict | None:
407
+ Parsed response object from the API or None in case of an error
408
+
409
+ """
410
+
411
+ payload = {
412
+ "authType": "Basic",
413
+ "params": [
414
+ {
415
+ "id": "OpenTextApiUrl",
416
+ "label": "Service URL",
417
+ "ctlType": "text",
418
+ "description": "OpenText Content Management API URL",
419
+ "required": True,
420
+ "defaultValue": "localhost",
421
+ "visible": True,
422
+ "editable": False,
423
+ "value": otcs_api_url,
424
+ },
425
+ {
426
+ "id": "Username",
427
+ "label": "Username",
428
+ "ctlType": "text",
429
+ "description": "OpenText Content Management Username",
430
+ "required": True,
431
+ "defaultValue": "",
432
+ "visible": True,
433
+ "editable": True,
434
+ "value": username,
435
+ },
436
+ {
437
+ "id": "Password",
438
+ "label": "Password",
439
+ "ctlType": "password",
440
+ "description": "OpenText Content Management password",
441
+ "required": True,
442
+ "defaultValue": "",
443
+ "visible": True,
444
+ "editable": True,
445
+ "value": password,
446
+ },
447
+ {
448
+ "id": "sourceLink",
449
+ "label": "Source Link",
450
+ "ctlType": "text",
451
+ "description": "Example: <OpenText Content Management API URL>/app/nodes/${NODE}/metadata",
452
+ "required": False,
453
+ "defaultValue": otcs_url + "/app/nodes/${NODE}/metadata",
454
+ "visible": True,
455
+ "editable": True,
456
+ },
457
+ {
458
+ "id": "RootNodeIds",
459
+ "label": "Root Node ID's",
460
+ "ctlType": "text",
461
+ "description": "List of nodes to be crawled(comma seperated)",
462
+ "required": True,
463
+ "defaultValue": "",
464
+ "visible": True,
465
+ "editable": False,
466
+ "value": "2000",
467
+ },
468
+ {
469
+ "id": "proxy",
470
+ "label": "Proxy Service",
471
+ "ctlType": "boolean",
472
+ "description": "",
473
+ "required": False,
474
+ "defaultValue": "false",
475
+ "value": "false",
476
+ "visible": True,
477
+ "editable": True,
478
+ },
479
+ {
480
+ "id": "proxyScheme",
481
+ "label": "Proxy Scheme",
482
+ "ctlType": "select",
483
+ "description": "",
484
+ "required": False,
485
+ "defaultValue": "HTTP",
486
+ "value": "HTTP",
487
+ "visible": True,
488
+ "acceptedValues": ["HTTP", "HTTPS", "SOCKS5"],
489
+ "editable": True,
490
+ },
491
+ {
492
+ "id": "proxyHost",
493
+ "label": "Proxy Host",
494
+ "ctlType": "text",
495
+ "description": "",
496
+ "required": False,
497
+ "defaultValue": "",
498
+ "value": "",
499
+ "visible": True,
500
+ "editable": True,
501
+ },
502
+ {
503
+ "id": "proxyPort",
504
+ "label": "Proxy Port",
505
+ "ctlType": "text",
506
+ "description": "",
507
+ "required": False,
508
+ "defaultValue": "",
509
+ "value": "",
510
+ "visible": True,
511
+ "editable": True,
512
+ },
513
+ {
514
+ "id": "ProxyConfigService",
515
+ "label": "Proxy Config Service",
516
+ "ctlType": "text",
517
+ "description": "",
518
+ "required": False,
519
+ "defaultValue": "",
520
+ "value": "",
521
+ "visible": False,
522
+ "editable": True,
523
+ },
524
+ ],
525
+ "config": {
526
+ "type": "nifi",
527
+ "id": "xECM",
528
+ "crawlConfig": {
529
+ "name": "GetOpenText",
530
+ "type": "idol.nifi.connector.GetOpenText",
531
+ "group": "idol.nifi.connector",
532
+ "artifact": "idol-nifi-connector-opentext",
533
+ "version": "25.1.0-nifi1",
534
+ },
535
+ "viewConfig": {
536
+ "name": "ViewOpenText",
537
+ "type": "idol.nifi.connector.ViewOpenText",
538
+ "group": "idol.nifi.connector",
539
+ "artifact": "idol-nifi-connector-opentext",
540
+ "version": "25.1.0-nifi1",
541
+ },
542
+ "omniConfig": {
543
+ "name": "GetOpenTextGroups",
544
+ "type": "idol.nifi.connector.GetOpenTextGroups",
545
+ "group": "idol.nifi.connector",
546
+ "artifact": "idol-nifi-connector-opentext",
547
+ "version": "25.1.0-nifi1",
548
+ "repoName": "ECM",
549
+ },
550
+ "crawlProps": {
551
+ "Password": "${Password}",
552
+ "Username": "${UserName}",
553
+ "META:SOURCE": "OPENTEXT",
554
+ "RootNodeIds": "${RootNodeIds}",
555
+ "MappedSecurity": "true",
556
+ "OpenTextApiUrl": "${OpenTextApiUrl}",
557
+ "ProxyConfigService": "${ProxyConfigService}",
558
+ },
559
+ "viewProps": {
560
+ "Password": "${Password}",
561
+ "Username": "${UserName}",
562
+ "OpenTextApiUrl": "${OpenTextApiUrl}",
563
+ "ProxyConfigService": "${ProxyConfigService}",
564
+ },
565
+ "omniProps": {
566
+ "Password": "${Password}",
567
+ "Username": "${UserName}",
568
+ "OpenTextApiUrl": "${OpenTextApiUrl}",
569
+ "ProxyConfigService": "${ProxyConfigService}",
570
+ "OpenTextApiPageSize": "10",
571
+ },
572
+ "metadataFields": ["NODE"],
573
+ },
574
+ "name": name,
575
+ "id": "xECM",
576
+ "sourceId": "xECM",
577
+ }
578
+
579
+ request_header = self.request_header()
580
+ request_url = self.config()["repoUrl"]
581
+
582
+ response = self.do_request(
583
+ url=request_url,
584
+ method="POST",
585
+ json_data=payload,
586
+ headers=request_header,
587
+ timeout=None,
588
+ failure_message="Failed to create repository -> '{}' ({})".format(
589
+ name,
590
+ node_id,
591
+ ),
592
+ show_error=False,
593
+ )
594
+
595
+ if response is None:
596
+ self.logger.error("Failed to create repository -> %s (%s)", name, node_id)
597
+
598
+ return response
599
+
600
+ # end method definition
601
+
602
+ def create_msteams_repo(
603
+ self,
604
+ name: str,
605
+ client_id: str,
606
+ tenant_id: str,
607
+ certificate_file: str,
608
+ certificate_password: int,
609
+ index_attachments: bool = True,
610
+ index_call_recordings: bool = True,
611
+ index_message_replies: bool = True,
612
+ index_user_chats: bool = True,
613
+ ) -> dict | None:
614
+ """Create a new Microsoft Teams repository to crawl with Aviator Search.
615
+
616
+ Args:
617
+ name (str):
618
+ The name of the repository.
619
+ client_id (str):
620
+ The M365 client ID.
621
+ tenant_id (str):
622
+ The M365 tenant ID.
623
+ certificate_file (str):
624
+ The path to the certificate file.
625
+ certificate_password (str):
626
+ The password for the certificate.
627
+ index_attachments (bool, optional):
628
+ Whether or not to index / crawl attachments.
629
+ index_call_recordings (bool, optional):
630
+ Whether or not to index / crawl meeting recordings.
631
+ index_message_replies (bool, optional):
632
+ Whether or not to index / crawl message replies.
633
+ index_user_chats (bool, optional):
634
+ Whether or not to index / crawl user chats.
635
+ version(str, optional): default 24.3.0
636
+
637
+ # TODO: add more params
638
+
639
+ Returns:
640
+ dict | None:
641
+ Parsed response object from the API or None in case of an error
642
+
643
+ """
644
+
645
+ certificate_file_content_base64 = self.get_certificate_file_content_base64(
646
+ certificate_file,
647
+ )
648
+
649
+ payload = {
650
+ "authType": "OAUTH",
651
+ "params": [
652
+ {
653
+ "id": "OAuth2SiteName",
654
+ "label": "OAuth2 Site Name",
655
+ "ctlType": "text",
656
+ "required": False,
657
+ "defaultValue": "AVTS",
658
+ "value": "AVTS",
659
+ "visible": False,
660
+ "editable": True,
661
+ },
662
+ {
663
+ "id": "OAuth2SitesFile",
664
+ "label": "OAuth2 Sites File",
665
+ "ctlType": "text",
666
+ "required": False,
667
+ "defaultValue": "",
668
+ "value": "",
669
+ "visible": False,
670
+ "editable": True,
671
+ },
672
+ {
673
+ "id": "sourceLink",
674
+ "label": "Source Link",
675
+ "ctlType": "text",
676
+ "required": False,
677
+ "defaultValue": "",
678
+ "visible": True,
679
+ "editable": True,
680
+ },
681
+ {
682
+ "id": "clientID",
683
+ "label": "Client ID",
684
+ "ctlType": "text",
685
+ "description": "Microsoft Entra client ID",
686
+ "required": True,
687
+ "defaultValue": "",
688
+ "value": client_id,
689
+ "visible": True,
690
+ "editable": True,
691
+ },
692
+ {
693
+ "id": "tenant",
694
+ "label": "Tenant ID",
695
+ "ctlType": "text",
696
+ "description": "Microsoft Entra tenant ID",
697
+ "required": True,
698
+ "defaultValue": "",
699
+ "value": tenant_id,
700
+ "visible": True,
701
+ "editable": False,
702
+ },
703
+ {
704
+ "id": "IndexAttachments",
705
+ "label": "Index Attachments",
706
+ "ctlType": "boolean",
707
+ "description": "Specifies whether to index attachments",
708
+ "required": False,
709
+ "defaultValue": "true",
710
+ "value": "true",
711
+ "visible": str(index_attachments).lower(),
712
+ "editable": True,
713
+ },
714
+ {
715
+ "id": "IndexCallRecordings",
716
+ "label": "Index Call Recordings",
717
+ "ctlType": "boolean",
718
+ "description": "Specifies whether to index call recordings",
719
+ "required": False,
720
+ "defaultValue": "true",
721
+ "value": str(index_call_recordings).lower(),
722
+ "visible": True,
723
+ "editable": True,
724
+ },
725
+ {
726
+ "id": "IndexMessageReplies",
727
+ "label": "Index Message Replies",
728
+ "ctlType": "boolean",
729
+ "description": "Specifies whether to index replies to messages",
730
+ "required": False,
731
+ "defaultValue": "true",
732
+ "value": str(index_message_replies).lower(),
733
+ "visible": True,
734
+ "editable": True,
735
+ },
736
+ {
737
+ "id": "IndexUserChats",
738
+ "label": "Index User Chats",
739
+ "ctlType": "boolean",
740
+ "description": "Specifies whether to synchronize one-to-one and group messages for each user",
741
+ "required": False,
742
+ "defaultValue": "true",
743
+ "value": str(index_user_chats).lower(),
744
+ "visible": True,
745
+ "editable": True,
746
+ },
747
+ {
748
+ "id": "certificateFile",
749
+ "label": "Certificate File",
750
+ "ctlType": "file",
751
+ "description": 'Please upload a valid "*.pfx" certificate file',
752
+ "required": True,
753
+ "defaultValue": "",
754
+ "value": "C:\\fakepath\\certificate 1 3 (1).pfx",
755
+ "visible": True,
756
+ "editable": True,
757
+ "fileDatabase64": f"data:application/x-pkcs12;base64,{certificate_file_content_base64}",
758
+ },
759
+ {
760
+ "id": "certificateFilePassword",
761
+ "label": "Certificate File Password",
762
+ "ctlType": "password",
763
+ "required": True,
764
+ "defaultValue": "",
765
+ "value": certificate_password,
766
+ "visible": True,
767
+ "editable": True,
768
+ },
769
+ {
770
+ "id": "proxy",
771
+ "label": "Proxy Service",
772
+ "ctlType": "boolean",
773
+ "description": "",
774
+ "required": False,
775
+ "defaultValue": "false",
776
+ "value": "true",
777
+ "visible": True,
778
+ "editable": True,
779
+ },
780
+ {
781
+ "id": "proxyScheme",
782
+ "label": "Proxy Scheme",
783
+ "ctlType": "select",
784
+ "description": "",
785
+ "required": False,
786
+ "defaultValue": "HTTP",
787
+ "value": "HTTP",
788
+ "visible": True,
789
+ "acceptedValues": [
790
+ "HTTP",
791
+ "HTTPS",
792
+ "SOCKS5",
793
+ ],
794
+ "editable": True,
795
+ },
796
+ {
797
+ "id": "proxyHost",
798
+ "label": "Proxy Host",
799
+ "ctlType": "text",
800
+ "description": "",
801
+ "required": False,
802
+ "defaultValue": "",
803
+ "value": "10.194.10.21",
804
+ "visible": True,
805
+ "editable": True,
806
+ },
807
+ {
808
+ "id": "proxyPort",
809
+ "label": "Proxy Port",
810
+ "ctlType": "text",
811
+ "description": "",
812
+ "required": False,
813
+ "defaultValue": "",
814
+ "value": "3128",
815
+ "visible": True,
816
+ "editable": True,
817
+ },
818
+ {
819
+ "id": "ProxyConfigService",
820
+ "label": "Proxy Config Service",
821
+ "ctlType": "text",
822
+ "description": "",
823
+ "required": False,
824
+ "defaultValue": "",
825
+ "value": "",
826
+ "visible": False,
827
+ "editable": True,
828
+ },
829
+ ],
830
+ "config": {
831
+ "type": "nifi",
832
+ "id": "MSTeams",
833
+ "crawlConfig": {
834
+ "name": "GetMicrosoftTeams",
835
+ "type": "idol.nifi.connector.GetMicrosoftTeams",
836
+ "group": "idol.nifi.connector",
837
+ "artifact": "idol-nifi-connector-officeteams",
838
+ "version": "25.1.0-nifi1",
839
+ },
840
+ "viewConfig": {
841
+ "name": "ViewMicrosoftTeams",
842
+ "type": "idol.nifi.connector.ViewMicrosoftTeams",
843
+ "group": "idol.nifi.connector",
844
+ "artifact": "idol-nifi-connector-officeteams",
845
+ "version": "25.1.0-nifi1",
846
+ },
847
+ "omniConfig": {
848
+ "name": "GetMicrosoftTeamsGroups",
849
+ "type": "idol.nifi.connector.GetMicrosoftTeamsGroups",
850
+ "group": "idol.nifi.connector",
851
+ "artifact": "idol-nifi-connector-officeteams",
852
+ "version": "25.1.0-nifi1",
853
+ "repoName": "OneDrive",
854
+ },
855
+ "crawlProps": {
856
+ "META:SOURCE": "Microsoft Teams",
857
+ "IndexUserChats": "${IndexUserChats}",
858
+ "MappedSecurity": "true",
859
+ "Oauth2SiteName": "${OAuth2SiteName}",
860
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
861
+ "IndexAttachments": "${IndexAttachments}",
862
+ "ProxyConfigService": "${ProxyConfigService}",
863
+ "IndexCallRecordings": "${IndexCallRecordings}",
864
+ "IndexMessageReplies": "${IndexMessageReplies}",
865
+ "ChatMessageGroupingSection": "chat",
866
+ "ChannelMessageGroupingSection": "channel",
867
+ "[chat]MessageGroupingInterval": "24 hour",
868
+ "[chat]MessageGroupingStrategy": "Interval",
869
+ "[channel]MessageGroupingInterval": "24 hour",
870
+ "[channel]MessageGroupingStrategy": "Interval",
871
+ },
872
+ "viewProps": {
873
+ "Oauth2SiteName": "${OAuth2SiteName}",
874
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
875
+ "ProxyConfigService": "${ProxyConfigService}",
876
+ },
877
+ "omniProps": {
878
+ "Oauth2SiteName": "${OAuth2SiteName}",
879
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
880
+ "ProxyConfigService": "${ProxyConfigService}",
881
+ },
882
+ "metadataFields": [],
883
+ },
884
+ "name": name,
885
+ "id": "MSTeams",
886
+ "sourceId": "MSTeams",
887
+ }
888
+
889
+ request_header = self.request_header()
890
+ request_url = self.config()["repoUrl"]
891
+
892
+ response = self.do_request(
893
+ url=request_url,
894
+ method="POST",
895
+ json_data=payload,
896
+ headers=request_header,
897
+ timeout=None,
898
+ failure_message="Failed to create repository -> '{}'".format(name),
899
+ show_error=False,
900
+ )
901
+
902
+ if response is None:
903
+ self.logger.error("Failed to create repository -> %s", name)
904
+ return None
905
+
906
+ self.repo_admin_consent(response["id"])
907
+
908
+ return response
909
+
910
+ # end method definition
911
+
912
+ def create_sharepoint_repo(
913
+ self,
914
+ name: str,
915
+ client_id: str,
916
+ tenant_id: str,
917
+ certificate_file: str,
918
+ certificate_password: int,
919
+ sharepoint_url: str,
920
+ sharepoint_url_type: str,
921
+ sharepoint_mysite_url: str,
922
+ sharepoint_admin_url: str,
923
+ index_user_profiles: bool = True,
924
+ oauth2_site_name: str = "AVTS",
925
+ oauth2_sites_file: str = "",
926
+ ) -> dict | None:
927
+ """Create a new Microsoft SharePoint repository to crawl with Aviator Search.
928
+
929
+ Args:
930
+ name (str):
931
+ The name of the repository.
932
+ client_id (str):
933
+ The M365 client ID.
934
+ tenant_id (str):
935
+ The M365 tenant ID.
936
+ certificate_file (str):
937
+ TODO: _description_
938
+ certificate_password (int):
939
+ TODO: _description_
940
+ sharepoint_url (str):
941
+ The SharePoint URL.
942
+ sharepoint_url_type (str):
943
+ The SharePoint URL type.
944
+ sharepoint_mysite_url (str):
945
+ The SharePoint MySite URL.
946
+ sharepoint_admin_url (str):
947
+ The SharePoint administration URL.
948
+ index_user_profiles (bool, optional):
949
+ TODO: _description_. Defaults to True.
950
+ oauth2_site_name (str, optional):
951
+ TODO: _description_. Defaults to "AVTS".
952
+ oauth2_sites_file (str, optional):
953
+ TODO: _description_. Defaults to "".
954
+ version (str, optional):
955
+ TODO: _description_. Defaults to "24.3.0".
956
+
957
+ Returns:
958
+ dict | None:
959
+ Parsed response object from the API or None in case of an error
960
+
961
+ """
962
+
963
+ certificate_file_content_base64 = self.get_certificate_file_content_base64(
964
+ certificate_file,
965
+ )
966
+
967
+ payload = {
968
+ "authType": "OAUTH",
969
+ "params": [
970
+ {
971
+ "id": "OAuth2SiteName",
972
+ "label": "OAuth2 Site Name",
973
+ "ctlType": "text",
974
+ "required": False,
975
+ "defaultValue": "AVTS",
976
+ "value": oauth2_site_name,
977
+ "visible": False,
978
+ "editable": True,
979
+ },
980
+ {
981
+ "id": "OAuth2SitesFile",
982
+ "label": "OAuth2 Sites File",
983
+ "ctlType": "text",
984
+ "required": False,
985
+ "defaultValue": "",
986
+ "value": oauth2_sites_file,
987
+ "visible": False,
988
+ "editable": True,
989
+ },
990
+ {
991
+ "id": "sourceLink",
992
+ "label": "Source Link",
993
+ "ctlType": "text",
994
+ "description": "Example: https://<sharepoint host>${FILEDIRREF}/Forms/AllItems.aspx?id=${FILEREF}&parent=${FILEDIRREF}",
995
+ "required": False,
996
+ "defaultValue": "",
997
+ "visible": True,
998
+ "editable": True,
999
+ "value": sharepoint_url + "${FILEDIRREF}/Forms/AllItems.aspx?id=${FILEREF}&parent=${FILEDIRREF}",
1000
+ },
1001
+ {
1002
+ "id": "clientID",
1003
+ "label": "Client ID",
1004
+ "ctlType": "text",
1005
+ "description": "Microsoft Entra client ID",
1006
+ "required": True,
1007
+ "defaultValue": "",
1008
+ "value": client_id,
1009
+ "visible": True,
1010
+ "editable": True,
1011
+ },
1012
+ {
1013
+ "id": "tenant",
1014
+ "label": "Tenant ID",
1015
+ "ctlType": "text",
1016
+ "description": "Microsoft Entra tenant ID",
1017
+ "required": True,
1018
+ "defaultValue": "",
1019
+ "value": tenant_id,
1020
+ "visible": True,
1021
+ "editable": True,
1022
+ },
1023
+ {
1024
+ "id": "sharePointUrl",
1025
+ "label": "SharePoint URL",
1026
+ "ctlType": "text",
1027
+ "description": 'The URL to start synchronizing from. Specify a URL that matches "SharePoint URL type"',
1028
+ "required": True,
1029
+ "defaultValue": "",
1030
+ "value": sharepoint_mysite_url,
1031
+ "visible": True,
1032
+ "editable": False,
1033
+ },
1034
+ {
1035
+ "id": "MappedWebApplicationPolicies",
1036
+ "label": "Mapped Web Application Policies",
1037
+ "ctlType": "boolean",
1038
+ "required": False,
1039
+ "defaultValue": "false",
1040
+ "value": "false",
1041
+ "visible": True,
1042
+ "editable": False,
1043
+ },
1044
+ {
1045
+ "id": "sharePointAdminUrl",
1046
+ "label": "SharePoint Admin URL",
1047
+ "ctlType": "text",
1048
+ "description": "The URL of the admin site collection, for retrieving user profiles from SharePoint Online",
1049
+ "required": True,
1050
+ "defaultValue": "",
1051
+ "value": sharepoint_admin_url,
1052
+ "visible": True,
1053
+ "editable": False,
1054
+ },
1055
+ {
1056
+ "id": "sharePointMySiteUrl",
1057
+ "label": "SharePoint MySite URL",
1058
+ "ctlType": "text",
1059
+ "description": "The URL of the MySites site collection, for retrieving user profiles from SharePoint Online",
1060
+ "required": True,
1061
+ "defaultValue": "",
1062
+ "value": sharepoint_mysite_url,
1063
+ "visible": True,
1064
+ "editable": False,
1065
+ },
1066
+ {
1067
+ "id": "sharePointOnline",
1068
+ "label": "SharePoint Online",
1069
+ "ctlType": "boolean",
1070
+ "description": "Specifies whether to retrieve data from SharePoint Online. To retrieve data from a SharePoint Online dedicated server set this to false",
1071
+ "required": False,
1072
+ "defaultValue": "true",
1073
+ "value": "true",
1074
+ "visible": False,
1075
+ "editable": False,
1076
+ },
1077
+ {
1078
+ "id": "TenantAdminSitesIncludeTypes",
1079
+ "label": "Tenant Admin Sites IncludeTypes",
1080
+ "ctlType": "text",
1081
+ "description": "This parameter helps to filter the results to include only specific types of sites",
1082
+ "required": False,
1083
+ "defaultValue": "all",
1084
+ "value": "all",
1085
+ "visible": False,
1086
+ "editable": False,
1087
+ },
1088
+ {
1089
+ "id": "URLType",
1090
+ "label": "SharePoint URL Type",
1091
+ "ctlType": "select",
1092
+ "description": 'The type of URL specified by "Sharepoint URL"',
1093
+ "required": True,
1094
+ "defaultValue": "",
1095
+ "value": sharepoint_url_type,
1096
+ "visible": True,
1097
+ "acceptedValues": [
1098
+ "WebApplication",
1099
+ "SiteCollection",
1100
+ "PersonalSiteCollection",
1101
+ "TenantAdmin",
1102
+ ],
1103
+ "editable": False,
1104
+ },
1105
+ {
1106
+ "id": "IndexUserProfiles",
1107
+ "label": "Index User Profiles",
1108
+ "ctlType": "boolean",
1109
+ "description": "Specifies whether to index information from user profiles",
1110
+ "required": True,
1111
+ "defaultValue": "false",
1112
+ "value": str(index_user_profiles).lower(),
1113
+ "visible": True,
1114
+ "editable": True,
1115
+ },
1116
+ {
1117
+ "id": "certificateFile",
1118
+ "label": "Certificate File",
1119
+ "ctlType": "file",
1120
+ "description": 'Please upload a valid "*.pfx" certificate file',
1121
+ "required": True,
1122
+ "defaultValue": "",
1123
+ "value": "C:\\fakepath\\certificate 1 3 (1).pfx",
1124
+ "visible": True,
1125
+ "editable": True,
1126
+ "fileDatabase64": f"data:application/x-pkcs12;base64,{certificate_file_content_base64}",
1127
+ },
1128
+ {
1129
+ "id": "certificateFilePassword",
1130
+ "label": "Certificate File Password",
1131
+ "ctlType": "password",
1132
+ "required": True,
1133
+ "defaultValue": "",
1134
+ "value": certificate_password,
1135
+ "visible": True,
1136
+ "editable": True,
1137
+ },
1138
+ {
1139
+ "id": "proxy",
1140
+ "label": "Proxy Service",
1141
+ "ctlType": "boolean",
1142
+ "description": "",
1143
+ "required": False,
1144
+ "defaultValue": "false",
1145
+ "value": "true",
1146
+ "visible": True,
1147
+ "editable": True,
1148
+ },
1149
+ {
1150
+ "id": "proxyScheme",
1151
+ "label": "Proxy Scheme",
1152
+ "ctlType": "select",
1153
+ "description": "",
1154
+ "required": False,
1155
+ "defaultValue": "HTTP",
1156
+ "value": "HTTP",
1157
+ "visible": True,
1158
+ "acceptedValues": [
1159
+ "HTTP",
1160
+ "HTTPS",
1161
+ "SOCKS5",
1162
+ ],
1163
+ "editable": True,
1164
+ },
1165
+ {
1166
+ "id": "proxyHost",
1167
+ "label": "Proxy Host",
1168
+ "ctlType": "text",
1169
+ "description": "",
1170
+ "required": False,
1171
+ "defaultValue": "",
1172
+ "value": "10.194.10.21",
1173
+ "visible": True,
1174
+ "editable": True,
1175
+ },
1176
+ {
1177
+ "id": "proxyPort",
1178
+ "label": "Proxy Port",
1179
+ "ctlType": "text",
1180
+ "description": "",
1181
+ "required": False,
1182
+ "defaultValue": "",
1183
+ "value": "3128",
1184
+ "visible": True,
1185
+ "editable": True,
1186
+ },
1187
+ {
1188
+ "id": "ProxyConfigService",
1189
+ "label": "Proxy Config Service",
1190
+ "ctlType": "text",
1191
+ "description": "",
1192
+ "required": False,
1193
+ "defaultValue": "",
1194
+ "value": "",
1195
+ "visible": False,
1196
+ "editable": True,
1197
+ },
1198
+ ],
1199
+ "config": {
1200
+ "type": "nifi",
1201
+ "id": "SharePoint",
1202
+ "crawlConfig": {
1203
+ "name": "GetSharePointOData",
1204
+ "type": "idol.nifi.connector.GetSharePointOData",
1205
+ "group": "idol.nifi.connector",
1206
+ "artifact": "idol-nifi-connector-sharepointodata",
1207
+ "version": "25.1.0-nifi1",
1208
+ },
1209
+ "viewConfig": {
1210
+ "name": "ViewSharePointOData",
1211
+ "type": "idol.nifi.connector.ViewSharePointOData",
1212
+ "group": "idol.nifi.connector",
1213
+ "artifact": "idol-nifi-connector-sharepointodata",
1214
+ "version": "25.1.0-nifi1",
1215
+ },
1216
+ "omniConfig": {
1217
+ "name": "GetSharePointGroupsOData",
1218
+ "type": "idol.nifi.connector.GetSharePointGroupsOData",
1219
+ "group": "idol.nifi.connector",
1220
+ "artifact": "idol-nifi-connector-sharepointodata",
1221
+ "version": "25.1.0-nifi1",
1222
+ "repoName": "SharePoint",
1223
+ },
1224
+ "crawlProps": {
1225
+ "META:SOURCE": "SharePoint Online",
1226
+ "SharepointUrl": "${sharePointUrl}",
1227
+ "MappedSecurity": "true",
1228
+ "Oauth2SiteName": "${OAuth2SiteName}",
1229
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
1230
+ "SharepointOnline": "${sharePointOnline}",
1231
+ "IndexUserProfiles": "${IndexUserProfiles}",
1232
+ "SharepointUrlType": "${URLType}",
1233
+ "ProxyConfigService": "${ProxyConfigService}",
1234
+ "SharepointAdminUrl": "${sharePointAdminUrl}",
1235
+ "SharepointMySiteUrl": "${sharePointMySiteUrl}",
1236
+ "RetrieveUserDetailsAs": "Title",
1237
+ "MappedWebApplicationPolicies": "${MappedWebApplicationPolicies}",
1238
+ "TenantAdminSitesIncludeTypes": "${TenantAdminSitesIncludeTypes}",
1239
+ },
1240
+ "viewProps": {
1241
+ "SharepointUrl": "${sharePointUrl}",
1242
+ "Oauth2SiteName": "${OAuth2SiteName}",
1243
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
1244
+ "SharepointOnline": "${sharePointOnline}",
1245
+ "SharepointUrlType": "${URLType}",
1246
+ "ProxyConfigService": "${ProxyConfigService}",
1247
+ "SharepointAdminUrl": "${sharePointAdminUrl}",
1248
+ "SharepointMySiteUrl": "${sharePointMySiteUrl}",
1249
+ "MappedWebApplicationPolicies": "${MappedWebApplicationPolicies}",
1250
+ },
1251
+ "omniProps": {
1252
+ "SharepointUrl": "${sharePointUrl}",
1253
+ "Oauth2SiteName": "${OAuth2SiteName}",
1254
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
1255
+ "SharepointOnline": "true",
1256
+ "SharepointUrlType": "${URLType}",
1257
+ "ProxyConfigService": "${ProxyConfigService}",
1258
+ "SharepointAdminUrl": "${sharePointAdminUrl}",
1259
+ "SharepointMySiteUrl": "${sharePointMySiteUrl}",
1260
+ "MappedWebApplicationPolicies": "false",
1261
+ "TenantAdminSitesIncludeTypes": "${TenantAdminSitesIncludeTypes}",
1262
+ },
1263
+ "metadataFields": [
1264
+ "FILEREF",
1265
+ "FILEDIRREF",
1266
+ ],
1267
+ },
1268
+ "name": name,
1269
+ "id": "SharePoint",
1270
+ "sourceId": "SharePoint",
1271
+ }
1272
+
1273
+ request_header = self.request_header()
1274
+ request_url = self.config()["repoUrl"]
1275
+
1276
+ response = self.do_request(
1277
+ url=request_url,
1278
+ method="POST",
1279
+ json_data=payload,
1280
+ headers=request_header,
1281
+ timeout=None,
1282
+ failure_message="Failed to create repository -> '{}'".format(name),
1283
+ show_error=False,
1284
+ )
1285
+
1286
+ if response is None:
1287
+ self.logger.error("Failed to create repository -> %s", name)
1288
+ return None
1289
+
1290
+ self.repo_admin_consent(response["id"])
1291
+
1292
+ return response
1293
+
1294
+ # end method definition
1295
+
1296
+ def repo_admin_consent(self, repo_id: str) -> dict | None:
1297
+ """Send admin consent information for a repository.
1298
+
1299
+ Args:
1300
+ repo_id (str):
1301
+ The ID of the repository.
1302
+
1303
+ Returns:
1304
+ dict | None:
1305
+ Parsed response object from the API or None in case of an error
1306
+
1307
+ """
1308
+
1309
+ request_header = self.request_header()
1310
+ request_url = self.config()["repoUrl"]
1311
+
1312
+ request_url = self.config()["repoUrl"] + "/" + repo_id + "/authorize?admin_consent=true"
1313
+
1314
+ return self.do_request(
1315
+ url=request_url,
1316
+ method="GET",
1317
+ headers=request_header,
1318
+ timeout=None,
1319
+ failure_message="Failed to set admin_consent for repository -> '{}'".format(
1320
+ repo_id,
1321
+ ),
1322
+ )
1323
+
1324
+ # end method definition
1325
+
1326
+ def start_crawling(self, repo_name: str) -> list | None:
1327
+ """Start crawling of a repository.
1328
+
1329
+ Args:
1330
+ repo_name (str):
1331
+ The name of the repository.
1332
+
1333
+ Returns:
1334
+ list | None:
1335
+ Parsed response object from the API or None in case of an error
1336
+
1337
+ """
1338
+
1339
+ self.logger.info("Start crawling repository -> %s", repo_name)
1340
+
1341
+ repo = self.get_repo_by_name(name=repo_name)
1342
+ if repo is None:
1343
+ return None
1344
+
1345
+ request_header = self.request_header()
1346
+ request_url = self.config()["repoUrl"] + "/start/" + repo.get("repoId")
1347
+
1348
+ return self.do_request(
1349
+ url=request_url,
1350
+ method="POST",
1351
+ headers=request_header,
1352
+ timeout=None,
1353
+ failure_message="Failed to start crawling repository -> '{}'".format(
1354
+ repo_name,
1355
+ ),
1356
+ )
1357
+
1358
+ # end method definition
1359
+
1360
+ def stop_crawling(self, repo_name: str) -> list | None:
1361
+ """Stop the crawling of a repository.
1362
+
1363
+ Args:
1364
+ repo_name (str):
1365
+ The name of the repository.
1366
+
1367
+ Returns:
1368
+ list | None:
1369
+ Parsed response object from the API or None in case of an error
1370
+
1371
+ """
1372
+
1373
+ repo = self.get_repo_by_name(name=repo_name)
1374
+ if repo is None:
1375
+ return None
1376
+
1377
+ request_header = self.request_header()
1378
+ request_url = self.config()["repoUrl"] + "/stop/" + repo.get("repoId")
1379
+
1380
+ return self.do_request(
1381
+ url=request_url,
1382
+ method="POST",
1383
+ headers=request_header,
1384
+ timeout=None,
1385
+ failure_message="Failed to stop crawling repository -> '{}'".format(
1386
+ repo_name,
1387
+ ),
1388
+ )
1389
+
1390
+ # end method definition
1391
+
1392
+ def get_repo_list(self) -> list | None:
1393
+ """Get a list of all repositories.
1394
+
1395
+ Returns:
1396
+ list | None:
1397
+ Parsed response object from the API listing all repositories or None in case of an error.
1398
+
1399
+ """
1400
+
1401
+ request_header = self.request_header()
1402
+ request_url = self.config()["repoUrl"]
1403
+
1404
+ return self.do_request(
1405
+ url=request_url,
1406
+ method="GET",
1407
+ headers=request_header,
1408
+ timeout=None,
1409
+ failure_message="Failed to get list of repositories to crawl",
1410
+ )
1411
+
1412
+ # end method definition
1413
+
1414
+ def get_repo_by_name(self, name: str) -> dict | None:
1415
+ """Get a repository by name.
1416
+
1417
+ Args:
1418
+ name (str):
1419
+ The name of the repository.
1420
+
1421
+ Returns:
1422
+ dict | None:
1423
+ ID of a repostiory by name or None in case of an error
1424
+
1425
+ """
1426
+
1427
+ repo_list = self.get_repo_list()
1428
+
1429
+ if repo_list is None:
1430
+ return None
1431
+
1432
+ return next(
1433
+ (repo for repo in repo_list if repo.get("repoName", "") == name),
1434
+ None,
1435
+ )
1436
+
1437
+ # end method definition
1438
+
1439
+ def get_certificate_file_content_base64(self, filepath: str) -> str | None:
1440
+ """Return the certificate as a base64 string.
1441
+
1442
+ In Kubernetes deploymnets the certificate is already mounted base64 encoded.
1443
+
1444
+ Args:
1445
+ filepath (str):
1446
+ The path to the certificate file.
1447
+
1448
+ Returns:
1449
+ str | None:
1450
+ Base64 encoded certificate file content.
1451
+
1452
+ """
1453
+
1454
+ if not os.path.isfile(filepath):
1455
+ return None
1456
+
1457
+ file_ext = os.path.splitext(filepath)[1].lower()
1458
+
1459
+ if self.running_in_kubernetes_pod() and file_ext == ".pfx":
1460
+ # Return file directly as already base64 encoded
1461
+ self.logger.warning(
1462
+ "Detected a binary pfx file in Kubernetes environment, expecting it to be already base64 encoded",
1463
+ )
1464
+ with open(filepath, encoding="UTF-8") as file:
1465
+ return file.read().strip()
1466
+
1467
+ else:
1468
+ # Return file as base64 encoded
1469
+ with open(filepath, "rb") as file:
1470
+ # Read the content of the file
1471
+ file_content = file.read()
1472
+ # Convert the bytes to a base64 string
1473
+ return base64.b64encode(file_content).decode("utf-8")
1474
+
1475
+ # end method definition
1476
+
1477
+ def running_in_kubernetes_pod(self) -> bool:
1478
+ """Check if the application is running inside a Kubernetes pod.
1479
+
1480
+ This function determines whether the process is running in a Kubernetes
1481
+ environment by checking for the presence of the `KUBERNETES_SERVICE_HOST`
1482
+ and `KUBERNETES_SERVICE_PORT` environment variables.
1483
+
1484
+ Returns:
1485
+ bool:
1486
+ True if running inside a Kubernetes pod, False otherwise.
1487
+
1488
+ """
1489
+
1490
+ return bool(os.getenv("KUBERNETES_SERVICE_HOST") and os.getenv("KUBERNETES_SERVICE_PORT"))
1491
+
1492
+ # end method definition