pyxecm 1.5__py3-none-any.whl → 1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

pyxecm/avts.py ADDED
@@ -0,0 +1,1065 @@
1
+ """
2
+ AVTS stands for Aviator Search and is an OpenText offering for LLMM-based search across multiple repositories
3
+
4
+ Class: AVTS
5
+ Methods:
6
+ __init__: class initializer
7
+ request_header: Returns the request header used for Application calls.
8
+ do_request: Call an Aviator Search REST API in a safe way
9
+ parse_request_response: Converts the request response (JSon) to a Python list in a safe way
10
+ authenticate: Authenticate at Search Aviator via oAuth authentication
11
+ repo_create_extended_ecm: Create a new repository to crawl in Aviator Search
12
+ start_crawling: Start crawling of a repository
13
+ stop_crawling: Stop the crawling of a repository
14
+ get_repo_list: Get a list of all repositories
15
+ get_repo_by_name: Get a repository by name
16
+ """
17
+
18
+ __author__ = "Dr. Marc Diefenbruch"
19
+ __copyright__ = "Copyright 2024, OpenText"
20
+ __credits__ = ["Kai-Philip Gatzweiler"]
21
+ __maintainer__ = "Dr. Marc Diefenbruch"
22
+ __email__ = "mdiefenb@opentext.com"
23
+
24
+ import json
25
+ import logging
26
+ import time
27
+ import os
28
+ import base64
29
+
30
+ import requests
31
+
32
+ logger = logging.getLogger("pyxecm.customizer.avts")
33
+
34
+ REQUEST_HEADERS = {"Accept": "application/json", "Content-Type": "application/json"}
35
+
36
+ REQUEST_TIMEOUT = 60
37
+ REQUEST_RETRY_DELAY = 20
38
+ REQUEST_MAX_RETRIES = 2
39
+
40
+
41
+ class AVTS(object):
42
+ """Used to configure and interact with Aviator Search"""
43
+
44
+ _config: dict
45
+ _session = None
46
+
47
+ def __init__(
48
+ self,
49
+ otds_url: str,
50
+ client_id: str,
51
+ client_secret: str,
52
+ base_url: str,
53
+ username: str,
54
+ password: str,
55
+ ):
56
+ """Initialize the AVTS object
57
+
58
+ Args:
59
+ otds_url (str): URL of the OTDS Server used by Aviator Search
60
+ client_id (str): Client ID for the Aviator Search oAuth client
61
+ client_secret (str): Client Secret for the Aviator Search oAuth client
62
+ base_url (str): Aviator Search base URL
63
+ username (str): User with administrative permissions in Aviator Search
64
+ password (str): Password of the user with administrative permissions in Aviator Search
65
+ """
66
+
67
+ avts_config = {}
68
+
69
+ # Store the credentials and parameters in a config dictionary:
70
+ avts_config["otdsUrl"] = otds_url
71
+ avts_config["clientId"] = client_id
72
+ avts_config["clientSecret"] = client_secret
73
+ avts_config["baseUrl"] = base_url
74
+ avts_config["username"] = username
75
+ avts_config["password"] = password
76
+
77
+ avts_config["tokenUrl"] = avts_config["otdsUrl"] + "/otdsws/oauth2/token"
78
+ avts_config["repoUrl"] = (
79
+ avts_config["baseUrl"] + "/aviator-gateway/avts-api/admin/v1/repo"
80
+ )
81
+
82
+ self._config = avts_config
83
+ self._accesstoken = None
84
+
85
+ self._session = requests.Session()
86
+
87
+ # end method definition
88
+
89
+ def config(self) -> dict:
90
+ """Returns the configuration dictionary
91
+
92
+ Returns:
93
+ dict: Configuration dictionary
94
+ """
95
+ return self._config
96
+
97
+ # end method definition
98
+
99
+ def request_header(self, content_type: str = "") -> dict:
100
+ """Returns the request header used for Application calls.
101
+ Consists of Bearer access token and Content Type
102
+
103
+ Args:
104
+ content_type (str, optional): custom content type for the request
105
+ Return:
106
+ dict: request header values
107
+ """
108
+
109
+ request_header = {}
110
+
111
+ request_header = REQUEST_HEADERS
112
+
113
+ if content_type:
114
+ request_header["Content-Type"] = content_type
115
+
116
+ if self._accesstoken is not None:
117
+ request_header["Authorization"] = f"Bearer {self._accesstoken}"
118
+
119
+ return request_header
120
+
121
+ # end method definition
122
+
123
+ def do_request(
124
+ self,
125
+ url: str,
126
+ method: str = "GET",
127
+ headers: dict | None = None,
128
+ data: dict | None = None,
129
+ json_data: dict | None = None,
130
+ files: dict | None = None,
131
+ timeout: int | None = REQUEST_TIMEOUT,
132
+ show_error: bool = True,
133
+ failure_message: str = "",
134
+ success_message: str = "",
135
+ max_retries: int = REQUEST_MAX_RETRIES,
136
+ retry_forever: bool = False,
137
+ ) -> dict | None:
138
+ """Call an Aviator Search REST API in a safe way
139
+
140
+ Args:
141
+ url (str): URL to send the request to.
142
+ method (str, optional): HTTP method (GET, POST, etc.). Defaults to "GET".
143
+ headers (dict | None, optional): Request Headers. Defaults to None.
144
+ json (dict | None, optional): Request payload. Defaults to None.
145
+ files (dict | None, optional): Dictionary of {"name": file-tuple} for multipart encoding upload.
146
+ file-tuple can be a 2-tuple ("filename", fileobj) or a 3-tuple ("filename", fileobj, "content_type")
147
+ timeout (int | None, optional): Timeout for the request in seconds. Defaults to REQUEST_TIMEOUT.
148
+ show_error (bool, optional): Whether or not an error should be logged in case of a failed REST call.
149
+ If False, then only a warning is logged. Defaults to True.
150
+ failure_message (str, optional): Specific error message. Defaults to "".
151
+ max_retries (int, optional): How many retries on Connection errors? Default is REQUEST_MAX_RETRIES.
152
+ retry_forever (bool, optional): Eventually wait forever - without timeout. Defaults to False.
153
+
154
+ Returns:
155
+ dict | None: Response of Aviator Search REST API or None in case of an error.
156
+ """
157
+
158
+ retries = 0
159
+ while True:
160
+ try:
161
+ response = self._session.request(
162
+ method=method,
163
+ url=url,
164
+ data=data,
165
+ json=json_data,
166
+ files=files,
167
+ headers=headers,
168
+ timeout=timeout,
169
+ )
170
+
171
+ if response.ok:
172
+ if success_message:
173
+ logger.debug(success_message)
174
+ return self.parse_request_response(response)
175
+ # Check if Session has expired - then re-authenticate and try once more
176
+ elif response.status_code == 401 and retries == 0:
177
+ logger.debug("Session has expired - try to re-authenticate...")
178
+ self.authenticate()
179
+ retries += 1
180
+ else:
181
+ # Handle plain HTML responses to not pollute the logs
182
+ content_type = response.headers.get("content-type", None)
183
+ if content_type == "text/html":
184
+ response_text = "HTML content (see debug log)"
185
+ else:
186
+ response_text = response.text
187
+
188
+ if show_error:
189
+ logger.error(
190
+ "%s; status -> %s; error -> %s",
191
+ failure_message,
192
+ response.status_code,
193
+ response_text,
194
+ )
195
+ else:
196
+ logger.warning(
197
+ "%s; status -> %s; warning -> %s",
198
+ failure_message,
199
+ response.status_code,
200
+ response_text,
201
+ )
202
+
203
+ if content_type == "text/html":
204
+ logger.debug(
205
+ "%s; status -> %s; warning -> %s",
206
+ failure_message,
207
+ response.status_code,
208
+ response.text,
209
+ )
210
+
211
+ return None
212
+ except requests.exceptions.Timeout:
213
+ if retries <= max_retries:
214
+ logger.warning(
215
+ "Request timed out. Retrying in %s seconds...",
216
+ str(REQUEST_RETRY_DELAY),
217
+ )
218
+ retries += 1
219
+ time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
220
+ else:
221
+ logger.error(
222
+ "%s; timeout error",
223
+ failure_message,
224
+ )
225
+ if retry_forever:
226
+ # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever
227
+ logger.warning("Turn timeouts off and wait forever...")
228
+ timeout = None
229
+ else:
230
+ return None
231
+ except requests.exceptions.ConnectionError:
232
+ if retries <= max_retries:
233
+ logger.warning(
234
+ "Connection error. Retrying in %s seconds...",
235
+ str(REQUEST_RETRY_DELAY),
236
+ )
237
+ retries += 1
238
+ time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
239
+ else:
240
+ logger.error(
241
+ "%s; connection error",
242
+ failure_message,
243
+ )
244
+ if retry_forever:
245
+ # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever
246
+ logger.warning("Turn timeouts off and wait forever...")
247
+ timeout = None
248
+ time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
249
+ else:
250
+ return None
251
+
252
+ # end method definition
253
+
254
+ def parse_request_response(
255
+ self,
256
+ response_object: requests.Response,
257
+ additional_error_message: str = "",
258
+ show_error: bool = True,
259
+ ) -> list | None:
260
+ """Converts the request response (JSon) to a Python list in a safe way
261
+ that also handles exceptions. It first tries to load the response.text
262
+ via json.loads() that produces a dict output. Only if response.text is
263
+ not set or is empty it just converts the response_object to a dict using
264
+ the vars() built-in method.
265
+
266
+ Args:
267
+ response_object (object): this is reponse object delivered by the request call
268
+ additional_error_message (str, optional): use a more specific error message
269
+ in case of an error
270
+ show_error (bool): True: write an error to the log file
271
+ False: write a warning to the log file
272
+ Returns:
273
+ list: response information or None in case of an error
274
+ """
275
+
276
+ if not response_object:
277
+ return None
278
+
279
+ try:
280
+ if response_object.text:
281
+ list_object = json.loads(response_object.text)
282
+ else:
283
+ list_object = vars(response_object)
284
+ except json.JSONDecodeError as exception:
285
+ if additional_error_message:
286
+ message = "Cannot decode response as JSON. {}; error -> {}".format(
287
+ additional_error_message, exception
288
+ )
289
+ else:
290
+ message = "Cannot decode response as JSON; error -> {}".format(
291
+ exception
292
+ )
293
+ if show_error:
294
+ logger.error(message)
295
+ else:
296
+ logger.warning(message)
297
+ return None
298
+ else:
299
+ return list_object
300
+
301
+ # end method definition
302
+
303
+ def authenticate(self) -> str | None:
304
+ """Authenticate at Search Aviator via oAuth authentication."""
305
+
306
+ if not self._session:
307
+ self._session = requests.Session()
308
+
309
+ self._session.headers.update(self.request_header())
310
+
311
+ request_url = self.config()["tokenUrl"]
312
+ request_header = {
313
+ "Authorization": "Bearer ",
314
+ "content-type": "application/x-www-form-urlencoded",
315
+ }
316
+ request_payload = {
317
+ "client_id": self.config()["clientId"],
318
+ "grant_type": "password",
319
+ "client_secret": self.config()["clientSecret"],
320
+ "username": self.config()["username"],
321
+ "password": self.config()["password"],
322
+ }
323
+
324
+ response = self.do_request(
325
+ url=request_url,
326
+ method="POST",
327
+ headers=request_header,
328
+ data=request_payload,
329
+ timeout=None,
330
+ failure_message=f"Failed to authenticate to OTDS with username -> {self.config()['username']} and client_id -> {self.config()['clientId']}",
331
+ )
332
+
333
+ if response is not None:
334
+ self._accesstoken = response.get("access_token", None)
335
+
336
+ return response
337
+
338
+ # end method definition
339
+
340
+ def repo_create_extended_ecm(
341
+ self,
342
+ name: str,
343
+ username: str,
344
+ password: str,
345
+ otcs_url: str,
346
+ otcs_api_url: str,
347
+ node_id: int,
348
+ version: str = "24.3.0",
349
+ ) -> dict | None:
350
+ """Create a new repository to crawl in Aviator Search
351
+
352
+ Args:
353
+ id (str): ID of the repository
354
+ name (str): socName of the repository
355
+ username (str): Username to use for crawling
356
+ password (str): Password of the user used for crawling
357
+ otcs_url (str): Base URL of Content Server e.g. https://otcs.base-url.tld/cs/cs
358
+ node_id (int): Root Node ID for crawling
359
+
360
+ Returns:
361
+ dict | None: Parsed response object from the API or None in case of an error
362
+ """
363
+
364
+ payload = {
365
+ "id": "xECM",
366
+ "name": name,
367
+ "metadataFields": ["NODE"],
368
+ "socName": "xECM",
369
+ "params": [
370
+ {
371
+ "id": "OpenTextApiUrl",
372
+ "label": "xECM API URL",
373
+ "ctlType": "text",
374
+ "required": True,
375
+ "value": otcs_api_url,
376
+ },
377
+ {
378
+ "id": "Username",
379
+ "label": "xECM username",
380
+ "ctlType": "text",
381
+ "required": True,
382
+ "value": username,
383
+ },
384
+ {
385
+ "id": "Password",
386
+ "label": "xECM Password",
387
+ "ctlType": "password",
388
+ "required": True,
389
+ "value": password,
390
+ },
391
+ {
392
+ "id": "RootNodeId",
393
+ "label": "Root Node ID",
394
+ "ctlType": "text",
395
+ "required": True,
396
+ "value": node_id,
397
+ },
398
+ {
399
+ "id": "sourceLink",
400
+ "label": "Source Link( ex:https://<xECM host>/cs/cs/app/nodes/${NODE}/metadata )",
401
+ "ctlType": "text",
402
+ "required": False,
403
+ "defaultValue": otcs_url + "/app/nodes/${NODE}/metadata",
404
+ "visible": True,
405
+ },
406
+ ],
407
+ "idolConfig": {
408
+ "view": {
409
+ "name": "ViewOpenText",
410
+ "type": "idol.nifi.connector.ViewOpenText",
411
+ "group": "idol.nifi.connector",
412
+ "artifact": "idol-nifi-connector-opentext",
413
+ "version": version,
414
+ },
415
+ "crawler": {
416
+ "name": "GetOpenText",
417
+ "type": "idol.nifi.connector.GetOpenText",
418
+ "group": "idol.nifi.connector",
419
+ "artifact": "idol-nifi-connector-opentext",
420
+ "version": version,
421
+ },
422
+ "omniGroup": {
423
+ "name": "GetOpenTextGroups",
424
+ "type": "idol.nifi.connector.GetOpenTextGroups",
425
+ "group": "idol.nifi.connector",
426
+ "artifact": "idol-nifi-connector-opentext",
427
+ "version": version,
428
+ },
429
+ },
430
+ "idolProperties": {
431
+ "view": {
432
+ "Password": "${Password}",
433
+ "Username": "${UserName}",
434
+ "OpenTextApiUrl": "${OpenTextApiUrl}",
435
+ },
436
+ "crawler": {
437
+ "Password": "${Password}",
438
+ "Username": "${UserName}",
439
+ "RootNodeId": "${RootNodeId}",
440
+ "META:SOURCE": "OPENTEXT",
441
+ "MappedSecurity": "true",
442
+ "OpenTextApiUrl": "${OpenTextApiUrl}",
443
+ },
444
+ "omniGroup": {
445
+ "Password": "${Password}",
446
+ "Username": "${UserName}",
447
+ "OpenTextApiUrl": "${OpenTextApiUrl}",
448
+ "OpenTextApiPageSize": "10",
449
+ },
450
+ },
451
+ }
452
+
453
+ request_header = self.request_header()
454
+ request_url = self.config()["repoUrl"]
455
+
456
+ return self.do_request(
457
+ url=request_url,
458
+ method="POST",
459
+ json_data=payload,
460
+ headers=request_header,
461
+ timeout=None,
462
+ failure_message="Failed to create repository -> '{}' ({})".format(
463
+ name, node_id
464
+ ),
465
+ )
466
+
467
+ # end method definition
468
+
469
+ def repo_create_msteams(
470
+ self,
471
+ name: str,
472
+ client_id: str,
473
+ tenant_id: str,
474
+ certificate_file: str,
475
+ certificate_password: int,
476
+ index_attachments: bool = True,
477
+ index_call_recordings: bool = True,
478
+ index_message_replies: bool = True,
479
+ index_user_chats: bool = True,
480
+ oauth2_site_name: str = "AVTS",
481
+ oauth2_sites_file: str = "",
482
+ version: str = "24.3.0",
483
+ ) -> dict | None:
484
+ """Create a new repository to crawl in Aviator Search
485
+
486
+ Args:
487
+ id (str): ID of the repository
488
+ name (str): socName of the repository
489
+ #todo: add more params
490
+
491
+ Returns:
492
+ dict | None: Parsed response object from the API or None in case of an error
493
+ """
494
+
495
+ if os.path.isfile(certificate_file):
496
+ # Open the file in binary mode
497
+ with open(certificate_file, "rb") as file:
498
+ # Read the content of the file
499
+ certificate_file_content = file.read()
500
+ # Convert the bytes to a base64 string
501
+ certificate_file_content_base64 = base64.b64encode(
502
+ certificate_file_content
503
+ ).decode("utf-8")
504
+
505
+ payload = {
506
+ "id": "MSTeams",
507
+ "socName": "Microsoft Teams",
508
+ "authType": "OAUTH",
509
+ "name": name,
510
+ "params": [
511
+ {
512
+ "id": "OAuth2SiteName",
513
+ "label": "OAuth2 Site Name",
514
+ "ctlType": "text",
515
+ "required": False,
516
+ "defaultValue": "AVTS",
517
+ "value": "AVTS",
518
+ "visible": False,
519
+ },
520
+ {
521
+ "id": "OAuth2SitesFile",
522
+ "label": "OAuth2 Sites File",
523
+ "ctlType": "text",
524
+ "required": False,
525
+ "defaultValue": "",
526
+ "value": "",
527
+ "visible": False,
528
+ },
529
+ {
530
+ "id": "sourceLink",
531
+ "label": "Source Link",
532
+ "ctlType": "text",
533
+ "required": False,
534
+ "defaultValue": "",
535
+ "visible": True,
536
+ },
537
+ {
538
+ "id": "clientID",
539
+ "label": "Client ID",
540
+ "ctlType": "text",
541
+ "description": "Microsoft Entra client ID",
542
+ "required": True,
543
+ "defaultValue": "",
544
+ "value": client_id,
545
+ "visible": True,
546
+ },
547
+ {
548
+ "id": "tenant",
549
+ "label": "Tenant ID",
550
+ "ctlType": "text",
551
+ "description": "Microsoft Entra tenant ID",
552
+ "required": True,
553
+ "defaultValue": "",
554
+ "value": tenant_id,
555
+ "visible": True,
556
+ },
557
+ {
558
+ "id": "IndexAttachments",
559
+ "label": "Index Attachments",
560
+ "ctlType": "boolean",
561
+ "description": "Specifies whether to index attachments",
562
+ "required": False,
563
+ "defaultValue": "true",
564
+ "value": str(index_attachments).lower(),
565
+ "visible": True,
566
+ },
567
+ {
568
+ "id": "IndexCallRecordings",
569
+ "label": "Index Call Recordings",
570
+ "ctlType": "boolean",
571
+ "description": "Specifies whether to index call recordings",
572
+ "required": False,
573
+ "defaultValue": "true",
574
+ "value": str(index_call_recordings).lower(),
575
+ "visible": True,
576
+ },
577
+ {
578
+ "id": "IndexMessageReplies",
579
+ "label": "Index Message Replies",
580
+ "ctlType": "boolean",
581
+ "description": "Specifies whether to index replies to messages",
582
+ "required": False,
583
+ "defaultValue": "true",
584
+ "value": str(index_message_replies).lower(),
585
+ "visible": True,
586
+ },
587
+ {
588
+ "id": "IndexUserChats",
589
+ "label": "Index User Chats",
590
+ "ctlType": "boolean",
591
+ "description": "Specifies whether to synchronize one-to-one and group messages for each user",
592
+ "required": False,
593
+ "defaultValue": "true",
594
+ "value": str(index_user_chats).lower(),
595
+ "visible": True,
596
+ },
597
+ {
598
+ "id": "certificateFile",
599
+ "label": "Certificate File",
600
+ "ctlType": "file",
601
+ "description": 'Please upload a valid "*.pfx" certificate file',
602
+ "required": True,
603
+ "defaultValue": "",
604
+ "value": "C:\\fakepath\\certificate.pfx",
605
+ "visible": True,
606
+ "fileDatabase64": f"data:application/x-pkcs12;base64,{certificate_file_content_base64}",
607
+ },
608
+ {
609
+ "id": "certificateFilePassword",
610
+ "label": "Certificate File Password",
611
+ "ctlType": "password",
612
+ "required": True,
613
+ "defaultValue": "",
614
+ "value": certificate_password,
615
+ "visible": True,
616
+ },
617
+ ],
618
+ "idolConfig": {
619
+ "view": {
620
+ "name": "ViewMicrosoftTeams",
621
+ "type": "idol.nifi.connector.ViewMicrosoftTeams",
622
+ "group": "idol.nifi.connector",
623
+ "artifact": "idol-nifi-connector-officeteams",
624
+ "version": version,
625
+ },
626
+ "crawler": {
627
+ "name": "GetMicrosoftTeams",
628
+ "type": "idol.nifi.connector.GetMicrosoftTeams",
629
+ "group": "idol.nifi.connector",
630
+ "artifact": "idol-nifi-connector-officeteams",
631
+ "version": version,
632
+ },
633
+ },
634
+ "idolProperties": {
635
+ "view": {
636
+ "Oauth2SiteName": "${OAuth2SiteName}",
637
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
638
+ "IndexCallRecordings": "true",
639
+ },
640
+ "crawler": {
641
+ "META:SOURCE": "MSTeams",
642
+ "IndexUserChats": "${IndexUserChats}",
643
+ "Oauth2SiteName": "${OAuth2SiteName}",
644
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
645
+ "IndexAttachments": "${IndexAttachments}",
646
+ "IndexCallRecordings": "${IndexCallRecordings}",
647
+ "IndexMessageReplies": "${IndexMessageReplies}",
648
+ },
649
+ },
650
+ "authRedirect": "",
651
+ "metadataFields": [],
652
+ }
653
+
654
+ request_header = self.request_header()
655
+ request_url = self.config()["repoUrl"]
656
+
657
+ response = self.do_request(
658
+ url=request_url,
659
+ method="POST",
660
+ json_data=payload,
661
+ headers=request_header,
662
+ timeout=None,
663
+ failure_message="Failed to create repository -> '{}'".format(name),
664
+ )
665
+
666
+ if response is None:
667
+ return None
668
+
669
+ self.repo_admin_consent(response["id"])
670
+
671
+ return response
672
+
673
+ # end method definition
674
+
675
+ def repo_create_sharepoint(
676
+ self,
677
+ name: str,
678
+ client_id: str,
679
+ tenant_id: str,
680
+ certificate_file: str,
681
+ certificate_password: int,
682
+ sharepoint_url: str,
683
+ sharepoint_url_type: str,
684
+ sharepoint_mysite_url: str,
685
+ sharepoint_admin_url: str,
686
+ index_user_profiles: bool = True,
687
+ oauth2_site_name: str = "AVTS",
688
+ oauth2_sites_file: str = "",
689
+ version: str = "24.3.0",
690
+ ) -> dict | None:
691
+ """Create a new repository to crawl in Aviator Search
692
+
693
+ Args:
694
+ id (str): ID of the repository
695
+ name (str): socName of the repository
696
+ #todo: add more params
697
+
698
+ Returns:
699
+ dict | None: Parsed response object from the API or None in case of an error
700
+ """
701
+
702
+ if os.path.isfile(certificate_file):
703
+ # Open the file in binary mode
704
+ with open(certificate_file, "rb") as file:
705
+ # Read the content of the file
706
+ certificate_file_content = file.read()
707
+ # Convert the bytes to a base64 string
708
+ certificate_file_content_base64 = base64.b64encode(
709
+ certificate_file_content
710
+ ).decode("utf-8")
711
+
712
+ payload = {
713
+ "id": "SharePoint",
714
+ "socName": "SharePoint Online",
715
+ "authType": "OAUTH",
716
+ "name": name,
717
+ "params": [
718
+ {
719
+ "id": "OAuth2SiteName",
720
+ "label": "OAuth2 Site Name",
721
+ "ctlType": "text",
722
+ "required": False,
723
+ "defaultValue": "AVTS",
724
+ "value": oauth2_site_name,
725
+ "visible": False,
726
+ },
727
+ {
728
+ "id": "OAuth2SitesFile",
729
+ "label": "OAuth2 Sites File",
730
+ "ctlType": "text",
731
+ "required": False,
732
+ "defaultValue": "",
733
+ "value": oauth2_sites_file,
734
+ "visible": False,
735
+ },
736
+ {
737
+ "id": "sourceLink",
738
+ "label": "Source Link",
739
+ "ctlType": "text",
740
+ "description": "Example: https://<sharepoint host>${FILEDIRREF}/Forms/AllItems.aspx?id=${FILEREF}&parent=${FILEDIRREF}",
741
+ "required": False,
742
+ "defaultValue": "",
743
+ "visible": True,
744
+ "value": sharepoint_url
745
+ + "${FILEDIRREF}/Forms/AllItems.aspx?id=${FILEREF}&parent=${FILEDIRREF}",
746
+ },
747
+ {
748
+ "id": "clientID",
749
+ "label": "Client ID",
750
+ "ctlType": "text",
751
+ "description": "Microsoft Entra client ID",
752
+ "required": True,
753
+ "defaultValue": "",
754
+ "value": client_id,
755
+ "visible": True,
756
+ },
757
+ {
758
+ "id": "tenant",
759
+ "label": "Tenant ID",
760
+ "ctlType": "text",
761
+ "description": "Microsoft Entra tenant ID",
762
+ "required": True,
763
+ "defaultValue": "",
764
+ "value": tenant_id,
765
+ "visible": True,
766
+ },
767
+ {
768
+ "id": "sharePointUrl",
769
+ "label": "SharePoint URL",
770
+ "ctlType": "text",
771
+ "description": 'The URL to start synchronizing from. Specify a URL that matches "SharePoint URL type"',
772
+ "required": True,
773
+ "defaultValue": "",
774
+ "value": sharepoint_url + "/",
775
+ "visible": True,
776
+ },
777
+ {
778
+ "id": "sharePointAdminUrl",
779
+ "label": "SharePoint Admin URL",
780
+ "ctlType": "text",
781
+ "description": "The URL of the admin site collection, for retrieving user profiles from SharePoint Online",
782
+ "required": True,
783
+ "defaultValue": "",
784
+ "value": sharepoint_admin_url,
785
+ "visible": True,
786
+ },
787
+ {
788
+ "id": "sharePointMySiteUrl",
789
+ "label": "SharePoint MySite URL",
790
+ "ctlType": "text",
791
+ "description": "The URL of the MySites site collection, for retrieving user profiles from SharePoint Online",
792
+ "required": True,
793
+ "defaultValue": "",
794
+ "value": sharepoint_mysite_url,
795
+ "visible": True,
796
+ },
797
+ {
798
+ "id": "sharePointOnline",
799
+ "label": "SharePoint Online",
800
+ "ctlType": "boolean",
801
+ "description": "Specifies whether to retrieve data from SharePoint Online. To retrieve data from a SharePoint Online dedicated server set this to false",
802
+ "required": False,
803
+ "defaultValue": "true",
804
+ "value": "true",
805
+ "visible": False,
806
+ },
807
+ {
808
+ "id": "MappedWebApplicationPolicies",
809
+ "label": "Mapped Web Application Policies",
810
+ "ctlType": "text",
811
+ "required": False,
812
+ "defaultValue": "false",
813
+ "value": "false",
814
+ "visible": False,
815
+ },
816
+ {
817
+ "id": "TenantAdminSitesIncludeTypes",
818
+ "label": "Tenant Admin Sites IncludeTypes",
819
+ "ctlType": "text",
820
+ "description": "This parameter helps to filter the results to include only specific types of sites",
821
+ "required": False,
822
+ "defaultValue": "all",
823
+ "value": "all",
824
+ "visible": False,
825
+ },
826
+ {
827
+ "id": "URLType",
828
+ "label": "SharePoint URL Type",
829
+ "ctlType": "select",
830
+ "description": 'The type of URL specified by "Sharepoint URL"',
831
+ "required": True,
832
+ "defaultValue": "",
833
+ "value": "SiteCollection",
834
+ "visible": True,
835
+ "acceptedValues": [
836
+ "WebApplication",
837
+ "SiteCollection",
838
+ "PersonalSiteCollection",
839
+ "TenantAdmin",
840
+ ],
841
+ },
842
+ {
843
+ "id": "IndexUserProfiles",
844
+ "label": "Index User Profiles",
845
+ "ctlType": "boolean",
846
+ "description": "Specifies whether to index information from user profiles",
847
+ "required": True,
848
+ "defaultValue": "false",
849
+ "value": str(index_user_profiles).lower(),
850
+ "visible": True,
851
+ },
852
+ {
853
+ "id": "certificateFile",
854
+ "label": "Certificate File",
855
+ "ctlType": "file",
856
+ "description": 'Please upload a valid "*.pfx" certificate file',
857
+ "required": True,
858
+ "defaultValue": "",
859
+ "value": "C:\\fakepath\\certificate.pfx",
860
+ "visible": True,
861
+ "fileDatabase64": f"data:application/x-pkcs12;base64,{certificate_file_content_base64}",
862
+ },
863
+ {
864
+ "id": "certificateFilePassword",
865
+ "label": "Certificate File Password",
866
+ "ctlType": "password",
867
+ "required": True,
868
+ "defaultValue": "",
869
+ "value": certificate_password,
870
+ "visible": True,
871
+ },
872
+ ],
873
+ "idolConfig": {
874
+ "view": {
875
+ "name": "ViewSharePointOData",
876
+ "type": "idol.nifi.connector.ViewSharePointOData",
877
+ "group": "idol.nifi.connector",
878
+ "artifact": "idol-nifi-connector-sharepointodata",
879
+ "version": version,
880
+ },
881
+ "crawler": {
882
+ "name": "GetSharePointOData",
883
+ "type": "idol.nifi.connector.GetSharePointOData",
884
+ "group": "idol.nifi.connector",
885
+ "artifact": "idol-nifi-connector-sharepointodata",
886
+ "version": version,
887
+ },
888
+ },
889
+ "idolProperties": {
890
+ "view": {
891
+ "SharepointUrl": "${sharePointUrl}",
892
+ "Oauth2SiteName": "${OAuth2SiteName}",
893
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
894
+ "SharepointOnline": "${sharePointOnline}",
895
+ "SharepointUrlType": "${URLType}",
896
+ "SharepointAdminUrl": "${sharePointAdminUrl}",
897
+ "SharepointMySiteUrl": "${sharePointMySiteUrl}",
898
+ "MappedWebApplicationPolicies": "${MappedWebApplicationPolicies}",
899
+ },
900
+ "crawler": {
901
+ "META:SOURCE": "SharePoint",
902
+ "SharepointUrl": "${sharePointUrl}",
903
+ "Oauth2SiteName": "${OAuth2SiteName}",
904
+ "Oauth2SitesFile": "${OAuth2SitesFile}",
905
+ "SharepointOnline": "${sharePointOnline}",
906
+ "IndexUserProfiles": "${IndexUserProfiles}",
907
+ "SharepointUrlType": "${URLType}",
908
+ "SharepointAdminUrl": "${sharePointAdminUrl}",
909
+ "SharepointMySiteUrl": "${sharePointMySiteUrl}",
910
+ "MappedWebApplicationPolicies": "${MappedWebApplicationPolicies}",
911
+ "TenantAdminSitesIncludeTypes": "${TenantAdminSitesIncludeTypes}",
912
+ },
913
+ },
914
+ "authRedirect": "",
915
+ "metadataFields": ["FILEREF", "FILEDIRREF"],
916
+ }
917
+
918
+ request_header = self.request_header()
919
+ request_url = self.config()["repoUrl"]
920
+
921
+ response = self.do_request(
922
+ url=request_url,
923
+ method="POST",
924
+ json_data=payload,
925
+ headers=request_header,
926
+ timeout=None,
927
+ failure_message="Failed to create repository -> '{}'".format(name),
928
+ )
929
+
930
+ if response is None:
931
+ return None
932
+
933
+ self.repo_admin_consent(response["id"])
934
+
935
+ return response
936
+
937
+ # end method definition
938
+
939
+ def repo_admin_consent(self, repo_id: str) -> dict | None:
940
+ """Send admin consent information for a repository
941
+
942
+ Args:
943
+ repo_id (str): id of the repository
944
+
945
+ Returns:
946
+ dict | None: Parsed response object from the API or None in case of an error
947
+ """
948
+
949
+ request_header = self.request_header()
950
+ request_url = self.config()["repoUrl"]
951
+
952
+ request_url = (
953
+ self.config()["repoUrl"] + "/" + repo_id + "/authorize?admin_consent=true"
954
+ )
955
+
956
+ return self.do_request(
957
+ url=request_url,
958
+ method="GET",
959
+ headers=request_header,
960
+ timeout=None,
961
+ failure_message="Failed to set admin_consent for repository -> '{}'".format(
962
+ repo_id
963
+ ),
964
+ )
965
+
966
+ # end method definition
967
+
968
+ def start_crawling(self, repo_name: str) -> list | None:
969
+ """Start crawling of a repository
970
+
971
+ Args:
972
+ repo_name (str): name of the repository
973
+ Returns:
974
+ list | None: Parsed response object from the API or None in case of an error
975
+ """
976
+
977
+ logger.info("Start crawling repository -> %s", repo_name)
978
+
979
+ repo = self.get_repo_by_name(name=repo_name)
980
+ if repo is None:
981
+ return None
982
+
983
+ request_header = self.request_header()
984
+ request_url = self.config()["repoUrl"] + "/start/" + repo.get("repoId")
985
+
986
+ return self.do_request(
987
+ url=request_url,
988
+ method="POST",
989
+ headers=request_header,
990
+ timeout=None,
991
+ failure_message="Failed to start crawling repository -> '{}'".format(
992
+ repo_name
993
+ ),
994
+ )
995
+
996
+ # end method definition
997
+
998
+ def stop_crawling(self, repo_name: str) -> list | None:
999
+ """Stop the crawling of a repository
1000
+
1001
+ Args:
1002
+ repo_name (str): name of the repository
1003
+ Returns:
1004
+ list | None: Parsed response object from the API or None in case of an error
1005
+ """
1006
+
1007
+ repo = self.get_repo_by_name(name=repo_name)
1008
+ if repo is None:
1009
+ return None
1010
+
1011
+ request_header = self.request_header()
1012
+ request_url = self.config()["repoUrl"] + "/stop/" + repo.get("repoId")
1013
+
1014
+ return self.do_request(
1015
+ url=request_url,
1016
+ method="POST",
1017
+ headers=request_header,
1018
+ timeout=None,
1019
+ failure_message="Failed to stop crawling repository -> '{}'".format(
1020
+ repo_name
1021
+ ),
1022
+ )
1023
+
1024
+ # end method definition
1025
+
1026
+ def get_repo_list(self) -> list | None:
1027
+ """Get a list of all repositories
1028
+
1029
+ Returns:
1030
+ list | None: Parsed response object from the API listing all repositories or None in case of an error
1031
+ """
1032
+
1033
+ request_header = self.request_header()
1034
+ request_url = self.config()["repoUrl"]
1035
+
1036
+ return self.do_request(
1037
+ url=request_url,
1038
+ method="GET",
1039
+ headers=request_header,
1040
+ timeout=None,
1041
+ failure_message="Failed to get list of repositories to crawl.",
1042
+ )
1043
+
1044
+ # end method definition
1045
+
1046
+ def get_repo_by_name(self, name: str) -> dict | None:
1047
+ """Get a repository by name
1048
+
1049
+ Args:
1050
+ name (str): name of the repository
1051
+ Returns:
1052
+ dict | None: ID of a repostiory by name or None in case of an error
1053
+ """
1054
+
1055
+ repo_list = self.get_repo_list()
1056
+
1057
+ if repo_list is None:
1058
+ return None
1059
+
1060
+ return next(
1061
+ (repo for repo in repo_list if repo.get("repoName", "") == name),
1062
+ None,
1063
+ )
1064
+
1065
+ # end method definition