pyxecm 2.0.2__py3-none-any.whl → 2.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

pyxecm/otkd.py ADDED
@@ -0,0 +1,1369 @@
1
+ """OTKD Module to implement functions to communicate with Knowledge Discovery (Nifi).
2
+
3
+ Nifi API documentation: https://nifi.apache.org/nifi-docs/rest-api.html
4
+
5
+ """
6
+
7
+ __author__ = "Dr. Marc Diefenbruch"
8
+ __copyright__ = "Copyright (C) 2024-2025, OpenText"
9
+ __credits__ = ["Kai-Philip Gatzweiler"]
10
+ __maintainer__ = "Dr. Marc Diefenbruch"
11
+ __email__ = "mdiefenb@opentext.com"
12
+
13
+ import json
14
+ import logging
15
+ import platform
16
+ import sys
17
+ import time
18
+ from http import HTTPStatus
19
+ from importlib.metadata import version
20
+
21
+ import requests
22
+
23
+ APP_NAME = "pyxecm"
24
+ APP_VERSION = version("pyxecm")
25
+ MODULE_NAME = APP_NAME + ".otkd"
26
+
27
+ PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
28
+ OS_INFO = f"{platform.system()} {platform.release()}"
29
+ ARCH_INFO = platform.machine()
30
+ REQUESTS_VERSION = requests.__version__
31
+
32
+ USER_AGENT = (
33
+ f"{APP_NAME}/{APP_VERSION} ({MODULE_NAME}/{APP_VERSION}; "
34
+ f"Python/{PYTHON_VERSION}; {OS_INFO}; {ARCH_INFO}; Requests/{REQUESTS_VERSION})"
35
+ )
36
+
37
+ REQUEST_FORM_HEADERS = {
38
+ "User-Agent": USER_AGENT,
39
+ "Content-Type": "application/x-www-form-urlencoded",
40
+ }
41
+
42
+ REQUEST_JSON_HEADERS = {
43
+ "User-Agent": USER_AGENT,
44
+ "accept": "application/json;charset=utf-8",
45
+ "Content-Type": "application/json",
46
+ }
47
+
48
+ REQUEST_UPLOAD_HEADERS = {
49
+ "User-Agent": USER_AGENT,
50
+ # DO NOT set "Content-Type" manually
51
+ }
52
+
53
+ REQUEST_TIMEOUT = 60
54
+ REQUEST_RETRY_DELAY = 20
55
+ REQUEST_MAX_RETRIES = 2
56
+
57
+ default_logger = logging.getLogger(MODULE_NAME)
58
+
59
+
60
+ class OTKD:
61
+ """Class OTKD is used to communicate Knowledge Discovery via REST API."""
62
+
63
+ logger: logging.Logger = default_logger
64
+
65
+ _config = None
66
+ _otkd_token = None
67
+
68
+ def __init__(
69
+ self,
70
+ protocol: str,
71
+ hostname: str,
72
+ port: int,
73
+ username: str,
74
+ password: str,
75
+ logger: logging.Logger = default_logger,
76
+ ) -> None:
77
+ """Initialize the OTKD object.
78
+
79
+ Args:
80
+ protocol (str):
81
+ Either http or https.
82
+ hostname (str):
83
+ The hostname of the Knowledge Discovery to communicate with.
84
+ port (int):
85
+ The port number used to talk to the Knowledge Discovery .
86
+ username (str):
87
+ The admin user name of Knowledge Discovery.
88
+ password (str):
89
+ The admin password of Knowledge Discovery.
90
+ logger (logging.Logger, optional):
91
+ The logging object to use for all log messages. Defaults to default_logger.
92
+
93
+ """
94
+
95
+ if logger != default_logger:
96
+ self.logger = logger.getChild("otkd")
97
+ for logfilter in logger.filters:
98
+ self.logger.addFilter(logfilter)
99
+
100
+ otkd_config = {}
101
+
102
+ otkd_config["hostname"] = hostname or ""
103
+ otkd_config["protocol"] = protocol or "http"
104
+
105
+ if port:
106
+ otkd_config["port"] = port
107
+ else:
108
+ otkd_config["port"] = 80
109
+
110
+ otkd_config["username"] = username or "admin"
111
+ otkd_config["password"] = password or ""
112
+ if not otkd_config["password"]:
113
+ self.logger.warning("Missing password for user -> '%s'.", otkd_config["username"])
114
+
115
+ otkd_base_url = protocol + "://" + otkd_config["hostname"]
116
+ if str(port) not in ["80", "443"]:
117
+ otkd_base_url += ":{}".format(port)
118
+ otkd_config["baseUrl"] = otkd_base_url
119
+ otkd_config["restUrl"] = otkd_config["baseUrl"] + "/nifi-api"
120
+ otkd_config["flowUrl"] = otkd_config["restUrl"] + "/flow"
121
+ otkd_config["authenticationUrl"] = otkd_config["restUrl"] + "/access/token"
122
+
123
+ self._config = otkd_config
124
+
125
+ # end method definition
126
+
127
+ def config(self) -> dict:
128
+ """Return the configuration dictionary.
129
+
130
+ Returns:
131
+ dict: Configuration dictionary
132
+
133
+ """
134
+ return self._config
135
+
136
+ # end method definition
137
+
138
+ def hostname(self) -> str:
139
+ """Return the Knowledge Discovery hostname.
140
+
141
+ Returns:
142
+ str: Knowledge Discovery hostname
143
+
144
+ """
145
+ return self.config()["hostname"]
146
+
147
+ # end method definition
148
+
149
+ def set_hostname(self, hostname: str) -> None:
150
+ """Set the Knowledge Discovery hostname.
151
+
152
+ Args:
153
+ hostname (str):
154
+ The new Knowledge Discovery hostname.
155
+
156
+ """
157
+ self.config()["hostname"] = hostname
158
+
159
+ # end method definition
160
+
161
+ def credentials(self, basic_auth: bool = False) -> dict:
162
+ """Get credentials (username + password).
163
+
164
+ Returns:
165
+ dict:
166
+ A dictionary with username and password.
167
+
168
+ """
169
+
170
+ if basic_auth:
171
+ return (self.config()["username"], self.config()["password"])
172
+
173
+ return {
174
+ "username": self.config()["username"],
175
+ "password": self.config()["password"],
176
+ }
177
+
178
+ # end method definition
179
+
180
+ def set_credentials(
181
+ self,
182
+ username: str = "",
183
+ password: str = "",
184
+ ) -> None:
185
+ """Set the credentials for Knowledge Discovery.
186
+
187
+ Args:
188
+ username (str, optional):
189
+ A non-default user name of the "admin" user. Defaults to "".
190
+ password (str, optional):
191
+ Password of the "admin" user. Defaults to "".
192
+
193
+ """
194
+
195
+ self.config()["username"] = username or "admin"
196
+ self.config()["password"] = password or ""
197
+
198
+ # end method definition
199
+
200
+ def base_url(self) -> str:
201
+ """Return the Knowledge Discovery base URL.
202
+
203
+ Returns:
204
+ str: Knowledge Discovery base URL
205
+
206
+ """
207
+
208
+ return self.config()["baseUrl"]
209
+
210
+ # end method definition
211
+
212
+ def rest_url(self) -> str:
213
+ """Return the Knowledge Discovery REST URL.
214
+
215
+ Returns:
216
+ str:
217
+ Knowledge Discovery REST URL
218
+
219
+ """
220
+
221
+ return self.config()["restUrl"]
222
+
223
+ # end method definition
224
+
225
+ def request_form_header(self) -> dict:
226
+ """Deliver the FORM request header used for the SOAP calls.
227
+
228
+ Consists of Token + Form Headers (see global variable)
229
+
230
+ Args:
231
+ None.
232
+
233
+ Return:
234
+ dict:
235
+ The request header for forms content type that includes the authorization token.
236
+
237
+ """
238
+
239
+ request_header = {}
240
+ request_header.update(REQUEST_FORM_HEADERS)
241
+ if self._otkd_token:
242
+ request_header.update({"Authorization": "Bearer {}".format(self._otkd_token)})
243
+
244
+ return request_header
245
+
246
+ # end method definition
247
+
248
+ def request_json_header(self) -> dict:
249
+ """Deliver the JSON request header used for the CRUD REST API calls.
250
+
251
+ Consists of JSON Headers (see global variable) and optional Authorization bearer token.
252
+
253
+ Args:
254
+ None.
255
+
256
+ Return:
257
+ dict:
258
+ The request header for JSON content type that optionally includes the authorization token.
259
+
260
+ """
261
+
262
+ request_header = {}
263
+ request_header.update(REQUEST_JSON_HEADERS)
264
+ if self._otkd_token:
265
+ request_header.update({"Authorization": "Bearer {}".format(self._otkd_token)})
266
+
267
+ return request_header
268
+
269
+ # end method definition
270
+
271
+ def request_upload_header(self) -> dict:
272
+ """Deliver the upload request header used for the upload REST API calls that uses the 'file' parameter.
273
+
274
+ Consists of only the 'User-Agent' Header (see global variable) and optional Authorization bearer token.
275
+ For uploads it is IMPORTANT to NOT set the 'Content-Type' header.
276
+
277
+ Args:
278
+ None.
279
+
280
+ Return:
281
+ dict:
282
+ The request header without the 'Content-Type' that only includes
283
+ the 'User-Agent' header and optionally the authorization token.
284
+
285
+ """
286
+
287
+ request_header = {}
288
+ request_header.update(REQUEST_UPLOAD_HEADERS)
289
+ if self._otkd_token:
290
+ request_header.update({"Authorization": "Bearer {}".format(self._otkd_token)})
291
+
292
+ return request_header
293
+
294
+ # end method definition
295
+
296
+ def do_request(
297
+ self,
298
+ url: str,
299
+ method: str = "GET",
300
+ headers: dict | None = None,
301
+ data: dict | None = None,
302
+ json_data: dict | None = None,
303
+ files: dict | None = None,
304
+ timeout: int | None = REQUEST_TIMEOUT,
305
+ show_error: bool = True,
306
+ show_warning: bool = False,
307
+ warning_message: str = "",
308
+ failure_message: str = "",
309
+ success_message: str = "",
310
+ max_retries: int = REQUEST_MAX_RETRIES,
311
+ retry_forever: bool = False,
312
+ parse_request_response: bool = True,
313
+ ) -> dict | None:
314
+ """Call an Nifi REST API in a safe way.
315
+
316
+ Args:
317
+ url (str):
318
+ The URL to send the request to.
319
+ method (str, optional):
320
+ The HTTP method (GET, POST, etc.). Defaults to "GET".
321
+ headers (dict | None, optional):
322
+ The request headers. Defaults to None.
323
+ data (dict | None, optional):
324
+ Request payload. Defaults to None
325
+ json_data (dict | None, optional):
326
+ Request payload for the JSON parameter. Defaults to None.
327
+ files (dict | None, optional):
328
+ Dictionary of {"name": file-tuple} for multipart encoding upload.
329
+ File-tuple can be a 2-tuple ("filename", fileobj) or a 3-tuple ("filename", fileobj, "content_type")
330
+ timeout (int | None, optional):
331
+ The timeout for the request in seconds. Defaults to REQUEST_TIMEOUT.
332
+ show_error (bool, optional):
333
+ Whether or not an error should be logged in case of a failed REST call.
334
+ If False, then only a warning is logged. Defaults to True.
335
+ show_warning (bool, optional):
336
+ Whether or not an warning should be logged in case of a
337
+ failed REST call.
338
+ If False, then only a warning is logged. Defaults to True.
339
+ warning_message (str, optional):
340
+ Specific warning message. Defaults to "". If not given the error_message will be used.
341
+ failure_message (str, optional):
342
+ Specific error message. Defaults to "".
343
+ success_message (str, optional):
344
+ Specific success message. Defaults to "".
345
+ max_retries (int, optional):
346
+ How many retries on Connection errors? Default is REQUEST_MAX_RETRIES.
347
+ retry_forever (bool, optional):
348
+ Eventually wait forever - without timeout. Defaults to False.
349
+ parse_request_response (bool, optional):
350
+ Defines if the response.text should be interpreted as json and loaded into a dictionary.
351
+ True is the default.
352
+
353
+ Returns:
354
+ dict | None:
355
+ Response of OTDS REST API or None in case of an error.
356
+
357
+ """
358
+
359
+ # In case of an expired session we reauthenticate and
360
+ # try 1 more time. Session expiration should not happen
361
+ # twice in a row:
362
+ retries = 0
363
+
364
+ while True:
365
+ try:
366
+ response = requests.request(
367
+ method=method,
368
+ url=url,
369
+ data=data,
370
+ json=json_data,
371
+ files=files,
372
+ headers=headers,
373
+ auth=(self.credentials(basic_auth=True)),
374
+ timeout=timeout,
375
+ )
376
+
377
+ if response.ok:
378
+ if success_message:
379
+ self.logger.info(success_message)
380
+ if parse_request_response:
381
+ return self.parse_request_response(response)
382
+ else:
383
+ return response
384
+ # Check if Session has expired - then re-authenticate and try once more
385
+ elif response.status_code == 401 and retries == 0:
386
+ self.logger.info("Session has expired - try to re-authenticate...")
387
+ self.authenticate(revalidate=True)
388
+ retries += 1
389
+ else:
390
+ if show_error:
391
+ self.logger.error(
392
+ "%s; status -> %s/%s; error -> %s",
393
+ failure_message,
394
+ response.status_code,
395
+ HTTPStatus(response.status_code).phrase,
396
+ response.text,
397
+ )
398
+ elif show_warning:
399
+ self.logger.warning(
400
+ "%s; status -> %s/%s; warning -> %s",
401
+ warning_message if warning_message else failure_message,
402
+ response.status_code,
403
+ HTTPStatus(response.status_code).phrase,
404
+ response.text,
405
+ )
406
+ return None
407
+ except requests.exceptions.Timeout:
408
+ if retries <= max_retries:
409
+ self.logger.warning(
410
+ "Request timed out. Retrying in %s seconds...",
411
+ str(REQUEST_RETRY_DELAY),
412
+ )
413
+ retries += 1
414
+ time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
415
+ else:
416
+ self.logger.error(
417
+ "%s; timeout error.",
418
+ failure_message,
419
+ )
420
+ if retry_forever:
421
+ # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever
422
+ self.logger.warning("Turn timeouts off and wait forever...")
423
+ timeout = None
424
+ else:
425
+ return None
426
+ except requests.exceptions.ConnectionError:
427
+ if retries <= max_retries:
428
+ self.logger.warning(
429
+ "Connection error. Retrying in %s seconds...",
430
+ str(REQUEST_RETRY_DELAY),
431
+ )
432
+ retries += 1
433
+ time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
434
+ else:
435
+ self.logger.error(
436
+ "%s; connection error.",
437
+ failure_message,
438
+ )
439
+ if retry_forever:
440
+ # If it fails after REQUEST_MAX_RETRIES retries we let it wait forever
441
+ self.logger.warning("Turn timeouts off and wait forever...")
442
+ timeout = None
443
+ time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
444
+ else:
445
+ return None
446
+ # end try
447
+ self.logger.warning(
448
+ "Retrying Nifi REST API %s call -> %s... (retry = %s)",
449
+ method,
450
+ url,
451
+ str(retries),
452
+ )
453
+ # end while True
454
+
455
+ # end method definition
456
+
457
+ def parse_request_response(
458
+ self,
459
+ response_object: object,
460
+ additional_error_message: str = "",
461
+ show_error: bool = True,
462
+ ) -> dict | None:
463
+ """Convert the text property of a request response object to a dictionary.
464
+
465
+ This is done in a safe way that also handles exceptions.
466
+
467
+ Args:
468
+ response_object (object):
469
+ The reponse object delivered by the request call.
470
+ additional_error_message (str):
471
+ To print a custom error message.
472
+ show_error (bool):
473
+ If True, log an error, if False log a warning.
474
+
475
+ Returns:
476
+ dict:
477
+ The response or None in case of an error.
478
+
479
+ """
480
+
481
+ if not response_object:
482
+ return None
483
+
484
+ try:
485
+ dict_object = json.loads(response_object.text)
486
+ except json.JSONDecodeError as exception:
487
+ if additional_error_message:
488
+ message = "Cannot decode response as JSon. {}; error -> {}".format(
489
+ additional_error_message,
490
+ exception,
491
+ )
492
+ else:
493
+ message = "Cannot decode response as JSon; error -> {}".format(
494
+ exception,
495
+ )
496
+ if show_error:
497
+ self.logger.error(message)
498
+ else:
499
+ self.logger.debug(message)
500
+ return None
501
+ else:
502
+ return dict_object
503
+
504
+ # end method definition
505
+
506
+ def authenticate(self, revalidate: bool = False) -> str | None:
507
+ """Authenticate at Knowledge Discovery and retrieve Ticket.
508
+
509
+ Args:
510
+ revalidate (bool, optional):
511
+ Determins if a re-athentication is enforced
512
+ (e.g. if session has timed out with 401 error).
513
+ By default we use the OTDS ticket (if exists) for the authentication with OTCS.
514
+ This switch allows the forced usage of username / password for the authentication.
515
+
516
+ Returns:
517
+ str | None:
518
+ Token information of None in case of an error.
519
+ Also stores cookie information in self._cookie
520
+
521
+ """
522
+
523
+ # Already authenticated and session still valid?
524
+ if self._otkd_token and not revalidate:
525
+ self.logger.debug(
526
+ "Session still valid - return existing ticket -> %s",
527
+ str(self._otkd_token),
528
+ )
529
+ return self._otkd_token
530
+
531
+ request_url = self.config()["authenticationUrl"]
532
+
533
+ # Check if previous authentication was not successful.
534
+ # Then we do the normal username + password authentication:
535
+ self.logger.debug(
536
+ "Requesting OTKD ticket with username and password; calling -> %s",
537
+ request_url,
538
+ )
539
+
540
+ response = None
541
+ try:
542
+ response = requests.post(
543
+ url=request_url,
544
+ data=self.credentials(),
545
+ timeout=REQUEST_TIMEOUT,
546
+ )
547
+ except requests.exceptions.RequestException as exception:
548
+ self.logger.warning(
549
+ "Unable to connect to -> %s; error -> %s",
550
+ request_url,
551
+ str(exception),
552
+ )
553
+ self.logger.warning("Nifi service may not be ready yet.")
554
+ return None
555
+
556
+ if response.ok:
557
+ token = response.text.strip()
558
+ self.logger.debug("NiFi access token -> %s", token)
559
+ self._otkd_token = token
560
+ return token
561
+ else:
562
+ self.logger.error(
563
+ "Failed to request an Nifi access token; status -> %s, error -> %s",
564
+ response.status_code,
565
+ response.text,
566
+ )
567
+ return None
568
+
569
+ # end method definition
570
+
571
+ def get_root_process_group(self) -> dict | None:
572
+ """Get the root process group in Nifi.
573
+
574
+ Returns:
575
+ dict | None:
576
+ The root process group. None in case of an error.
577
+
578
+ """
579
+
580
+ request_url = self.config()["restUrl"] + "/process-groups/root"
581
+ request_header = self.request_json_header()
582
+
583
+ return self.do_request(
584
+ url=request_url,
585
+ method="GET",
586
+ headers=request_header,
587
+ failure_message="Failed to get root process group",
588
+ )
589
+
590
+ # end method definition
591
+
592
+ def get_process_groups(self, parent_process_group_id: str) -> list | None:
593
+ """Get the (root) process groups.
594
+
595
+ Args:
596
+ parent_process_group_id (str):
597
+ The parent of the process groups to retrieve.
598
+
599
+ Returns:
600
+ list | None:
601
+ The list of process groups. None in case an error has occured.
602
+
603
+ Example:
604
+ [
605
+ {
606
+ 'revision': {
607
+ 'clientId': 'none',
608
+ 'version': 2
609
+ },
610
+ 'id': '17c9d355-0197-1000-ffff-fffff3783d78',
611
+ 'uri': 'https://nifi.master.terrarium.cloud:443/nifi-api/process-groups/17c9d355-0197-1000-ffff-fffff3783d78',
612
+ 'position': {
613
+ 'x': 0.0,
614
+ 'y': 0.0
615
+ },
616
+ 'permissions': {
617
+ 'canRead': True,
618
+ 'canWrite': True
619
+ },
620
+ 'bulletins': [],
621
+ 'component': {
622
+ 'id': '17c9d355-0197-1000-ffff-fffff3783d78',
623
+ 'parentGroupId': '1783526b-0197-1000-24bf-df0e446b7f0e',
624
+ 'position': {...},
625
+ 'name': 'KD_Demo_Integration',
626
+ 'comments': '',
627
+ 'parameterContext': {...},
628
+ 'flowfileConcurrency': 'UNBOUNDED',
629
+ 'flowfileOutboundPolicy': 'STREAM_WHEN_AVAILABLE',
630
+ 'defaultFlowFileExpiration': '0 sec',
631
+ 'defaultBackPressureObjectThreshold': 10000,
632
+ 'defaultBackPressureDataSizeThreshold': '1 GB',
633
+ 'logFileSuffix': '',
634
+ 'executionEngine': 'INHERITED',
635
+ 'maxConcurrentTasks': 1,
636
+ 'statelessFlowTimeout': '1 min',
637
+ 'runningCount': 34,
638
+ 'stoppedCount': 0,
639
+ 'invalidCount': 35,
640
+ 'disabledCount': 3,
641
+ ...
642
+ },
643
+ 'status': {
644
+ 'id': '17c9d355-0197-1000-ffff-fffff3783d78',
645
+ 'name': 'KD_Demo_Integration',
646
+ 'statsLastRefreshed': '17:05:34 GMT',
647
+ 'aggregateSnapshot': {...}
648
+ },
649
+ 'runningCount': 34,
650
+ 'stoppedCount': 0,
651
+ 'invalidCount': 35,
652
+ 'disabledCount': 3,
653
+ 'activeRemotePortCount': 0,
654
+ 'inactiveRemotePortCount': 0,
655
+ 'upToDateCount': 0,
656
+ 'locallyModifiedCount': 0,
657
+ 'staleCount': 0,
658
+ 'locallyModifiedAndStaleCount': 0,
659
+ 'syncFailureCount': 0,
660
+ 'localInputPortCount': 0,
661
+ 'localOutputPortCount': 0,
662
+ 'publicInputPortCount': 0,
663
+ 'publicOutputPortCount': 0,
664
+ 'parameterContext': {
665
+ 'id': 'efc9e58c-946a-3125-a52e-c395a6be2990',
666
+ 'permissions': {...},
667
+ 'component': {...}
668
+ },
669
+ inputPortCount': 0,
670
+ 'outputPortCount': 0
671
+ }
672
+ ]
673
+
674
+ """
675
+
676
+ request_url = self.config()["restUrl"] + "/process-groups/" + parent_process_group_id + "/process-groups"
677
+ request_header = self.request_json_header()
678
+
679
+ process_groups = self.do_request(
680
+ url=request_url, method="GET", headers=request_header, failure_message="Failed to get process groups"
681
+ )
682
+
683
+ if not process_groups:
684
+ return None
685
+
686
+ return process_groups.get("processGroups")
687
+
688
+ # end method definition
689
+
690
+ def get_process_group_by_parent_and_name(self, name: str, parent_id: str | None = None) -> dict | None:
691
+ """Get a process group based on the parent ID and name.
692
+
693
+ Args:
694
+ name (str):
695
+ The name of the parent group to retrieve.
696
+ parent_id (str | None):
697
+ The ID of the parent process group.
698
+
699
+ Returns:
700
+ dict | None:
701
+ Process group information, nor None if no process group
702
+ with the given name is found under the specified parent.
703
+
704
+ Example:
705
+ {
706
+ 'revision': {
707
+ 'clientId': '3bce7da0-b8f7-41de-87af-7245ea7203e6',
708
+ 'version': 4
709
+ },
710
+ 'id': '39fad8ec-0197-1000-0000-000042fa6c3d',
711
+ 'uri': 'https://nifi.master.terrarium.cloud:443/nifi-api/process-groups/39fad8ec-0197-1000-0000-000042fa6c3d',
712
+ 'position': {'x': 8.0, 'y': -48.0},
713
+ 'permissions': {
714
+ 'canRead': True,
715
+ 'canWrite': True
716
+ },
717
+ 'bulletins': [],
718
+ 'component': {
719
+ 'id': '39fad8ec-0197-1000-0000-000042fa6c3d',
720
+ 'parentGroupId': '39e6026f-0197-1000-507c-19c44bb6d518',
721
+ 'position': {...},
722
+ 'name': 'KD_Integration',
723
+ 'comments': '',
724
+ 'parameterContext': {...},
725
+ 'flowfileConcurrency': 'UNBOUNDED',
726
+ 'flowfileOutboundPolicy': 'STREAM_WHEN_AVAILABLE',
727
+ 'defaultFlowFileExpiration': '0 sec',
728
+ 'defaultBackPressureObjectThreshold': 10000,
729
+ 'defaultBackPressureDataSizeThreshold': '1 GB',
730
+ 'logFileSuffix': '',
731
+ 'executionEngine': 'INHERITED',
732
+ 'maxConcurrentTasks': 1,
733
+ 'statelessFlowTimeout': '1 min',
734
+ 'runningCount': 57,
735
+ 'stoppedCount': 0,
736
+ 'invalidCount': 0,
737
+ 'disabledCount': 1,
738
+ ...
739
+ },
740
+ 'status': {
741
+ 'id': '39fad8ec-0197-1000-0000-000042fa6c3d',
742
+ 'name': 'KD_Integration',
743
+ 'statsLastRefreshed': '18:04:47 GMT',
744
+ 'aggregateSnapshot': {...}
745
+ },
746
+ 'runningCount': 57,
747
+ 'stoppedCount': 0,
748
+ 'invalidCount': 0,
749
+ 'disabledCount': 1,
750
+ 'activeRemotePortCount': 0,
751
+ 'inactiveRemotePortCount': 0,
752
+ 'upToDateCount': 0,
753
+ 'locallyModifiedCount': 0,
754
+ 'staleCount': 0,
755
+ 'locallyModifiedAndStaleCount': 0,
756
+ 'syncFailureCount': 0,
757
+ 'localInputPortCount': 0,
758
+ 'localOutputPortCount': 0,
759
+ 'publicInputPortCount': 0,
760
+ 'publicOutputPortCount': 0,
761
+ 'parameterContext': {
762
+ 'id': 'd380a638-7b8d-39e4-bda8-c77fa2c7ddf0',
763
+ 'permissions': {...},
764
+ 'component': {...}
765
+ },
766
+ 'inputPortCount': 0,
767
+ 'outputPortCount':
768
+ }
769
+
770
+ """
771
+
772
+ # If no specific parent ID is provided we dtermine the root process ID:
773
+ if parent_id is None:
774
+ root_process_group = self.get_root_process_group()
775
+ if not root_process_group:
776
+ return None
777
+ parent_id = root_process_group.get("id")
778
+ if not parent_id:
779
+ return None
780
+
781
+ process_groups = self.get_process_groups(parent_process_group_id=parent_id)
782
+
783
+ process_group = next(
784
+ (group for group in process_groups if group["component"]["name"] == name),
785
+ None,
786
+ )
787
+
788
+ return process_group
789
+
790
+ # end method definition
791
+
792
+ def get_process_group_by_name(self, name: str) -> dict | None:
793
+ """Get a top-level process group based on the name.
794
+
795
+ This is a pure convenience wrapper for get_process_group_by_parent_and_name()
796
+ in cases you want to look process group under 'root'.
797
+
798
+ Args:
799
+ name (str):
800
+ The name of the parent group to retrieve.
801
+
802
+ Returns:
803
+ dict | None:
804
+ Process group information, nor None if no process group
805
+ with the given name is found under the specified parent.
806
+
807
+ """
808
+
809
+ # We let the parent_id undefined (None) - this will deliver the
810
+ # process group in root if it exists with the given name:
811
+ return self.get_process_group_by_parent_and_name(name=name)
812
+
813
+ # end method definition
814
+
815
+ def upload_process_group(
816
+ self, file_path: str, name: str, position_x: float = 0.0, position_y: float = 0.0
817
+ ) -> dict | None:
818
+ """Upload Nifi flow from JSON file.
819
+
820
+ Args:
821
+ file_path (str):
822
+ Path to JSON file.
823
+ name (str):
824
+ Name of the group to be added.
825
+ position_x (float, optional):
826
+ The layout position of the flow on the X-axis. Optional. Default 0.0.
827
+ position_y (float, optional):
828
+ The layout position of the flow on the Y-axis. Optional. Default 0.0.
829
+
830
+ Returns:
831
+ dict | None:
832
+ Request response. None in case an error has occured.
833
+
834
+ """
835
+
836
+ root_process_group = self.get_root_process_group()
837
+ if not root_process_group:
838
+ return None
839
+ root_process_group_id = root_process_group.get("id")
840
+
841
+ if not root_process_group_id:
842
+ return None
843
+
844
+ process_groups = self.get_process_groups(parent_process_group_id=root_process_group_id)
845
+
846
+ group_exists = next(
847
+ (True for group in process_groups if group["component"]["name"] == name),
848
+ False,
849
+ )
850
+
851
+ if group_exists:
852
+ self.logger.warning("Process group -> '%s' already exists!", name)
853
+ return None
854
+
855
+ request_url = self.config()["restUrl"] + "/process-groups/" + root_process_group_id + "/process-groups/upload"
856
+ request_header = self.request_upload_header()
857
+
858
+ # Upload the Template JSON file
859
+ with open(file_path, "rb") as pg_file:
860
+ response = self.do_request(
861
+ url=request_url,
862
+ method="POST",
863
+ headers=request_header,
864
+ data={
865
+ "positionX": str(position_x),
866
+ "positionY": str(position_y),
867
+ "groupName": name,
868
+ "clientId": "none",
869
+ },
870
+ files={"file": (file_path, pg_file, "multipart/form-data")},
871
+ )
872
+
873
+ if response:
874
+ self.logger.debug(
875
+ "The process group -> '%s' has been uploaded successfully!",
876
+ name,
877
+ )
878
+ return response
879
+
880
+ self.logger.error(
881
+ "The process group -> '%s' could not be uploaded!",
882
+ name,
883
+ )
884
+
885
+ return None
886
+
887
+ # end method definition
888
+
889
+ def get_flow_status(self) -> dict | None:
890
+ """Get the flow status.
891
+
892
+ Returns:
893
+ dict | None:
894
+ Status information of the flow.
895
+
896
+ Example:
897
+ {
898
+ 'controllerStatus': {
899
+ 'activeThreadCount': 1,
900
+ 'terminatedThreadCount': 0,
901
+ 'queued': '0 / 0 bytes',
902
+ 'flowFilesQueued': 0,
903
+ 'bytesQueued': 0,
904
+ 'runningCount': 57,
905
+ 'stoppedCount': 0,
906
+ 'invalidCount': 0,
907
+ 'disabledCount': 1,
908
+ 'activeRemotePortCount': 0,
909
+ 'inactiveRemotePortCount': 0,
910
+ 'upToDateCount': 0,
911
+ 'locallyModifiedCount': 0,
912
+ 'staleCount': 0,
913
+ 'locallyModifiedAndStaleCount': 0,
914
+ 'syncFailureCount': 0
915
+ }
916
+ }
917
+
918
+ """
919
+
920
+ request_url = self.config()["restUrl"] + "/flow/status"
921
+ request_header = self.request_json_header()
922
+
923
+ return self.do_request(
924
+ url=request_url,
925
+ method="GET",
926
+ headers=request_header,
927
+ failure_message="Cannot get flow status!",
928
+ )
929
+
930
+ # end method definition
931
+
932
+ def get_flow_config(self) -> dict | None:
933
+ """Get the flow configuration.
934
+
935
+ Returns:
936
+ dict | None:
937
+ Configuration information of the flow.
938
+
939
+ Example:
940
+ {
941
+ 'flowConfiguration': {
942
+ 'supportsManagedAuthorizer': False,
943
+ 'supportsConfigurableAuthorizer': False,
944
+ 'supportsConfigurableUsersAndGroups': False,
945
+ 'currentTime': '05:51:45 GMT',
946
+ 'timeOffset': 0,
947
+ 'defaultBackPressureObjectThreshold': 10000,
948
+ 'defaultBackPressureDataSizeThreshold': '1 GB'
949
+ }
950
+ }
951
+
952
+ """
953
+
954
+ request_url = self.config()["restUrl"] + "/flow/config"
955
+ request_header = self.request_json_header()
956
+
957
+ return self.do_request(
958
+ url=request_url,
959
+ method="GET",
960
+ headers=request_header,
961
+ failure_message="Cannot get flow status!",
962
+ )
963
+
964
+ # end method definition
965
+
966
+ def get_parameter_contexts(self) -> list | None:
967
+ """Get the list of parameter contexts.
968
+
969
+ Returns:
970
+ list | None:
971
+ The list of parameter contexts.
972
+
973
+ """
974
+
975
+ request_url = self.config()["restUrl"] + "/flow/parameter-contexts"
976
+ request_header = self.request_json_header()
977
+
978
+ parameter_contexts = self.do_request(
979
+ url=request_url,
980
+ method="GET",
981
+ headers=request_header,
982
+ failure_message="Cannot get parameter contexts!",
983
+ )
984
+
985
+ if not parameter_contexts:
986
+ return None
987
+
988
+ return parameter_contexts.get("parameterContexts")
989
+
990
+ # end method definition
991
+
992
+ def get_parameter_context_by_name(self, name: str) -> dict | None:
993
+ """Get the parameter context with the given name.
994
+
995
+ Returns:
996
+ dict | None:
997
+ The parameter contexts with the given name.
998
+
999
+ """
1000
+
1001
+ # Get a list of all parameter contexts:
1002
+ parameter_contexts = self.get_parameter_contexts()
1003
+
1004
+ parameter_context = next(
1005
+ (context for context in parameter_contexts if context["component"]["name"] == name), None
1006
+ )
1007
+
1008
+ if not parameter_context:
1009
+ self.logger.error("Cannot find parameter context with name -> '%s'!", name)
1010
+ return None
1011
+
1012
+ return parameter_context
1013
+
1014
+ # end method definition
1015
+
1016
+ def update_parameter(
1017
+ self, component: str, parameter: str, value: str | float | bool, sensitive: bool = False, description: str = ""
1018
+ ) -> dict | None:
1019
+ """Update a parameter in a given parameter context.
1020
+
1021
+ Args:
1022
+ component (str):
1023
+ Name of the component.
1024
+ parameter (str):
1025
+ Name of the parameter.
1026
+ value (str | float | bool):
1027
+ Value of the parameter.
1028
+ sensitive (bool, optional):
1029
+ Indication if parameter is sensitive. Defaults to False.
1030
+ description (str, optional):
1031
+ Description of the parameter.
1032
+
1033
+ Returns:
1034
+ dict | None:
1035
+ The updated parameterContext as dict.
1036
+
1037
+ Example:
1038
+ {
1039
+ 'request': {
1040
+ 'requestId': '8e111371-df57-4ef2-a36d-81fdc5158810',
1041
+ 'uri': 'https://nifi.master.terrarium.cloud:443/nifi-api/parameter-contexts/efc9e58c-946a-3125-916a-278f528ac0ab/update-requests/8e111371-df57-4ef2-a36d-81fdc5158810',
1042
+ 'lastUpdated': '05/29/2025 09:56:51.000 GMT',
1043
+ 'complete': False,
1044
+ 'percentCompleted': 0,
1045
+ 'state': 'Stopping Affected Processors',
1046
+ 'updateSteps': [
1047
+ {
1048
+ 'description': 'Stopping Affected Processors',
1049
+ 'complete': False
1050
+ },
1051
+ {
1052
+ 'description': 'Disabling Affected Controller Services',
1053
+ 'complete': False
1054
+ },
1055
+ {
1056
+ 'description': 'Updating Parameter Context',
1057
+ 'complete': False
1058
+ },
1059
+ {
1060
+ 'description': 'Re-Enabling Affected Controller Services',
1061
+ 'complete': False
1062
+ },
1063
+ {
1064
+ 'description': 'Restarting Affected Processors',
1065
+ 'complete': False
1066
+ }
1067
+ ],
1068
+ referencingComponents': [
1069
+ {
1070
+ 'revision': {...},
1071
+ 'id': '516d8089-9886-307a-99ba-f08ce519f446',
1072
+ 'permissions': {...},
1073
+ 'bulletins': [...],
1074
+ 'component': {
1075
+ 'processGroupId': '1b5bd4d5-0197-1000-ffff-ffffd6a2b035',
1076
+ 'id': '516d8089-9886-307a-99ba-f08ce519f446',
1077
+ 'referenceType': 'PROCESSOR',
1078
+ 'name': 'Unreserve Document',
1079
+ 'state': 'RUNNING',
1080
+ 'activeThreadCount': 0,
1081
+ 'validationErrors': [
1082
+ "'516d8089-9886-307a-99ba-f08ce519f446' validated against 'IDOL License Service' is invalid because IDOL License Service not enabled",
1083
+ "'IDOL License Service' validated against '01832b88-cf15-3a46-9d80-6be6247aa276' is invalid because Controller Service with ID 01832b88-cf15-3a46-9d80-6be6247aa276 is disabled"
1084
+ ]
1085
+ },
1086
+ 'processGroup': {...},
1087
+ 'referenceType': 'PROCESSOR'
1088
+ },
1089
+ ...
1090
+ ]
1091
+ }
1092
+ }
1093
+
1094
+ """
1095
+
1096
+ # Find the parameter context by its name:
1097
+ parameter_context = self.get_parameter_context_by_name(name=component)
1098
+ if not parameter_context:
1099
+ self.logger.error(
1100
+ "Parameter -> '%s' could not be updated because the parameter context -> '%s' was not found!",
1101
+ parameter,
1102
+ component,
1103
+ )
1104
+ return None
1105
+
1106
+ parameter_context_id = parameter_context["id"]
1107
+
1108
+ json_body = {
1109
+ "revision": parameter_context["revision"],
1110
+ "disconnectedNodeAcknowledged": False,
1111
+ "id": parameter_context_id,
1112
+ "component": {
1113
+ "id": parameter_context_id,
1114
+ "name": component,
1115
+ "description": None,
1116
+ "parameters": [
1117
+ {
1118
+ "parameter": {
1119
+ "name": parameter,
1120
+ "sensitive": sensitive,
1121
+ "description": description,
1122
+ "value": value,
1123
+ }
1124
+ },
1125
+ ],
1126
+ "inheritedParameterContexts": [],
1127
+ },
1128
+ }
1129
+
1130
+ request_url = self.config()["restUrl"] + "/parameter-contexts/" + parameter_context_id + "/update-requests"
1131
+ request_header = self.request_json_header()
1132
+
1133
+ response = self.do_request(
1134
+ url=request_url, method="POST", headers=request_header, json_data=json_body, failure_message=""
1135
+ )
1136
+
1137
+ if response:
1138
+ if sensitive:
1139
+ value = value[:2] + "*" * (len(value) - 2)
1140
+ self.logger.debug("Parameter -> '%s' has been updated to value -> '%s'.", parameter, value)
1141
+ return response
1142
+
1143
+ self.logger.error(
1144
+ "Parameter -> '%s' could not be updated!",
1145
+ parameter,
1146
+ )
1147
+
1148
+ return None
1149
+
1150
+ # end method definition
1151
+
1152
+ def start_all_processors(self, name: str) -> dict | None:
1153
+ """Start all processors in the process group given by its name.
1154
+
1155
+ Args:
1156
+ name (str):
1157
+ The name of the group to start the processors for.
1158
+
1159
+ Returns:
1160
+ dict | None:
1161
+ Response of the start command.
1162
+
1163
+ """
1164
+
1165
+ process_group = self.get_process_group_by_name(name=name)
1166
+ if not process_group:
1167
+ self.logger.error("Cannot find process group -> '%s' to start!", name)
1168
+ return None
1169
+
1170
+ process_group_id = process_group["id"]
1171
+
1172
+ request_url = self.config()["restUrl"] + "/flow/process-groups/" + process_group_id
1173
+ request_header = self.request_json_header()
1174
+
1175
+ json_body = {"id": process_group_id, "state": "RUNNING"}
1176
+
1177
+ response = self.do_request(url=request_url, method="PUT", headers=request_header, json_data=json_body)
1178
+
1179
+ if response:
1180
+ self.logger.debug("All processors in process-group -> '%s' have been started...", name)
1181
+ return response
1182
+
1183
+ self.logger.error(
1184
+ "Processors in process-group -> '%s' failed to start!",
1185
+ name,
1186
+ )
1187
+
1188
+ return None
1189
+
1190
+ # end method definition
1191
+
1192
+ def get_controller_services(self, process_group_id: str) -> list | None:
1193
+ """Get the list of controller services for a process group.
1194
+
1195
+ Args:
1196
+ process_group_id (str):
1197
+ The process group to retrieve controller services for.
1198
+
1199
+ Returns:
1200
+ list | None:
1201
+ The list of process groups. None in case an error has occured.
1202
+
1203
+ Example:
1204
+ [
1205
+ {
1206
+ "revision": {
1207
+ "version": 1
1208
+ },
1209
+ "id": "81076e51-13d4-3930-bb89-cd192ccb213a",
1210
+ "uri": "https://nifi.master.terrarium.cloud:443/nifi-api/controller-services/81076e51-13d4-3930-bb89-cd192ccb213a",
1211
+ "permissions": {
1212
+ "canRead": true,
1213
+ "canWrite": true
1214
+ },
1215
+ "bulletins": [],
1216
+ "parentGroupId": "337df1d6-0197-1000-ffff-ffffbc502fff",
1217
+ "component": {
1218
+ "id": "81076e51-13d4-3930-bb89-cd192ccb213a",
1219
+ "versionedComponentId": "e1e795b3-ce90-3eaa-a7fb-929c8bde75e1",
1220
+ "parentGroupId": "337df1d6-0197-1000-ffff-ffffbc502fff",
1221
+ "name": "StandardHttpContextMap",
1222
+ "type": "org.apache.nifi.http.StandardHttpContextMap",
1223
+ "bundle": {
1224
+ "group": "org.apache.nifi",
1225
+ "artifact": "nifi-http-context-map-nar",
1226
+ "version": "2.0.0"
1227
+ },
1228
+ "controllerServiceApis": [
1229
+ {
1230
+ "type": "org.apache.nifi.http.HttpContextMap",
1231
+ "bundle": {
1232
+ "group": "org.apache.nifi",
1233
+ "artifact": "nifi-standard-services-api-nar",
1234
+ "version": "2.0.0"
1235
+ }
1236
+ }
1237
+ ],
1238
+ "comments": "",
1239
+ "state": "ENABLED",
1240
+ "persistsState": false,
1241
+ "restricted": false,
1242
+ "deprecated": false,
1243
+ "multipleVersionsAvailable": false,
1244
+ "supportsSensitiveDynamicProperties": false,
1245
+ "properties": {
1246
+ "Maximum Outstanding Requests": "5000",
1247
+ "Request Expiration": "1 min"
1248
+ },
1249
+ "descriptors": {
1250
+ "Maximum Outstanding Requests": {
1251
+ "name": "Maximum Outstanding Requests",
1252
+ "displayName": "Maximum Outstanding Requests",
1253
+ "description": "The maximum number of HTTP requests that can be outstanding at any one time. Any attempt to register an additional HTTP Request will cause an error",
1254
+ "defaultValue": "5000",
1255
+ "required": true,
1256
+ "sensitive": false,
1257
+ "dynamic": false,
1258
+ "supportsEl": false,
1259
+ "expressionLanguageScope": "Not Supported",
1260
+ "dependencies": []
1261
+ },
1262
+ "Request Expiration": {
1263
+ "name": "Request Expiration",
1264
+ "displayName": "Request Expiration",
1265
+ "description": "Specifies how long an HTTP Request should be left unanswered before being evicted from the cache and being responded to with a Service Unavailable status code",
1266
+ "defaultValue": "1 min",
1267
+ "required": true,
1268
+ "sensitive": false,
1269
+ "dynamic": false,
1270
+ "supportsEl": false,
1271
+ "expressionLanguageScope": "Not Supported",
1272
+ "dependencies": []
1273
+ }
1274
+ },
1275
+ "inputPorts": [],
1276
+ "outputPorts": [],
1277
+ "schedulingPeriod": "1 sec",
1278
+ },
1279
+ ...
1280
+ "status" : {
1281
+ 'runStatus': 'ENABLED',
1282
+ 'validationStatus': 'VALID'
1283
+ }
1284
+ ]
1285
+
1286
+ """
1287
+
1288
+ request_url = self.config()["restUrl"] + "/flow/process-groups/" + process_group_id + "/controller-services"
1289
+ request_header = self.request_json_header()
1290
+
1291
+ controller_services = self.do_request(
1292
+ url=request_url,
1293
+ method="GET",
1294
+ headers=request_header,
1295
+ failure_message="Failed to get controller services for process group -> '{}'".format(process_group_id),
1296
+ )
1297
+
1298
+ if not controller_services:
1299
+ return None
1300
+
1301
+ return controller_services.get("controllerServices", [])
1302
+
1303
+ # end method definition
1304
+
1305
+ def set_controller_services_state(self, name: str, state: str = "ENABLED", components: dict | None = None) -> bool:
1306
+ """Enable or disable Controller Services in the specified Process Group.
1307
+
1308
+ Args:
1309
+ name (str):
1310
+ The name of the process group to enable the controller-services for.
1311
+ state (str, optional):
1312
+ Can either be "ENABLED" or "DISABLED". Default is "ENABLED".
1313
+ components (dict | None, optional):
1314
+ If provided the state is only set for the given components of the process group.
1315
+ If not provided ALL components will be enabled/disabled.
1316
+ The dictionary should have a structure like this:
1317
+ {
1318
+ "key" : {
1319
+ "clientId" : "clientId",
1320
+ "lastModifier" : "lastModifier",
1321
+ "version" : 2
1322
+ }
1323
+ }
1324
+
1325
+ Returns:
1326
+ dict | None:
1327
+ Response of the enable controller command.
1328
+
1329
+ """
1330
+
1331
+ state = state.upper()
1332
+ if state not in ["ENABLED", "DISABLED"]:
1333
+ self.logger.error(
1334
+ "Illegal state -> '%s' for process group controller service. Needs to be 'ENABLED' or 'DISABLED'!",
1335
+ state,
1336
+ )
1337
+
1338
+ process_group = self.get_process_group_by_name(name=name)
1339
+ if not process_group:
1340
+ self.logger.error(
1341
+ "Cannot find process group -> '%s' to %s controller service for!",
1342
+ name,
1343
+ "enable" if state == "ENABLED" else "disable",
1344
+ )
1345
+ return None
1346
+
1347
+ process_group_id = process_group["id"]
1348
+
1349
+ request_url = self.config()["restUrl"] + "/flow/process-groups/" + process_group_id + "/controller-services"
1350
+ request_header = self.request_json_header()
1351
+
1352
+ json_body = {"id": process_group_id, "state": state, "disconnectedNodeAcknowledged": False}
1353
+ if components:
1354
+ json_body["components"] = components
1355
+
1356
+ response = self.do_request(
1357
+ url=request_url,
1358
+ method="PUT",
1359
+ headers=request_header,
1360
+ json_data=json_body,
1361
+ failure_message="Unable to set state -> '{}' for controller-services in process-group -> '{}'".format(
1362
+ state, name
1363
+ ),
1364
+ show_error=True,
1365
+ )
1366
+
1367
+ return response
1368
+
1369
+ # end method definition