pyxecm 2.0.1__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyxecm might be problematic. Click here for more details.

@@ -200,7 +200,7 @@ class CustomizerSettingsOTPD(BaseModel):
200
200
  description="Username of the API user to configure OTPD",
201
201
  validation_alias=AliasChoices("username", "user"),
202
202
  )
203
- password: str = Field(default="", description="Password of the API user to configure OTPD")
203
+ password: SecretStr = Field(default="", description="Password of the API user to configure OTPD")
204
204
  url: HttpUrl | None = Field(default=None, description="URL of the OTPD service")
205
205
 
206
206
  db_importfile: str = Field(default="", description="Path to the OTPD import file")
@@ -303,7 +303,7 @@ class CustomizerSettingsM365(BaseModel):
303
303
  description="Username of the M365 tenant Admin.",
304
304
  validation_alias=AliasChoices("username", "user"),
305
305
  )
306
- password: str = Field(default="", description="Password of the M365 tenant Admin.")
306
+ password: SecretStr = Field(default="", description="Password of the M365 tenant Admin.")
307
307
  enabled: bool = Field(default=False, description="Enable/Disable the Microsoft 365 integration.")
308
308
  tenant_id: str = Field(default="", description="TennantID of the Microsoft 365 tenant")
309
309
  client_id: str = Field(default="", description="Client ID for the Microsoft 365 tenant.")
@@ -345,6 +345,23 @@ class CustomizerSettingsAviator(BaseModel):
345
345
  """Class for Aviator related settings."""
346
346
 
347
347
  enabled: bool = Field(default=False, description="Content Aviator enabled")
348
+ oauth_client: str = Field(default="", description="OAuth Client ID for Content Aviator")
349
+ oauth_secret: str = Field(default="", description="OAuth Client Secret for Content Aviator")
350
+ chat_svc_url: HttpUrl = Field(
351
+ default="http://csai-chat-svc:3000", description="Chat Service URL for Content Aviator"
352
+ )
353
+ embed_svc_url: HttpUrl = Field(
354
+ default="http://csai-embed-svc:3000", description="Embed Service URL for Content Aviator"
355
+ )
356
+
357
+
358
+ class CustomizerSettingsKnowledgeDiscovery(BaseModel):
359
+ """Class for Knowledge Discovery related settings."""
360
+
361
+ enabled: bool = Field(default=False, description="Knowledge Discovery enabled")
362
+ url: HttpUrl | None = Field(default=None, description="URL of the Nifi Server")
363
+ username: str = Field(default="admin", description="Admin username for Knowledge Dicovery (Nifi)")
364
+ password: SecretStr = Field(default="", description="Admin password for Knowledge Discovery (Nifi)")
348
365
 
349
366
 
350
367
  class CustomizerSettingsAVTS(BaseModel):
@@ -352,7 +369,7 @@ class CustomizerSettingsAVTS(BaseModel):
352
369
 
353
370
  enabled: bool = Field(default=False, description="Enable Aviator Search configuration")
354
371
  username: str = Field(default="", description="Admin username for Aviator Search")
355
- password: str = Field(default="", description="Admin password for Aviator Search")
372
+ password: SecretStr = Field(default="", description="Admin password for Aviator Search")
356
373
  client_id: str = Field(default="", description="OTDS Client ID for Aviator Search")
357
374
  client_secret: str = Field(default="", description="OTDS Client Secret for Aviator Search")
358
375
  base_url: HttpUrl | None = Field(
@@ -419,6 +436,7 @@ class Settings(BaseSettings):
419
436
  m365: CustomizerSettingsM365 = CustomizerSettingsM365()
420
437
  coreshare: CustomizerSettingsCoreShare = CustomizerSettingsCoreShare()
421
438
  aviator: CustomizerSettingsAviator = CustomizerSettingsAviator()
439
+ otkd: CustomizerSettingsKnowledgeDiscovery = CustomizerSettingsKnowledgeDiscovery()
422
440
  avts: CustomizerSettingsAVTS = CustomizerSettingsAVTS()
423
441
  otmm: CustomizerSettingsOTMM = CustomizerSettingsOTMM()
424
442
 
pyxecm/helper/xml.py CHANGED
@@ -17,7 +17,7 @@ from queue import Queue
17
17
 
18
18
  import xmltodict
19
19
 
20
- # we need lxml instead of stadard xml.etree to have xpath capabilities!
20
+ # we need lxml instead of standard xml.etree to have xpath capabilities!
21
21
  from lxml import etree
22
22
  from lxml.etree import Element
23
23
 
pyxecm/otawp.py CHANGED
@@ -56,7 +56,7 @@ REQUEST_HEADERS_JSON = {
56
56
  REQUEST_TIMEOUT = 120
57
57
  REQUEST_MAX_RETRIES = 10
58
58
  REQUEST_RETRY_DELAY = 30
59
- SYNC_PUBLISH_REQUEST_TIMEOUT = 300
59
+ SYNC_PUBLISH_REQUEST_TIMEOUT = 600
60
60
 
61
61
  default_logger = logging.getLogger(MODULE_NAME)
62
62
 
@@ -1642,15 +1642,19 @@ class OTAWP:
1642
1642
  continue
1643
1643
 
1644
1644
  if SOAP_FAULT_INDICATOR in response.text:
1645
- self.logger.error(
1646
- "Workspace synchronization failed with error -> '%s' when calling -> %s!",
1645
+ self.logger.warning(
1646
+ "Workspace synchronization failed with error -> '%s' when calling -> %s! Retry in %d seconds...",
1647
1647
  self.get_soap_element(soap_response=response.text, soap_tag="faultstring"),
1648
1648
  self.get_soap_element(soap_response=response.text, soap_tag="faultactor"),
1649
+ REQUEST_RETRY_DELAY,
1649
1650
  )
1650
1651
  self.logger.debug("SOAP message -> %s", response.text)
1651
- return None
1652
-
1653
- self.logger.error("Unexpected error during workspace synchronization -> %s", response.text)
1652
+ else:
1653
+ self.logger.warning(
1654
+ "Unexpected error during workspace synchronization -> %s. Retry in %d seconds...",
1655
+ response.text,
1656
+ REQUEST_RETRY_DELAY,
1657
+ )
1654
1658
  time.sleep(REQUEST_RETRY_DELAY)
1655
1659
  retries += 1
1656
1660
 
pyxecm/otca.py CHANGED
@@ -1,4 +1,9 @@
1
- """OTCA stands for Content Aviator and is an OpenText offering for LLMM-based Agentic AI."""
1
+ """OTCA stands for Content Aviator and is an OpenText offering for LLMM-based Agentic AI.
2
+
3
+ The REST API is documented here (OT internal):
4
+ https://confluence.opentext.com/display/CSAI/LLM+Project+REST+APIs
5
+
6
+ """
2
7
 
3
8
  __author__ = "Dr. Marc Diefenbruch"
4
9
  __copyright__ = "Copyright (C) 2024-2025, OpenText"
@@ -6,6 +11,7 @@ __credits__ = ["Kai-Philip Gatzweiler"]
6
11
  __maintainer__ = "Dr. Marc Diefenbruch"
7
12
  __email__ = "mdiefenb@opentext.com"
8
13
 
14
+ import hashlib
9
15
  import json
10
16
  import logging
11
17
  import platform
@@ -47,11 +53,16 @@ class OTCA:
47
53
 
48
54
  _config: dict
49
55
  _context = ""
56
+ _embed_token: str | None = None
57
+ _chat_token: str | None = None
50
58
 
51
59
  def __init__(
52
60
  self,
53
61
  chat_url: str,
54
62
  embed_url: str,
63
+ otds_url: str,
64
+ client_id: str,
65
+ client_secret: str,
55
66
  otcs_object: OTCS,
56
67
  synonyms: list | None = None,
57
68
  inline_citation: bool = True,
@@ -64,6 +75,12 @@ class OTCA:
64
75
  The Content Aviator base URL for chat.
65
76
  embed_url (str):
66
77
  The Content Aviator base URL for embedding.
78
+ otds_url (str):
79
+ The OTDS URL.
80
+ client_id (str):
81
+ The Core Share Client ID.
82
+ client_secret (str):
83
+ The Core Share client secret.
67
84
  otcs_object (OTCS):
68
85
  The OTCS object.
69
86
  synonyms (list):
@@ -85,12 +102,15 @@ class OTCA:
85
102
  otca_config["chatUrl"] = chat_url + "/v1/chat"
86
103
  otca_config["searchUrl"] = chat_url + "/v1/context"
87
104
  otca_config["embedUrl"] = embed_url + "/v1/embeddings"
105
+ otca_config["clientId"] = client_id
106
+ otca_config["clientSecret"] = client_secret
107
+ otca_config["otdsUrl"] = otds_url
88
108
 
89
109
  otca_config["synonyms"] = synonyms if synonyms else []
90
110
  otca_config["inlineCitation"] = inline_citation
91
111
 
92
112
  self._config = otca_config
93
- self._access_token = otcs_object.otcs_ticket()
113
+ self.otcs_object = otcs_object
94
114
 
95
115
  # end method definition
96
116
 
@@ -148,12 +168,16 @@ class OTCA:
148
168
 
149
169
  # end method definition
150
170
 
151
- def request_header(self, content_type: str = "") -> dict:
171
+ def request_header(self, service_type: str = "chat", content_type: str = "application/json") -> dict:
152
172
  """Return the request header used for requests.
153
173
 
154
174
  Consists of Bearer access token and Content Type
155
175
 
156
176
  Args:
177
+ service_type (str, optional):
178
+ Service type for which the header should be returned.
179
+ Either "chat" or "embed". "chat" is the default.
180
+
157
181
  content_type (str, optional):
158
182
  Custom content type for the request.
159
183
  Typical values:
@@ -172,8 +196,11 @@ class OTCA:
172
196
  if content_type:
173
197
  request_header["Content-Type"] = content_type
174
198
 
175
- if self._access_token is not None:
176
- request_header["Authorization"] = f"Bearer {self._access_token}"
199
+ if service_type == "chat" and self._chat_token is not None:
200
+ request_header["Authorization"] = "Bearer {}".format(self._chat_token)
201
+
202
+ elif service_type == "embed" and self._embed_token is not None:
203
+ request_header["Authorization"] = "Bearer {}".format(self._embed_token)
177
204
 
178
205
  return request_header
179
206
 
@@ -251,7 +278,7 @@ class OTCA:
251
278
  # Check if Session has expired - then re-authenticate and try once more
252
279
  elif response.status_code == 401 and retries == 0:
253
280
  self.logger.debug("Session has expired - try to re-authenticate...")
254
- self.authenticate()
281
+ self.authenticate_chat()
255
282
  retries += 1
256
283
  else:
257
284
  # Handle plain HTML responses to not pollute the logs
@@ -377,8 +404,70 @@ class OTCA:
377
404
 
378
405
  # end method definition
379
406
 
407
+ def authenticate_chat(self) -> str | None:
408
+ """Authenticate for Chat service at Content Aviator / CSAI.
409
+
410
+ Returns:
411
+ str | None:
412
+ Authentication token or None if the authentication fails.
413
+
414
+ """
415
+
416
+ token = self.otcs_object.otcs_ticket() or self.otcs_object.authenticate()
417
+
418
+ if token and "otcsticket" in token:
419
+ # Encode the input string before hashing
420
+ encoded_string = token["otcsticket"].encode("utf-8")
421
+
422
+ # Create a new SHA-512 hash object
423
+ sha512 = hashlib.sha512()
424
+
425
+ # Update the hash object with the input string
426
+ sha512.update(encoded_string)
427
+
428
+ # Get the hexadecimal representation of the hash
429
+ hashed_output = sha512.hexdigest()
430
+
431
+ self._chat_token = hashed_output
432
+
433
+ return self._chat_token
434
+
435
+ return None
436
+
437
+ # end method definition
438
+
439
+ def authenticate_embed(self) -> str | None:
440
+ """Authenticate as embedding service at Content Aviator / CSAI.
441
+
442
+ Returns:
443
+ str | None:
444
+ Authentication token or None if the authentication fails.
445
+
446
+ """
447
+
448
+ url = self.config()["otdsUrl"] + "/otdsws/login"
449
+
450
+ data = {
451
+ "grant_type": "client_credentials",
452
+ "client_id": self.config()["clientId"],
453
+ "client_secret": self.config()["clientSecret"],
454
+ }
455
+
456
+ result = self.do_request(url=url, method="Post", data=data)
457
+
458
+ if result:
459
+ self._embed_token = result["access_token"]
460
+ return self._embed_token
461
+ else:
462
+ self.logger.error(
463
+ "Authentication failed with client ID -> '%s' against -> %s", self.config()["clientId"], url
464
+ )
465
+ return None
466
+
467
+ # end method definition
468
+
380
469
  def chat(self, context: str | None, messages: list, where: list) -> dict:
381
- """Process a chat interaction with Content Aviator.
470
+ r"""Process a chat interaction with Content Aviator.
382
471
 
383
472
  Chat requests are meant to be called as end-users. This should involve
384
473
  passing the end-user's access token via the Authorization HTTP header.
@@ -396,7 +485,82 @@ class OTCA:
396
485
  Values need to match those passed as metadata to the embeddings API.
397
486
 
398
487
  Returns:
399
- dict: _description_
488
+ dict: Conversation status
489
+
490
+ Example:
491
+ {
492
+ 'result': 'I do not know.',
493
+ 'called': [
494
+ {
495
+ 'name': 'breakdown_query',
496
+ 'arguments': {},
497
+ 'result': '```json{"input": ["Tell me about the calibration equipment"]}```',
498
+ 'showInContext': False
499
+ },
500
+ {
501
+ 'name': 'store_subqueries',
502
+ 'arguments': {
503
+ '0': 'Tell me about the calibration equipment'
504
+ },
505
+ 'showInContext': False
506
+ },
507
+ {
508
+ 'name': 'get_next_subquery_and_reset_segment',
509
+ 'arguments': {},
510
+ 'result': 'Tell me about the calibration equipment',
511
+ 'showInContext': False
512
+ },
513
+ {
514
+ 'name': 'segmented_query',
515
+ 'arguments': {},
516
+ 'result': 'runQuery',
517
+ 'showInContext': False
518
+ },
519
+ {
520
+ 'name': 'get_context',
521
+ 'arguments': {
522
+ 'query': 'Tell me about the calibration equipment'
523
+ },
524
+ 'result': '',
525
+ 'showInContext': True
526
+ },
527
+ {
528
+ 'name': 'check_answer',
529
+ 'arguments': {},
530
+ 'result': 'noAnswer',
531
+ 'showInContext': False
532
+ },
533
+ {
534
+ 'name': 'segmented_query',
535
+ 'arguments': {},
536
+ 'result': 'answer',
537
+ 'showInContext': False
538
+ },
539
+ {
540
+ 'name': 'get_next_subquery_and_reset_segment',
541
+ 'arguments': {},
542
+ 'showInContext': False
543
+ },
544
+ {
545
+ 'name': 'general_prompt',
546
+ 'arguments': {...},
547
+ 'result': 'I do not know.',
548
+ 'showInContext': False
549
+ },
550
+ {
551
+ 'name': 'filter_references',
552
+ 'arguments': {},
553
+ 'result': '[]',
554
+ 'showInContext': False
555
+ }
556
+ ],
557
+ 'references': [],
558
+ 'context': 'Tool "get_context" called with arguments {"query":"Tell me about the calibration equipment"} and returned:',
559
+ 'queryMetadata': {
560
+ 'originalQuery': 'Tell me about the calibration equipment',
561
+ 'usedQuery': 'Tell me about the calibration equipment'
562
+ }
563
+ }
400
564
 
401
565
  """
402
566
 
@@ -407,22 +571,24 @@ class OTCA:
407
571
  "context": context,
408
572
  "messages": messages,
409
573
  "where": where,
410
- "synonyms": self.config()["synonyms"],
411
- "inlineCitation": self.config()["inlineCitation"],
574
+ # "synonyms": self.config()["synonyms"],
575
+ # "inlineCitation": self.config()["inlineCitation"],
412
576
  }
413
577
 
414
578
  return self.do_request(
415
579
  url=request_url,
416
580
  method="POST",
417
581
  headers=request_header,
418
- data=chat_data,
582
+ json_data=chat_data,
419
583
  timeout=None,
420
584
  failure_message="Failed to chat with Content Aviator",
421
585
  )
422
586
 
423
587
  # end method definition
424
588
 
425
- def search(self, query: str, threshold: float, num_results: int, document_ids: list, workspace_ids: list) -> dict:
589
+ def search(
590
+ self, query: str, document_ids: list, workspace_ids: list, threshold: float = 0.5, num_results: int = 10
591
+ ) -> dict:
426
592
  """Semantic search for text chunks.
427
593
 
428
594
  Search requests are meant to be called as end-users. This should involve
@@ -432,6 +598,10 @@ class OTCA:
432
598
  Args:
433
599
  query (str):
434
600
  The query.
601
+ document_ids (list):
602
+ List of documents (IDs) to use as scope for the query.
603
+ workspace_ids (list):
604
+ List of workspaces (IDs) to use as scope for the query.
435
605
  threshold (float):
436
606
  Minimum similarity score to accept a document. A value like 0.7 means
437
607
  only bring back documents that are at least 70% similar.
@@ -439,10 +609,6 @@ class OTCA:
439
609
  Also called "top-k". Defined how many "most similar" documents to retrieve.
440
610
  Typical value: 3-20. Higher values gets broader context but risks pulling
441
611
  in less relevant documents.
442
- document_ids (list):
443
- List of documents (IDs) to use as scope for the query.
444
- workspace_ids (list):
445
- List of workspaces (IDs) to use as scope for the query.
446
612
 
447
613
  Returns:
448
614
  dict:
@@ -487,9 +653,9 @@ class OTCA:
487
653
  }
488
654
 
489
655
  for document_id in document_ids:
490
- search_data["metadata"].append({"documentID": document_id})
656
+ search_data["metadata"].append({"documentID": str(document_id)})
491
657
  for workspace_id in workspace_ids:
492
- search_data["metadata"].append({"workspaceID": workspace_id})
658
+ search_data["metadata"].append({"workspaceID": str(workspace_id)})
493
659
 
494
660
  return self.do_request(
495
661
  url=request_url,
@@ -497,7 +663,7 @@ class OTCA:
497
663
  headers=request_header,
498
664
  data=search_data,
499
665
  timeout=None,
500
- failure_message="Failed to to do a semantic search with query -> '{}'!".format(query),
666
+ failure_message="Failed to to do a semantic search with query -> '{}'".format(query),
501
667
  )
502
668
 
503
669
  # end method definition
@@ -541,7 +707,7 @@ class OTCA:
541
707
  return None
542
708
 
543
709
  request_url = self.config()["embedUrl"]
544
- request_header = self.request_header()
710
+ request_header = self.request_header(service_type="embed")
545
711
 
546
712
  metadata = {}
547
713
  if workspace_id:
@@ -561,7 +727,7 @@ class OTCA:
561
727
  url=request_url,
562
728
  method="POST",
563
729
  headers=request_header,
564
- data=embed_data,
730
+ json_data=embed_data,
565
731
  timeout=None,
566
732
  failure_message="Failed to embed content",
567
733
  )