pyxecm 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyxecm might be problematic. Click here for more details.
- pyxecm/__init__.py +2 -1
- pyxecm/avts.py +79 -33
- pyxecm/customizer/api/app.py +45 -796
- pyxecm/customizer/api/auth/__init__.py +1 -0
- pyxecm/customizer/api/{auth.py → auth/functions.py} +2 -64
- pyxecm/customizer/api/auth/router.py +78 -0
- pyxecm/customizer/api/common/__init__.py +1 -0
- pyxecm/customizer/api/common/functions.py +47 -0
- pyxecm/customizer/api/{metrics.py → common/metrics.py} +1 -1
- pyxecm/customizer/api/common/models.py +21 -0
- pyxecm/customizer/api/{payload_list.py → common/payload_list.py} +6 -1
- pyxecm/customizer/api/common/router.py +72 -0
- pyxecm/customizer/api/settings.py +25 -0
- pyxecm/customizer/api/terminal/__init__.py +1 -0
- pyxecm/customizer/api/terminal/router.py +87 -0
- pyxecm/customizer/api/v1_csai/__init__.py +1 -0
- pyxecm/customizer/api/v1_csai/router.py +87 -0
- pyxecm/customizer/api/v1_maintenance/__init__.py +1 -0
- pyxecm/customizer/api/v1_maintenance/functions.py +100 -0
- pyxecm/customizer/api/v1_maintenance/models.py +12 -0
- pyxecm/customizer/api/v1_maintenance/router.py +76 -0
- pyxecm/customizer/api/v1_otcs/__init__.py +1 -0
- pyxecm/customizer/api/v1_otcs/functions.py +61 -0
- pyxecm/customizer/api/v1_otcs/router.py +179 -0
- pyxecm/customizer/api/v1_payload/__init__.py +1 -0
- pyxecm/customizer/api/v1_payload/functions.py +179 -0
- pyxecm/customizer/api/v1_payload/models.py +51 -0
- pyxecm/customizer/api/v1_payload/router.py +499 -0
- pyxecm/customizer/browser_automation.py +568 -326
- pyxecm/customizer/customizer.py +204 -430
- pyxecm/customizer/guidewire.py +907 -43
- pyxecm/customizer/k8s.py +243 -56
- pyxecm/customizer/m365.py +104 -15
- pyxecm/customizer/payload.py +1943 -885
- pyxecm/customizer/pht.py +19 -2
- pyxecm/customizer/servicenow.py +22 -5
- pyxecm/customizer/settings.py +9 -6
- pyxecm/helper/xml.py +69 -0
- pyxecm/otac.py +1 -1
- pyxecm/otawp.py +2104 -1535
- pyxecm/otca.py +569 -0
- pyxecm/otcs.py +201 -37
- pyxecm/otds.py +35 -13
- {pyxecm-2.0.0.dist-info → pyxecm-2.0.1.dist-info}/METADATA +6 -29
- pyxecm-2.0.1.dist-info/RECORD +76 -0
- {pyxecm-2.0.0.dist-info → pyxecm-2.0.1.dist-info}/WHEEL +1 -1
- pyxecm-2.0.0.dist-info/RECORD +0 -54
- /pyxecm/customizer/api/{models.py → auth/models.py} +0 -0
- {pyxecm-2.0.0.dist-info → pyxecm-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {pyxecm-2.0.0.dist-info → pyxecm-2.0.1.dist-info}/top_level.txt +0 -0
pyxecm/otca.py
ADDED
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
"""OTCA stands for Content Aviator and is an OpenText offering for LLMM-based Agentic AI."""
|
|
2
|
+
|
|
3
|
+
__author__ = "Dr. Marc Diefenbruch"
|
|
4
|
+
__copyright__ = "Copyright (C) 2024-2025, OpenText"
|
|
5
|
+
__credits__ = ["Kai-Philip Gatzweiler"]
|
|
6
|
+
__maintainer__ = "Dr. Marc Diefenbruch"
|
|
7
|
+
__email__ = "mdiefenb@opentext.com"
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import platform
|
|
12
|
+
import sys
|
|
13
|
+
import time
|
|
14
|
+
from importlib.metadata import version
|
|
15
|
+
|
|
16
|
+
import requests
|
|
17
|
+
|
|
18
|
+
from pyxecm.otcs import OTCS
|
|
19
|
+
|
|
20
|
+
APP_NAME = "pyxecm"
|
|
21
|
+
APP_VERSION = version("pyxecm")
|
|
22
|
+
MODULE_NAME = APP_NAME + ".otca"
|
|
23
|
+
|
|
24
|
+
PYTHON_VERSION = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
|
25
|
+
OS_INFO = f"{platform.system()} {platform.release()}"
|
|
26
|
+
ARCH_INFO = platform.machine()
|
|
27
|
+
REQUESTS_VERSION = requests.__version__
|
|
28
|
+
|
|
29
|
+
USER_AGENT = (
|
|
30
|
+
f"{APP_NAME}/{APP_VERSION} ({MODULE_NAME}/{APP_VERSION}; "
|
|
31
|
+
f"Python/{PYTHON_VERSION}; {OS_INFO}; {ARCH_INFO}; Requests/{REQUESTS_VERSION})"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
REQUEST_HEADERS = {"User-Agent": USER_AGENT, "accept": "application/json", "Content-Type": "application/json"}
|
|
35
|
+
|
|
36
|
+
REQUEST_TIMEOUT = 60
|
|
37
|
+
REQUEST_RETRY_DELAY = 20
|
|
38
|
+
REQUEST_MAX_RETRIES = 2
|
|
39
|
+
|
|
40
|
+
default_logger = logging.getLogger(MODULE_NAME)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class OTCA:
|
|
44
|
+
"""Interact with Content Aviator REST API."""
|
|
45
|
+
|
|
46
|
+
logger: logging.Logger = default_logger
|
|
47
|
+
|
|
48
|
+
_config: dict
|
|
49
|
+
_context = ""
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
chat_url: str,
|
|
54
|
+
embed_url: str,
|
|
55
|
+
otcs_object: OTCS,
|
|
56
|
+
synonyms: list | None = None,
|
|
57
|
+
inline_citation: bool = True,
|
|
58
|
+
logger: logging.Logger = default_logger,
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Initialize the Content Aviator (OTCA) object.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
chat_url (str):
|
|
64
|
+
The Content Aviator base URL for chat.
|
|
65
|
+
embed_url (str):
|
|
66
|
+
The Content Aviator base URL for embedding.
|
|
67
|
+
otcs_object (OTCS):
|
|
68
|
+
The OTCS object.
|
|
69
|
+
synonyms (list):
|
|
70
|
+
List of synonyms that are used to generate a better response to the user.
|
|
71
|
+
inline_citation (bool):
|
|
72
|
+
Enable/Disable citations in the answers.
|
|
73
|
+
logger (logging.Logger, optional):
|
|
74
|
+
The logging object to use for all log messages. Defaults to default_logger.
|
|
75
|
+
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
if logger != default_logger:
|
|
79
|
+
self.logger = logger.getChild("otca")
|
|
80
|
+
for logfilter in logger.filters:
|
|
81
|
+
self.logger.addFilter(logfilter)
|
|
82
|
+
|
|
83
|
+
otca_config = {}
|
|
84
|
+
|
|
85
|
+
otca_config["chatUrl"] = chat_url + "/v1/chat"
|
|
86
|
+
otca_config["searchUrl"] = chat_url + "/v1/context"
|
|
87
|
+
otca_config["embedUrl"] = embed_url + "/v1/embeddings"
|
|
88
|
+
|
|
89
|
+
otca_config["synonyms"] = synonyms if synonyms else []
|
|
90
|
+
otca_config["inlineCitation"] = inline_citation
|
|
91
|
+
|
|
92
|
+
self._config = otca_config
|
|
93
|
+
self._access_token = otcs_object.otcs_ticket()
|
|
94
|
+
|
|
95
|
+
# end method definition
|
|
96
|
+
|
|
97
|
+
def config(self) -> dict:
|
|
98
|
+
"""Return the configuration dictionary.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
dict: Configuration dictionary
|
|
102
|
+
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
return self._config
|
|
106
|
+
|
|
107
|
+
# end method definition
|
|
108
|
+
|
|
109
|
+
def get_context(self) -> str:
|
|
110
|
+
"""Return the current chat context (history).
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
str:
|
|
114
|
+
Chat history.
|
|
115
|
+
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
return self._context
|
|
119
|
+
|
|
120
|
+
# end method definition
|
|
121
|
+
|
|
122
|
+
def get_synonyms(self) -> list:
|
|
123
|
+
"""Get configured synonyms.
|
|
124
|
+
|
|
125
|
+
Returns a list of lists. The inner lists are the set
|
|
126
|
+
of terms that are synonyms of each other.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
synonyms (list):
|
|
130
|
+
List of synonyms that are used to generate a better response to the user.
|
|
131
|
+
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
return self.config()["synonyms"]
|
|
135
|
+
|
|
136
|
+
# end method definition
|
|
137
|
+
|
|
138
|
+
def add_synonyms(self, synonyms: list) -> None:
|
|
139
|
+
"""Add synonyms to the existing synonyms.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
synonyms (list):
|
|
143
|
+
List of synonyms that are used to generate a better response to the user.
|
|
144
|
+
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
self.config()["synonyms"].extend(synonyms)
|
|
148
|
+
|
|
149
|
+
# end method definition
|
|
150
|
+
|
|
151
|
+
def request_header(self, content_type: str = "") -> dict:
|
|
152
|
+
"""Return the request header used for requests.
|
|
153
|
+
|
|
154
|
+
Consists of Bearer access token and Content Type
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
content_type (str, optional):
|
|
158
|
+
Custom content type for the request.
|
|
159
|
+
Typical values:
|
|
160
|
+
* application/json - Used for sending JSON-encoded data
|
|
161
|
+
* application/x-www-form-urlencoded - The default for HTML forms.
|
|
162
|
+
Data is sent as key-value pairs in the body of the request, similar to query parameters.
|
|
163
|
+
* multipart/form-data - Used for file uploads or when a form includes non-ASCII characters
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
dict: The request header values.
|
|
167
|
+
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
request_header = REQUEST_HEADERS
|
|
171
|
+
|
|
172
|
+
if content_type:
|
|
173
|
+
request_header["Content-Type"] = content_type
|
|
174
|
+
|
|
175
|
+
if self._access_token is not None:
|
|
176
|
+
request_header["Authorization"] = f"Bearer {self._access_token}"
|
|
177
|
+
|
|
178
|
+
return request_header
|
|
179
|
+
|
|
180
|
+
# end method definition
|
|
181
|
+
|
|
182
|
+
def do_request(
|
|
183
|
+
self,
|
|
184
|
+
url: str,
|
|
185
|
+
method: str = "GET",
|
|
186
|
+
headers: dict | None = None,
|
|
187
|
+
data: dict | list | None = None,
|
|
188
|
+
json_data: dict | None = None,
|
|
189
|
+
files: dict | None = None,
|
|
190
|
+
timeout: int | None = REQUEST_TIMEOUT,
|
|
191
|
+
show_error: bool = True,
|
|
192
|
+
failure_message: str = "",
|
|
193
|
+
success_message: str = "",
|
|
194
|
+
max_retries: int = REQUEST_MAX_RETRIES,
|
|
195
|
+
retry_forever: bool = False,
|
|
196
|
+
) -> dict | None:
|
|
197
|
+
"""Call an Content Aviator REST API in a safe way.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
url (str):
|
|
201
|
+
URL to send the request to.
|
|
202
|
+
method (str, optional):
|
|
203
|
+
HTTP method (GET, POST, etc.). Defaults to "GET".
|
|
204
|
+
headers (dict | None, optional):
|
|
205
|
+
Request headers. Defaults to None.
|
|
206
|
+
data (dict | None, optional):
|
|
207
|
+
Request payload. Defaults to None.
|
|
208
|
+
json_data (dict | None, optional):
|
|
209
|
+
Request payload for the JSON parameter. Defaults to None.
|
|
210
|
+
files (dict | None, optional):
|
|
211
|
+
Dictionary of {"name": file-tuple} for multipart encoding upload.
|
|
212
|
+
The file-tuple can be a 2-tuple ("filename", fileobj) or a 3-tuple
|
|
213
|
+
("filename", fileobj, "content_type").
|
|
214
|
+
timeout (int | None, optional):
|
|
215
|
+
Timeout for the request in seconds. Defaults to REQUEST_TIMEOUT.
|
|
216
|
+
show_error (bool, optional):
|
|
217
|
+
Whether or not an error should be logged in case of a failed REST call.
|
|
218
|
+
If False, then only a warning is logged. Defaults to True.
|
|
219
|
+
failure_message (str, optional):
|
|
220
|
+
Specific error message. Defaults to "".
|
|
221
|
+
success_message (str, optional):
|
|
222
|
+
Specific success message. Defaults to "".
|
|
223
|
+
max_retries (int, optional):
|
|
224
|
+
Number of retries on connection errors. Defaults to REQUEST_MAX_RETRIES.
|
|
225
|
+
retry_forever (bool, optional):
|
|
226
|
+
Whether to wait forever without timeout. Defaults to False.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
dict | None:
|
|
230
|
+
Response of Content Aviator REST API or None in case of an error.
|
|
231
|
+
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
retries = 0
|
|
235
|
+
while True:
|
|
236
|
+
try:
|
|
237
|
+
response = requests.request(
|
|
238
|
+
method=method,
|
|
239
|
+
url=url,
|
|
240
|
+
data=data,
|
|
241
|
+
json=json_data,
|
|
242
|
+
files=files,
|
|
243
|
+
headers=headers,
|
|
244
|
+
timeout=timeout,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if response.ok:
|
|
248
|
+
if success_message:
|
|
249
|
+
self.logger.debug(success_message)
|
|
250
|
+
return self.parse_request_response(response)
|
|
251
|
+
# Check if Session has expired - then re-authenticate and try once more
|
|
252
|
+
elif response.status_code == 401 and retries == 0:
|
|
253
|
+
self.logger.debug("Session has expired - try to re-authenticate...")
|
|
254
|
+
self.authenticate()
|
|
255
|
+
retries += 1
|
|
256
|
+
else:
|
|
257
|
+
# Handle plain HTML responses to not pollute the logs
|
|
258
|
+
content_type = response.headers.get("content-type", None)
|
|
259
|
+
response_text = "HTML content (see debug log)" if content_type == "text/html" else response.text
|
|
260
|
+
|
|
261
|
+
if show_error:
|
|
262
|
+
self.logger.error(
|
|
263
|
+
"%s; status -> %s; error -> %s",
|
|
264
|
+
failure_message,
|
|
265
|
+
response.status_code,
|
|
266
|
+
response_text,
|
|
267
|
+
)
|
|
268
|
+
else:
|
|
269
|
+
self.logger.warning(
|
|
270
|
+
"%s; status -> %s; warning -> %s",
|
|
271
|
+
failure_message,
|
|
272
|
+
response.status_code,
|
|
273
|
+
response_text,
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
if content_type == "text/html":
|
|
277
|
+
self.logger.debug(
|
|
278
|
+
"%s; status -> %s; warning -> %s",
|
|
279
|
+
failure_message,
|
|
280
|
+
response.status_code,
|
|
281
|
+
response.text,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return None
|
|
285
|
+
except requests.exceptions.Timeout:
|
|
286
|
+
if retries <= max_retries:
|
|
287
|
+
self.logger.warning(
|
|
288
|
+
"Request timed out. Retrying in %s seconds...",
|
|
289
|
+
str(REQUEST_RETRY_DELAY),
|
|
290
|
+
)
|
|
291
|
+
retries += 1
|
|
292
|
+
time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
|
|
293
|
+
else:
|
|
294
|
+
self.logger.error(
|
|
295
|
+
"%s; timeout error.",
|
|
296
|
+
failure_message,
|
|
297
|
+
)
|
|
298
|
+
if retry_forever:
|
|
299
|
+
# If it fails after REQUEST_MAX_RETRIES retries we let it wait forever
|
|
300
|
+
self.logger.warning("Turn timeouts off and wait forever...")
|
|
301
|
+
timeout = None
|
|
302
|
+
else:
|
|
303
|
+
return None
|
|
304
|
+
except requests.exceptions.ConnectionError:
|
|
305
|
+
if retries <= max_retries:
|
|
306
|
+
self.logger.warning(
|
|
307
|
+
"Connection error. Retrying in %s seconds...",
|
|
308
|
+
str(REQUEST_RETRY_DELAY),
|
|
309
|
+
)
|
|
310
|
+
retries += 1
|
|
311
|
+
time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
|
|
312
|
+
else:
|
|
313
|
+
self.logger.error(
|
|
314
|
+
"%s; connection error.",
|
|
315
|
+
failure_message,
|
|
316
|
+
)
|
|
317
|
+
if retry_forever:
|
|
318
|
+
# If it fails after REQUEST_MAX_RETRIES retries we let it wait forever
|
|
319
|
+
self.logger.warning("Turn timeouts off and wait forever...")
|
|
320
|
+
timeout = None
|
|
321
|
+
time.sleep(REQUEST_RETRY_DELAY) # Add a delay before retrying
|
|
322
|
+
else:
|
|
323
|
+
return None
|
|
324
|
+
|
|
325
|
+
# end method definition
|
|
326
|
+
|
|
327
|
+
def parse_request_response(
|
|
328
|
+
self,
|
|
329
|
+
response_object: requests.Response,
|
|
330
|
+
additional_error_message: str = "",
|
|
331
|
+
show_error: bool = True,
|
|
332
|
+
) -> list | None:
|
|
333
|
+
"""Convert the request response (JSon) to a Python list in a safe way that also handles exceptions.
|
|
334
|
+
|
|
335
|
+
It first tries to load the response.text
|
|
336
|
+
via json.loads() that produces a dict output. Only if response.text is
|
|
337
|
+
not set or is empty it just converts the response_object to a dict using
|
|
338
|
+
the vars() built-in method.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
response_object (requests.Response):
|
|
342
|
+
This is reponse object delivered by the request call.
|
|
343
|
+
additional_error_message (str, optional):
|
|
344
|
+
Use a more specific error message in case of an error.
|
|
345
|
+
show_error (bool, optional):
|
|
346
|
+
If True, write an error to the log file.
|
|
347
|
+
If False, write a warning to the log file.
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
list | None:
|
|
351
|
+
The response information or None in case of an error.
|
|
352
|
+
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
if not response_object:
|
|
356
|
+
return None
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
list_object = json.loads(response_object.text) if response_object.text else vars(response_object)
|
|
360
|
+
except json.JSONDecodeError as exception:
|
|
361
|
+
if additional_error_message:
|
|
362
|
+
message = "Cannot decode response as JSON. {}; error -> {}".format(
|
|
363
|
+
additional_error_message,
|
|
364
|
+
exception,
|
|
365
|
+
)
|
|
366
|
+
else:
|
|
367
|
+
message = "Cannot decode response as JSON; error -> {}".format(
|
|
368
|
+
exception,
|
|
369
|
+
)
|
|
370
|
+
if show_error:
|
|
371
|
+
self.logger.error(message)
|
|
372
|
+
else:
|
|
373
|
+
self.logger.warning(message)
|
|
374
|
+
return None
|
|
375
|
+
else:
|
|
376
|
+
return list_object
|
|
377
|
+
|
|
378
|
+
# end method definition
|
|
379
|
+
|
|
380
|
+
def chat(self, context: str | None, messages: list, where: list) -> dict:
|
|
381
|
+
"""Process a chat interaction with Content Aviator.
|
|
382
|
+
|
|
383
|
+
Chat requests are meant to be called as end-users. This should involve
|
|
384
|
+
passing the end-user's access token via the Authorization HTTP header.
|
|
385
|
+
The chat service use OTDS's token endpoint to ensure that the token is valid.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
context (str | None):
|
|
389
|
+
Context for the current conversation
|
|
390
|
+
(empty initially, returned by previous responses from POST /v1/chat).
|
|
391
|
+
messages (list):
|
|
392
|
+
List of messages from conversation history.
|
|
393
|
+
where (list):
|
|
394
|
+
Metadata name/value pairs for the query.
|
|
395
|
+
Could be used to specify workspaces, documents, or other criteria in the future.
|
|
396
|
+
Values need to match those passed as metadata to the embeddings API.
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
dict: _description_
|
|
400
|
+
|
|
401
|
+
"""
|
|
402
|
+
|
|
403
|
+
request_url = self.config()["chatUrl"]
|
|
404
|
+
request_header = self.request_header()
|
|
405
|
+
|
|
406
|
+
chat_data = {
|
|
407
|
+
"context": context,
|
|
408
|
+
"messages": messages,
|
|
409
|
+
"where": where,
|
|
410
|
+
"synonyms": self.config()["synonyms"],
|
|
411
|
+
"inlineCitation": self.config()["inlineCitation"],
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
return self.do_request(
|
|
415
|
+
url=request_url,
|
|
416
|
+
method="POST",
|
|
417
|
+
headers=request_header,
|
|
418
|
+
data=chat_data,
|
|
419
|
+
timeout=None,
|
|
420
|
+
failure_message="Failed to chat with Content Aviator",
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
# end method definition
|
|
424
|
+
|
|
425
|
+
def search(self, query: str, threshold: float, num_results: int, document_ids: list, workspace_ids: list) -> dict:
|
|
426
|
+
"""Semantic search for text chunks.
|
|
427
|
+
|
|
428
|
+
Search requests are meant to be called as end-users. This should involve
|
|
429
|
+
passing the end-user's access token via the Authorization HTTP header.
|
|
430
|
+
The chat service use OTDS's token endpoint to ensure that the token is valid.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
query (str):
|
|
434
|
+
The query.
|
|
435
|
+
threshold (float):
|
|
436
|
+
Minimum similarity score to accept a document. A value like 0.7 means
|
|
437
|
+
only bring back documents that are at least 70% similar.
|
|
438
|
+
num_results (int):
|
|
439
|
+
Also called "top-k". Defined how many "most similar" documents to retrieve.
|
|
440
|
+
Typical value: 3-20. Higher values gets broader context but risks pulling
|
|
441
|
+
in less relevant documents.
|
|
442
|
+
document_ids (list):
|
|
443
|
+
List of documents (IDs) to use as scope for the query.
|
|
444
|
+
workspace_ids (list):
|
|
445
|
+
List of workspaces (IDs) to use as scope for the query.
|
|
446
|
+
|
|
447
|
+
Returns:
|
|
448
|
+
dict:
|
|
449
|
+
Results of the search.
|
|
450
|
+
|
|
451
|
+
Example:
|
|
452
|
+
[
|
|
453
|
+
{
|
|
454
|
+
"pageContent": "matched chunk"
|
|
455
|
+
"metadata": {
|
|
456
|
+
"documentID": 1234,
|
|
457
|
+
"workspaceID": 4711,
|
|
458
|
+
"some-id": 123
|
|
459
|
+
},
|
|
460
|
+
"distance": 0.13
|
|
461
|
+
},
|
|
462
|
+
{
|
|
463
|
+
"pageContent": "matched chunk1"
|
|
464
|
+
"metadata": {
|
|
465
|
+
"documentID": 5678,
|
|
466
|
+
"workspaceID": 47272
|
|
467
|
+
},
|
|
468
|
+
"distance": 0.22
|
|
469
|
+
}
|
|
470
|
+
]
|
|
471
|
+
|
|
472
|
+
"""
|
|
473
|
+
|
|
474
|
+
# Validations:
|
|
475
|
+
if not workspace_ids and not document_ids:
|
|
476
|
+
self.logger.error("Either workspace ID(s) or document ID(s) need to be provided!")
|
|
477
|
+
return None
|
|
478
|
+
|
|
479
|
+
request_url = self.config()["searchUrl"]
|
|
480
|
+
request_header = self.request_header()
|
|
481
|
+
|
|
482
|
+
search_data = {
|
|
483
|
+
"query": query,
|
|
484
|
+
"threshold": threshold,
|
|
485
|
+
"numResults": num_results,
|
|
486
|
+
"metadata": [],
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
for document_id in document_ids:
|
|
490
|
+
search_data["metadata"].append({"documentID": document_id})
|
|
491
|
+
for workspace_id in workspace_ids:
|
|
492
|
+
search_data["metadata"].append({"workspaceID": workspace_id})
|
|
493
|
+
|
|
494
|
+
return self.do_request(
|
|
495
|
+
url=request_url,
|
|
496
|
+
method="POST",
|
|
497
|
+
headers=request_header,
|
|
498
|
+
data=search_data,
|
|
499
|
+
timeout=None,
|
|
500
|
+
failure_message="Failed to to do a semantic search with query -> '{}'!".format(query),
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
# end method definition
|
|
504
|
+
|
|
505
|
+
def embed(
|
|
506
|
+
self,
|
|
507
|
+
content: str | None = None,
|
|
508
|
+
operation: str = "add",
|
|
509
|
+
document_id: int | None = None,
|
|
510
|
+
workspace_id: int | None = None,
|
|
511
|
+
additional_metadata: dict | None = None,
|
|
512
|
+
) -> dict:
|
|
513
|
+
"""Embed a given content.
|
|
514
|
+
|
|
515
|
+
Requests are meant to be called as a service user. This would involve passing a service user's access token
|
|
516
|
+
(token from a particular OAuth confidential client, using client credentials grant).
|
|
517
|
+
|
|
518
|
+
Args:
|
|
519
|
+
content (str | None):
|
|
520
|
+
Content to be embedded. Can be empty for "delete" operations.
|
|
521
|
+
operation (str):
|
|
522
|
+
This can be either "add", "update" or "delete".
|
|
523
|
+
document_id (int):
|
|
524
|
+
The ID of the document the content originates from.
|
|
525
|
+
workspace_id (int):
|
|
526
|
+
The ID of the workspace the content originates from.
|
|
527
|
+
additional_metadata (dict | None):
|
|
528
|
+
Dictionary with additional metadata.
|
|
529
|
+
|
|
530
|
+
Returns:
|
|
531
|
+
dict: _description_
|
|
532
|
+
|
|
533
|
+
"""
|
|
534
|
+
|
|
535
|
+
# Validations:
|
|
536
|
+
if operation not in ["add", "update", "delete"]:
|
|
537
|
+
self.logger.error("Illegal embed operation -> '%s'!", operation)
|
|
538
|
+
return None
|
|
539
|
+
if operation != "delete" and not content:
|
|
540
|
+
self.logger.error("Add or update operation require content to embed!")
|
|
541
|
+
return None
|
|
542
|
+
|
|
543
|
+
request_url = self.config()["embedUrl"]
|
|
544
|
+
request_header = self.request_header()
|
|
545
|
+
|
|
546
|
+
metadata = {}
|
|
547
|
+
if workspace_id:
|
|
548
|
+
metadata["workspaceID"] = workspace_id
|
|
549
|
+
if document_id:
|
|
550
|
+
metadata["documentID"] = document_id
|
|
551
|
+
if additional_metadata:
|
|
552
|
+
metadata.update(additional_metadata)
|
|
553
|
+
|
|
554
|
+
embed_data = {
|
|
555
|
+
"content": content,
|
|
556
|
+
"operation": operation,
|
|
557
|
+
"metadata": metadata,
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
return self.do_request(
|
|
561
|
+
url=request_url,
|
|
562
|
+
method="POST",
|
|
563
|
+
headers=request_header,
|
|
564
|
+
data=embed_data,
|
|
565
|
+
timeout=None,
|
|
566
|
+
failure_message="Failed to embed content",
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
# end method definition
|