alita-sdk 0.3.365__py3-none-any.whl → 0.3.462__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent_executor.py +144 -0
- alita_sdk/cli/agent_loader.py +197 -0
- alita_sdk/cli/agent_ui.py +166 -0
- alita_sdk/cli/agents.py +1069 -0
- alita_sdk/cli/callbacks.py +576 -0
- alita_sdk/cli/cli.py +159 -0
- alita_sdk/cli/config.py +153 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +330 -0
- alita_sdk/cli/toolkit_loader.py +55 -0
- alita_sdk/cli/tools/__init__.py +9 -0
- alita_sdk/cli/tools/filesystem.py +905 -0
- alita_sdk/configurations/bitbucket.py +95 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/artifact.py +1 -1
- alita_sdk/runtime/clients/client.py +47 -10
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +373 -0
- alita_sdk/runtime/langchain/assistant.py +70 -41
- alita_sdk/runtime/langchain/constants.py +6 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +73 -100
- alita_sdk/runtime/langchain/langraph_agent.py +164 -38
- alita_sdk/runtime/langchain/utils.py +43 -7
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/toolkits/__init__.py +24 -0
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/artifact.py +5 -6
- alita_sdk/runtime/toolkits/mcp.py +895 -0
- alita_sdk/runtime/toolkits/tools.py +140 -50
- alita_sdk/runtime/tools/__init__.py +7 -2
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/function.py +94 -5
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +104 -8
- alita_sdk/runtime/tools/llm.py +204 -114
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/sandbox.py +180 -79
- alita_sdk/runtime/tools/vectorstore.py +22 -21
- alita_sdk/runtime/tools/vectorstore_base.py +79 -26
- alita_sdk/runtime/utils/mcp_oauth.py +164 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
- alita_sdk/runtime/utils/streamlit.py +34 -3
- alita_sdk/runtime/utils/toolkit_utils.py +14 -4
- alita_sdk/runtime/utils/utils.py +1 -0
- alita_sdk/tools/__init__.py +48 -31
- alita_sdk/tools/ado/repos/__init__.py +1 -0
- alita_sdk/tools/ado/test_plan/__init__.py +1 -1
- alita_sdk/tools/ado/wiki/__init__.py +1 -5
- alita_sdk/tools/ado/work_item/__init__.py +1 -5
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/base_indexer_toolkit.py +194 -112
- alita_sdk/tools/bitbucket/__init__.py +1 -0
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/code/sonar/__init__.py +1 -1
- alita_sdk/tools/code_indexer_toolkit.py +15 -5
- alita_sdk/tools/confluence/__init__.py +2 -2
- alita_sdk/tools/confluence/api_wrapper.py +110 -63
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/elitea_base.py +22 -22
- alita_sdk/tools/github/__init__.py +2 -2
- alita_sdk/tools/gitlab/__init__.py +2 -1
- alita_sdk/tools/gitlab/api_wrapper.py +11 -7
- alita_sdk/tools/gitlab_org/__init__.py +1 -2
- alita_sdk/tools/google_places/__init__.py +2 -1
- alita_sdk/tools/jira/__init__.py +1 -0
- alita_sdk/tools/jira/api_wrapper.py +1 -1
- alita_sdk/tools/memory/__init__.py +1 -1
- alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/pandas/__init__.py +1 -1
- alita_sdk/tools/postman/__init__.py +2 -1
- alita_sdk/tools/postman/api_wrapper.py +18 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +2 -2
- alita_sdk/tools/qtest/__init__.py +3 -3
- alita_sdk/tools/qtest/api_wrapper.py +1708 -76
- alita_sdk/tools/rally/__init__.py +1 -2
- alita_sdk/tools/report_portal/__init__.py +1 -0
- alita_sdk/tools/salesforce/__init__.py +1 -0
- alita_sdk/tools/servicenow/__init__.py +2 -3
- alita_sdk/tools/sharepoint/__init__.py +1 -0
- alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +1 -0
- alita_sdk/tools/sql/__init__.py +2 -1
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +1 -0
- alita_sdk/tools/testrail/__init__.py +1 -3
- alita_sdk/tools/utils/__init__.py +17 -0
- alita_sdk/tools/utils/content_parser.py +35 -24
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +67 -21
- alita_sdk/tools/xray/__init__.py +2 -1
- alita_sdk/tools/zephyr/__init__.py +2 -1
- alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
- alita_sdk/tools/zephyr_essential/__init__.py +1 -0
- alita_sdk/tools/zephyr_scale/__init__.py +1 -0
- alita_sdk/tools/zephyr_squad/__init__.py +1 -0
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +118 -93
- alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0
|
@@ -207,9 +207,9 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
207
207
|
tool_name="_remove_collection"
|
|
208
208
|
)
|
|
209
209
|
|
|
210
|
-
def _get_indexed_ids(self,
|
|
210
|
+
def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
|
|
211
211
|
"""Get all indexed document IDs from vectorstore"""
|
|
212
|
-
return self.vector_adapter.get_indexed_ids(self,
|
|
212
|
+
return self.vector_adapter.get_indexed_ids(self, index_name)
|
|
213
213
|
|
|
214
214
|
def list_collections(self) -> Any:
|
|
215
215
|
"""List all collections in the vectorstore.
|
|
@@ -233,7 +233,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
233
233
|
return {"collections": [], "message": "No indexed collections"}
|
|
234
234
|
return cols
|
|
235
235
|
|
|
236
|
-
def _clean_collection(self,
|
|
236
|
+
def _clean_collection(self, index_name: str = ''):
|
|
237
237
|
"""
|
|
238
238
|
Clean the vectorstore collection by deleting all indexed data.
|
|
239
239
|
"""
|
|
@@ -241,15 +241,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
241
241
|
f"Cleaning collection '{self.dataset}'",
|
|
242
242
|
tool_name="_clean_collection"
|
|
243
243
|
)
|
|
244
|
-
self.vector_adapter.clean_collection(self,
|
|
244
|
+
self.vector_adapter.clean_collection(self, index_name)
|
|
245
245
|
self._log_data(
|
|
246
246
|
f"Collection '{self.dataset}' has been cleaned. ",
|
|
247
247
|
tool_name="_clean_collection"
|
|
248
248
|
)
|
|
249
249
|
|
|
250
|
-
def _get_code_indexed_data(self,
|
|
250
|
+
def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
|
|
251
251
|
""" Get all indexed data from vectorstore for code content """
|
|
252
|
-
return self.vector_adapter.get_code_indexed_data(self,
|
|
252
|
+
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
253
253
|
|
|
254
254
|
def _add_to_collection(self, entry_id, new_collection_value):
|
|
255
255
|
"""Add a new collection name to the `collection` key in the `metadata` column."""
|
|
@@ -258,7 +258,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
258
258
|
def _reduce_duplicates(
|
|
259
259
|
self,
|
|
260
260
|
documents: Generator[Any, None, None],
|
|
261
|
-
|
|
261
|
+
index_name: str,
|
|
262
262
|
get_indexed_data: Callable,
|
|
263
263
|
key_fn: Callable,
|
|
264
264
|
compare_fn: Callable,
|
|
@@ -267,7 +267,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
267
267
|
) -> List[Any]:
|
|
268
268
|
"""Generic duplicate reduction logic for documents."""
|
|
269
269
|
self._log_data(log_msg, tool_name="index_documents")
|
|
270
|
-
indexed_data = get_indexed_data(
|
|
270
|
+
indexed_data = get_indexed_data(index_name)
|
|
271
271
|
indexed_keys = set(indexed_data.keys())
|
|
272
272
|
if not indexed_keys:
|
|
273
273
|
self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
|
|
@@ -279,14 +279,14 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
279
279
|
for document in documents:
|
|
280
280
|
key = key_fn(document)
|
|
281
281
|
key = key if isinstance(key, str) else str(key)
|
|
282
|
-
if key in indexed_keys and
|
|
282
|
+
if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
|
|
283
283
|
if compare_fn(document, indexed_data[key]):
|
|
284
284
|
# Disabled addition of new collection to already indexed documents
|
|
285
285
|
# # check metadata.collection and update if needed
|
|
286
286
|
# for update_collection_id in remove_ids_fn(indexed_data, key):
|
|
287
287
|
# self._add_to_collection(
|
|
288
288
|
# update_collection_id,
|
|
289
|
-
#
|
|
289
|
+
# index_name
|
|
290
290
|
# )
|
|
291
291
|
continue
|
|
292
292
|
final_docs.append(document)
|
|
@@ -303,10 +303,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
303
303
|
|
|
304
304
|
return final_docs
|
|
305
305
|
|
|
306
|
-
def _reduce_code_duplicates(self, documents: Generator[Any, None, None],
|
|
306
|
+
def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
|
|
307
307
|
return self._reduce_duplicates(
|
|
308
308
|
documents,
|
|
309
|
-
|
|
309
|
+
index_name,
|
|
310
310
|
self._get_code_indexed_data,
|
|
311
311
|
lambda doc: doc.metadata.get('filename'),
|
|
312
312
|
lambda doc, idx: (
|
|
@@ -318,7 +318,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
318
318
|
log_msg="Verification of code documents to index started"
|
|
319
319
|
)
|
|
320
320
|
|
|
321
|
-
def index_documents(self, documents: Generator[Document, None, None],
|
|
321
|
+
def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
|
|
322
322
|
""" Index documents in the vectorstore.
|
|
323
323
|
|
|
324
324
|
Args:
|
|
@@ -329,13 +329,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
329
329
|
|
|
330
330
|
from ..langchain.interfaces.llm_processor import add_documents
|
|
331
331
|
|
|
332
|
-
self._log_tool_event(message=f"Starting the indexing... Parameters: {
|
|
332
|
+
self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
|
|
333
333
|
# pre-process documents if needed (find duplicates, etc.)
|
|
334
334
|
if clean_index:
|
|
335
335
|
logger.info("Cleaning index before re-indexing all documents.")
|
|
336
336
|
self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
|
|
337
337
|
try:
|
|
338
|
-
self._clean_collection(
|
|
338
|
+
self._clean_collection(index_name)
|
|
339
339
|
self.vectoradapter.persist()
|
|
340
340
|
self.vectoradapter.vacuum()
|
|
341
341
|
self._log_data("Previous index has been removed",
|
|
@@ -349,7 +349,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
349
349
|
message="Filter for duplicates",
|
|
350
350
|
tool_name="index_documents")
|
|
351
351
|
# remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
|
|
352
|
-
documents = self._reduce_code_duplicates(documents,
|
|
352
|
+
documents = self._reduce_code_duplicates(documents, index_name)
|
|
353
353
|
self._log_tool_event(
|
|
354
354
|
message="All the duplicates were filtered out. Proceeding with indexing.",
|
|
355
355
|
tool_name="index_documents")
|
|
@@ -377,13 +377,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
377
377
|
self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
|
|
378
378
|
tool_name="index_documents")
|
|
379
379
|
|
|
380
|
-
# if
|
|
381
|
-
if
|
|
380
|
+
# if index_name is provided, add it to metadata of each document
|
|
381
|
+
if index_name:
|
|
382
382
|
for doc in documents:
|
|
383
383
|
if not doc.metadata.get('collection'):
|
|
384
|
-
doc.metadata['collection'] =
|
|
384
|
+
doc.metadata['collection'] = index_name
|
|
385
385
|
else:
|
|
386
|
-
doc.metadata['collection'] += f";{
|
|
386
|
+
doc.metadata['collection'] += f";{index_name}"
|
|
387
387
|
|
|
388
388
|
total_docs = len(documents)
|
|
389
389
|
documents_count = 0
|
|
@@ -414,7 +414,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
414
414
|
return {"status": "error", "message": f"Error: {format_exc()}"}
|
|
415
415
|
if _documents:
|
|
416
416
|
add_documents(vectorstore=self.vectorstore, documents=_documents)
|
|
417
|
-
return {"status": "ok", "message": f"successfully indexed {documents_count} documents"
|
|
417
|
+
return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
|
|
418
|
+
else "No new documents to index."}
|
|
418
419
|
|
|
419
420
|
def search_documents(self, query:str, doctype: str = 'code',
|
|
420
421
|
filter:dict|str={}, cut_off: float=0.5,
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import math
|
|
3
2
|
from collections import OrderedDict
|
|
4
3
|
from logging import getLogger
|
|
5
4
|
from typing import Any, Optional, List, Dict, Generator
|
|
6
5
|
|
|
6
|
+
import math
|
|
7
7
|
from langchain_core.documents import Document
|
|
8
8
|
from langchain_core.messages import HumanMessage
|
|
9
|
+
from langchain_core.tools import ToolException
|
|
10
|
+
from psycopg.errors import DataException
|
|
9
11
|
from pydantic import BaseModel, model_validator, Field
|
|
10
12
|
|
|
11
13
|
from alita_sdk.tools.elitea_base import BaseToolApiWrapper
|
|
12
14
|
from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
|
|
13
|
-
from
|
|
15
|
+
from ...runtime.utils.utils import IndexerKeywords
|
|
14
16
|
|
|
15
17
|
logger = getLogger(__name__)
|
|
16
18
|
|
|
@@ -175,6 +177,37 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
175
177
|
except Exception as e:
|
|
176
178
|
logger.error(f"Failed to initialize PGVectorSearch: {str(e)}")
|
|
177
179
|
|
|
180
|
+
def _similarity_search_with_score(self, query: str, filter: dict = None, k: int = 10):
|
|
181
|
+
"""
|
|
182
|
+
Perform similarity search with proper exception handling for DataException.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
query: Search query string
|
|
186
|
+
filter: Optional filter dictionary
|
|
187
|
+
k: Number of results to return
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
List of (Document, score) tuples
|
|
191
|
+
|
|
192
|
+
Raises:
|
|
193
|
+
ToolException: When DataException occurs or other search errors
|
|
194
|
+
"""
|
|
195
|
+
try:
|
|
196
|
+
return self.vectorstore.similarity_search_with_score(
|
|
197
|
+
query, filter=filter, k=k
|
|
198
|
+
)
|
|
199
|
+
except DataException as dimException:
|
|
200
|
+
exception_str = str(dimException)
|
|
201
|
+
if 'different vector dimensions' in exception_str:
|
|
202
|
+
logger.error(f"Data exception: {exception_str}")
|
|
203
|
+
raise ToolException(f"Global search cannot be completed since collections were indexed using "
|
|
204
|
+
f"different embedding models. Use search within a single collection."
|
|
205
|
+
f"\nDetails: {exception_str}")
|
|
206
|
+
raise ToolException(f"Data exception during search. Possibly invalid filter: {exception_str}")
|
|
207
|
+
except Exception as e:
|
|
208
|
+
logger.error(f"Error during similarity search: {str(e)}")
|
|
209
|
+
raise ToolException(f"Search failed: {str(e)}")
|
|
210
|
+
|
|
178
211
|
def list_collections(self) -> List[str]:
|
|
179
212
|
"""List all collections in the vectorstore."""
|
|
180
213
|
|
|
@@ -183,7 +216,28 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
183
216
|
return "No indexed collections"
|
|
184
217
|
return collections
|
|
185
218
|
|
|
186
|
-
def
|
|
219
|
+
def get_index_meta(self, index_name: str):
|
|
220
|
+
index_metas = self.vector_adapter.get_index_meta(self, index_name)
|
|
221
|
+
if len(index_metas) > 1:
|
|
222
|
+
raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
|
|
223
|
+
return index_metas[0] if index_metas else None
|
|
224
|
+
|
|
225
|
+
def get_indexed_count(self, index_name: str) -> int:
|
|
226
|
+
from sqlalchemy.orm import Session
|
|
227
|
+
from sqlalchemy import func, or_
|
|
228
|
+
|
|
229
|
+
with Session(self.vectorstore.session_maker.bind) as session:
|
|
230
|
+
return session.query(
|
|
231
|
+
self.vectorstore.EmbeddingStore.id,
|
|
232
|
+
).filter(
|
|
233
|
+
func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'collection') == index_name,
|
|
234
|
+
or_(
|
|
235
|
+
func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type').is_(None),
|
|
236
|
+
func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value
|
|
237
|
+
)
|
|
238
|
+
).count()
|
|
239
|
+
|
|
240
|
+
def _clean_collection(self, index_name: str = ''):
|
|
187
241
|
"""
|
|
188
242
|
Clean the vectorstore collection by deleting all indexed data.
|
|
189
243
|
"""
|
|
@@ -191,13 +245,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
191
245
|
f"Cleaning collection '{self.dataset}'",
|
|
192
246
|
tool_name="_clean_collection"
|
|
193
247
|
)
|
|
194
|
-
self.vector_adapter.clean_collection(self,
|
|
248
|
+
self.vector_adapter.clean_collection(self, index_name)
|
|
195
249
|
self._log_tool_event(
|
|
196
250
|
f"Collection '{self.dataset}' has been cleaned. ",
|
|
197
251
|
tool_name="_clean_collection"
|
|
198
252
|
)
|
|
199
253
|
|
|
200
|
-
def index_documents(self, documents: Generator[Document, None, None],
|
|
254
|
+
def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True):
|
|
201
255
|
""" Index documents in the vectorstore.
|
|
202
256
|
|
|
203
257
|
Args:
|
|
@@ -206,21 +260,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
206
260
|
clean_index (bool): If True, clean the index before re-indexing all documents.
|
|
207
261
|
"""
|
|
208
262
|
if clean_index:
|
|
209
|
-
self._clean_index(
|
|
263
|
+
self._clean_index(index_name)
|
|
210
264
|
|
|
211
|
-
return self._save_index(list(documents),
|
|
265
|
+
return self._save_index(list(documents), index_name, progress_step)
|
|
212
266
|
|
|
213
|
-
def _clean_index(self,
|
|
267
|
+
def _clean_index(self, index_name: str):
|
|
214
268
|
logger.info("Cleaning index before re-indexing all documents.")
|
|
215
269
|
self._log_tool_event("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
|
|
216
270
|
try:
|
|
217
|
-
self._clean_collection(
|
|
271
|
+
self._clean_collection(index_name)
|
|
218
272
|
self._log_tool_event("Previous index has been removed",
|
|
219
273
|
tool_name="index_documents")
|
|
220
274
|
except Exception as e:
|
|
221
275
|
logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
|
|
222
276
|
|
|
223
|
-
def _save_index(self, documents: list[Document],
|
|
277
|
+
def _save_index(self, documents: list[Document], index_name: Optional[str] = None, progress_step: int = 20):
|
|
224
278
|
from ..langchain.interfaces.llm_processor import add_documents
|
|
225
279
|
#
|
|
226
280
|
for doc in documents:
|
|
@@ -229,13 +283,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
229
283
|
|
|
230
284
|
logger.debug(f"Indexing documents: {documents}")
|
|
231
285
|
|
|
232
|
-
# if
|
|
233
|
-
if
|
|
286
|
+
# if index_name is provided, add it to metadata of each document
|
|
287
|
+
if index_name:
|
|
234
288
|
for doc in documents:
|
|
235
289
|
if not doc.metadata.get('collection'):
|
|
236
|
-
doc.metadata['collection'] =
|
|
290
|
+
doc.metadata['collection'] = index_name
|
|
237
291
|
else:
|
|
238
|
-
doc.metadata['collection'] += f";{
|
|
292
|
+
doc.metadata['collection'] += f";{index_name}"
|
|
239
293
|
|
|
240
294
|
total_docs = len(documents)
|
|
241
295
|
documents_count = 0
|
|
@@ -269,7 +323,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
269
323
|
return {"status": "error", "message": f"Error: {format_exc()}"}
|
|
270
324
|
if _documents:
|
|
271
325
|
add_documents(vectorstore=self.vectorstore, documents=_documents)
|
|
272
|
-
return {"status": "ok", "message": f"successfully indexed {documents_count} documents"
|
|
326
|
+
return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
|
|
327
|
+
else "no documents to index"}
|
|
273
328
|
|
|
274
329
|
def search_documents(self, query:str, doctype: str = 'code',
|
|
275
330
|
filter:dict|str={}, cut_off: float=0.5,
|
|
@@ -303,7 +358,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
303
358
|
}
|
|
304
359
|
|
|
305
360
|
try:
|
|
306
|
-
document_items = self.
|
|
361
|
+
document_items = self._similarity_search_with_score(
|
|
307
362
|
query, filter=document_filter, k=search_top
|
|
308
363
|
)
|
|
309
364
|
# Add document results to unique docs
|
|
@@ -336,18 +391,16 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
336
391
|
}
|
|
337
392
|
|
|
338
393
|
try:
|
|
339
|
-
chunk_items = self.
|
|
394
|
+
chunk_items = self._similarity_search_with_score(
|
|
340
395
|
query, filter=chunk_filter, k=search_top
|
|
341
396
|
)
|
|
342
|
-
|
|
343
|
-
logger.debug(f"Chunk items for {chunk_type}: {chunk_items[0]}")
|
|
344
|
-
|
|
397
|
+
|
|
345
398
|
for doc, score in chunk_items:
|
|
346
399
|
# Create unique identifier for document
|
|
347
400
|
source = doc.metadata.get('source')
|
|
348
401
|
chunk_id = doc.metadata.get('chunk_id')
|
|
349
402
|
doc_id = f"{source}_{chunk_id}" if source and chunk_id else str(doc.metadata.get('id', id(doc)))
|
|
350
|
-
|
|
403
|
+
|
|
351
404
|
# Store document and its score
|
|
352
405
|
if doc_id not in unique_docs:
|
|
353
406
|
unique_docs[doc_id] = doc
|
|
@@ -367,9 +420,9 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
367
420
|
doc_filter = {
|
|
368
421
|
"$and": doc_filter_parts
|
|
369
422
|
}
|
|
370
|
-
|
|
423
|
+
|
|
371
424
|
try:
|
|
372
|
-
fetch_items = self.
|
|
425
|
+
fetch_items = self._similarity_search_with_score(
|
|
373
426
|
query, filter=doc_filter, k=1
|
|
374
427
|
)
|
|
375
428
|
if fetch_items:
|
|
@@ -383,7 +436,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
383
436
|
else:
|
|
384
437
|
# Default search behavior (unchanged)
|
|
385
438
|
max_search_results = 30 if search_top * 3 > 30 else search_top * 3
|
|
386
|
-
vector_items = self.
|
|
439
|
+
vector_items = self._similarity_search_with_score(
|
|
387
440
|
query, filter=filter, k=max_search_results
|
|
388
441
|
)
|
|
389
442
|
|
|
@@ -401,7 +454,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
401
454
|
doc_map = OrderedDict(
|
|
402
455
|
sorted(doc_map.items(), key=lambda x: x[1][1], reverse=True)
|
|
403
456
|
)
|
|
404
|
-
|
|
457
|
+
|
|
405
458
|
# Process full-text search if configured
|
|
406
459
|
if full_text_search and full_text_search.get('enabled') and full_text_search.get('fields'):
|
|
407
460
|
language = full_text_search.get('language', 'english')
|
|
@@ -414,7 +467,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
414
467
|
for field_name in full_text_search.get('fields', []):
|
|
415
468
|
try:
|
|
416
469
|
text_results = self.pg_helper.full_text_search(field_name, query)
|
|
417
|
-
|
|
470
|
+
|
|
418
471
|
# Combine text search results with vector results
|
|
419
472
|
for result in text_results:
|
|
420
473
|
doc_id = result['id']
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
from langchain_core.tools import ToolException
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class McpAuthorizationRequired(ToolException):
|
|
14
|
+
"""Raised when an MCP server requires OAuth authorization before use."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
message: str,
|
|
19
|
+
server_url: str,
|
|
20
|
+
resource_metadata_url: Optional[str] = None,
|
|
21
|
+
www_authenticate: Optional[str] = None,
|
|
22
|
+
resource_metadata: Optional[Dict[str, Any]] = None,
|
|
23
|
+
status: Optional[int] = None,
|
|
24
|
+
tool_name: Optional[str] = None,
|
|
25
|
+
):
|
|
26
|
+
super().__init__(message)
|
|
27
|
+
self.server_url = server_url
|
|
28
|
+
self.resource_metadata_url = resource_metadata_url
|
|
29
|
+
self.www_authenticate = www_authenticate
|
|
30
|
+
self.resource_metadata = resource_metadata
|
|
31
|
+
self.status = status
|
|
32
|
+
self.tool_name = tool_name
|
|
33
|
+
|
|
34
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
35
|
+
return {
|
|
36
|
+
"message": str(self),
|
|
37
|
+
"server_url": self.server_url,
|
|
38
|
+
"resource_metadata_url": self.resource_metadata_url,
|
|
39
|
+
"www_authenticate": self.www_authenticate,
|
|
40
|
+
"resource_metadata": self.resource_metadata,
|
|
41
|
+
"status": self.status,
|
|
42
|
+
"tool_name": self.tool_name,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def extract_resource_metadata_url(www_authenticate: Optional[str], server_url: Optional[str] = None) -> Optional[str]:
|
|
47
|
+
"""
|
|
48
|
+
Pull the resource_metadata URL from a WWW-Authenticate header if present.
|
|
49
|
+
If not found and server_url is provided, try to construct resource metadata URLs.
|
|
50
|
+
"""
|
|
51
|
+
if not www_authenticate and not server_url:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
# RFC9728 returns `resource_metadata="<url>"` inside the header value
|
|
55
|
+
if www_authenticate:
|
|
56
|
+
match = re.search(r'resource_metadata\s*=\s*\"?([^\", ]+)\"?', www_authenticate)
|
|
57
|
+
if match:
|
|
58
|
+
return match.group(1)
|
|
59
|
+
|
|
60
|
+
# For servers that don't provide resource_metadata in WWW-Authenticate,
|
|
61
|
+
# we'll return None and rely on inferring authorization servers from the realm
|
|
62
|
+
# or using well-known OAuth discovery endpoints directly
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def fetch_oauth_authorization_server_metadata(base_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
|
|
67
|
+
"""
|
|
68
|
+
Fetch OAuth authorization server metadata from well-known endpoints.
|
|
69
|
+
Tries both oauth-authorization-server and openid-configuration discovery endpoints.
|
|
70
|
+
"""
|
|
71
|
+
discovery_endpoints = [
|
|
72
|
+
f"{base_url}/.well-known/oauth-authorization-server",
|
|
73
|
+
f"{base_url}/.well-known/openid-configuration",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
for endpoint in discovery_endpoints:
|
|
77
|
+
try:
|
|
78
|
+
resp = requests.get(endpoint, timeout=timeout)
|
|
79
|
+
if resp.status_code == 200:
|
|
80
|
+
return resp.json()
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
logger.debug(f"Failed to fetch OAuth metadata from {endpoint}: {exc}")
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def infer_authorization_servers_from_realm(www_authenticate: Optional[str], server_url: str) -> Optional[list]:
|
|
89
|
+
"""
|
|
90
|
+
Infer authorization server URLs from WWW-Authenticate realm or server URL.
|
|
91
|
+
This is used when the server doesn't provide resource_metadata endpoint.
|
|
92
|
+
"""
|
|
93
|
+
if not www_authenticate and not server_url:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
authorization_servers = []
|
|
97
|
+
|
|
98
|
+
# Try to extract realm from WWW-Authenticate header
|
|
99
|
+
realm = None
|
|
100
|
+
if www_authenticate:
|
|
101
|
+
realm_match = re.search(r'realm\s*=\s*\"([^\"]+)\"', www_authenticate)
|
|
102
|
+
if realm_match:
|
|
103
|
+
realm = realm_match.group(1)
|
|
104
|
+
|
|
105
|
+
# Parse the server URL to get base domain
|
|
106
|
+
parsed = urlparse(server_url)
|
|
107
|
+
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
|
108
|
+
|
|
109
|
+
# Return the base authorization server URL (not the discovery endpoint)
|
|
110
|
+
# The client will append .well-known paths when fetching metadata
|
|
111
|
+
authorization_servers.append(base_url)
|
|
112
|
+
|
|
113
|
+
return authorization_servers if authorization_servers else None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def fetch_resource_metadata(resource_metadata_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
|
|
117
|
+
"""Fetch and parse the protected resource metadata document."""
|
|
118
|
+
try:
|
|
119
|
+
resp = requests.get(resource_metadata_url, timeout=timeout)
|
|
120
|
+
resp.raise_for_status()
|
|
121
|
+
return resp.json()
|
|
122
|
+
except Exception as exc: # broad catch – we want to surface auth requirement even if this fails
|
|
123
|
+
logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
async def fetch_resource_metadata_async(resource_metadata_url: str, session=None, timeout: int = 10) -> Optional[Dict[str, Any]]:
|
|
128
|
+
"""Async variant for fetching protected resource metadata."""
|
|
129
|
+
try:
|
|
130
|
+
import aiohttp
|
|
131
|
+
|
|
132
|
+
client_timeout = aiohttp.ClientTimeout(total=timeout)
|
|
133
|
+
if session:
|
|
134
|
+
async with session.get(resource_metadata_url, timeout=client_timeout) as resp:
|
|
135
|
+
text = await resp.text()
|
|
136
|
+
else:
|
|
137
|
+
async with aiohttp.ClientSession(timeout=client_timeout) as local_session:
|
|
138
|
+
async with local_session.get(resource_metadata_url) as resp:
|
|
139
|
+
text = await resp.text()
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
return json.loads(text)
|
|
143
|
+
except json.JSONDecodeError:
|
|
144
|
+
logger.warning("Resource metadata at %s is not valid JSON: %s", resource_metadata_url, text[:200])
|
|
145
|
+
return None
|
|
146
|
+
except Exception as exc:
|
|
147
|
+
logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def canonical_resource(server_url: str) -> str:
|
|
152
|
+
"""Produce a canonical resource identifier for the MCP server."""
|
|
153
|
+
parsed = urlparse(server_url)
|
|
154
|
+
# Normalize scheme/host casing per RFC guidance
|
|
155
|
+
normalized = parsed._replace(
|
|
156
|
+
scheme=parsed.scheme.lower(),
|
|
157
|
+
netloc=parsed.netloc.lower(),
|
|
158
|
+
)
|
|
159
|
+
resource = normalized.geturl()
|
|
160
|
+
|
|
161
|
+
# Prefer form without trailing slash unless path is meaningful
|
|
162
|
+
if resource.endswith("/") and parsed.path in ("", "/"):
|
|
163
|
+
resource = resource[:-1]
|
|
164
|
+
return resource
|