alita-sdk 0.3.365__py3-none-any.whl → 0.3.462__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (118) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent_executor.py +144 -0
  4. alita_sdk/cli/agent_loader.py +197 -0
  5. alita_sdk/cli/agent_ui.py +166 -0
  6. alita_sdk/cli/agents.py +1069 -0
  7. alita_sdk/cli/callbacks.py +576 -0
  8. alita_sdk/cli/cli.py +159 -0
  9. alita_sdk/cli/config.py +153 -0
  10. alita_sdk/cli/formatting.py +182 -0
  11. alita_sdk/cli/mcp_loader.py +315 -0
  12. alita_sdk/cli/toolkit.py +330 -0
  13. alita_sdk/cli/toolkit_loader.py +55 -0
  14. alita_sdk/cli/tools/__init__.py +9 -0
  15. alita_sdk/cli/tools/filesystem.py +905 -0
  16. alita_sdk/configurations/bitbucket.py +95 -0
  17. alita_sdk/configurations/confluence.py +96 -1
  18. alita_sdk/configurations/gitlab.py +79 -0
  19. alita_sdk/configurations/jira.py +103 -0
  20. alita_sdk/configurations/testrail.py +88 -0
  21. alita_sdk/configurations/xray.py +93 -0
  22. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  23. alita_sdk/configurations/zephyr_essential.py +75 -0
  24. alita_sdk/runtime/clients/artifact.py +1 -1
  25. alita_sdk/runtime/clients/client.py +47 -10
  26. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  27. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  28. alita_sdk/runtime/clients/sandbox_client.py +373 -0
  29. alita_sdk/runtime/langchain/assistant.py +70 -41
  30. alita_sdk/runtime/langchain/constants.py +6 -1
  31. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  32. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  33. alita_sdk/runtime/langchain/document_loaders/constants.py +73 -100
  34. alita_sdk/runtime/langchain/langraph_agent.py +164 -38
  35. alita_sdk/runtime/langchain/utils.py +43 -7
  36. alita_sdk/runtime/models/mcp_models.py +61 -0
  37. alita_sdk/runtime/toolkits/__init__.py +24 -0
  38. alita_sdk/runtime/toolkits/application.py +8 -1
  39. alita_sdk/runtime/toolkits/artifact.py +5 -6
  40. alita_sdk/runtime/toolkits/mcp.py +895 -0
  41. alita_sdk/runtime/toolkits/tools.py +140 -50
  42. alita_sdk/runtime/tools/__init__.py +7 -2
  43. alita_sdk/runtime/tools/application.py +7 -0
  44. alita_sdk/runtime/tools/function.py +94 -5
  45. alita_sdk/runtime/tools/graph.py +10 -4
  46. alita_sdk/runtime/tools/image_generation.py +104 -8
  47. alita_sdk/runtime/tools/llm.py +204 -114
  48. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  49. alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
  50. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  51. alita_sdk/runtime/tools/sandbox.py +180 -79
  52. alita_sdk/runtime/tools/vectorstore.py +22 -21
  53. alita_sdk/runtime/tools/vectorstore_base.py +79 -26
  54. alita_sdk/runtime/utils/mcp_oauth.py +164 -0
  55. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  56. alita_sdk/runtime/utils/streamlit.py +34 -3
  57. alita_sdk/runtime/utils/toolkit_utils.py +14 -4
  58. alita_sdk/runtime/utils/utils.py +1 -0
  59. alita_sdk/tools/__init__.py +48 -31
  60. alita_sdk/tools/ado/repos/__init__.py +1 -0
  61. alita_sdk/tools/ado/test_plan/__init__.py +1 -1
  62. alita_sdk/tools/ado/wiki/__init__.py +1 -5
  63. alita_sdk/tools/ado/work_item/__init__.py +1 -5
  64. alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
  65. alita_sdk/tools/base_indexer_toolkit.py +194 -112
  66. alita_sdk/tools/bitbucket/__init__.py +1 -0
  67. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  68. alita_sdk/tools/code/sonar/__init__.py +1 -1
  69. alita_sdk/tools/code_indexer_toolkit.py +15 -5
  70. alita_sdk/tools/confluence/__init__.py +2 -2
  71. alita_sdk/tools/confluence/api_wrapper.py +110 -63
  72. alita_sdk/tools/confluence/loader.py +10 -0
  73. alita_sdk/tools/elitea_base.py +22 -22
  74. alita_sdk/tools/github/__init__.py +2 -2
  75. alita_sdk/tools/gitlab/__init__.py +2 -1
  76. alita_sdk/tools/gitlab/api_wrapper.py +11 -7
  77. alita_sdk/tools/gitlab_org/__init__.py +1 -2
  78. alita_sdk/tools/google_places/__init__.py +2 -1
  79. alita_sdk/tools/jira/__init__.py +1 -0
  80. alita_sdk/tools/jira/api_wrapper.py +1 -1
  81. alita_sdk/tools/memory/__init__.py +1 -1
  82. alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
  83. alita_sdk/tools/openapi/__init__.py +10 -1
  84. alita_sdk/tools/pandas/__init__.py +1 -1
  85. alita_sdk/tools/postman/__init__.py +2 -1
  86. alita_sdk/tools/postman/api_wrapper.py +18 -8
  87. alita_sdk/tools/postman/postman_analysis.py +8 -1
  88. alita_sdk/tools/pptx/__init__.py +2 -2
  89. alita_sdk/tools/qtest/__init__.py +3 -3
  90. alita_sdk/tools/qtest/api_wrapper.py +1708 -76
  91. alita_sdk/tools/rally/__init__.py +1 -2
  92. alita_sdk/tools/report_portal/__init__.py +1 -0
  93. alita_sdk/tools/salesforce/__init__.py +1 -0
  94. alita_sdk/tools/servicenow/__init__.py +2 -3
  95. alita_sdk/tools/sharepoint/__init__.py +1 -0
  96. alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
  97. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  98. alita_sdk/tools/sharepoint/utils.py +8 -2
  99. alita_sdk/tools/slack/__init__.py +1 -0
  100. alita_sdk/tools/sql/__init__.py +2 -1
  101. alita_sdk/tools/sql/api_wrapper.py +71 -23
  102. alita_sdk/tools/testio/__init__.py +1 -0
  103. alita_sdk/tools/testrail/__init__.py +1 -3
  104. alita_sdk/tools/utils/__init__.py +17 -0
  105. alita_sdk/tools/utils/content_parser.py +35 -24
  106. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +67 -21
  107. alita_sdk/tools/xray/__init__.py +2 -1
  108. alita_sdk/tools/zephyr/__init__.py +2 -1
  109. alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
  110. alita_sdk/tools/zephyr_essential/__init__.py +1 -0
  111. alita_sdk/tools/zephyr_scale/__init__.py +1 -0
  112. alita_sdk/tools/zephyr_squad/__init__.py +1 -0
  113. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
  114. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +118 -93
  115. alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
  116. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
  117. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
  118. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0
@@ -207,9 +207,9 @@ class VectorStoreWrapper(BaseToolApiWrapper):
207
207
  tool_name="_remove_collection"
208
208
  )
209
209
 
210
- def _get_indexed_ids(self, collection_suffix: Optional[str] = '') -> List[str]:
210
+ def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
211
211
  """Get all indexed document IDs from vectorstore"""
212
- return self.vector_adapter.get_indexed_ids(self, collection_suffix)
212
+ return self.vector_adapter.get_indexed_ids(self, index_name)
213
213
 
214
214
  def list_collections(self) -> Any:
215
215
  """List all collections in the vectorstore.
@@ -233,7 +233,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
233
233
  return {"collections": [], "message": "No indexed collections"}
234
234
  return cols
235
235
 
236
- def _clean_collection(self, collection_suffix: str = ''):
236
+ def _clean_collection(self, index_name: str = ''):
237
237
  """
238
238
  Clean the vectorstore collection by deleting all indexed data.
239
239
  """
@@ -241,15 +241,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
241
241
  f"Cleaning collection '{self.dataset}'",
242
242
  tool_name="_clean_collection"
243
243
  )
244
- self.vector_adapter.clean_collection(self, collection_suffix)
244
+ self.vector_adapter.clean_collection(self, index_name)
245
245
  self._log_data(
246
246
  f"Collection '{self.dataset}' has been cleaned. ",
247
247
  tool_name="_clean_collection"
248
248
  )
249
249
 
250
- def _get_code_indexed_data(self, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
250
+ def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
251
251
  """ Get all indexed data from vectorstore for code content """
252
- return self.vector_adapter.get_code_indexed_data(self, collection_suffix)
252
+ return self.vector_adapter.get_code_indexed_data(self, index_name)
253
253
 
254
254
  def _add_to_collection(self, entry_id, new_collection_value):
255
255
  """Add a new collection name to the `collection` key in the `metadata` column."""
@@ -258,7 +258,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
258
258
  def _reduce_duplicates(
259
259
  self,
260
260
  documents: Generator[Any, None, None],
261
- collection_suffix: str,
261
+ index_name: str,
262
262
  get_indexed_data: Callable,
263
263
  key_fn: Callable,
264
264
  compare_fn: Callable,
@@ -267,7 +267,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
267
267
  ) -> List[Any]:
268
268
  """Generic duplicate reduction logic for documents."""
269
269
  self._log_data(log_msg, tool_name="index_documents")
270
- indexed_data = get_indexed_data(collection_suffix)
270
+ indexed_data = get_indexed_data(index_name)
271
271
  indexed_keys = set(indexed_data.keys())
272
272
  if not indexed_keys:
273
273
  self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -279,14 +279,14 @@ class VectorStoreWrapper(BaseToolApiWrapper):
279
279
  for document in documents:
280
280
  key = key_fn(document)
281
281
  key = key if isinstance(key, str) else str(key)
282
- if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
282
+ if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
283
283
  if compare_fn(document, indexed_data[key]):
284
284
  # Disabled addition of new collection to already indexed documents
285
285
  # # check metadata.collection and update if needed
286
286
  # for update_collection_id in remove_ids_fn(indexed_data, key):
287
287
  # self._add_to_collection(
288
288
  # update_collection_id,
289
- # collection_suffix
289
+ # index_name
290
290
  # )
291
291
  continue
292
292
  final_docs.append(document)
@@ -303,10 +303,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
303
303
 
304
304
  return final_docs
305
305
 
306
- def _reduce_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
306
+ def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
307
307
  return self._reduce_duplicates(
308
308
  documents,
309
- collection_suffix,
309
+ index_name,
310
310
  self._get_code_indexed_data,
311
311
  lambda doc: doc.metadata.get('filename'),
312
312
  lambda doc, idx: (
@@ -318,7 +318,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
318
318
  log_msg="Verification of code documents to index started"
319
319
  )
320
320
 
321
- def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
321
+ def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
322
322
  """ Index documents in the vectorstore.
323
323
 
324
324
  Args:
@@ -329,13 +329,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
329
329
 
330
330
  from ..langchain.interfaces.llm_processor import add_documents
331
331
 
332
- self._log_tool_event(message=f"Starting the indexing... Parameters: {collection_suffix=}, {clean_index=}, {is_code}", tool_name="index_documents")
332
+ self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
333
333
  # pre-process documents if needed (find duplicates, etc.)
334
334
  if clean_index:
335
335
  logger.info("Cleaning index before re-indexing all documents.")
336
336
  self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
337
337
  try:
338
- self._clean_collection(collection_suffix)
338
+ self._clean_collection(index_name)
339
339
  self.vectoradapter.persist()
340
340
  self.vectoradapter.vacuum()
341
341
  self._log_data("Previous index has been removed",
@@ -349,7 +349,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
349
349
  message="Filter for duplicates",
350
350
  tool_name="index_documents")
351
351
  # remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
352
- documents = self._reduce_code_duplicates(documents, collection_suffix)
352
+ documents = self._reduce_code_duplicates(documents, index_name)
353
353
  self._log_tool_event(
354
354
  message="All the duplicates were filtered out. Proceeding with indexing.",
355
355
  tool_name="index_documents")
@@ -377,13 +377,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
377
377
  self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
378
378
  tool_name="index_documents")
379
379
 
380
- # if collection_suffix is provided, add it to metadata of each document
381
- if collection_suffix:
380
+ # if index_name is provided, add it to metadata of each document
381
+ if index_name:
382
382
  for doc in documents:
383
383
  if not doc.metadata.get('collection'):
384
- doc.metadata['collection'] = collection_suffix
384
+ doc.metadata['collection'] = index_name
385
385
  else:
386
- doc.metadata['collection'] += f";{collection_suffix}"
386
+ doc.metadata['collection'] += f";{index_name}"
387
387
 
388
388
  total_docs = len(documents)
389
389
  documents_count = 0
@@ -414,7 +414,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
414
414
  return {"status": "error", "message": f"Error: {format_exc()}"}
415
415
  if _documents:
416
416
  add_documents(vectorstore=self.vectorstore, documents=_documents)
417
- return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
417
+ return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
418
+ else "No new documents to index."}
418
419
 
419
420
  def search_documents(self, query:str, doctype: str = 'code',
420
421
  filter:dict|str={}, cut_off: float=0.5,
@@ -1,16 +1,18 @@
1
1
  import json
2
- import math
3
2
  from collections import OrderedDict
4
3
  from logging import getLogger
5
4
  from typing import Any, Optional, List, Dict, Generator
6
5
 
6
+ import math
7
7
  from langchain_core.documents import Document
8
8
  from langchain_core.messages import HumanMessage
9
+ from langchain_core.tools import ToolException
10
+ from psycopg.errors import DataException
9
11
  from pydantic import BaseModel, model_validator, Field
10
12
 
11
13
  from alita_sdk.tools.elitea_base import BaseToolApiWrapper
12
14
  from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
13
- from ..utils.logging import dispatch_custom_event
15
+ from ...runtime.utils.utils import IndexerKeywords
14
16
 
15
17
  logger = getLogger(__name__)
16
18
 
@@ -175,6 +177,37 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
175
177
  except Exception as e:
176
178
  logger.error(f"Failed to initialize PGVectorSearch: {str(e)}")
177
179
 
180
+ def _similarity_search_with_score(self, query: str, filter: dict = None, k: int = 10):
181
+ """
182
+ Perform similarity search with proper exception handling for DataException.
183
+
184
+ Args:
185
+ query: Search query string
186
+ filter: Optional filter dictionary
187
+ k: Number of results to return
188
+
189
+ Returns:
190
+ List of (Document, score) tuples
191
+
192
+ Raises:
193
+ ToolException: When DataException occurs or other search errors
194
+ """
195
+ try:
196
+ return self.vectorstore.similarity_search_with_score(
197
+ query, filter=filter, k=k
198
+ )
199
+ except DataException as dimException:
200
+ exception_str = str(dimException)
201
+ if 'different vector dimensions' in exception_str:
202
+ logger.error(f"Data exception: {exception_str}")
203
+ raise ToolException(f"Global search cannot be completed since collections were indexed using "
204
+ f"different embedding models. Use search within a single collection."
205
+ f"\nDetails: {exception_str}")
206
+ raise ToolException(f"Data exception during search. Possibly invalid filter: {exception_str}")
207
+ except Exception as e:
208
+ logger.error(f"Error during similarity search: {str(e)}")
209
+ raise ToolException(f"Search failed: {str(e)}")
210
+
178
211
  def list_collections(self) -> List[str]:
179
212
  """List all collections in the vectorstore."""
180
213
 
@@ -183,7 +216,28 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
183
216
  return "No indexed collections"
184
217
  return collections
185
218
 
186
- def _clean_collection(self, collection_suffix: str = ''):
219
+ def get_index_meta(self, index_name: str):
220
+ index_metas = self.vector_adapter.get_index_meta(self, index_name)
221
+ if len(index_metas) > 1:
222
+ raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
223
+ return index_metas[0] if index_metas else None
224
+
225
+ def get_indexed_count(self, index_name: str) -> int:
226
+ from sqlalchemy.orm import Session
227
+ from sqlalchemy import func, or_
228
+
229
+ with Session(self.vectorstore.session_maker.bind) as session:
230
+ return session.query(
231
+ self.vectorstore.EmbeddingStore.id,
232
+ ).filter(
233
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'collection') == index_name,
234
+ or_(
235
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type').is_(None),
236
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value
237
+ )
238
+ ).count()
239
+
240
+ def _clean_collection(self, index_name: str = ''):
187
241
  """
188
242
  Clean the vectorstore collection by deleting all indexed data.
189
243
  """
@@ -191,13 +245,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
191
245
  f"Cleaning collection '{self.dataset}'",
192
246
  tool_name="_clean_collection"
193
247
  )
194
- self.vector_adapter.clean_collection(self, collection_suffix)
248
+ self.vector_adapter.clean_collection(self, index_name)
195
249
  self._log_tool_event(
196
250
  f"Collection '{self.dataset}' has been cleaned. ",
197
251
  tool_name="_clean_collection"
198
252
  )
199
253
 
200
- def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True):
254
+ def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True):
201
255
  """ Index documents in the vectorstore.
202
256
 
203
257
  Args:
@@ -206,21 +260,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
206
260
  clean_index (bool): If True, clean the index before re-indexing all documents.
207
261
  """
208
262
  if clean_index:
209
- self._clean_index(collection_suffix)
263
+ self._clean_index(index_name)
210
264
 
211
- return self._save_index(list(documents), collection_suffix, progress_step)
265
+ return self._save_index(list(documents), index_name, progress_step)
212
266
 
213
- def _clean_index(self, collection_suffix: str):
267
+ def _clean_index(self, index_name: str):
214
268
  logger.info("Cleaning index before re-indexing all documents.")
215
269
  self._log_tool_event("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
216
270
  try:
217
- self._clean_collection(collection_suffix)
271
+ self._clean_collection(index_name)
218
272
  self._log_tool_event("Previous index has been removed",
219
273
  tool_name="index_documents")
220
274
  except Exception as e:
221
275
  logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
222
276
 
223
- def _save_index(self, documents: list[Document], collection_suffix: Optional[str] = None, progress_step: int = 20):
277
+ def _save_index(self, documents: list[Document], index_name: Optional[str] = None, progress_step: int = 20):
224
278
  from ..langchain.interfaces.llm_processor import add_documents
225
279
  #
226
280
  for doc in documents:
@@ -229,13 +283,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
229
283
 
230
284
  logger.debug(f"Indexing documents: {documents}")
231
285
 
232
- # if collection_suffix is provided, add it to metadata of each document
233
- if collection_suffix:
286
+ # if index_name is provided, add it to metadata of each document
287
+ if index_name:
234
288
  for doc in documents:
235
289
  if not doc.metadata.get('collection'):
236
- doc.metadata['collection'] = collection_suffix
290
+ doc.metadata['collection'] = index_name
237
291
  else:
238
- doc.metadata['collection'] += f";{collection_suffix}"
292
+ doc.metadata['collection'] += f";{index_name}"
239
293
 
240
294
  total_docs = len(documents)
241
295
  documents_count = 0
@@ -269,7 +323,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
269
323
  return {"status": "error", "message": f"Error: {format_exc()}"}
270
324
  if _documents:
271
325
  add_documents(vectorstore=self.vectorstore, documents=_documents)
272
- return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
326
+ return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
327
+ else "no documents to index"}
273
328
 
274
329
  def search_documents(self, query:str, doctype: str = 'code',
275
330
  filter:dict|str={}, cut_off: float=0.5,
@@ -303,7 +358,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
303
358
  }
304
359
 
305
360
  try:
306
- document_items = self.vectorstore.similarity_search_with_score(
361
+ document_items = self._similarity_search_with_score(
307
362
  query, filter=document_filter, k=search_top
308
363
  )
309
364
  # Add document results to unique docs
@@ -336,18 +391,16 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
336
391
  }
337
392
 
338
393
  try:
339
- chunk_items = self.vectorstore.similarity_search_with_score(
394
+ chunk_items = self._similarity_search_with_score(
340
395
  query, filter=chunk_filter, k=search_top
341
396
  )
342
-
343
- logger.debug(f"Chunk items for {chunk_type}: {chunk_items[0]}")
344
-
397
+
345
398
  for doc, score in chunk_items:
346
399
  # Create unique identifier for document
347
400
  source = doc.metadata.get('source')
348
401
  chunk_id = doc.metadata.get('chunk_id')
349
402
  doc_id = f"{source}_{chunk_id}" if source and chunk_id else str(doc.metadata.get('id', id(doc)))
350
-
403
+
351
404
  # Store document and its score
352
405
  if doc_id not in unique_docs:
353
406
  unique_docs[doc_id] = doc
@@ -367,9 +420,9 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
367
420
  doc_filter = {
368
421
  "$and": doc_filter_parts
369
422
  }
370
-
423
+
371
424
  try:
372
- fetch_items = self.vectorstore.similarity_search_with_score(
425
+ fetch_items = self._similarity_search_with_score(
373
426
  query, filter=doc_filter, k=1
374
427
  )
375
428
  if fetch_items:
@@ -383,7 +436,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
383
436
  else:
384
437
  # Default search behavior (unchanged)
385
438
  max_search_results = 30 if search_top * 3 > 30 else search_top * 3
386
- vector_items = self.vectorstore.similarity_search_with_score(
439
+ vector_items = self._similarity_search_with_score(
387
440
  query, filter=filter, k=max_search_results
388
441
  )
389
442
 
@@ -401,7 +454,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
401
454
  doc_map = OrderedDict(
402
455
  sorted(doc_map.items(), key=lambda x: x[1][1], reverse=True)
403
456
  )
404
-
457
+
405
458
  # Process full-text search if configured
406
459
  if full_text_search and full_text_search.get('enabled') and full_text_search.get('fields'):
407
460
  language = full_text_search.get('language', 'english')
@@ -414,7 +467,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
414
467
  for field_name in full_text_search.get('fields', []):
415
468
  try:
416
469
  text_results = self.pg_helper.full_text_search(field_name, query)
417
-
470
+
418
471
  # Combine text search results with vector results
419
472
  for result in text_results:
420
473
  doc_id = result['id']
@@ -0,0 +1,164 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from typing import Any, Dict, Optional
5
+ from urllib.parse import urlparse
6
+
7
+ import requests
8
+ from langchain_core.tools import ToolException
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class McpAuthorizationRequired(ToolException):
14
+ """Raised when an MCP server requires OAuth authorization before use."""
15
+
16
+ def __init__(
17
+ self,
18
+ message: str,
19
+ server_url: str,
20
+ resource_metadata_url: Optional[str] = None,
21
+ www_authenticate: Optional[str] = None,
22
+ resource_metadata: Optional[Dict[str, Any]] = None,
23
+ status: Optional[int] = None,
24
+ tool_name: Optional[str] = None,
25
+ ):
26
+ super().__init__(message)
27
+ self.server_url = server_url
28
+ self.resource_metadata_url = resource_metadata_url
29
+ self.www_authenticate = www_authenticate
30
+ self.resource_metadata = resource_metadata
31
+ self.status = status
32
+ self.tool_name = tool_name
33
+
34
+ def to_dict(self) -> Dict[str, Any]:
35
+ return {
36
+ "message": str(self),
37
+ "server_url": self.server_url,
38
+ "resource_metadata_url": self.resource_metadata_url,
39
+ "www_authenticate": self.www_authenticate,
40
+ "resource_metadata": self.resource_metadata,
41
+ "status": self.status,
42
+ "tool_name": self.tool_name,
43
+ }
44
+
45
+
46
+ def extract_resource_metadata_url(www_authenticate: Optional[str], server_url: Optional[str] = None) -> Optional[str]:
47
+ """
48
+ Pull the resource_metadata URL from a WWW-Authenticate header if present.
49
+ If not found and server_url is provided, try to construct resource metadata URLs.
50
+ """
51
+ if not www_authenticate and not server_url:
52
+ return None
53
+
54
+ # RFC9728 returns `resource_metadata="<url>"` inside the header value
55
+ if www_authenticate:
56
+ match = re.search(r'resource_metadata\s*=\s*\"?([^\", ]+)\"?', www_authenticate)
57
+ if match:
58
+ return match.group(1)
59
+
60
+ # For servers that don't provide resource_metadata in WWW-Authenticate,
61
+ # we'll return None and rely on inferring authorization servers from the realm
62
+ # or using well-known OAuth discovery endpoints directly
63
+ return None
64
+
65
+
66
+ def fetch_oauth_authorization_server_metadata(base_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
67
+ """
68
+ Fetch OAuth authorization server metadata from well-known endpoints.
69
+ Tries both oauth-authorization-server and openid-configuration discovery endpoints.
70
+ """
71
+ discovery_endpoints = [
72
+ f"{base_url}/.well-known/oauth-authorization-server",
73
+ f"{base_url}/.well-known/openid-configuration",
74
+ ]
75
+
76
+ for endpoint in discovery_endpoints:
77
+ try:
78
+ resp = requests.get(endpoint, timeout=timeout)
79
+ if resp.status_code == 200:
80
+ return resp.json()
81
+ except Exception as exc:
82
+ logger.debug(f"Failed to fetch OAuth metadata from {endpoint}: {exc}")
83
+ continue
84
+
85
+ return None
86
+
87
+
88
+ def infer_authorization_servers_from_realm(www_authenticate: Optional[str], server_url: str) -> Optional[list]:
89
+ """
90
+ Infer authorization server URLs from WWW-Authenticate realm or server URL.
91
+ This is used when the server doesn't provide resource_metadata endpoint.
92
+ """
93
+ if not www_authenticate and not server_url:
94
+ return None
95
+
96
+ authorization_servers = []
97
+
98
+ # Try to extract realm from WWW-Authenticate header
99
+ realm = None
100
+ if www_authenticate:
101
+ realm_match = re.search(r'realm\s*=\s*\"([^\"]+)\"', www_authenticate)
102
+ if realm_match:
103
+ realm = realm_match.group(1)
104
+
105
+ # Parse the server URL to get base domain
106
+ parsed = urlparse(server_url)
107
+ base_url = f"{parsed.scheme}://{parsed.netloc}"
108
+
109
+ # Return the base authorization server URL (not the discovery endpoint)
110
+ # The client will append .well-known paths when fetching metadata
111
+ authorization_servers.append(base_url)
112
+
113
+ return authorization_servers if authorization_servers else None
114
+
115
+
116
+ def fetch_resource_metadata(resource_metadata_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
117
+ """Fetch and parse the protected resource metadata document."""
118
+ try:
119
+ resp = requests.get(resource_metadata_url, timeout=timeout)
120
+ resp.raise_for_status()
121
+ return resp.json()
122
+ except Exception as exc: # broad catch – we want to surface auth requirement even if this fails
123
+ logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
124
+ return None
125
+
126
+
127
+ async def fetch_resource_metadata_async(resource_metadata_url: str, session=None, timeout: int = 10) -> Optional[Dict[str, Any]]:
128
+ """Async variant for fetching protected resource metadata."""
129
+ try:
130
+ import aiohttp
131
+
132
+ client_timeout = aiohttp.ClientTimeout(total=timeout)
133
+ if session:
134
+ async with session.get(resource_metadata_url, timeout=client_timeout) as resp:
135
+ text = await resp.text()
136
+ else:
137
+ async with aiohttp.ClientSession(timeout=client_timeout) as local_session:
138
+ async with local_session.get(resource_metadata_url) as resp:
139
+ text = await resp.text()
140
+
141
+ try:
142
+ return json.loads(text)
143
+ except json.JSONDecodeError:
144
+ logger.warning("Resource metadata at %s is not valid JSON: %s", resource_metadata_url, text[:200])
145
+ return None
146
+ except Exception as exc:
147
+ logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
148
+ return None
149
+
150
+
151
+ def canonical_resource(server_url: str) -> str:
152
+ """Produce a canonical resource identifier for the MCP server."""
153
+ parsed = urlparse(server_url)
154
+ # Normalize scheme/host casing per RFC guidance
155
+ normalized = parsed._replace(
156
+ scheme=parsed.scheme.lower(),
157
+ netloc=parsed.netloc.lower(),
158
+ )
159
+ resource = normalized.geturl()
160
+
161
+ # Prefer form without trailing slash unless path is meaningful
162
+ if resource.endswith("/") and parsed.path in ("", "/"):
163
+ resource = resource[:-1]
164
+ return resource