alita-sdk 0.3.449__py3-none-any.whl → 0.3.457__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (47) hide show
  1. alita_sdk/runtime/langchain/langraph_agent.py +57 -15
  2. alita_sdk/runtime/langchain/utils.py +19 -3
  3. alita_sdk/runtime/toolkits/artifact.py +5 -6
  4. alita_sdk/runtime/toolkits/mcp.py +5 -2
  5. alita_sdk/runtime/toolkits/tools.py +1 -0
  6. alita_sdk/runtime/tools/function.py +2 -1
  7. alita_sdk/runtime/tools/vectorstore_base.py +17 -2
  8. alita_sdk/runtime/utils/mcp_sse_client.py +64 -6
  9. alita_sdk/tools/ado/repos/__init__.py +1 -0
  10. alita_sdk/tools/ado/test_plan/__init__.py +1 -1
  11. alita_sdk/tools/ado/wiki/__init__.py +1 -5
  12. alita_sdk/tools/ado/work_item/__init__.py +1 -5
  13. alita_sdk/tools/base_indexer_toolkit.py +10 -6
  14. alita_sdk/tools/bitbucket/__init__.py +1 -0
  15. alita_sdk/tools/code/sonar/__init__.py +1 -1
  16. alita_sdk/tools/confluence/__init__.py +2 -2
  17. alita_sdk/tools/github/__init__.py +2 -2
  18. alita_sdk/tools/gitlab/__init__.py +2 -1
  19. alita_sdk/tools/gitlab_org/__init__.py +1 -2
  20. alita_sdk/tools/google_places/__init__.py +2 -1
  21. alita_sdk/tools/jira/__init__.py +1 -0
  22. alita_sdk/tools/memory/__init__.py +1 -1
  23. alita_sdk/tools/pandas/__init__.py +1 -1
  24. alita_sdk/tools/postman/__init__.py +2 -1
  25. alita_sdk/tools/pptx/__init__.py +2 -2
  26. alita_sdk/tools/qtest/__init__.py +3 -3
  27. alita_sdk/tools/qtest/api_wrapper.py +374 -29
  28. alita_sdk/tools/rally/__init__.py +1 -2
  29. alita_sdk/tools/report_portal/__init__.py +1 -0
  30. alita_sdk/tools/salesforce/__init__.py +1 -0
  31. alita_sdk/tools/servicenow/__init__.py +2 -3
  32. alita_sdk/tools/sharepoint/__init__.py +1 -0
  33. alita_sdk/tools/slack/__init__.py +1 -0
  34. alita_sdk/tools/sql/__init__.py +2 -1
  35. alita_sdk/tools/testio/__init__.py +1 -0
  36. alita_sdk/tools/testrail/__init__.py +1 -3
  37. alita_sdk/tools/xray/__init__.py +2 -1
  38. alita_sdk/tools/zephyr/__init__.py +2 -1
  39. alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
  40. alita_sdk/tools/zephyr_essential/__init__.py +1 -0
  41. alita_sdk/tools/zephyr_scale/__init__.py +1 -0
  42. alita_sdk/tools/zephyr_squad/__init__.py +1 -0
  43. {alita_sdk-0.3.449.dist-info → alita_sdk-0.3.457.dist-info}/METADATA +1 -1
  44. {alita_sdk-0.3.449.dist-info → alita_sdk-0.3.457.dist-info}/RECORD +47 -47
  45. {alita_sdk-0.3.449.dist-info → alita_sdk-0.3.457.dist-info}/WHEEL +0 -0
  46. {alita_sdk-0.3.449.dist-info → alita_sdk-0.3.457.dist-info}/licenses/LICENSE +0 -0
  47. {alita_sdk-0.3.449.dist-info → alita_sdk-0.3.457.dist-info}/top_level.txt +0 -0
@@ -475,10 +475,14 @@ def create_graph(
475
475
  if toolkit_name:
476
476
  tool_name = f"{clean_string(toolkit_name)}{TOOLKIT_SPLITTER}{tool_name}"
477
477
  logger.info(f"Node: {node_id} : {node_type} - {tool_name}")
478
- if node_type in ['function', 'tool', 'loop', 'loop_from_tool', 'indexer', 'subgraph', 'pipeline', 'agent']:
478
+ if node_type in ['function', 'toolkit', 'mcp', 'tool', 'loop', 'loop_from_tool', 'indexer', 'subgraph', 'pipeline', 'agent']:
479
+ if node_type == 'mcp' and tool_name not in [tool.name for tool in tools]:
480
+ # MCP is not connected and node cannot be added
481
+ raise ToolException(f"MCP tool '{tool_name}' not found in the provided tools. "
482
+ f"Make sure it is connected properly. Available tools: {[tool.name for tool in tools]}")
479
483
  for tool in tools:
480
484
  if tool.name == tool_name:
481
- if node_type == 'function':
485
+ if node_type in ['function', 'toolkit', 'mcp']:
482
486
  lg_builder.add_node(node_id, FunctionTool(
483
487
  tool=tool, name=node_id, return_type='dict',
484
488
  output_variables=node.get('output', []),
@@ -643,6 +647,7 @@ def create_graph(
643
647
  default_output=node.get('default_output', 'END')
644
648
  )
645
649
  )
650
+ continue
646
651
  elif node_type == 'state_modifier':
647
652
  lg_builder.add_node(node_id, StateModifierNode(
648
653
  template=node.get('template', ''),
@@ -663,7 +668,7 @@ def create_graph(
663
668
  lg_builder.add_node(reset_node_id, PrinterNode(
664
669
  input_mapping={'printer': {'type': 'fixed', 'value': ''}}
665
670
  ))
666
- lg_builder.add_edge(node_id, reset_node_id)
671
+ lg_builder.add_conditional_edges(node_id, TransitionalEdge(reset_node_id))
667
672
  lg_builder.add_conditional_edges(reset_node_id, TransitionalEdge(clean_string(node['transition'])))
668
673
  continue
669
674
  if node.get('transition'):
@@ -814,35 +819,63 @@ class LangGraphAgentRunnable(CompiledStateGraph):
814
819
  input['messages'] = [convert_dict_to_message(msg) for msg in chat_history]
815
820
 
816
821
  # handler for LLM node: if no input (Chat perspective), then take last human message
822
+ # Track if input came from messages to handle content extraction properly
823
+ input_from_messages = False
817
824
  if not input.get('input'):
818
825
  if input.get('messages'):
819
826
  input['input'] = [next((msg for msg in reversed(input['messages']) if isinstance(msg, HumanMessage)),
820
- None)]
827
+ None)]
828
+ if input['input'] is not None:
829
+ input_from_messages = True
821
830
 
822
831
  # Append current input to existing messages instead of overwriting
823
832
  if input.get('input'):
824
833
  if isinstance(input['input'], str):
825
834
  current_message = input['input']
826
835
  else:
836
+ # input can be a list of messages or a single message object
827
837
  current_message = input.get('input')[-1]
828
838
 
829
839
  # TODO: add handler after we add 2+ inputs (filterByType, etc.)
830
840
  if isinstance(current_message, HumanMessage):
831
841
  current_content = current_message.content
832
842
  if isinstance(current_content, list):
833
- text_contents = [
834
- item['text'] if isinstance(item, dict) and item.get('type') == 'text'
835
- else item if isinstance(item, str)
836
- else None
837
- for item in current_content
838
- ]
839
- text_contents = [text for text in text_contents if text is not None]
840
- input['input'] = ". ".join(text_contents)
843
+ # Extract text parts and keep non-text parts (images, etc.)
844
+ text_contents = []
845
+ non_text_parts = []
846
+
847
+ for item in current_content:
848
+ if isinstance(item, dict) and item.get('type') == 'text':
849
+ text_contents.append(item['text'])
850
+ elif isinstance(item, str):
851
+ text_contents.append(item)
852
+ else:
853
+ # Keep image_url and other non-text content
854
+ non_text_parts.append(item)
855
+
856
+ # Set input to the joined text
857
+ input['input'] = ". ".join(text_contents) if text_contents else ""
858
+
859
+ # If this message came from input['messages'], update or remove it
860
+ if input_from_messages:
861
+ if non_text_parts:
862
+ # Keep the message but only with non-text content (images, etc.)
863
+ current_message.content = non_text_parts
864
+ else:
865
+ # All content was text, remove this message from the list
866
+ input['messages'] = [msg for msg in input['messages'] if msg is not current_message]
867
+
841
868
  elif isinstance(current_content, str):
842
869
  # on regenerate case
843
870
  input['input'] = current_content
871
+ # If from messages and all content is text, remove the message
872
+ if input_from_messages:
873
+ input['messages'] = [msg for msg in input['messages'] if msg is not current_message]
844
874
  else:
845
875
  input['input'] = str(current_content)
876
+ # If from messages, remove since we extracted the content
877
+ if input_from_messages:
878
+ input['messages'] = [msg for msg in input['messages'] if msg is not current_message]
846
879
  elif isinstance(current_message, str):
847
880
  input['input'] = current_message
848
881
  else:
@@ -852,9 +885,18 @@ class LangGraphAgentRunnable(CompiledStateGraph):
852
885
  input['messages'] = [convert_dict_to_message(msg) for msg in input['messages']]
853
886
  # Append to existing messages
854
887
  # input['messages'].append(current_message)
855
- else:
856
- # No existing messages, create new list
857
- input['messages'] = [current_message]
888
+ # else:
889
+ # NOTE: Commented out to prevent duplicates with input['input']
890
+ # input['messages'] = [current_message]
891
+
892
+ # Validate that input is not empty after all processing
893
+ if not input.get('input'):
894
+ raise RuntimeError(
895
+ "Empty input after processing. Cannot send empty string to LLM. "
896
+ "This likely means the message contained only non-text content "
897
+ "with no accompanying text."
898
+ )
899
+
858
900
  logging.info(f"Input: {thread_id} - {input}")
859
901
  if self.checkpointer and self.checkpointer.get_tuple(config):
860
902
  self.update_state(config, input)
@@ -2,7 +2,7 @@ import builtins
2
2
  import json
3
3
  import logging
4
4
  import re
5
- from pydantic import create_model, Field
5
+ from pydantic import create_model, Field, Json
6
6
  from typing import Tuple, TypedDict, Any, Optional, Annotated
7
7
  from langchain_core.messages import AnyMessage
8
8
  from langgraph.graph import add_messages
@@ -208,5 +208,21 @@ def safe_format(template, mapping):
208
208
  def create_pydantic_model(model_name: str, variables: dict[str, dict]):
209
209
  fields = {}
210
210
  for var_name, var_data in variables.items():
211
- fields[var_name] = (parse_type(var_data['type']), Field(description=var_data.get('description', None)))
212
- return create_model(model_name, **fields)
211
+ fields[var_name] = (parse_pydantic_type(var_data['type']), Field(description=var_data.get('description', None)))
212
+ return create_model(model_name, **fields)
213
+
214
+ def parse_pydantic_type(type_name: str):
215
+ """
216
+ Helper function to parse type names into Python types.
217
+ Extend this function to handle custom types like 'dict' -> Json[Any].
218
+ """
219
+ type_mapping = {
220
+ 'str': str,
221
+ 'int': int,
222
+ 'float': float,
223
+ 'bool': bool,
224
+ 'dict': Json[Any], # Map 'dict' to Pydantic's Json type
225
+ 'list': list,
226
+ 'any': Any
227
+ }
228
+ return type_mapping.get(type_name, Any)
@@ -23,11 +23,7 @@ class ArtifactToolkit(BaseToolkit):
23
23
  # client = (Any, FieldInfo(description="Client object", required=True, autopopulate=True)),
24
24
  bucket=(str, FieldInfo(
25
25
  description="Bucket name",
26
- pattern=r'^[a-z][a-z0-9-]*$',
27
- json_schema_extra={
28
- 'toolkit_name': True,
29
- 'max_toolkit_length': ArtifactToolkit.toolkit_max_length
30
- }
26
+ pattern=r'^[a-z][a-z0-9-]*$'
31
27
  )),
32
28
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
33
29
  # indexer settings
@@ -37,7 +33,10 @@ class ArtifactToolkit(BaseToolkit):
37
33
  embedding_model=(Optional[str], Field(default=None, description="Embedding configuration.",
38
34
  json_schema_extra={'configuration_model': 'embedding'})),
39
35
 
40
- __config__=ConfigDict(json_schema_extra={'metadata': {"label": "Artifact", "icon_url": None}})
36
+ __config__=ConfigDict(json_schema_extra={'metadata': {"label": "Artifact",
37
+ "icon_url": None,
38
+ "max_length": ArtifactToolkit.toolkit_max_length
39
+ }})
41
40
  )
42
41
 
43
42
  @classmethod
@@ -498,9 +498,12 @@ class McpToolkit(BaseToolkit):
498
498
  all_tools = []
499
499
  session_id = connection_config.session_id
500
500
 
501
+ # Generate temporary session_id if not provided (for OAuth flow)
502
+ # The real session_id should come from frontend after OAuth completes
501
503
  if not session_id:
502
- logger.error(f"[MCP SSE] session_id is required for SSE servers")
503
- raise ValueError("session_id is required. Frontend must generate UUID.")
504
+ import uuid
505
+ session_id = str(uuid.uuid4())
506
+ logger.info(f"[MCP SSE] Generated temporary session_id for OAuth: {session_id}")
504
507
 
505
508
  logger.info(f"[MCP SSE] Discovering from {connection_config.url} with session {session_id}")
506
509
 
@@ -110,6 +110,7 @@ def get_tools(tools_list: list, alita_client, llm, memory_store: BaseStore = Non
110
110
  toolkit_name=tool.get('toolkit_name', ''),
111
111
  **tool['settings']).get_tools())
112
112
  elif tool['type'] == 'mcp':
113
+ # remote mcp tool initialization with token injection
113
114
  settings = dict(tool['settings'])
114
115
  url = settings.get('url')
115
116
  headers = settings.get('headers')
@@ -120,7 +120,8 @@ class FunctionTool(BaseTool):
120
120
  messages_dict = {
121
121
  "messages": [{
122
122
  "role": "assistant",
123
- "content": dumps(tool_result) if not isinstance(tool_result, ToolException)
123
+ "content": dumps(tool_result)
124
+ if not isinstance(tool_result, ToolException) and not isinstance(tool_result, str)
124
125
  else str(tool_result)
125
126
  }]
126
127
  }
@@ -1,9 +1,9 @@
1
1
  import json
2
- import math
3
2
  from collections import OrderedDict
4
3
  from logging import getLogger
5
4
  from typing import Any, Optional, List, Dict, Generator
6
5
 
6
+ import math
7
7
  from langchain_core.documents import Document
8
8
  from langchain_core.messages import HumanMessage
9
9
  from langchain_core.tools import ToolException
@@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator, Field
12
12
 
13
13
  from alita_sdk.tools.elitea_base import BaseToolApiWrapper
14
14
  from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
15
- from ..utils.logging import dispatch_custom_event
15
+ from ...runtime.utils.utils import IndexerKeywords
16
16
 
17
17
  logger = getLogger(__name__)
18
18
 
@@ -222,6 +222,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
222
222
  raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
223
223
  return index_metas[0] if index_metas else None
224
224
 
225
+ def get_indexed_count(self, index_name: str) -> int:
226
+ from sqlalchemy.orm import Session
227
+ from sqlalchemy import func, or_
228
+
229
+ with Session(self.vectorstore.session_maker.bind) as session:
230
+ return session.query(
231
+ self.vectorstore.EmbeddingStore.id,
232
+ ).filter(
233
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'collection') == index_name,
234
+ or_(
235
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type').is_(None),
236
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value
237
+ )
238
+ ).count()
239
+
225
240
  def _clean_collection(self, index_name: str = ''):
226
241
  """
227
242
  Clean the vectorstore collection by deleting all indexed data.
@@ -65,6 +65,53 @@ class McpSseClient:
65
65
 
66
66
  logger.info(f"[MCP SSE Client] Stream opened: status={self._stream_response.status}")
67
67
 
68
+ # Handle 401 Unauthorized - need OAuth
69
+ if self._stream_response.status == 401:
70
+ from ..utils.mcp_oauth import (
71
+ McpAuthorizationRequired,
72
+ canonical_resource,
73
+ extract_resource_metadata_url,
74
+ fetch_resource_metadata_async,
75
+ infer_authorization_servers_from_realm,
76
+ fetch_oauth_authorization_server_metadata
77
+ )
78
+
79
+ auth_header = self._stream_response.headers.get('WWW-Authenticate', '')
80
+ resource_metadata_url = extract_resource_metadata_url(auth_header, self.url)
81
+
82
+ metadata = None
83
+ if resource_metadata_url:
84
+ metadata = await fetch_resource_metadata_async(
85
+ resource_metadata_url,
86
+ session=self._stream_session,
87
+ timeout=30
88
+ )
89
+
90
+ # Infer authorization servers if not in metadata
91
+ if not metadata or not metadata.get('authorization_servers'):
92
+ inferred_servers = infer_authorization_servers_from_realm(auth_header, self.url)
93
+ if inferred_servers:
94
+ if not metadata:
95
+ metadata = {}
96
+ metadata['authorization_servers'] = inferred_servers
97
+ logger.info(f"[MCP SSE Client] Inferred authorization servers: {inferred_servers}")
98
+
99
+ # Fetch OAuth metadata
100
+ auth_server_metadata = fetch_oauth_authorization_server_metadata(inferred_servers[0], timeout=30)
101
+ if auth_server_metadata:
102
+ metadata['oauth_authorization_server'] = auth_server_metadata
103
+ logger.info(f"[MCP SSE Client] Fetched OAuth metadata")
104
+
105
+ raise McpAuthorizationRequired(
106
+ message=f"MCP server {self.url} requires OAuth authorization",
107
+ server_url=canonical_resource(self.url),
108
+ resource_metadata_url=resource_metadata_url,
109
+ www_authenticate=auth_header,
110
+ resource_metadata=metadata,
111
+ status=self._stream_response.status,
112
+ tool_name=self.url,
113
+ )
114
+
68
115
  if self._stream_response.status != 200:
69
116
  error_text = await self._stream_response.text()
70
117
  raise Exception(f"Failed to open SSE stream: HTTP {self._stream_response.status}: {error_text}")
@@ -248,18 +295,29 @@ class McpSseClient:
248
295
  """Close the persistent SSE stream."""
249
296
  logger.info(f"[MCP SSE Client] Closing connection...")
250
297
 
298
+ # Cancel background stream reader task
251
299
  if self._stream_task and not self._stream_task.done():
252
300
  self._stream_task.cancel()
253
301
  try:
254
302
  await self._stream_task
255
- except asyncio.CancelledError:
256
- pass
303
+ except (asyncio.CancelledError, Exception) as e:
304
+ logger.debug(f"[MCP SSE Client] Stream task cleanup: {e}")
257
305
 
258
- if self._stream_response:
259
- self._stream_response.close()
306
+ # Close response stream
307
+ if self._stream_response and not self._stream_response.closed:
308
+ try:
309
+ self._stream_response.close()
310
+ except Exception as e:
311
+ logger.debug(f"[MCP SSE Client] Response close error: {e}")
260
312
 
261
- if self._stream_session:
262
- await self._stream_session.close()
313
+ # Close session
314
+ if self._stream_session and not self._stream_session.closed:
315
+ try:
316
+ await self._stream_session.close()
317
+ # Give aiohttp time to cleanup
318
+ await asyncio.sleep(0.1)
319
+ except Exception as e:
320
+ logger.debug(f"[MCP SSE Client] Session close error: {e}")
263
321
 
264
322
  logger.info(f"[MCP SSE Client] Connection closed")
265
323
 
@@ -63,6 +63,7 @@ class AzureDevOpsReposToolkit(BaseToolkit):
63
63
  "icon_url": "ado-repos-icon.svg",
64
64
  "categories": ["code repositories"],
65
65
  "extra_categories": ["code", "repository", "version control"],
66
+ "max_length": AzureDevOpsReposToolkit.toolkit_max_length
66
67
  }}}
67
68
  )
68
69
 
@@ -27,7 +27,6 @@ class AzureDevOpsPlansToolkit(BaseToolkit):
27
27
  AzureDevOpsPlansToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
28
28
  m = create_model(
29
29
  name_alias,
30
- name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AzureDevOpsPlansToolkit.toolkit_max_length})),
31
30
  ado_configuration=(AdoConfiguration, Field(description="Ado configuration", json_schema_extra={'configuration_types': ['ado']})),
32
31
  limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
33
32
  # indexer settings
@@ -40,6 +39,7 @@ class AzureDevOpsPlansToolkit(BaseToolkit):
40
39
  {
41
40
  "label": "ADO plans",
42
41
  "icon_url": "ado-plans.svg",
42
+ "max_length": AzureDevOpsPlansToolkit.toolkit_max_length,
43
43
  "categories": ["test management"],
44
44
  "extra_categories": ["test case management", "qa"],
45
45
  "sections": {
@@ -24,11 +24,6 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
24
24
  AzureDevOpsWikiToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
25
25
  m = create_model(
26
26
  name_alias,
27
- name=(str, Field(description="Toolkit name",
28
- json_schema_extra={
29
- 'toolkit_name': True,
30
- 'max_toolkit_length': AzureDevOpsWikiToolkit.toolkit_max_length})
31
- ),
32
27
  ado_configuration=(AdoConfiguration, Field(description="Ado configuration", json_schema_extra={'configuration_types': ['ado']})),
33
28
  # indexer settings
34
29
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(default=None,
@@ -42,6 +37,7 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
42
37
  'metadata': {
43
38
  "label": "ADO wiki",
44
39
  "icon_url": "ado-wiki-icon.svg",
40
+ "max_length": AzureDevOpsWikiToolkit.toolkit_max_length,
45
41
  "categories": ["documentation"],
46
42
  "extra_categories": ["knowledge base", "documentation management", "wiki"],
47
43
  "sections": {
@@ -23,11 +23,6 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
23
23
  AzureDevOpsWorkItemsToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
24
24
  m = create_model(
25
25
  name,
26
- name=(str, Field(description="Toolkit name",
27
- json_schema_extra={
28
- 'toolkit_name': True,
29
- 'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
30
- ),
31
26
  ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado']})),
32
27
  limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
33
28
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
@@ -42,6 +37,7 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
42
37
  'metadata': {
43
38
  "label": "ADO boards",
44
39
  "icon_url": "ado-boards-icon.svg",
40
+ "max_length": AzureDevOpsWorkItemsToolkit.toolkit_max_length,
45
41
  "categories": ["project management"],
46
42
  "extra_categories": ["work item management", "issue tracking", "agile boards"],
47
43
  "sections": {
@@ -15,6 +15,8 @@ from ..runtime.utils.utils import IndexerKeywords
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
+ DEFAULT_CUT_OFF = 0.2
19
+
18
20
  # Base Vector Store Schema Models
19
21
  BaseIndexParams = create_model(
20
22
  "BaseIndexParams",
@@ -37,7 +39,7 @@ BaseSearchParams = create_model(
37
39
  default={},
38
40
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
39
41
  )),
40
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
42
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
41
43
  search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
42
44
  full_text_search=(Optional[Dict[str, Any]], Field(
43
45
  description="Full text search parameters. Can be a dictionary with search options.",
@@ -67,7 +69,7 @@ BaseStepbackSearchParams = create_model(
67
69
  default={},
68
70
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
69
71
  )),
70
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
72
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
71
73
  search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
72
74
  full_text_search=(Optional[Dict[str, Any]], Field(
73
75
  description="Full text search parameters. Can be a dictionary with search options.",
@@ -380,7 +382,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
380
382
  def search_index(self,
381
383
  query: str,
382
384
  index_name: str = "",
383
- filter: dict | str = {}, cut_off: float = 0.5,
385
+ filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
384
386
  search_top: int = 10, reranker: dict = {},
385
387
  full_text_search: Optional[Dict[str, Any]] = None,
386
388
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -411,7 +413,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
411
413
  query: str,
412
414
  messages: List[Dict[str, Any]] = [],
413
415
  index_name: str = "",
414
- filter: dict | str = {}, cut_off: float = 0.5,
416
+ filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
415
417
  search_top: int = 10, reranker: dict = {},
416
418
  full_text_search: Optional[Dict[str, Any]] = None,
417
419
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -436,7 +438,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
436
438
  query: str,
437
439
  messages: List[Dict[str, Any]] = [],
438
440
  index_name: str = "",
439
- filter: dict | str = {}, cut_off: float = 0.5,
441
+ filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
440
442
  search_top: int = 10, reranker: dict = {},
441
443
  full_text_search: Optional[Dict[str, Any]] = None,
442
444
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -470,6 +472,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
470
472
  "collection": index_name,
471
473
  "type": IndexerKeywords.INDEX_META_TYPE.value,
472
474
  "indexed": 0,
475
+ "updated": 0,
473
476
  "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
474
477
  "index_configuration": index_configuration,
475
478
  "created_on": created_on,
@@ -487,7 +490,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
487
490
  #
488
491
  if index_meta_raw:
489
492
  metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
490
- metadata["indexed"] = result
493
+ metadata["indexed"] = self.get_indexed_count(index_name)
494
+ metadata["updated"] = result
491
495
  metadata["state"] = state
492
496
  metadata["updated_on"] = time.time()
493
497
  #
@@ -61,6 +61,7 @@ class AlitaBitbucketToolkit(BaseToolkit):
61
61
  'metadata':
62
62
  {
63
63
  "label": "Bitbucket", "icon_url": "bitbucket-icon.svg",
64
+ "max_length": AlitaBitbucketToolkit.toolkit_max_length,
64
65
  "categories": ["code repositories"],
65
66
  "extra_categories": ["bitbucket", "git", "repository", "code", "version control"],
66
67
  }
@@ -29,7 +29,7 @@ class SonarToolkit(BaseToolkit):
29
29
  SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
30
30
  return create_model(
31
31
  name,
32
- sonar_project_name=(str, Field(description="Project name of the desired repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SonarToolkit.toolkit_max_length})),
32
+ sonar_project_name=(str, Field(description="Project name of the desired repository")),
33
33
  sonar_configuration=(SonarConfiguration, Field(description="Sonar Configuration", json_schema_extra={'configuration_types': ['sonar']})),
34
34
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
35
35
  __config__=ConfigDict(json_schema_extra=
@@ -67,8 +67,7 @@ class ConfluenceToolkit(BaseToolkit):
67
67
 
68
68
  model = create_model(
69
69
  name,
70
- space=(str, Field(description="Space", json_schema_extra={'toolkit_name': True,
71
- 'max_toolkit_length': ConfluenceToolkit.toolkit_max_length})),
70
+ space=(str, Field(description="Space")),
72
71
  cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
73
72
  limit=(int, Field(description="Pages limit per request", default=5)),
74
73
  labels=(Optional[str], Field(
@@ -95,6 +94,7 @@ class ConfluenceToolkit(BaseToolkit):
95
94
  'metadata': {
96
95
  "label": "Confluence",
97
96
  "icon_url": None,
97
+ "max_length": ConfluenceToolkit.toolkit_max_length,
98
98
  "categories": ["documentation"],
99
99
  "extra_categories": ["confluence", "wiki", "knowledge base", "documentation", "atlassian"]
100
100
  }
@@ -53,6 +53,7 @@ class AlitaGitHubToolkit(BaseToolkit):
53
53
  'metadata': {
54
54
  "label": "GitHub",
55
55
  "icon_url": None,
56
+ "max_length": AlitaGitHubToolkit.toolkit_max_length,
56
57
  "categories": ["code repositories"],
57
58
  "extra_categories": ["github", "git", "repository", "code", "version control"],
58
59
  },
@@ -62,8 +63,7 @@ class AlitaGitHubToolkit(BaseToolkit):
62
63
  json_schema_extra={'configuration_types': ['github']})),
63
64
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector configuration", default=None,
64
65
  json_schema_extra={'configuration_types': ['pgvector']})),
65
- repository=(str, Field(description="Github repository", json_schema_extra={'toolkit_name': True,
66
- 'max_toolkit_length': AlitaGitHubToolkit.toolkit_max_length})),
66
+ repository=(str, Field(description="Github repository")),
67
67
  active_branch=(Optional[str], Field(description="Active branch", default="main")),
68
68
  base_branch=(Optional[str], Field(description="Github Base branch", default="main")),
69
69
  # embedder settings
@@ -43,7 +43,7 @@ class AlitaGitlabToolkit(BaseToolkit):
43
43
  AlitaGitlabToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
44
44
  return create_model(
45
45
  name,
46
- repository=(str, Field(description="GitLab repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AlitaGitlabToolkit.toolkit_max_length})),
46
+ repository=(str, Field(description="GitLab repository")),
47
47
  gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration", json_schema_extra={'configuration_types': ['gitlab']})),
48
48
  branch=(str, Field(description="Main branch", default="main")),
49
49
  # indexer settings
@@ -57,6 +57,7 @@ class AlitaGitlabToolkit(BaseToolkit):
57
57
  'metadata': {
58
58
  "label": "GitLab",
59
59
  "icon_url": None,
60
+ "max_length": AlitaGitlabToolkit.toolkit_max_length,
60
61
  "categories": ["code repositories"],
61
62
  "extra_categories": ["gitlab", "git", "repository", "code", "version control"],
62
63
  }
@@ -30,8 +30,6 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
30
30
  AlitaGitlabSpaceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
31
31
  return create_model(
32
32
  name,
33
- name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True,
34
- 'max_toolkit_length': AlitaGitlabSpaceToolkit.toolkit_max_length})),
35
33
  gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration",
36
34
  json_schema_extra={
37
35
  'configuration_types': ['gitlab']})),
@@ -46,6 +44,7 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
46
44
  'metadata': {
47
45
  "label": "GitLab Org",
48
46
  "icon_url": None,
47
+ "max_length": AlitaGitlabSpaceToolkit.toolkit_max_length,
49
48
  "categories": ["code repositories"],
50
49
  "extra_categories": ["gitlab", "git", "repository", "code", "version control"],
51
50
  }
@@ -30,7 +30,7 @@ class GooglePlacesToolkit(BaseToolkit):
30
30
  GooglePlacesToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
31
31
  return create_model(
32
32
  name,
33
- results_count=(Optional[int], Field(description="Results number to show", default=None, json_schema_extra={'toolkit_name': True, 'max_toolkit_length': GooglePlacesToolkit.toolkit_max_length})),
33
+ results_count=(Optional[int], Field(description="Results number to show", default=None)),
34
34
  google_places_configuration=(GooglePlacesConfiguration, Field(description="Google Places Configuration", json_schema_extra={'configuration_types': ['google_places']})),
35
35
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
36
36
  __config__=ConfigDict(json_schema_extra=
@@ -38,6 +38,7 @@ class GooglePlacesToolkit(BaseToolkit):
38
38
  'metadata':
39
39
  {
40
40
  "label": "Google Places", "icon_url": "gplaces-icon.svg",
41
+ "max_length": GooglePlacesToolkit.toolkit_max_length,
41
42
  "categories": ["other"],
42
43
  "extra_categories": ["google", "places", "maps", "location",
43
44
  "geolocation"],
@@ -89,6 +89,7 @@ class JiraToolkit(BaseToolkit):
89
89
  'metadata': {
90
90
  "label": "Jira",
91
91
  "icon_url": "jira-icon.svg",
92
+ "max_length": JiraToolkit.toolkit_max_length,
92
93
  "categories": ["project management"],
93
94
  "extra_categories": ["jira", "atlassian", "issue tracking", "project management", "task management"],
94
95
  }
@@ -61,7 +61,7 @@ class MemoryToolkit(BaseToolkit):
61
61
 
62
62
  return create_model(
63
63
  'memory',
64
- namespace=(str, Field(description="Memory namespace", json_schema_extra={'toolkit_name': True})),
64
+ namespace=(str, Field(description="Memory namespace")),
65
65
  pgvector_configuration=(PgVectorConfiguration, Field(description="PgVector Configuration",
66
66
  json_schema_extra={
67
67
  'configuration_types': ['pgvector']})),
@@ -29,7 +29,7 @@ class PandasToolkit(BaseToolkit):
29
29
  PandasToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
30
30
  return create_model(
31
31
  name,
32
- bucket_name=(str, Field(default=None, title="Bucket name", description="Bucket where the content file is stored", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': PandasToolkit.toolkit_max_length})),
32
+ bucket_name=(str, Field(default=None, title="Bucket name", description="Bucket where the content file is stored")),
33
33
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
34
34
  __config__=ConfigDict(json_schema_extra={'metadata': {"label": "Pandas", "icon_url": "pandas-icon.svg",
35
35
  "categories": ["analysis"],