alita-sdk 0.3.465__py3-none-any.whl → 0.3.486__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (90) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +83 -1
  3. alita_sdk/cli/agent_loader.py +6 -9
  4. alita_sdk/cli/agent_ui.py +13 -3
  5. alita_sdk/cli/agents.py +1866 -185
  6. alita_sdk/cli/callbacks.py +96 -25
  7. alita_sdk/cli/cli.py +10 -1
  8. alita_sdk/cli/config.py +151 -9
  9. alita_sdk/cli/context/__init__.py +30 -0
  10. alita_sdk/cli/context/cleanup.py +198 -0
  11. alita_sdk/cli/context/manager.py +731 -0
  12. alita_sdk/cli/context/message.py +285 -0
  13. alita_sdk/cli/context/strategies.py +289 -0
  14. alita_sdk/cli/context/token_estimation.py +127 -0
  15. alita_sdk/cli/input_handler.py +167 -4
  16. alita_sdk/cli/inventory.py +1256 -0
  17. alita_sdk/cli/toolkit.py +14 -17
  18. alita_sdk/cli/toolkit_loader.py +35 -5
  19. alita_sdk/cli/tools/__init__.py +8 -1
  20. alita_sdk/cli/tools/filesystem.py +815 -55
  21. alita_sdk/cli/tools/planning.py +143 -157
  22. alita_sdk/cli/tools/terminal.py +154 -20
  23. alita_sdk/community/__init__.py +64 -8
  24. alita_sdk/community/inventory/__init__.py +224 -0
  25. alita_sdk/community/inventory/config.py +257 -0
  26. alita_sdk/community/inventory/enrichment.py +2137 -0
  27. alita_sdk/community/inventory/extractors.py +1469 -0
  28. alita_sdk/community/inventory/ingestion.py +3172 -0
  29. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  30. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  31. alita_sdk/community/inventory/parsers/base.py +295 -0
  32. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  33. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  34. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  35. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  36. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  37. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  38. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  39. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  40. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  41. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  42. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  43. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  44. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  45. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  46. alita_sdk/community/inventory/patterns/loader.py +348 -0
  47. alita_sdk/community/inventory/patterns/registry.py +198 -0
  48. alita_sdk/community/inventory/presets.py +535 -0
  49. alita_sdk/community/inventory/retrieval.py +1403 -0
  50. alita_sdk/community/inventory/toolkit.py +169 -0
  51. alita_sdk/community/inventory/visualize.py +1370 -0
  52. alita_sdk/configurations/bitbucket.py +0 -3
  53. alita_sdk/runtime/clients/client.py +84 -26
  54. alita_sdk/runtime/langchain/assistant.py +4 -2
  55. alita_sdk/runtime/langchain/langraph_agent.py +122 -31
  56. alita_sdk/runtime/llms/preloaded.py +2 -6
  57. alita_sdk/runtime/toolkits/__init__.py +2 -0
  58. alita_sdk/runtime/toolkits/application.py +1 -1
  59. alita_sdk/runtime/toolkits/mcp.py +46 -36
  60. alita_sdk/runtime/toolkits/planning.py +171 -0
  61. alita_sdk/runtime/toolkits/tools.py +39 -6
  62. alita_sdk/runtime/tools/llm.py +185 -8
  63. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  64. alita_sdk/runtime/tools/planning/models.py +246 -0
  65. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  66. alita_sdk/runtime/tools/vectorstore_base.py +41 -6
  67. alita_sdk/runtime/utils/mcp_oauth.py +80 -0
  68. alita_sdk/runtime/utils/streamlit.py +6 -10
  69. alita_sdk/runtime/utils/toolkit_utils.py +19 -4
  70. alita_sdk/tools/__init__.py +54 -27
  71. alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
  72. alita_sdk/tools/base_indexer_toolkit.py +98 -19
  73. alita_sdk/tools/bitbucket/__init__.py +2 -2
  74. alita_sdk/tools/chunkers/__init__.py +3 -1
  75. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +95 -6
  76. alita_sdk/tools/chunkers/universal_chunker.py +269 -0
  77. alita_sdk/tools/code_indexer_toolkit.py +55 -22
  78. alita_sdk/tools/elitea_base.py +86 -21
  79. alita_sdk/tools/jira/__init__.py +1 -1
  80. alita_sdk/tools/jira/api_wrapper.py +91 -40
  81. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  82. alita_sdk/tools/qtest/__init__.py +1 -1
  83. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +8 -2
  84. alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
  85. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/METADATA +2 -1
  86. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/RECORD +90 -50
  87. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/WHEEL +0 -0
  88. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/entry_points.txt +0 -0
  89. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/licenses/LICENSE +0 -0
  90. {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/top_level.txt +0 -0
@@ -155,15 +155,45 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
155
155
  if values.get('alita') and values.get('embedding_model'):
156
156
  values['embeddings'] = values.get('alita').get_embeddings(values.get('embedding_model'))
157
157
 
158
- if values.get('vectorstore_type') and values.get('vectorstore_params') and values.get('embedding_model'):
159
- values['vectorstore'] = get_vectorstore(values['vectorstore_type'], values['vectorstore_params'], embedding_func=values['embeddings'])
160
- # Initialize the new vector adapter
161
- values['vector_adapter'] = VectorStoreAdapterFactory.create_adapter(values['vectorstore_type'])
162
- logger.debug(f"Vectorstore wrapper initialized: {values}")
158
+ # Lazy initialization: vectorstore and vector_adapter are initialized on-demand
159
+ # This prevents errors when using non-index tools with broken/missing vector DB
163
160
  return values
164
161
 
162
+ def _ensure_vectorstore_initialized(self):
163
+ """Lazily initialize vectorstore and vector_adapter when needed for index operations."""
164
+ if self.vectorstore is None:
165
+ if not self.vectorstore_type or not self.vectorstore_params:
166
+ raise ToolException(
167
+ "Vector store is not configured. "
168
+ "Please ensure embedding_model and pgvector_configuration are provided."
169
+ )
170
+
171
+ from ..langchain.interfaces.llm_processor import get_vectorstore
172
+ try:
173
+ self.vectorstore = get_vectorstore(
174
+ self.vectorstore_type,
175
+ self.vectorstore_params,
176
+ embedding_func=self.embeddings
177
+ )
178
+ logger.debug(f"Vectorstore initialized: {self.vectorstore_type}")
179
+ except Exception as e:
180
+ raise ToolException(
181
+ f"Failed to initialize vector store: {str(e)}. "
182
+ "Check your vector database configuration and connection."
183
+ )
184
+
185
+ if self.vector_adapter is None:
186
+ try:
187
+ self.vector_adapter = VectorStoreAdapterFactory.create_adapter(self.vectorstore_type)
188
+ logger.debug(f"Vector adapter initialized: {self.vectorstore_type}")
189
+ except Exception as e:
190
+ raise ToolException(
191
+ f"Failed to initialize vector adapter: {str(e)}"
192
+ )
193
+
165
194
  def _init_pg_helper(self, language='english'):
166
195
  """Initialize PGVector helper if needed and not already initialized"""
196
+ self._ensure_vectorstore_initialized()
167
197
  if self.pg_helper is None and hasattr(self.vectorstore, 'connection_string') and hasattr(self.vectorstore, 'collection_name'):
168
198
  try:
169
199
  from .pgvector_search import PGVectorSearch
@@ -192,6 +222,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
192
222
  Raises:
193
223
  ToolException: When DataException occurs or other search errors
194
224
  """
225
+ self._ensure_vectorstore_initialized()
195
226
  try:
196
227
  return self.vectorstore.similarity_search_with_score(
197
228
  query, filter=filter, k=k
@@ -210,19 +241,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
210
241
 
211
242
  def list_collections(self) -> List[str]:
212
243
  """List all collections in the vectorstore."""
213
-
244
+ self._ensure_vectorstore_initialized()
214
245
  collections = self.vector_adapter.list_collections(self)
215
246
  if not collections:
216
247
  return "No indexed collections"
217
248
  return collections
218
249
 
219
250
  def get_index_meta(self, index_name: str):
251
+ self._ensure_vectorstore_initialized()
220
252
  index_metas = self.vector_adapter.get_index_meta(self, index_name)
221
253
  if len(index_metas) > 1:
222
254
  raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
223
255
  return index_metas[0] if index_metas else None
224
256
 
225
257
  def get_indexed_count(self, index_name: str) -> int:
258
+ self._ensure_vectorstore_initialized()
226
259
  from sqlalchemy.orm import Session
227
260
  from sqlalchemy import func, or_
228
261
 
@@ -241,6 +274,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
241
274
  """
242
275
  Clean the vectorstore collection by deleting all indexed data.
243
276
  """
277
+ self._ensure_vectorstore_initialized()
244
278
  self._log_tool_event(
245
279
  f"Cleaning collection '{self.dataset}'",
246
280
  tool_name="_clean_collection"
@@ -259,6 +293,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
259
293
  progress_step (int): Step for progress reporting, default is 20.
260
294
  clean_index (bool): If True, clean the index before re-indexing all documents.
261
295
  """
296
+ self._ensure_vectorstore_initialized()
262
297
  if clean_index:
263
298
  self._clean_index(index_name)
264
299
 
@@ -162,3 +162,83 @@ def canonical_resource(server_url: str) -> str:
162
162
  if resource.endswith("/") and parsed.path in ("", "/"):
163
163
  resource = resource[:-1]
164
164
  return resource
165
+
166
+
167
+ def exchange_oauth_token(
168
+ token_endpoint: str,
169
+ code: str,
170
+ redirect_uri: str,
171
+ client_id: str,
172
+ client_secret: Optional[str] = None,
173
+ code_verifier: Optional[str] = None,
174
+ scope: Optional[str] = None,
175
+ timeout: int = 30,
176
+ ) -> Dict[str, Any]:
177
+ """
178
+ Exchange an OAuth authorization code for access tokens.
179
+
180
+ This function performs the OAuth token exchange on the server side,
181
+ avoiding CORS issues that would occur if done from a browser.
182
+
183
+ Args:
184
+ token_endpoint: OAuth token endpoint URL
185
+ code: Authorization code from OAuth provider
186
+ redirect_uri: Redirect URI used in authorization request
187
+ client_id: OAuth client ID
188
+ client_secret: OAuth client secret (optional for public clients)
189
+ code_verifier: PKCE code verifier (optional)
190
+ scope: OAuth scope (optional)
191
+ timeout: Request timeout in seconds
192
+
193
+ Returns:
194
+ Token response from OAuth provider containing access_token, etc.
195
+
196
+ Raises:
197
+ requests.RequestException: If the HTTP request fails
198
+ ValueError: If the token exchange fails
199
+ """
200
+ # Build the token request body
201
+ token_body = {
202
+ "grant_type": "authorization_code",
203
+ "code": code,
204
+ "redirect_uri": redirect_uri,
205
+ "client_id": client_id,
206
+ }
207
+
208
+ if client_secret:
209
+ token_body["client_secret"] = client_secret
210
+ if code_verifier:
211
+ token_body["code_verifier"] = code_verifier
212
+ if scope:
213
+ token_body["scope"] = scope
214
+
215
+ logger.info(f"MCP OAuth: exchanging code at {token_endpoint}")
216
+
217
+ # Make the token exchange request
218
+ response = requests.post(
219
+ token_endpoint,
220
+ data=token_body,
221
+ headers={
222
+ "Content-Type": "application/x-www-form-urlencoded",
223
+ "Accept": "application/json",
224
+ },
225
+ timeout=timeout
226
+ )
227
+
228
+ # Try to parse as JSON
229
+ try:
230
+ token_data = response.json()
231
+ except Exception:
232
+ # Some providers return URL-encoded response
233
+ from urllib.parse import parse_qs
234
+ token_data = {k: v[0] if len(v) == 1 else v
235
+ for k, v in parse_qs(response.text).items()}
236
+
237
+ if response.ok:
238
+ logger.info("MCP OAuth: token exchange successful")
239
+ return token_data
240
+ else:
241
+ error_msg = token_data.get("error_description") or token_data.get("error") or response.text
242
+ logger.error(f"MCP OAuth: token exchange failed - {response.status_code}: {error_msg}")
243
+ raise ValueError(f"Token exchange failed: {error_msg}")
244
+
@@ -287,7 +287,6 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
287
287
  model_config={
288
288
  "temperature": 0.1,
289
289
  "max_tokens": 1000,
290
- "top_p": 1.0
291
290
  }
292
291
  )
293
292
  except Exception as e:
@@ -1256,7 +1255,6 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
1256
1255
  model_config={
1257
1256
  "temperature": 0.1,
1258
1257
  "max_tokens": 1000,
1259
- "top_p": 1.0
1260
1258
  }
1261
1259
  )
1262
1260
  except Exception as e:
@@ -1387,20 +1385,18 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
1387
1385
  help="Maximum number of tokens in the AI response"
1388
1386
  )
1389
1387
 
1390
- top_p = st.slider(
1391
- "Top-p:",
1392
- min_value=0.1,
1393
- max_value=1.0,
1394
- value=1.0,
1395
- step=0.1,
1396
- help="Controls diversity via nucleus sampling"
1388
+ reasoning_effort = st.selectbox(
1389
+ "Reasoning effort:",
1390
+ options=['null', 'low', 'medium', 'high'],
1391
+ index=0,
1392
+ help="Higher effort better reasoning, slower response"
1397
1393
  )
1398
1394
 
1399
1395
  # Create LLM config
1400
1396
  llm_config = {
1401
1397
  'max_tokens': max_tokens,
1402
1398
  'temperature': temperature,
1403
- 'top_p': top_p
1399
+ 'reasoning_effort': reasoning_effort
1404
1400
  }
1405
1401
 
1406
1402
  col1, col2 = st.columns([3, 1])
@@ -12,7 +12,8 @@ logger = logging.getLogger(__name__)
12
12
 
13
13
  def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
14
14
  llm_client: Any,
15
- alita_client: Optional[Any] = None) -> List[Any]:
15
+ alita_client: Optional[Any] = None,
16
+ mcp_tokens: Optional[Dict[str, Any]] = None) -> List[Any]:
16
17
  """
17
18
  Instantiate a toolkit with LLM client support.
18
19
 
@@ -22,7 +23,8 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
22
23
  Args:
23
24
  toolkit_config: Configuration dictionary for the toolkit
24
25
  llm_client: LLM client instance for tools that need LLM capabilities
25
- client: Optional additional client instance
26
+ alita_client: Optional additional client instance
27
+ mcp_tokens: Optional dictionary of MCP OAuth tokens by server URL
26
28
 
27
29
  Returns:
28
30
  List of instantiated tools from the toolkit
@@ -60,8 +62,8 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
60
62
  }
61
63
 
62
64
  # Get tools using the toolkit configuration with clients
63
- # Parameter order: get_tools(tools_list, alita_client, llm, memory_store)
64
- tools = get_tools([tool_config], alita_client, llm_client)
65
+ # Parameter order: get_tools(tools_list, alita_client, llm, memory_store, debug_mode, mcp_tokens)
66
+ tools = get_tools([tool_config], alita_client, llm_client, mcp_tokens=mcp_tokens)
65
67
 
66
68
  if not tools:
67
69
  logger.warning(f"No tools returned for toolkit {toolkit_name}")
@@ -73,9 +75,22 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
73
75
  except Exception as e:
74
76
  # Re-raise McpAuthorizationRequired without logging as error
75
77
  from ..utils.mcp_oauth import McpAuthorizationRequired
78
+
79
+ # Check if it's McpAuthorizationRequired directly
76
80
  if isinstance(e, McpAuthorizationRequired):
77
81
  logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization")
78
82
  raise
83
+
84
+ # Also check for wrapped exceptions
85
+ if hasattr(e, '__cause__') and isinstance(e.__cause__, McpAuthorizationRequired):
86
+ logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (wrapped)")
87
+ raise e.__cause__
88
+
89
+ # Check exception class name as fallback
90
+ if e.__class__.__name__ == 'McpAuthorizationRequired':
91
+ logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (by name)")
92
+ raise
93
+
79
94
  # Log and re-raise other errors
80
95
  logger.error(f"Error instantiating toolkit {toolkit_name} with client: {str(e)}")
81
96
  raise
@@ -13,6 +13,30 @@ AVAILABLE_TOOLS = {}
13
13
  AVAILABLE_TOOLKITS = {}
14
14
  FAILED_IMPORTS = {}
15
15
 
16
+
17
+ def _inject_toolkit_id(tool_conf: dict, toolkit_tools) -> None:
18
+ """Inject `toolkit_id` into tools that expose `api_wrapper.toolkit_id`.
19
+
20
+ This reads 'id' from the tool configuration and, if it is an integer,
21
+ assigns it to the 'toolkit_id' attribute of the 'api_wrapper' for each
22
+ tool in 'toolkit_tools' that supports it.
23
+
24
+ Args:
25
+ tool_conf: Raw tool configuration item from 'tools_list'.
26
+ toolkit_tools: List of instantiated tools produced by a toolkit.
27
+ """
28
+ toolkit_id = tool_conf.get('id')
29
+ if isinstance(toolkit_id, int):
30
+ for t in toolkit_tools:
31
+ if hasattr(t, 'api_wrapper') and hasattr(t.api_wrapper, 'toolkit_id'):
32
+ t.api_wrapper.toolkit_id = toolkit_id
33
+ else:
34
+ logger.error(
35
+ f"Toolkit ID is missing or not an integer for tool "
36
+ f"`{tool_conf.get('type', '')}` with name `{tool_conf.get('name', '')}`"
37
+ )
38
+
39
+
16
40
  def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class_name=None):
17
41
  """Safely import a tool module and register available functions/classes."""
18
42
  try:
@@ -34,6 +58,7 @@ def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class
34
58
  FAILED_IMPORTS[tool_name] = str(e)
35
59
  logger.debug(f"Failed to import {tool_name}: {e}")
36
60
 
61
+
37
62
  # Safe imports for all tools
38
63
  _safe_import_tool('github', 'github', 'get_tools', 'AlitaGitHubToolkit')
39
64
  _safe_import_tool('openapi', 'openapi', 'get_tools')
@@ -90,11 +115,19 @@ available_count = len(AVAILABLE_TOOLS)
90
115
  total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
91
116
  logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
92
117
 
118
+ # Import community module to trigger community toolkit registration
119
+ try:
120
+ from alita_sdk import community # noqa: F401
121
+ logger.debug("Community toolkits registered successfully")
122
+ except ImportError as e:
123
+ logger.debug(f"Community module not available: {e}")
124
+
93
125
 
94
126
  def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
95
127
  tools = []
96
128
 
97
129
  for tool in tools_list:
130
+ toolkit_tools = []
98
131
  settings = tool.get('settings')
99
132
 
100
133
  # Skip tools without settings early
@@ -116,53 +149,47 @@ def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args,
116
149
 
117
150
  # Set pgvector collection schema if present
118
151
  if settings.get('pgvector_configuration'):
119
- settings['pgvector_configuration']['collection_schema'] = str(tool['id'])
152
+ # Use tool id if available, otherwise use toolkit_name or type as fallback
153
+ collection_id = tool.get('id') or tool.get('toolkit_name') or tool_type
154
+ settings['pgvector_configuration']['collection_schema'] = str(collection_id)
120
155
 
121
156
  # Handle ADO special cases
122
157
  if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
123
- tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
124
- continue
125
-
126
- # Handle ADO repos aliases
127
- if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
158
+ toolkit_tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
159
+ elif tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
128
160
  try:
129
- tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
161
+ toolkit_tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
130
162
  except Exception as e:
131
163
  logger.error(f"Error getting ADO repos tools: {e}")
132
- continue
133
-
134
- # Skip MCP toolkit - it's handled by runtime/toolkits/tools.py to avoid duplicate loading
135
- if tool_type == 'mcp':
164
+ elif tool_type == 'mcp':
136
165
  logger.debug(f"Skipping MCP toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
137
- continue
138
-
139
- # Handle standard tools
140
- if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
166
+ elif tool_type == 'planning':
167
+ logger.debug(f"Skipping planning toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
168
+ elif tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
141
169
  try:
142
- tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
170
+ toolkit_tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
143
171
  except Exception as e:
144
172
  logger.error(f"Error getting tools for {tool_type}: {e}")
145
173
  raise ToolException(f"Error getting tools for {tool_type}: {e}")
146
- continue
147
-
148
- # Handle custom modules
149
- if settings.get("module"):
174
+ elif settings.get("module"):
150
175
  try:
151
176
  mod = import_module(settings.pop("module"))
152
177
  tkitclass = getattr(mod, settings.pop("class"))
153
178
  get_toolkit_params = settings.copy()
154
179
  get_toolkit_params["name"] = tool.get("name")
155
180
  toolkit = tkitclass.get_toolkit(**get_toolkit_params)
156
- tools.extend(toolkit.get_tools())
181
+ toolkit_tools.extend(toolkit.get_tools())
157
182
  except Exception as e:
158
183
  logger.error(f"Error in getting custom toolkit: {e}")
159
- continue
160
-
161
- # Tool not available
162
- if tool_type in FAILED_IMPORTS:
163
- logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
164
184
  else:
165
- logger.warning(f"Unknown tool type: {tool_type}")
185
+ if tool_type in FAILED_IMPORTS:
186
+ logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
187
+ else:
188
+ logger.warning(f"Unknown tool type: {tool_type}")
189
+ #
190
+ # Always inject toolkit_id to each tool
191
+ _inject_toolkit_id(tool, toolkit_tools)
192
+ tools.extend(toolkit_tools)
166
193
 
167
194
  return tools
168
195
 
@@ -111,8 +111,7 @@ class ArgsSchema(Enum):
111
111
  Field(
112
112
  description=(
113
113
  "Branch to be used for read file operation."
114
- ),
115
- default=None
114
+ )
116
115
  ),
117
116
  )
118
117
  )
@@ -2,6 +2,7 @@ import copy
2
2
  import json
3
3
  import logging
4
4
  import time
5
+ from enum import Enum
5
6
  from typing import Any, Optional, List, Dict, Generator
6
7
 
7
8
  from langchain_core.callbacks import dispatch_custom_event
@@ -16,7 +17,17 @@ from ..runtime.utils.utils import IndexerKeywords
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
19
- DEFAULT_CUT_OFF = 0.2
20
+ DEFAULT_CUT_OFF = 0.1
21
+ INDEX_META_UPDATE_INTERVAL = 600.0
22
+
23
+ class IndexTools(str, Enum):
24
+ """Enum for index-related tool names."""
25
+ INDEX_DATA = "index_data"
26
+ SEARCH_INDEX = "search_index"
27
+ STEPBACK_SEARCH_INDEX = "stepback_search_index"
28
+ STEPBACK_SUMMARY_INDEX = "stepback_summary_index"
29
+ REMOVE_INDEX = "remove_index"
30
+ LIST_COLLECTIONS = "list_collections"
20
31
 
21
32
  # Base Vector Store Schema Models
22
33
  BaseIndexParams = create_model(
@@ -157,6 +168,16 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
157
168
  clean_index = kwargs.get("clean_index")
158
169
  chunking_tool = kwargs.get("chunking_tool")
159
170
  chunking_config = kwargs.get("chunking_config")
171
+
172
+ # Store the interval in a private dict to avoid Pydantic field errors
173
+ if not hasattr(self, "_index_meta_config"):
174
+ self._index_meta_config: Dict[str, Any] = {}
175
+
176
+ self._index_meta_config["update_interval"] = kwargs.get(
177
+ "meta_update_interval",
178
+ INDEX_META_UPDATE_INTERVAL,
179
+ )
180
+
160
181
  result = {"count": 0}
161
182
  #
162
183
  try:
@@ -164,6 +185,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
164
185
  self._clean_index(index_name)
165
186
  #
166
187
  self.index_meta_init(index_name, kwargs)
188
+ self._emit_index_event(index_name)
167
189
  #
168
190
  self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
169
191
  self._log_tool_event(f"Loading the documents to index...{kwargs}")
@@ -179,18 +201,26 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
179
201
  self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
180
202
  #
181
203
  results_count = result["count"]
182
- self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
204
+ # Final update should always be forced
205
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count, update_force=True)
183
206
  self._emit_index_event(index_name)
184
207
  #
185
208
  return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
186
209
  else "no new documents to index"}
187
210
  except Exception as e:
188
- self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
189
- self._emit_index_event(index_name, error=str(e))
211
+ # Do maximum effort at least send custom event for supposed changed status
212
+ msg = str(e)
213
+ try:
214
+ # Error update should also be forced
215
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"], update_force=True)
216
+ except Exception as ie:
217
+ logger.error(f"Failed to update index meta status to FAILED for index '{index_name}': {ie}")
218
+ msg = f"{msg}; additionally failed to update index meta status to FAILED: {ie}"
219
+ self._emit_index_event(index_name, error=msg)
190
220
  raise e
191
-
192
221
 
193
222
  def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
223
+ self._ensure_vectorstore_initialized()
194
224
  self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
195
225
  from ..runtime.langchain.interfaces.llm_processor import add_documents
196
226
  #
@@ -243,6 +273,11 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
243
273
  logger.debug(msg)
244
274
  self._log_tool_event(msg)
245
275
  result["count"] += dependent_docs_counter
276
+ # After each base document, try a non-forced meta update; throttling handled inside index_meta_update
277
+ try:
278
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_IN_PROGRESS.value, result["count"], update_force=False)
279
+ except Exception as exc: # best-effort, do not break indexing
280
+ logger.warning(f"Failed to update index meta during indexing process for index '{index_name}': {exc}")
246
281
  if pg_vector_add_docs_chunk:
247
282
  add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
248
283
 
@@ -308,6 +343,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
308
343
  log_msg: str = "Verification of documents to index started"
309
344
  ) -> Generator[Document, None, None]:
310
345
  """Generic duplicate reduction logic for documents."""
346
+ self._ensure_vectorstore_initialized()
311
347
  self._log_tool_event(log_msg, tool_name="index_documents")
312
348
  indexed_data = self._get_indexed_data(index_name)
313
349
  indexed_keys = set(indexed_data.keys())
@@ -463,6 +499,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
463
499
  )
464
500
 
465
501
  def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
502
+ self._ensure_vectorstore_initialized()
466
503
  index_meta = super().get_index_meta(index_name)
467
504
  if not index_meta:
468
505
  self._log_tool_event(
@@ -482,12 +519,53 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
482
519
  "updated_on": created_on,
483
520
  "task_id": None,
484
521
  "conversation_id": None,
522
+ "toolkit_id": self.toolkit_id,
485
523
  }
486
524
  metadata["history"] = json.dumps([metadata])
487
525
  index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
488
526
  add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
489
527
 
490
- def index_meta_update(self, index_name: str, state: str, result: int):
528
+ def index_meta_update(self, index_name: str, state: str, result: int, update_force: bool = True, interval: Optional[float] = None):
529
+ """Update `index_meta` document with optional time-based throttling.
530
+
531
+ Args:
532
+ index_name: Index name to update meta for.
533
+ state: New state value for the `index_meta` record.
534
+ result: Number of processed documents to store in the `updated` field.
535
+ update_force: If `True`, perform the update unconditionally, ignoring throttling.
536
+ If `False`, perform the update only when the effective time interval has passed.
537
+ interval: Optional custom interval (in seconds) for this call when `update_force` is `False`.
538
+ If `None`, falls back to the value stored in `self._index_meta_config["update_interval"]`
539
+ if present, otherwise uses `INDEX_META_UPDATE_INTERVAL`.
540
+ """
541
+ self._ensure_vectorstore_initialized()
542
+ if not hasattr(self, "_index_meta_last_update_time"):
543
+ self._index_meta_last_update_time: Dict[str, float] = {}
544
+
545
+ if not update_force:
546
+ # Resolve effective interval:
547
+ # 1\) explicit arg
548
+ # 2\) value from `_index_meta_config`
549
+ # 3\) default constant
550
+ cfg_interval = None
551
+ if hasattr(self, "_index_meta_config"):
552
+ cfg_interval = self._index_meta_config.get("update_interval")
553
+
554
+ eff_interval = (
555
+ interval
556
+ if interval is not None
557
+ else (cfg_interval if cfg_interval is not None else INDEX_META_UPDATE_INTERVAL)
558
+ )
559
+
560
+ last_time = self._index_meta_last_update_time.get(index_name)
561
+ now = time.time()
562
+ if last_time is not None and (now - last_time) < eff_interval:
563
+ return
564
+ self._index_meta_last_update_time[index_name] = now
565
+ else:
566
+ # For forced updates, always refresh last update time
567
+ self._index_meta_last_update_time[index_name] = time.time()
568
+
491
569
  index_meta_raw = super().get_index_meta(index_name)
492
570
  from ..runtime.langchain.interfaces.llm_processor import add_documents
493
571
  #
@@ -545,11 +623,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
545
623
  event_data = {
546
624
  "id": index_meta.get("id"),
547
625
  "index_name": index_name,
548
- "state": metadata.get("state"),
626
+ "state": "failed" if error is not None else metadata.get("state"),
549
627
  "error": error,
550
628
  "reindex": is_reindex,
551
629
  "indexed": metadata.get("indexed", 0),
552
630
  "updated": metadata.get("updated", 0),
631
+ "toolkit_id": metadata.get("toolkit_id"),
553
632
  }
554
633
 
555
634
  # Emit the event
@@ -572,8 +651,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
572
651
  """
573
652
  return [
574
653
  {
575
- "name": "index_data",
576
- "mode": "index_data",
654
+ "name": IndexTools.INDEX_DATA.value,
655
+ "mode": IndexTools.INDEX_DATA.value,
577
656
  "ref": self.index_data,
578
657
  "description": "Loads data to index.",
579
658
  "args_schema": create_model(
@@ -583,36 +662,36 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
583
662
  )
584
663
  },
585
664
  {
586
- "name": "search_index",
587
- "mode": "search_index",
665
+ "name": IndexTools.SEARCH_INDEX.value,
666
+ "mode": IndexTools.SEARCH_INDEX.value,
588
667
  "ref": self.search_index,
589
668
  "description": self.search_index.__doc__,
590
669
  "args_schema": BaseSearchParams
591
670
  },
592
671
  {
593
- "name": "stepback_search_index",
594
- "mode": "stepback_search_index",
672
+ "name": IndexTools.STEPBACK_SEARCH_INDEX.value,
673
+ "mode": IndexTools.STEPBACK_SEARCH_INDEX.value,
595
674
  "ref": self.stepback_search_index,
596
675
  "description": self.stepback_search_index.__doc__,
597
676
  "args_schema": BaseStepbackSearchParams
598
677
  },
599
678
  {
600
- "name": "stepback_summary_index",
601
- "mode": "stepback_summary_index",
679
+ "name": IndexTools.STEPBACK_SUMMARY_INDEX.value,
680
+ "mode": IndexTools.STEPBACK_SUMMARY_INDEX.value,
602
681
  "ref": self.stepback_summary_index,
603
682
  "description": self.stepback_summary_index.__doc__,
604
683
  "args_schema": BaseStepbackSearchParams
605
684
  },
606
685
  {
607
- "name": "remove_index",
608
- "mode": "remove_index",
686
+ "name": IndexTools.REMOVE_INDEX.value,
687
+ "mode": IndexTools.REMOVE_INDEX.value,
609
688
  "ref": self.remove_index,
610
689
  "description": self.remove_index.__doc__,
611
690
  "args_schema": RemoveIndexParams
612
691
  },
613
692
  {
614
- "name": "list_collections",
615
- "mode": "list_collections",
693
+ "name": IndexTools.LIST_COLLECTIONS.value,
694
+ "mode": IndexTools.LIST_COLLECTIONS.value,
616
695
  "ref": self.list_collections,
617
696
  "description": self.list_collections.__doc__,
618
697
  # No parameters