alita-sdk 0.3.465__py3-none-any.whl → 0.3.486__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +83 -1
- alita_sdk/cli/agent_loader.py +6 -9
- alita_sdk/cli/agent_ui.py +13 -3
- alita_sdk/cli/agents.py +1866 -185
- alita_sdk/cli/callbacks.py +96 -25
- alita_sdk/cli/cli.py +10 -1
- alita_sdk/cli/config.py +151 -9
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/input_handler.py +167 -4
- alita_sdk/cli/inventory.py +1256 -0
- alita_sdk/cli/toolkit.py +14 -17
- alita_sdk/cli/toolkit_loader.py +35 -5
- alita_sdk/cli/tools/__init__.py +8 -1
- alita_sdk/cli/tools/filesystem.py +815 -55
- alita_sdk/cli/tools/planning.py +143 -157
- alita_sdk/cli/tools/terminal.py +154 -20
- alita_sdk/community/__init__.py +64 -8
- alita_sdk/community/inventory/__init__.py +224 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +169 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/bitbucket.py +0 -3
- alita_sdk/runtime/clients/client.py +84 -26
- alita_sdk/runtime/langchain/assistant.py +4 -2
- alita_sdk/runtime/langchain/langraph_agent.py +122 -31
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/toolkits/__init__.py +2 -0
- alita_sdk/runtime/toolkits/application.py +1 -1
- alita_sdk/runtime/toolkits/mcp.py +46 -36
- alita_sdk/runtime/toolkits/planning.py +171 -0
- alita_sdk/runtime/toolkits/tools.py +39 -6
- alita_sdk/runtime/tools/llm.py +185 -8
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/vectorstore_base.py +41 -6
- alita_sdk/runtime/utils/mcp_oauth.py +80 -0
- alita_sdk/runtime/utils/streamlit.py +6 -10
- alita_sdk/runtime/utils/toolkit_utils.py +19 -4
- alita_sdk/tools/__init__.py +54 -27
- alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
- alita_sdk/tools/base_indexer_toolkit.py +98 -19
- alita_sdk/tools/bitbucket/__init__.py +2 -2
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +95 -6
- alita_sdk/tools/chunkers/universal_chunker.py +269 -0
- alita_sdk/tools/code_indexer_toolkit.py +55 -22
- alita_sdk/tools/elitea_base.py +86 -21
- alita_sdk/tools/jira/__init__.py +1 -1
- alita_sdk/tools/jira/api_wrapper.py +91 -40
- alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
- alita_sdk/tools/qtest/__init__.py +1 -1
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +8 -2
- alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
- {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/METADATA +2 -1
- {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/RECORD +90 -50
- {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/entry_points.txt +0 -0
- {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.465.dist-info → alita_sdk-0.3.486.dist-info}/top_level.txt +0 -0
|
@@ -155,15 +155,45 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
155
155
|
if values.get('alita') and values.get('embedding_model'):
|
|
156
156
|
values['embeddings'] = values.get('alita').get_embeddings(values.get('embedding_model'))
|
|
157
157
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
# Initialize the new vector adapter
|
|
161
|
-
values['vector_adapter'] = VectorStoreAdapterFactory.create_adapter(values['vectorstore_type'])
|
|
162
|
-
logger.debug(f"Vectorstore wrapper initialized: {values}")
|
|
158
|
+
# Lazy initialization: vectorstore and vector_adapter are initialized on-demand
|
|
159
|
+
# This prevents errors when using non-index tools with broken/missing vector DB
|
|
163
160
|
return values
|
|
164
161
|
|
|
162
|
+
def _ensure_vectorstore_initialized(self):
|
|
163
|
+
"""Lazily initialize vectorstore and vector_adapter when needed for index operations."""
|
|
164
|
+
if self.vectorstore is None:
|
|
165
|
+
if not self.vectorstore_type or not self.vectorstore_params:
|
|
166
|
+
raise ToolException(
|
|
167
|
+
"Vector store is not configured. "
|
|
168
|
+
"Please ensure embedding_model and pgvector_configuration are provided."
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
from ..langchain.interfaces.llm_processor import get_vectorstore
|
|
172
|
+
try:
|
|
173
|
+
self.vectorstore = get_vectorstore(
|
|
174
|
+
self.vectorstore_type,
|
|
175
|
+
self.vectorstore_params,
|
|
176
|
+
embedding_func=self.embeddings
|
|
177
|
+
)
|
|
178
|
+
logger.debug(f"Vectorstore initialized: {self.vectorstore_type}")
|
|
179
|
+
except Exception as e:
|
|
180
|
+
raise ToolException(
|
|
181
|
+
f"Failed to initialize vector store: {str(e)}. "
|
|
182
|
+
"Check your vector database configuration and connection."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if self.vector_adapter is None:
|
|
186
|
+
try:
|
|
187
|
+
self.vector_adapter = VectorStoreAdapterFactory.create_adapter(self.vectorstore_type)
|
|
188
|
+
logger.debug(f"Vector adapter initialized: {self.vectorstore_type}")
|
|
189
|
+
except Exception as e:
|
|
190
|
+
raise ToolException(
|
|
191
|
+
f"Failed to initialize vector adapter: {str(e)}"
|
|
192
|
+
)
|
|
193
|
+
|
|
165
194
|
def _init_pg_helper(self, language='english'):
|
|
166
195
|
"""Initialize PGVector helper if needed and not already initialized"""
|
|
196
|
+
self._ensure_vectorstore_initialized()
|
|
167
197
|
if self.pg_helper is None and hasattr(self.vectorstore, 'connection_string') and hasattr(self.vectorstore, 'collection_name'):
|
|
168
198
|
try:
|
|
169
199
|
from .pgvector_search import PGVectorSearch
|
|
@@ -192,6 +222,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
192
222
|
Raises:
|
|
193
223
|
ToolException: When DataException occurs or other search errors
|
|
194
224
|
"""
|
|
225
|
+
self._ensure_vectorstore_initialized()
|
|
195
226
|
try:
|
|
196
227
|
return self.vectorstore.similarity_search_with_score(
|
|
197
228
|
query, filter=filter, k=k
|
|
@@ -210,19 +241,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
210
241
|
|
|
211
242
|
def list_collections(self) -> List[str]:
|
|
212
243
|
"""List all collections in the vectorstore."""
|
|
213
|
-
|
|
244
|
+
self._ensure_vectorstore_initialized()
|
|
214
245
|
collections = self.vector_adapter.list_collections(self)
|
|
215
246
|
if not collections:
|
|
216
247
|
return "No indexed collections"
|
|
217
248
|
return collections
|
|
218
249
|
|
|
219
250
|
def get_index_meta(self, index_name: str):
|
|
251
|
+
self._ensure_vectorstore_initialized()
|
|
220
252
|
index_metas = self.vector_adapter.get_index_meta(self, index_name)
|
|
221
253
|
if len(index_metas) > 1:
|
|
222
254
|
raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
|
|
223
255
|
return index_metas[0] if index_metas else None
|
|
224
256
|
|
|
225
257
|
def get_indexed_count(self, index_name: str) -> int:
|
|
258
|
+
self._ensure_vectorstore_initialized()
|
|
226
259
|
from sqlalchemy.orm import Session
|
|
227
260
|
from sqlalchemy import func, or_
|
|
228
261
|
|
|
@@ -241,6 +274,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
241
274
|
"""
|
|
242
275
|
Clean the vectorstore collection by deleting all indexed data.
|
|
243
276
|
"""
|
|
277
|
+
self._ensure_vectorstore_initialized()
|
|
244
278
|
self._log_tool_event(
|
|
245
279
|
f"Cleaning collection '{self.dataset}'",
|
|
246
280
|
tool_name="_clean_collection"
|
|
@@ -259,6 +293,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
259
293
|
progress_step (int): Step for progress reporting, default is 20.
|
|
260
294
|
clean_index (bool): If True, clean the index before re-indexing all documents.
|
|
261
295
|
"""
|
|
296
|
+
self._ensure_vectorstore_initialized()
|
|
262
297
|
if clean_index:
|
|
263
298
|
self._clean_index(index_name)
|
|
264
299
|
|
|
@@ -162,3 +162,83 @@ def canonical_resource(server_url: str) -> str:
|
|
|
162
162
|
if resource.endswith("/") and parsed.path in ("", "/"):
|
|
163
163
|
resource = resource[:-1]
|
|
164
164
|
return resource
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def exchange_oauth_token(
|
|
168
|
+
token_endpoint: str,
|
|
169
|
+
code: str,
|
|
170
|
+
redirect_uri: str,
|
|
171
|
+
client_id: str,
|
|
172
|
+
client_secret: Optional[str] = None,
|
|
173
|
+
code_verifier: Optional[str] = None,
|
|
174
|
+
scope: Optional[str] = None,
|
|
175
|
+
timeout: int = 30,
|
|
176
|
+
) -> Dict[str, Any]:
|
|
177
|
+
"""
|
|
178
|
+
Exchange an OAuth authorization code for access tokens.
|
|
179
|
+
|
|
180
|
+
This function performs the OAuth token exchange on the server side,
|
|
181
|
+
avoiding CORS issues that would occur if done from a browser.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
token_endpoint: OAuth token endpoint URL
|
|
185
|
+
code: Authorization code from OAuth provider
|
|
186
|
+
redirect_uri: Redirect URI used in authorization request
|
|
187
|
+
client_id: OAuth client ID
|
|
188
|
+
client_secret: OAuth client secret (optional for public clients)
|
|
189
|
+
code_verifier: PKCE code verifier (optional)
|
|
190
|
+
scope: OAuth scope (optional)
|
|
191
|
+
timeout: Request timeout in seconds
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Token response from OAuth provider containing access_token, etc.
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
requests.RequestException: If the HTTP request fails
|
|
198
|
+
ValueError: If the token exchange fails
|
|
199
|
+
"""
|
|
200
|
+
# Build the token request body
|
|
201
|
+
token_body = {
|
|
202
|
+
"grant_type": "authorization_code",
|
|
203
|
+
"code": code,
|
|
204
|
+
"redirect_uri": redirect_uri,
|
|
205
|
+
"client_id": client_id,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if client_secret:
|
|
209
|
+
token_body["client_secret"] = client_secret
|
|
210
|
+
if code_verifier:
|
|
211
|
+
token_body["code_verifier"] = code_verifier
|
|
212
|
+
if scope:
|
|
213
|
+
token_body["scope"] = scope
|
|
214
|
+
|
|
215
|
+
logger.info(f"MCP OAuth: exchanging code at {token_endpoint}")
|
|
216
|
+
|
|
217
|
+
# Make the token exchange request
|
|
218
|
+
response = requests.post(
|
|
219
|
+
token_endpoint,
|
|
220
|
+
data=token_body,
|
|
221
|
+
headers={
|
|
222
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
223
|
+
"Accept": "application/json",
|
|
224
|
+
},
|
|
225
|
+
timeout=timeout
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Try to parse as JSON
|
|
229
|
+
try:
|
|
230
|
+
token_data = response.json()
|
|
231
|
+
except Exception:
|
|
232
|
+
# Some providers return URL-encoded response
|
|
233
|
+
from urllib.parse import parse_qs
|
|
234
|
+
token_data = {k: v[0] if len(v) == 1 else v
|
|
235
|
+
for k, v in parse_qs(response.text).items()}
|
|
236
|
+
|
|
237
|
+
if response.ok:
|
|
238
|
+
logger.info("MCP OAuth: token exchange successful")
|
|
239
|
+
return token_data
|
|
240
|
+
else:
|
|
241
|
+
error_msg = token_data.get("error_description") or token_data.get("error") or response.text
|
|
242
|
+
logger.error(f"MCP OAuth: token exchange failed - {response.status_code}: {error_msg}")
|
|
243
|
+
raise ValueError(f"Token exchange failed: {error_msg}")
|
|
244
|
+
|
|
@@ -287,7 +287,6 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
|
|
|
287
287
|
model_config={
|
|
288
288
|
"temperature": 0.1,
|
|
289
289
|
"max_tokens": 1000,
|
|
290
|
-
"top_p": 1.0
|
|
291
290
|
}
|
|
292
291
|
)
|
|
293
292
|
except Exception as e:
|
|
@@ -1256,7 +1255,6 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
|
|
|
1256
1255
|
model_config={
|
|
1257
1256
|
"temperature": 0.1,
|
|
1258
1257
|
"max_tokens": 1000,
|
|
1259
|
-
"top_p": 1.0
|
|
1260
1258
|
}
|
|
1261
1259
|
)
|
|
1262
1260
|
except Exception as e:
|
|
@@ -1387,20 +1385,18 @@ def run_streamlit(st, ai_icon=None, user_icon=None):
|
|
|
1387
1385
|
help="Maximum number of tokens in the AI response"
|
|
1388
1386
|
)
|
|
1389
1387
|
|
|
1390
|
-
|
|
1391
|
-
"
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
step=0.1,
|
|
1396
|
-
help="Controls diversity via nucleus sampling"
|
|
1388
|
+
reasoning_effort = st.selectbox(
|
|
1389
|
+
"Reasoning effort:",
|
|
1390
|
+
options=['null', 'low', 'medium', 'high'],
|
|
1391
|
+
index=0,
|
|
1392
|
+
help="Higher effort better reasoning, slower response"
|
|
1397
1393
|
)
|
|
1398
1394
|
|
|
1399
1395
|
# Create LLM config
|
|
1400
1396
|
llm_config = {
|
|
1401
1397
|
'max_tokens': max_tokens,
|
|
1402
1398
|
'temperature': temperature,
|
|
1403
|
-
'
|
|
1399
|
+
'reasoning_effort': reasoning_effort
|
|
1404
1400
|
}
|
|
1405
1401
|
|
|
1406
1402
|
col1, col2 = st.columns([3, 1])
|
|
@@ -12,7 +12,8 @@ logger = logging.getLogger(__name__)
|
|
|
12
12
|
|
|
13
13
|
def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
|
|
14
14
|
llm_client: Any,
|
|
15
|
-
alita_client: Optional[Any] = None
|
|
15
|
+
alita_client: Optional[Any] = None,
|
|
16
|
+
mcp_tokens: Optional[Dict[str, Any]] = None) -> List[Any]:
|
|
16
17
|
"""
|
|
17
18
|
Instantiate a toolkit with LLM client support.
|
|
18
19
|
|
|
@@ -22,7 +23,8 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
|
|
|
22
23
|
Args:
|
|
23
24
|
toolkit_config: Configuration dictionary for the toolkit
|
|
24
25
|
llm_client: LLM client instance for tools that need LLM capabilities
|
|
25
|
-
|
|
26
|
+
alita_client: Optional additional client instance
|
|
27
|
+
mcp_tokens: Optional dictionary of MCP OAuth tokens by server URL
|
|
26
28
|
|
|
27
29
|
Returns:
|
|
28
30
|
List of instantiated tools from the toolkit
|
|
@@ -60,8 +62,8 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
|
|
|
60
62
|
}
|
|
61
63
|
|
|
62
64
|
# Get tools using the toolkit configuration with clients
|
|
63
|
-
# Parameter order: get_tools(tools_list, alita_client, llm, memory_store)
|
|
64
|
-
tools = get_tools([tool_config], alita_client, llm_client)
|
|
65
|
+
# Parameter order: get_tools(tools_list, alita_client, llm, memory_store, debug_mode, mcp_tokens)
|
|
66
|
+
tools = get_tools([tool_config], alita_client, llm_client, mcp_tokens=mcp_tokens)
|
|
65
67
|
|
|
66
68
|
if not tools:
|
|
67
69
|
logger.warning(f"No tools returned for toolkit {toolkit_name}")
|
|
@@ -73,9 +75,22 @@ def instantiate_toolkit_with_client(toolkit_config: Dict[str, Any],
|
|
|
73
75
|
except Exception as e:
|
|
74
76
|
# Re-raise McpAuthorizationRequired without logging as error
|
|
75
77
|
from ..utils.mcp_oauth import McpAuthorizationRequired
|
|
78
|
+
|
|
79
|
+
# Check if it's McpAuthorizationRequired directly
|
|
76
80
|
if isinstance(e, McpAuthorizationRequired):
|
|
77
81
|
logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization")
|
|
78
82
|
raise
|
|
83
|
+
|
|
84
|
+
# Also check for wrapped exceptions
|
|
85
|
+
if hasattr(e, '__cause__') and isinstance(e.__cause__, McpAuthorizationRequired):
|
|
86
|
+
logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (wrapped)")
|
|
87
|
+
raise e.__cause__
|
|
88
|
+
|
|
89
|
+
# Check exception class name as fallback
|
|
90
|
+
if e.__class__.__name__ == 'McpAuthorizationRequired':
|
|
91
|
+
logger.info(f"Toolkit {toolkit_name} requires MCP OAuth authorization (by name)")
|
|
92
|
+
raise
|
|
93
|
+
|
|
79
94
|
# Log and re-raise other errors
|
|
80
95
|
logger.error(f"Error instantiating toolkit {toolkit_name} with client: {str(e)}")
|
|
81
96
|
raise
|
alita_sdk/tools/__init__.py
CHANGED
|
@@ -13,6 +13,30 @@ AVAILABLE_TOOLS = {}
|
|
|
13
13
|
AVAILABLE_TOOLKITS = {}
|
|
14
14
|
FAILED_IMPORTS = {}
|
|
15
15
|
|
|
16
|
+
|
|
17
|
+
def _inject_toolkit_id(tool_conf: dict, toolkit_tools) -> None:
|
|
18
|
+
"""Inject `toolkit_id` into tools that expose `api_wrapper.toolkit_id`.
|
|
19
|
+
|
|
20
|
+
This reads 'id' from the tool configuration and, if it is an integer,
|
|
21
|
+
assigns it to the 'toolkit_id' attribute of the 'api_wrapper' for each
|
|
22
|
+
tool in 'toolkit_tools' that supports it.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
tool_conf: Raw tool configuration item from 'tools_list'.
|
|
26
|
+
toolkit_tools: List of instantiated tools produced by a toolkit.
|
|
27
|
+
"""
|
|
28
|
+
toolkit_id = tool_conf.get('id')
|
|
29
|
+
if isinstance(toolkit_id, int):
|
|
30
|
+
for t in toolkit_tools:
|
|
31
|
+
if hasattr(t, 'api_wrapper') and hasattr(t.api_wrapper, 'toolkit_id'):
|
|
32
|
+
t.api_wrapper.toolkit_id = toolkit_id
|
|
33
|
+
else:
|
|
34
|
+
logger.error(
|
|
35
|
+
f"Toolkit ID is missing or not an integer for tool "
|
|
36
|
+
f"`{tool_conf.get('type', '')}` with name `{tool_conf.get('name', '')}`"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
16
40
|
def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class_name=None):
|
|
17
41
|
"""Safely import a tool module and register available functions/classes."""
|
|
18
42
|
try:
|
|
@@ -34,6 +58,7 @@ def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class
|
|
|
34
58
|
FAILED_IMPORTS[tool_name] = str(e)
|
|
35
59
|
logger.debug(f"Failed to import {tool_name}: {e}")
|
|
36
60
|
|
|
61
|
+
|
|
37
62
|
# Safe imports for all tools
|
|
38
63
|
_safe_import_tool('github', 'github', 'get_tools', 'AlitaGitHubToolkit')
|
|
39
64
|
_safe_import_tool('openapi', 'openapi', 'get_tools')
|
|
@@ -90,11 +115,19 @@ available_count = len(AVAILABLE_TOOLS)
|
|
|
90
115
|
total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
|
|
91
116
|
logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
|
|
92
117
|
|
|
118
|
+
# Import community module to trigger community toolkit registration
|
|
119
|
+
try:
|
|
120
|
+
from alita_sdk import community # noqa: F401
|
|
121
|
+
logger.debug("Community toolkits registered successfully")
|
|
122
|
+
except ImportError as e:
|
|
123
|
+
logger.debug(f"Community module not available: {e}")
|
|
124
|
+
|
|
93
125
|
|
|
94
126
|
def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
|
|
95
127
|
tools = []
|
|
96
128
|
|
|
97
129
|
for tool in tools_list:
|
|
130
|
+
toolkit_tools = []
|
|
98
131
|
settings = tool.get('settings')
|
|
99
132
|
|
|
100
133
|
# Skip tools without settings early
|
|
@@ -116,53 +149,47 @@ def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args,
|
|
|
116
149
|
|
|
117
150
|
# Set pgvector collection schema if present
|
|
118
151
|
if settings.get('pgvector_configuration'):
|
|
119
|
-
|
|
152
|
+
# Use tool id if available, otherwise use toolkit_name or type as fallback
|
|
153
|
+
collection_id = tool.get('id') or tool.get('toolkit_name') or tool_type
|
|
154
|
+
settings['pgvector_configuration']['collection_schema'] = str(collection_id)
|
|
120
155
|
|
|
121
156
|
# Handle ADO special cases
|
|
122
157
|
if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
# Handle ADO repos aliases
|
|
127
|
-
if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
|
|
158
|
+
toolkit_tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
|
|
159
|
+
elif tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
|
|
128
160
|
try:
|
|
129
|
-
|
|
161
|
+
toolkit_tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
|
|
130
162
|
except Exception as e:
|
|
131
163
|
logger.error(f"Error getting ADO repos tools: {e}")
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
# Skip MCP toolkit - it's handled by runtime/toolkits/tools.py to avoid duplicate loading
|
|
135
|
-
if tool_type == 'mcp':
|
|
164
|
+
elif tool_type == 'mcp':
|
|
136
165
|
logger.debug(f"Skipping MCP toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
|
|
166
|
+
elif tool_type == 'planning':
|
|
167
|
+
logger.debug(f"Skipping planning toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
|
|
168
|
+
elif tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
|
|
141
169
|
try:
|
|
142
|
-
|
|
170
|
+
toolkit_tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
|
|
143
171
|
except Exception as e:
|
|
144
172
|
logger.error(f"Error getting tools for {tool_type}: {e}")
|
|
145
173
|
raise ToolException(f"Error getting tools for {tool_type}: {e}")
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
# Handle custom modules
|
|
149
|
-
if settings.get("module"):
|
|
174
|
+
elif settings.get("module"):
|
|
150
175
|
try:
|
|
151
176
|
mod = import_module(settings.pop("module"))
|
|
152
177
|
tkitclass = getattr(mod, settings.pop("class"))
|
|
153
178
|
get_toolkit_params = settings.copy()
|
|
154
179
|
get_toolkit_params["name"] = tool.get("name")
|
|
155
180
|
toolkit = tkitclass.get_toolkit(**get_toolkit_params)
|
|
156
|
-
|
|
181
|
+
toolkit_tools.extend(toolkit.get_tools())
|
|
157
182
|
except Exception as e:
|
|
158
183
|
logger.error(f"Error in getting custom toolkit: {e}")
|
|
159
|
-
continue
|
|
160
|
-
|
|
161
|
-
# Tool not available
|
|
162
|
-
if tool_type in FAILED_IMPORTS:
|
|
163
|
-
logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
|
|
164
184
|
else:
|
|
165
|
-
|
|
185
|
+
if tool_type in FAILED_IMPORTS:
|
|
186
|
+
logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
|
|
187
|
+
else:
|
|
188
|
+
logger.warning(f"Unknown tool type: {tool_type}")
|
|
189
|
+
#
|
|
190
|
+
# Always inject toolkit_id to each tool
|
|
191
|
+
_inject_toolkit_id(tool, toolkit_tools)
|
|
192
|
+
tools.extend(toolkit_tools)
|
|
166
193
|
|
|
167
194
|
return tools
|
|
168
195
|
|
|
@@ -2,6 +2,7 @@ import copy
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import time
|
|
5
|
+
from enum import Enum
|
|
5
6
|
from typing import Any, Optional, List, Dict, Generator
|
|
6
7
|
|
|
7
8
|
from langchain_core.callbacks import dispatch_custom_event
|
|
@@ -16,7 +17,17 @@ from ..runtime.utils.utils import IndexerKeywords
|
|
|
16
17
|
|
|
17
18
|
logger = logging.getLogger(__name__)
|
|
18
19
|
|
|
19
|
-
DEFAULT_CUT_OFF = 0.
|
|
20
|
+
DEFAULT_CUT_OFF = 0.1
|
|
21
|
+
INDEX_META_UPDATE_INTERVAL = 600.0
|
|
22
|
+
|
|
23
|
+
class IndexTools(str, Enum):
|
|
24
|
+
"""Enum for index-related tool names."""
|
|
25
|
+
INDEX_DATA = "index_data"
|
|
26
|
+
SEARCH_INDEX = "search_index"
|
|
27
|
+
STEPBACK_SEARCH_INDEX = "stepback_search_index"
|
|
28
|
+
STEPBACK_SUMMARY_INDEX = "stepback_summary_index"
|
|
29
|
+
REMOVE_INDEX = "remove_index"
|
|
30
|
+
LIST_COLLECTIONS = "list_collections"
|
|
20
31
|
|
|
21
32
|
# Base Vector Store Schema Models
|
|
22
33
|
BaseIndexParams = create_model(
|
|
@@ -157,6 +168,16 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
157
168
|
clean_index = kwargs.get("clean_index")
|
|
158
169
|
chunking_tool = kwargs.get("chunking_tool")
|
|
159
170
|
chunking_config = kwargs.get("chunking_config")
|
|
171
|
+
|
|
172
|
+
# Store the interval in a private dict to avoid Pydantic field errors
|
|
173
|
+
if not hasattr(self, "_index_meta_config"):
|
|
174
|
+
self._index_meta_config: Dict[str, Any] = {}
|
|
175
|
+
|
|
176
|
+
self._index_meta_config["update_interval"] = kwargs.get(
|
|
177
|
+
"meta_update_interval",
|
|
178
|
+
INDEX_META_UPDATE_INTERVAL,
|
|
179
|
+
)
|
|
180
|
+
|
|
160
181
|
result = {"count": 0}
|
|
161
182
|
#
|
|
162
183
|
try:
|
|
@@ -164,6 +185,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
164
185
|
self._clean_index(index_name)
|
|
165
186
|
#
|
|
166
187
|
self.index_meta_init(index_name, kwargs)
|
|
188
|
+
self._emit_index_event(index_name)
|
|
167
189
|
#
|
|
168
190
|
self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
|
|
169
191
|
self._log_tool_event(f"Loading the documents to index...{kwargs}")
|
|
@@ -179,18 +201,26 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
179
201
|
self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
|
|
180
202
|
#
|
|
181
203
|
results_count = result["count"]
|
|
182
|
-
|
|
204
|
+
# Final update should always be forced
|
|
205
|
+
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count, update_force=True)
|
|
183
206
|
self._emit_index_event(index_name)
|
|
184
207
|
#
|
|
185
208
|
return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
|
|
186
209
|
else "no new documents to index"}
|
|
187
210
|
except Exception as e:
|
|
188
|
-
|
|
189
|
-
|
|
211
|
+
# Do maximum effort at least send custom event for supposed changed status
|
|
212
|
+
msg = str(e)
|
|
213
|
+
try:
|
|
214
|
+
# Error update should also be forced
|
|
215
|
+
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"], update_force=True)
|
|
216
|
+
except Exception as ie:
|
|
217
|
+
logger.error(f"Failed to update index meta status to FAILED for index '{index_name}': {ie}")
|
|
218
|
+
msg = f"{msg}; additionally failed to update index meta status to FAILED: {ie}"
|
|
219
|
+
self._emit_index_event(index_name, error=msg)
|
|
190
220
|
raise e
|
|
191
|
-
|
|
192
221
|
|
|
193
222
|
def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
|
|
223
|
+
self._ensure_vectorstore_initialized()
|
|
194
224
|
self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
|
|
195
225
|
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
196
226
|
#
|
|
@@ -243,6 +273,11 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
243
273
|
logger.debug(msg)
|
|
244
274
|
self._log_tool_event(msg)
|
|
245
275
|
result["count"] += dependent_docs_counter
|
|
276
|
+
# After each base document, try a non-forced meta update; throttling handled inside index_meta_update
|
|
277
|
+
try:
|
|
278
|
+
self.index_meta_update(index_name, IndexerKeywords.INDEX_META_IN_PROGRESS.value, result["count"], update_force=False)
|
|
279
|
+
except Exception as exc: # best-effort, do not break indexing
|
|
280
|
+
logger.warning(f"Failed to update index meta during indexing process for index '{index_name}': {exc}")
|
|
246
281
|
if pg_vector_add_docs_chunk:
|
|
247
282
|
add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
|
|
248
283
|
|
|
@@ -308,6 +343,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
308
343
|
log_msg: str = "Verification of documents to index started"
|
|
309
344
|
) -> Generator[Document, None, None]:
|
|
310
345
|
"""Generic duplicate reduction logic for documents."""
|
|
346
|
+
self._ensure_vectorstore_initialized()
|
|
311
347
|
self._log_tool_event(log_msg, tool_name="index_documents")
|
|
312
348
|
indexed_data = self._get_indexed_data(index_name)
|
|
313
349
|
indexed_keys = set(indexed_data.keys())
|
|
@@ -463,6 +499,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
463
499
|
)
|
|
464
500
|
|
|
465
501
|
def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
|
|
502
|
+
self._ensure_vectorstore_initialized()
|
|
466
503
|
index_meta = super().get_index_meta(index_name)
|
|
467
504
|
if not index_meta:
|
|
468
505
|
self._log_tool_event(
|
|
@@ -482,12 +519,53 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
482
519
|
"updated_on": created_on,
|
|
483
520
|
"task_id": None,
|
|
484
521
|
"conversation_id": None,
|
|
522
|
+
"toolkit_id": self.toolkit_id,
|
|
485
523
|
}
|
|
486
524
|
metadata["history"] = json.dumps([metadata])
|
|
487
525
|
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
|
|
488
526
|
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
|
|
489
527
|
|
|
490
|
-
def index_meta_update(self, index_name: str, state: str, result: int):
|
|
528
|
+
def index_meta_update(self, index_name: str, state: str, result: int, update_force: bool = True, interval: Optional[float] = None):
|
|
529
|
+
"""Update `index_meta` document with optional time-based throttling.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
index_name: Index name to update meta for.
|
|
533
|
+
state: New state value for the `index_meta` record.
|
|
534
|
+
result: Number of processed documents to store in the `updated` field.
|
|
535
|
+
update_force: If `True`, perform the update unconditionally, ignoring throttling.
|
|
536
|
+
If `False`, perform the update only when the effective time interval has passed.
|
|
537
|
+
interval: Optional custom interval (in seconds) for this call when `update_force` is `False`.
|
|
538
|
+
If `None`, falls back to the value stored in `self._index_meta_config["update_interval"]`
|
|
539
|
+
if present, otherwise uses `INDEX_META_UPDATE_INTERVAL`.
|
|
540
|
+
"""
|
|
541
|
+
self._ensure_vectorstore_initialized()
|
|
542
|
+
if not hasattr(self, "_index_meta_last_update_time"):
|
|
543
|
+
self._index_meta_last_update_time: Dict[str, float] = {}
|
|
544
|
+
|
|
545
|
+
if not update_force:
|
|
546
|
+
# Resolve effective interval:
|
|
547
|
+
# 1\) explicit arg
|
|
548
|
+
# 2\) value from `_index_meta_config`
|
|
549
|
+
# 3\) default constant
|
|
550
|
+
cfg_interval = None
|
|
551
|
+
if hasattr(self, "_index_meta_config"):
|
|
552
|
+
cfg_interval = self._index_meta_config.get("update_interval")
|
|
553
|
+
|
|
554
|
+
eff_interval = (
|
|
555
|
+
interval
|
|
556
|
+
if interval is not None
|
|
557
|
+
else (cfg_interval if cfg_interval is not None else INDEX_META_UPDATE_INTERVAL)
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
last_time = self._index_meta_last_update_time.get(index_name)
|
|
561
|
+
now = time.time()
|
|
562
|
+
if last_time is not None and (now - last_time) < eff_interval:
|
|
563
|
+
return
|
|
564
|
+
self._index_meta_last_update_time[index_name] = now
|
|
565
|
+
else:
|
|
566
|
+
# For forced updates, always refresh last update time
|
|
567
|
+
self._index_meta_last_update_time[index_name] = time.time()
|
|
568
|
+
|
|
491
569
|
index_meta_raw = super().get_index_meta(index_name)
|
|
492
570
|
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
493
571
|
#
|
|
@@ -545,11 +623,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
545
623
|
event_data = {
|
|
546
624
|
"id": index_meta.get("id"),
|
|
547
625
|
"index_name": index_name,
|
|
548
|
-
"state": metadata.get("state"),
|
|
626
|
+
"state": "failed" if error is not None else metadata.get("state"),
|
|
549
627
|
"error": error,
|
|
550
628
|
"reindex": is_reindex,
|
|
551
629
|
"indexed": metadata.get("indexed", 0),
|
|
552
630
|
"updated": metadata.get("updated", 0),
|
|
631
|
+
"toolkit_id": metadata.get("toolkit_id"),
|
|
553
632
|
}
|
|
554
633
|
|
|
555
634
|
# Emit the event
|
|
@@ -572,8 +651,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
572
651
|
"""
|
|
573
652
|
return [
|
|
574
653
|
{
|
|
575
|
-
"name":
|
|
576
|
-
"mode":
|
|
654
|
+
"name": IndexTools.INDEX_DATA.value,
|
|
655
|
+
"mode": IndexTools.INDEX_DATA.value,
|
|
577
656
|
"ref": self.index_data,
|
|
578
657
|
"description": "Loads data to index.",
|
|
579
658
|
"args_schema": create_model(
|
|
@@ -583,36 +662,36 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
583
662
|
)
|
|
584
663
|
},
|
|
585
664
|
{
|
|
586
|
-
"name":
|
|
587
|
-
"mode":
|
|
665
|
+
"name": IndexTools.SEARCH_INDEX.value,
|
|
666
|
+
"mode": IndexTools.SEARCH_INDEX.value,
|
|
588
667
|
"ref": self.search_index,
|
|
589
668
|
"description": self.search_index.__doc__,
|
|
590
669
|
"args_schema": BaseSearchParams
|
|
591
670
|
},
|
|
592
671
|
{
|
|
593
|
-
"name":
|
|
594
|
-
"mode":
|
|
672
|
+
"name": IndexTools.STEPBACK_SEARCH_INDEX.value,
|
|
673
|
+
"mode": IndexTools.STEPBACK_SEARCH_INDEX.value,
|
|
595
674
|
"ref": self.stepback_search_index,
|
|
596
675
|
"description": self.stepback_search_index.__doc__,
|
|
597
676
|
"args_schema": BaseStepbackSearchParams
|
|
598
677
|
},
|
|
599
678
|
{
|
|
600
|
-
"name":
|
|
601
|
-
"mode":
|
|
679
|
+
"name": IndexTools.STEPBACK_SUMMARY_INDEX.value,
|
|
680
|
+
"mode": IndexTools.STEPBACK_SUMMARY_INDEX.value,
|
|
602
681
|
"ref": self.stepback_summary_index,
|
|
603
682
|
"description": self.stepback_summary_index.__doc__,
|
|
604
683
|
"args_schema": BaseStepbackSearchParams
|
|
605
684
|
},
|
|
606
685
|
{
|
|
607
|
-
"name":
|
|
608
|
-
"mode":
|
|
686
|
+
"name": IndexTools.REMOVE_INDEX.value,
|
|
687
|
+
"mode": IndexTools.REMOVE_INDEX.value,
|
|
609
688
|
"ref": self.remove_index,
|
|
610
689
|
"description": self.remove_index.__doc__,
|
|
611
690
|
"args_schema": RemoveIndexParams
|
|
612
691
|
},
|
|
613
692
|
{
|
|
614
|
-
"name":
|
|
615
|
-
"mode":
|
|
693
|
+
"name": IndexTools.LIST_COLLECTIONS.value,
|
|
694
|
+
"mode": IndexTools.LIST_COLLECTIONS.value,
|
|
616
695
|
"ref": self.list_collections,
|
|
617
696
|
"description": self.list_collections.__doc__,
|
|
618
697
|
# No parameters
|