alita-sdk 0.3.374__py3-none-any.whl → 0.3.423__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/configurations/bitbucket.py +95 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/client.py +3 -2
- alita_sdk/runtime/clients/sandbox_client.py +8 -0
- alita_sdk/runtime/langchain/assistant.py +56 -40
- alita_sdk/runtime/langchain/constants.py +4 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
- alita_sdk/runtime/langchain/langraph_agent.py +92 -28
- alita_sdk/runtime/langchain/utils.py +24 -4
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/tools.py +80 -49
- alita_sdk/runtime/tools/__init__.py +7 -2
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/function.py +28 -23
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +104 -8
- alita_sdk/runtime/tools/llm.py +146 -114
- alita_sdk/runtime/tools/sandbox.py +166 -63
- alita_sdk/runtime/tools/vectorstore.py +22 -21
- alita_sdk/runtime/tools/vectorstore_base.py +16 -15
- alita_sdk/runtime/utils/utils.py +1 -0
- alita_sdk/tools/__init__.py +43 -31
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/base_indexer_toolkit.py +102 -93
- alita_sdk/tools/code_indexer_toolkit.py +15 -5
- alita_sdk/tools/confluence/api_wrapper.py +30 -8
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/elitea_base.py +22 -22
- alita_sdk/tools/gitlab/api_wrapper.py +8 -9
- alita_sdk/tools/jira/api_wrapper.py +1 -1
- alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/qtest/api_wrapper.py +298 -51
- alita_sdk/tools/sharepoint/api_wrapper.py +104 -33
- alita_sdk/tools/sharepoint/authorization_helper.py +175 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/utils/content_parser.py +27 -16
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +38 -25
- {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/RECORD +51 -51
- {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.374.dist-info → alita_sdk-0.3.423.dist-info}/top_level.txt +0 -0
|
@@ -2,21 +2,60 @@ import asyncio
|
|
|
2
2
|
import logging
|
|
3
3
|
import subprocess
|
|
4
4
|
import os
|
|
5
|
-
from typing import Any, Type, Optional, Dict
|
|
6
|
-
from
|
|
7
|
-
from
|
|
5
|
+
from typing import Any, Type, Optional, Dict, List, Literal, Union
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from langchain_core.tools import BaseTool, BaseToolkit
|
|
10
|
+
from langchain_core.messages import ToolCall
|
|
11
|
+
from pydantic import BaseModel, create_model, ConfigDict, Field
|
|
8
12
|
from pydantic.fields import FieldInfo
|
|
9
13
|
|
|
10
14
|
logger = logging.getLogger(__name__)
|
|
11
15
|
|
|
16
|
+
name = "pyodide"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
|
|
20
|
+
"""
|
|
21
|
+
Get sandbox tools for the provided tool configurations.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
tools_list: List of tool configurations
|
|
25
|
+
alita_client: Alita client instance for sandbox tools
|
|
26
|
+
llm: LLM client instance (unused for sandbox)
|
|
27
|
+
memory_store: Optional memory store instance (unused for sandbox)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
List of sandbox tools
|
|
31
|
+
"""
|
|
32
|
+
all_tools = []
|
|
33
|
+
|
|
34
|
+
for tool in tools_list:
|
|
35
|
+
if tool.get('type') == 'sandbox' or tool.get('toolkit_name') == 'sandbox':
|
|
36
|
+
try:
|
|
37
|
+
toolkit_instance = SandboxToolkit.get_toolkit(
|
|
38
|
+
stateful=tool['settings'].get('stateful', False),
|
|
39
|
+
allow_net=tool['settings'].get('allow_net', True),
|
|
40
|
+
alita_client=alita_client,
|
|
41
|
+
toolkit_name=tool.get('toolkit_name', '')
|
|
42
|
+
)
|
|
43
|
+
all_tools.extend(toolkit_instance.get_tools())
|
|
44
|
+
except Exception as e:
|
|
45
|
+
logger.error(f"Error in sandbox toolkit get_tools: {e}")
|
|
46
|
+
logger.error(f"Tool config: {tool}")
|
|
47
|
+
raise
|
|
48
|
+
|
|
49
|
+
return all_tools
|
|
50
|
+
|
|
12
51
|
|
|
13
52
|
def _is_deno_available() -> bool:
|
|
14
53
|
"""Check if Deno is available in the PATH"""
|
|
15
54
|
try:
|
|
16
55
|
result = subprocess.run(
|
|
17
|
-
["deno", "--version"],
|
|
18
|
-
capture_output=True,
|
|
19
|
-
text=True,
|
|
56
|
+
["deno", "--version"],
|
|
57
|
+
capture_output=True,
|
|
58
|
+
text=True,
|
|
20
59
|
timeout=10
|
|
21
60
|
)
|
|
22
61
|
return result.returncode == 0
|
|
@@ -25,43 +64,17 @@ def _is_deno_available() -> bool:
|
|
|
25
64
|
|
|
26
65
|
|
|
27
66
|
def _setup_pyodide_cache_env() -> None:
|
|
28
|
-
"""Setup Pyodide caching environment variables for performance optimization"""
|
|
67
|
+
"""Setup Pyodide caching environment variables for performance optimization [NO-OP]"""
|
|
29
68
|
try:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
if os.path.exists(cache_env_file):
|
|
33
|
-
with open(cache_env_file, 'r') as f:
|
|
34
|
-
for line in f:
|
|
35
|
-
line = line.strip()
|
|
36
|
-
if line.startswith('export ') and '=' in line:
|
|
37
|
-
# Parse export VAR=value format
|
|
38
|
-
var_assignment = line[7:] # Remove 'export '
|
|
39
|
-
if '=' in var_assignment:
|
|
40
|
-
key, value = var_assignment.split('=', 1)
|
|
41
|
-
# Remove quotes if present
|
|
42
|
-
value = value.strip('"').strip("'")
|
|
43
|
-
os.environ[key] = value
|
|
44
|
-
logger.debug(f"Set Pyodide cache env: {key}={value}")
|
|
45
|
-
|
|
46
|
-
# Set default caching environment variables if not already set
|
|
47
|
-
cache_defaults = {
|
|
48
|
-
'PYODIDE_PACKAGES_PATH': os.path.expanduser('~/.cache/pyodide'),
|
|
49
|
-
'DENO_DIR': os.path.expanduser('~/.cache/deno'),
|
|
50
|
-
'PYODIDE_CACHE_DIR': os.path.expanduser('~/.cache/pyodide'),
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
for key, default_value in cache_defaults.items():
|
|
54
|
-
if key not in os.environ:
|
|
55
|
-
os.environ[key] = default_value
|
|
56
|
-
logger.debug(f"Set default Pyodide env: {key}={default_value}")
|
|
57
|
-
|
|
69
|
+
for key in ["SANDBOX_BASE", "DENO_DIR"]:
|
|
70
|
+
logger.info("Sandbox env: %s -> %s", key, os.environ.get(key, "n/a"))
|
|
58
71
|
except Exception as e:
|
|
59
72
|
logger.warning(f"Could not setup Pyodide cache environment: {e}")
|
|
60
73
|
|
|
61
74
|
|
|
62
75
|
# Create input schema for the sandbox tool
|
|
63
76
|
sandbox_tool_input = create_model(
|
|
64
|
-
"SandboxToolInput",
|
|
77
|
+
"SandboxToolInput",
|
|
65
78
|
code=(str, FieldInfo(description="Python code to execute in the sandbox environment"))
|
|
66
79
|
)
|
|
67
80
|
|
|
@@ -72,7 +85,7 @@ class PyodideSandboxTool(BaseTool):
|
|
|
72
85
|
This tool leverages langchain-sandbox to provide a safe environment for running untrusted Python code.
|
|
73
86
|
Optimized for performance with caching and stateless execution by default.
|
|
74
87
|
"""
|
|
75
|
-
|
|
88
|
+
|
|
76
89
|
name: str = "pyodide_sandbox"
|
|
77
90
|
description: str = """Execute Python code in a secure sandbox environment using Pyodide.
|
|
78
91
|
This tool allows safe execution of Python code without access to the host system.
|
|
@@ -81,7 +94,7 @@ class PyodideSandboxTool(BaseTool):
|
|
|
81
94
|
- Perform calculations or data analysis
|
|
82
95
|
- Test Python algorithms
|
|
83
96
|
- Run code that requires isolation from the host system
|
|
84
|
-
|
|
97
|
+
|
|
85
98
|
The sandbox supports most Python standard library modules and can install additional packages.
|
|
86
99
|
Note: File access and some system operations are restricted for security.
|
|
87
100
|
Optimized for performance with local caching (stateless by default for faster execution).
|
|
@@ -91,14 +104,37 @@ class PyodideSandboxTool(BaseTool):
|
|
|
91
104
|
allow_net: bool = True
|
|
92
105
|
session_bytes: Optional[bytes] = None
|
|
93
106
|
session_metadata: Optional[Dict] = None
|
|
94
|
-
|
|
107
|
+
alita_client: Optional[Any] = None
|
|
108
|
+
|
|
95
109
|
def __init__(self, **kwargs: Any) -> None:
|
|
96
110
|
super().__init__(**kwargs)
|
|
97
111
|
self._sandbox = None
|
|
98
112
|
# Setup caching environment for optimal performance
|
|
99
113
|
_setup_pyodide_cache_env()
|
|
100
114
|
self._initialize_sandbox()
|
|
101
|
-
|
|
115
|
+
|
|
116
|
+
def _prepare_pyodide_input(self, code: str) -> str:
|
|
117
|
+
"""Prepare input for PyodideSandboxTool by injecting state and alita_client into the code block."""
|
|
118
|
+
pyodide_predata = ""
|
|
119
|
+
|
|
120
|
+
# Add alita_client if available
|
|
121
|
+
if self.alita_client:
|
|
122
|
+
try:
|
|
123
|
+
# Get the directory of the current file and construct the path to sandbox_client.py
|
|
124
|
+
current_dir = Path(__file__).parent
|
|
125
|
+
sandbox_client_path = current_dir.parent / 'clients' / 'sandbox_client.py'
|
|
126
|
+
|
|
127
|
+
with open(sandbox_client_path, 'r') as f:
|
|
128
|
+
sandbox_client_code = f.read()
|
|
129
|
+
pyodide_predata += f"{sandbox_client_code}\n"
|
|
130
|
+
pyodide_predata += (f"alita_client = SandboxClient(base_url='{self.alita_client.base_url}',"
|
|
131
|
+
f"project_id={self.alita_client.project_id},"
|
|
132
|
+
f"auth_token='{self.alita_client.auth_token}')\n")
|
|
133
|
+
except FileNotFoundError:
|
|
134
|
+
logger.error(f"sandbox_client.py not found. Ensure the file exists.")
|
|
135
|
+
|
|
136
|
+
return f"#elitea simplified client\n{pyodide_predata}{code}"
|
|
137
|
+
|
|
102
138
|
def _initialize_sandbox(self) -> None:
|
|
103
139
|
"""Initialize the PyodideSandbox instance with optimized settings"""
|
|
104
140
|
try:
|
|
@@ -110,12 +146,22 @@ class PyodideSandboxTool(BaseTool):
|
|
|
110
146
|
)
|
|
111
147
|
logger.error(error_msg)
|
|
112
148
|
raise RuntimeError(error_msg)
|
|
113
|
-
|
|
149
|
+
|
|
114
150
|
from langchain_sandbox import PyodideSandbox
|
|
115
|
-
|
|
151
|
+
|
|
152
|
+
# Air-gapped settings
|
|
153
|
+
sandbox_base = os.environ.get("SANDBOX_BASE", os.path.expanduser('~/.cache/pyodide'))
|
|
154
|
+
sandbox_tmp = os.path.join(sandbox_base, "tmp")
|
|
155
|
+
deno_cache = os.environ.get("DENO_DIR", os.path.expanduser('~/.cache/deno'))
|
|
156
|
+
|
|
116
157
|
# Configure sandbox with performance optimizations
|
|
117
158
|
self._sandbox = PyodideSandbox(
|
|
118
159
|
stateful=self.stateful,
|
|
160
|
+
#
|
|
161
|
+
allow_env=["SANDBOX_BASE"],
|
|
162
|
+
allow_read=[sandbox_base, sandbox_tmp, deno_cache],
|
|
163
|
+
allow_write=[sandbox_tmp, deno_cache],
|
|
164
|
+
#
|
|
119
165
|
allow_net=self.allow_net,
|
|
120
166
|
# Use auto node_modules_dir for better caching
|
|
121
167
|
node_modules_dir="auto"
|
|
@@ -135,7 +181,7 @@ class PyodideSandboxTool(BaseTool):
|
|
|
135
181
|
except Exception as e:
|
|
136
182
|
logger.error(f"Failed to initialize PyodideSandbox: {e}")
|
|
137
183
|
raise
|
|
138
|
-
|
|
184
|
+
|
|
139
185
|
def _run(self, code: str) -> str:
|
|
140
186
|
"""
|
|
141
187
|
Synchronous version - runs the async method in a new event loop
|
|
@@ -144,7 +190,10 @@ class PyodideSandboxTool(BaseTool):
|
|
|
144
190
|
# Check if sandbox is initialized, if not try to initialize
|
|
145
191
|
if self._sandbox is None:
|
|
146
192
|
self._initialize_sandbox()
|
|
147
|
-
|
|
193
|
+
|
|
194
|
+
# Prepare code with state and client injection
|
|
195
|
+
prepared_code = self._prepare_pyodide_input(code)
|
|
196
|
+
|
|
148
197
|
# Check if we're already in an async context
|
|
149
198
|
try:
|
|
150
199
|
loop = asyncio.get_running_loop()
|
|
@@ -152,11 +201,11 @@ class PyodideSandboxTool(BaseTool):
|
|
|
152
201
|
# We'll need to use a different approach
|
|
153
202
|
import concurrent.futures
|
|
154
203
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
155
|
-
future = executor.submit(asyncio.run, self._arun(
|
|
204
|
+
future = executor.submit(asyncio.run, self._arun(prepared_code))
|
|
156
205
|
return future.result()
|
|
157
206
|
except RuntimeError:
|
|
158
207
|
# No running loop, safe to use asyncio.run
|
|
159
|
-
return asyncio.run(self._arun(
|
|
208
|
+
return asyncio.run(self._arun(prepared_code))
|
|
160
209
|
except (ImportError, RuntimeError) as e:
|
|
161
210
|
# Handle specific dependency errors gracefully
|
|
162
211
|
error_msg = str(e)
|
|
@@ -169,7 +218,7 @@ class PyodideSandboxTool(BaseTool):
|
|
|
169
218
|
except Exception as e:
|
|
170
219
|
logger.error(f"Error executing code in sandbox: {e}")
|
|
171
220
|
return f"Error executing code: {str(e)}"
|
|
172
|
-
|
|
221
|
+
|
|
173
222
|
async def _arun(self, code: str) -> str:
|
|
174
223
|
"""
|
|
175
224
|
Execute Python code in the Pyodide sandbox
|
|
@@ -177,19 +226,19 @@ class PyodideSandboxTool(BaseTool):
|
|
|
177
226
|
try:
|
|
178
227
|
if self._sandbox is None:
|
|
179
228
|
self._initialize_sandbox()
|
|
180
|
-
|
|
229
|
+
|
|
181
230
|
# Execute the code with session state if available
|
|
182
231
|
result = await self._sandbox.execute(
|
|
183
232
|
code,
|
|
184
233
|
session_bytes=self.session_bytes,
|
|
185
234
|
session_metadata=self.session_metadata
|
|
186
235
|
)
|
|
187
|
-
|
|
236
|
+
|
|
188
237
|
# Update session state for stateful execution
|
|
189
238
|
if self.stateful:
|
|
190
239
|
self.session_bytes = result.session_bytes
|
|
191
240
|
self.session_metadata = result.session_metadata
|
|
192
|
-
|
|
241
|
+
|
|
193
242
|
result_dict = {}
|
|
194
243
|
|
|
195
244
|
if result.result is not None:
|
|
@@ -212,10 +261,10 @@ class PyodideSandboxTool(BaseTool):
|
|
|
212
261
|
|
|
213
262
|
result_dict["execution_info"] = execution_info
|
|
214
263
|
return result_dict
|
|
215
|
-
|
|
264
|
+
|
|
216
265
|
except Exception as e:
|
|
217
266
|
logger.error(f"Error executing code in sandbox: {e}")
|
|
218
|
-
return f"Error executing code: {str(e)}"
|
|
267
|
+
return {"error": f"Error executing code: {str(e)}"}
|
|
219
268
|
|
|
220
269
|
|
|
221
270
|
class StatefulPyodideSandboxTool(PyodideSandboxTool):
|
|
@@ -223,7 +272,7 @@ class StatefulPyodideSandboxTool(PyodideSandboxTool):
|
|
|
223
272
|
A stateful version of the PyodideSandboxTool that maintains state between executions.
|
|
224
273
|
This version preserves variables, imports, and function definitions across multiple tool calls.
|
|
225
274
|
"""
|
|
226
|
-
|
|
275
|
+
|
|
227
276
|
name: str = "stateful_pyodide_sandbox"
|
|
228
277
|
description: str = """Execute Python code in a stateful sandbox environment using Pyodide.
|
|
229
278
|
This tool maintains state between executions, preserving variables, imports, and function definitions.
|
|
@@ -232,41 +281,95 @@ class StatefulPyodideSandboxTool(PyodideSandboxTool):
|
|
|
232
281
|
- Maintain variables across multiple calls
|
|
233
282
|
- Develop complex programs step by step
|
|
234
283
|
- Preserve imported libraries and defined functions
|
|
235
|
-
|
|
284
|
+
|
|
236
285
|
The sandbox supports most Python standard library modules and can install additional packages.
|
|
237
286
|
Note: File access and some system operations are restricted for security.
|
|
238
287
|
"""
|
|
239
|
-
|
|
288
|
+
|
|
240
289
|
def __init__(self, **kwargs: Any) -> None:
|
|
241
290
|
kwargs['stateful'] = True # Force stateful mode
|
|
242
291
|
super().__init__(**kwargs)
|
|
243
292
|
|
|
244
293
|
|
|
245
294
|
# Factory function for creating sandbox tools
|
|
246
|
-
def create_sandbox_tool(stateful: bool = False, allow_net: bool = True) -> BaseTool:
|
|
295
|
+
def create_sandbox_tool(stateful: bool = False, allow_net: bool = True, alita_client: Optional[Any] = None) -> BaseTool:
|
|
247
296
|
"""
|
|
248
297
|
Factory function to create sandbox tools with specified configuration.
|
|
249
|
-
|
|
298
|
+
|
|
250
299
|
Note: This tool requires Deno to be installed and available in PATH.
|
|
251
300
|
For installation and optimization, run the bootstrap.sh script.
|
|
252
|
-
|
|
301
|
+
|
|
253
302
|
Args:
|
|
254
303
|
stateful: Whether to maintain state between executions (default: False for better performance)
|
|
255
304
|
allow_net: Whether to allow network access (for package installation)
|
|
256
|
-
|
|
305
|
+
|
|
257
306
|
Returns:
|
|
258
307
|
Configured sandbox tool instance
|
|
259
|
-
|
|
308
|
+
|
|
260
309
|
Raises:
|
|
261
310
|
ImportError: If langchain-sandbox is not installed
|
|
262
311
|
RuntimeError: If Deno is not found in PATH
|
|
263
|
-
|
|
312
|
+
|
|
264
313
|
Performance Notes:
|
|
265
314
|
- Stateless mode (default) is faster and avoids session state overhead
|
|
266
315
|
- Run bootstrap.sh script to enable local caching and reduce initialization time
|
|
267
316
|
- Cached wheels reduce package download time from ~4.76s to near-instant
|
|
268
317
|
"""
|
|
269
318
|
if stateful:
|
|
270
|
-
return StatefulPyodideSandboxTool(allow_net=allow_net)
|
|
319
|
+
return StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client)
|
|
271
320
|
else:
|
|
272
|
-
return PyodideSandboxTool(stateful=False, allow_net=allow_net)
|
|
321
|
+
return PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class SandboxToolkit(BaseToolkit):
|
|
325
|
+
tools: List[BaseTool] = []
|
|
326
|
+
|
|
327
|
+
@staticmethod
|
|
328
|
+
def toolkit_config_schema() -> Type[BaseModel]:
|
|
329
|
+
# Create sample tools to get their schemas
|
|
330
|
+
sample_tools = [
|
|
331
|
+
PyodideSandboxTool(),
|
|
332
|
+
StatefulPyodideSandboxTool()
|
|
333
|
+
]
|
|
334
|
+
selected_tools = {x.name: x.args_schema.model_json_schema() for x in sample_tools}
|
|
335
|
+
|
|
336
|
+
return create_model(
|
|
337
|
+
'sandbox',
|
|
338
|
+
stateful=(bool, Field(default=False, description="Whether to maintain state between executions")),
|
|
339
|
+
allow_net=(bool, Field(default=True, description="Whether to allow network access for package installation")),
|
|
340
|
+
selected_tools=(List[Literal[tuple(selected_tools)]],
|
|
341
|
+
Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
|
342
|
+
|
|
343
|
+
__config__=ConfigDict(json_schema_extra={
|
|
344
|
+
'metadata': {
|
|
345
|
+
"label": "Python Sandbox",
|
|
346
|
+
"icon_url": "sandbox.svg",
|
|
347
|
+
"hidden": False,
|
|
348
|
+
"categories": ["code", "execution", "internal_tool"],
|
|
349
|
+
"extra_categories": ["python", "pyodide", "sandbox", "code execution"],
|
|
350
|
+
}
|
|
351
|
+
})
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
@classmethod
|
|
355
|
+
def get_toolkit(cls, stateful: bool = False, allow_net: bool = True, alita_client=None, **kwargs):
|
|
356
|
+
"""
|
|
357
|
+
Get toolkit with sandbox tools.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
stateful: Whether to maintain state between executions
|
|
361
|
+
allow_net: Whether to allow network access
|
|
362
|
+
alita_client: Alita client instance for sandbox tools
|
|
363
|
+
**kwargs: Additional arguments
|
|
364
|
+
"""
|
|
365
|
+
tools = []
|
|
366
|
+
|
|
367
|
+
if stateful:
|
|
368
|
+
tools.append(StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client))
|
|
369
|
+
else:
|
|
370
|
+
tools.append(PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client))
|
|
371
|
+
|
|
372
|
+
return cls(tools=tools)
|
|
373
|
+
|
|
374
|
+
def get_tools(self):
|
|
375
|
+
return self.tools
|
|
@@ -207,9 +207,9 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
207
207
|
tool_name="_remove_collection"
|
|
208
208
|
)
|
|
209
209
|
|
|
210
|
-
def _get_indexed_ids(self,
|
|
210
|
+
def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
|
|
211
211
|
"""Get all indexed document IDs from vectorstore"""
|
|
212
|
-
return self.vector_adapter.get_indexed_ids(self,
|
|
212
|
+
return self.vector_adapter.get_indexed_ids(self, index_name)
|
|
213
213
|
|
|
214
214
|
def list_collections(self) -> Any:
|
|
215
215
|
"""List all collections in the vectorstore.
|
|
@@ -233,7 +233,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
233
233
|
return {"collections": [], "message": "No indexed collections"}
|
|
234
234
|
return cols
|
|
235
235
|
|
|
236
|
-
def _clean_collection(self,
|
|
236
|
+
def _clean_collection(self, index_name: str = ''):
|
|
237
237
|
"""
|
|
238
238
|
Clean the vectorstore collection by deleting all indexed data.
|
|
239
239
|
"""
|
|
@@ -241,15 +241,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
241
241
|
f"Cleaning collection '{self.dataset}'",
|
|
242
242
|
tool_name="_clean_collection"
|
|
243
243
|
)
|
|
244
|
-
self.vector_adapter.clean_collection(self,
|
|
244
|
+
self.vector_adapter.clean_collection(self, index_name)
|
|
245
245
|
self._log_data(
|
|
246
246
|
f"Collection '{self.dataset}' has been cleaned. ",
|
|
247
247
|
tool_name="_clean_collection"
|
|
248
248
|
)
|
|
249
249
|
|
|
250
|
-
def _get_code_indexed_data(self,
|
|
250
|
+
def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
|
|
251
251
|
""" Get all indexed data from vectorstore for code content """
|
|
252
|
-
return self.vector_adapter.get_code_indexed_data(self,
|
|
252
|
+
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
253
253
|
|
|
254
254
|
def _add_to_collection(self, entry_id, new_collection_value):
|
|
255
255
|
"""Add a new collection name to the `collection` key in the `metadata` column."""
|
|
@@ -258,7 +258,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
258
258
|
def _reduce_duplicates(
|
|
259
259
|
self,
|
|
260
260
|
documents: Generator[Any, None, None],
|
|
261
|
-
|
|
261
|
+
index_name: str,
|
|
262
262
|
get_indexed_data: Callable,
|
|
263
263
|
key_fn: Callable,
|
|
264
264
|
compare_fn: Callable,
|
|
@@ -267,7 +267,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
267
267
|
) -> List[Any]:
|
|
268
268
|
"""Generic duplicate reduction logic for documents."""
|
|
269
269
|
self._log_data(log_msg, tool_name="index_documents")
|
|
270
|
-
indexed_data = get_indexed_data(
|
|
270
|
+
indexed_data = get_indexed_data(index_name)
|
|
271
271
|
indexed_keys = set(indexed_data.keys())
|
|
272
272
|
if not indexed_keys:
|
|
273
273
|
self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
|
|
@@ -279,14 +279,14 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
279
279
|
for document in documents:
|
|
280
280
|
key = key_fn(document)
|
|
281
281
|
key = key if isinstance(key, str) else str(key)
|
|
282
|
-
if key in indexed_keys and
|
|
282
|
+
if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
|
|
283
283
|
if compare_fn(document, indexed_data[key]):
|
|
284
284
|
# Disabled addition of new collection to already indexed documents
|
|
285
285
|
# # check metadata.collection and update if needed
|
|
286
286
|
# for update_collection_id in remove_ids_fn(indexed_data, key):
|
|
287
287
|
# self._add_to_collection(
|
|
288
288
|
# update_collection_id,
|
|
289
|
-
#
|
|
289
|
+
# index_name
|
|
290
290
|
# )
|
|
291
291
|
continue
|
|
292
292
|
final_docs.append(document)
|
|
@@ -303,10 +303,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
303
303
|
|
|
304
304
|
return final_docs
|
|
305
305
|
|
|
306
|
-
def _reduce_code_duplicates(self, documents: Generator[Any, None, None],
|
|
306
|
+
def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
|
|
307
307
|
return self._reduce_duplicates(
|
|
308
308
|
documents,
|
|
309
|
-
|
|
309
|
+
index_name,
|
|
310
310
|
self._get_code_indexed_data,
|
|
311
311
|
lambda doc: doc.metadata.get('filename'),
|
|
312
312
|
lambda doc, idx: (
|
|
@@ -318,7 +318,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
318
318
|
log_msg="Verification of code documents to index started"
|
|
319
319
|
)
|
|
320
320
|
|
|
321
|
-
def index_documents(self, documents: Generator[Document, None, None],
|
|
321
|
+
def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
|
|
322
322
|
""" Index documents in the vectorstore.
|
|
323
323
|
|
|
324
324
|
Args:
|
|
@@ -329,13 +329,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
329
329
|
|
|
330
330
|
from ..langchain.interfaces.llm_processor import add_documents
|
|
331
331
|
|
|
332
|
-
self._log_tool_event(message=f"Starting the indexing... Parameters: {
|
|
332
|
+
self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
|
|
333
333
|
# pre-process documents if needed (find duplicates, etc.)
|
|
334
334
|
if clean_index:
|
|
335
335
|
logger.info("Cleaning index before re-indexing all documents.")
|
|
336
336
|
self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
|
|
337
337
|
try:
|
|
338
|
-
self._clean_collection(
|
|
338
|
+
self._clean_collection(index_name)
|
|
339
339
|
self.vectoradapter.persist()
|
|
340
340
|
self.vectoradapter.vacuum()
|
|
341
341
|
self._log_data("Previous index has been removed",
|
|
@@ -349,7 +349,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
349
349
|
message="Filter for duplicates",
|
|
350
350
|
tool_name="index_documents")
|
|
351
351
|
# remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
|
|
352
|
-
documents = self._reduce_code_duplicates(documents,
|
|
352
|
+
documents = self._reduce_code_duplicates(documents, index_name)
|
|
353
353
|
self._log_tool_event(
|
|
354
354
|
message="All the duplicates were filtered out. Proceeding with indexing.",
|
|
355
355
|
tool_name="index_documents")
|
|
@@ -377,13 +377,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
377
377
|
self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
|
|
378
378
|
tool_name="index_documents")
|
|
379
379
|
|
|
380
|
-
# if
|
|
381
|
-
if
|
|
380
|
+
# if index_name is provided, add it to metadata of each document
|
|
381
|
+
if index_name:
|
|
382
382
|
for doc in documents:
|
|
383
383
|
if not doc.metadata.get('collection'):
|
|
384
|
-
doc.metadata['collection'] =
|
|
384
|
+
doc.metadata['collection'] = index_name
|
|
385
385
|
else:
|
|
386
|
-
doc.metadata['collection'] += f";{
|
|
386
|
+
doc.metadata['collection'] += f";{index_name}"
|
|
387
387
|
|
|
388
388
|
total_docs = len(documents)
|
|
389
389
|
documents_count = 0
|
|
@@ -414,7 +414,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
414
414
|
return {"status": "error", "message": f"Error: {format_exc()}"}
|
|
415
415
|
if _documents:
|
|
416
416
|
add_documents(vectorstore=self.vectorstore, documents=_documents)
|
|
417
|
-
return {"status": "ok", "message": f"successfully indexed {documents_count} documents"
|
|
417
|
+
return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
|
|
418
|
+
else "No new documents to index."}
|
|
418
419
|
|
|
419
420
|
def search_documents(self, query:str, doctype: str = 'code',
|
|
420
421
|
filter:dict|str={}, cut_off: float=0.5,
|
|
@@ -216,13 +216,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
216
216
|
return "No indexed collections"
|
|
217
217
|
return collections
|
|
218
218
|
|
|
219
|
-
def get_index_meta(self,
|
|
220
|
-
index_metas = self.vector_adapter.get_index_meta(self,
|
|
219
|
+
def get_index_meta(self, index_name: str):
|
|
220
|
+
index_metas = self.vector_adapter.get_index_meta(self, index_name)
|
|
221
221
|
if len(index_metas) > 1:
|
|
222
222
|
raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
|
|
223
223
|
return index_metas[0] if index_metas else None
|
|
224
224
|
|
|
225
|
-
def _clean_collection(self,
|
|
225
|
+
def _clean_collection(self, index_name: str = ''):
|
|
226
226
|
"""
|
|
227
227
|
Clean the vectorstore collection by deleting all indexed data.
|
|
228
228
|
"""
|
|
@@ -230,13 +230,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
230
230
|
f"Cleaning collection '{self.dataset}'",
|
|
231
231
|
tool_name="_clean_collection"
|
|
232
232
|
)
|
|
233
|
-
self.vector_adapter.clean_collection(self,
|
|
233
|
+
self.vector_adapter.clean_collection(self, index_name)
|
|
234
234
|
self._log_tool_event(
|
|
235
235
|
f"Collection '{self.dataset}' has been cleaned. ",
|
|
236
236
|
tool_name="_clean_collection"
|
|
237
237
|
)
|
|
238
238
|
|
|
239
|
-
def index_documents(self, documents: Generator[Document, None, None],
|
|
239
|
+
def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True):
|
|
240
240
|
""" Index documents in the vectorstore.
|
|
241
241
|
|
|
242
242
|
Args:
|
|
@@ -245,21 +245,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
245
245
|
clean_index (bool): If True, clean the index before re-indexing all documents.
|
|
246
246
|
"""
|
|
247
247
|
if clean_index:
|
|
248
|
-
self._clean_index(
|
|
248
|
+
self._clean_index(index_name)
|
|
249
249
|
|
|
250
|
-
return self._save_index(list(documents),
|
|
250
|
+
return self._save_index(list(documents), index_name, progress_step)
|
|
251
251
|
|
|
252
|
-
def _clean_index(self,
|
|
252
|
+
def _clean_index(self, index_name: str):
|
|
253
253
|
logger.info("Cleaning index before re-indexing all documents.")
|
|
254
254
|
self._log_tool_event("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
|
|
255
255
|
try:
|
|
256
|
-
self._clean_collection(
|
|
256
|
+
self._clean_collection(index_name)
|
|
257
257
|
self._log_tool_event("Previous index has been removed",
|
|
258
258
|
tool_name="index_documents")
|
|
259
259
|
except Exception as e:
|
|
260
260
|
logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
|
|
261
261
|
|
|
262
|
-
def _save_index(self, documents: list[Document],
|
|
262
|
+
def _save_index(self, documents: list[Document], index_name: Optional[str] = None, progress_step: int = 20):
|
|
263
263
|
from ..langchain.interfaces.llm_processor import add_documents
|
|
264
264
|
#
|
|
265
265
|
for doc in documents:
|
|
@@ -268,13 +268,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
268
268
|
|
|
269
269
|
logger.debug(f"Indexing documents: {documents}")
|
|
270
270
|
|
|
271
|
-
# if
|
|
272
|
-
if
|
|
271
|
+
# if index_name is provided, add it to metadata of each document
|
|
272
|
+
if index_name:
|
|
273
273
|
for doc in documents:
|
|
274
274
|
if not doc.metadata.get('collection'):
|
|
275
|
-
doc.metadata['collection'] =
|
|
275
|
+
doc.metadata['collection'] = index_name
|
|
276
276
|
else:
|
|
277
|
-
doc.metadata['collection'] += f";{
|
|
277
|
+
doc.metadata['collection'] += f";{index_name}"
|
|
278
278
|
|
|
279
279
|
total_docs = len(documents)
|
|
280
280
|
documents_count = 0
|
|
@@ -308,7 +308,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
308
308
|
return {"status": "error", "message": f"Error: {format_exc()}"}
|
|
309
309
|
if _documents:
|
|
310
310
|
add_documents(vectorstore=self.vectorstore, documents=_documents)
|
|
311
|
-
return {"status": "ok", "message": f"successfully indexed {documents_count} documents"
|
|
311
|
+
return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
|
|
312
|
+
else "no documents to index"}
|
|
312
313
|
|
|
313
314
|
def search_documents(self, query:str, doctype: str = 'code',
|
|
314
315
|
filter:dict|str={}, cut_off: float=0.5,
|
alita_sdk/runtime/utils/utils.py
CHANGED
|
@@ -14,6 +14,7 @@ class IndexerKeywords(Enum):
|
|
|
14
14
|
INDEX_META_TYPE = 'index_meta'
|
|
15
15
|
INDEX_META_IN_PROGRESS = 'in_progress'
|
|
16
16
|
INDEX_META_COMPLETED = 'completed'
|
|
17
|
+
INDEX_META_FAILED = 'failed'
|
|
17
18
|
|
|
18
19
|
# This pattern matches characters that are NOT alphanumeric, underscores, or hyphens
|
|
19
20
|
clean_string_pattern = re.compile(r'[^a-zA-Z0-9_.-]')
|