alita-sdk 0.3.263__py3-none-any.whl → 0.3.499__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3601 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1256 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +64 -8
  30. alita_sdk/community/inventory/__init__.py +224 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/visualize.py +1370 -0
  58. alita_sdk/configurations/__init__.py +10 -0
  59. alita_sdk/configurations/ado.py +4 -2
  60. alita_sdk/configurations/azure_search.py +1 -1
  61. alita_sdk/configurations/bigquery.py +1 -1
  62. alita_sdk/configurations/bitbucket.py +94 -2
  63. alita_sdk/configurations/browser.py +18 -0
  64. alita_sdk/configurations/carrier.py +19 -0
  65. alita_sdk/configurations/confluence.py +96 -1
  66. alita_sdk/configurations/delta_lake.py +1 -1
  67. alita_sdk/configurations/figma.py +0 -5
  68. alita_sdk/configurations/github.py +65 -1
  69. alita_sdk/configurations/gitlab.py +79 -0
  70. alita_sdk/configurations/google_places.py +17 -0
  71. alita_sdk/configurations/jira.py +103 -0
  72. alita_sdk/configurations/postman.py +1 -1
  73. alita_sdk/configurations/qtest.py +1 -3
  74. alita_sdk/configurations/report_portal.py +19 -0
  75. alita_sdk/configurations/salesforce.py +19 -0
  76. alita_sdk/configurations/service_now.py +1 -12
  77. alita_sdk/configurations/sharepoint.py +19 -0
  78. alita_sdk/configurations/sonar.py +18 -0
  79. alita_sdk/configurations/sql.py +20 -0
  80. alita_sdk/configurations/testio.py +18 -0
  81. alita_sdk/configurations/testrail.py +88 -0
  82. alita_sdk/configurations/xray.py +94 -1
  83. alita_sdk/configurations/zephyr_enterprise.py +94 -1
  84. alita_sdk/configurations/zephyr_essential.py +95 -0
  85. alita_sdk/runtime/clients/artifact.py +12 -2
  86. alita_sdk/runtime/clients/client.py +235 -66
  87. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  88. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  89. alita_sdk/runtime/clients/sandbox_client.py +373 -0
  90. alita_sdk/runtime/langchain/assistant.py +123 -17
  91. alita_sdk/runtime/langchain/constants.py +8 -1
  92. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  93. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
  94. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  95. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +8 -2
  96. alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
  97. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
  98. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
  99. alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
  100. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
  101. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
  102. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
  103. alita_sdk/runtime/langchain/document_loaders/constants.py +187 -40
  104. alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
  105. alita_sdk/runtime/langchain/langraph_agent.py +406 -91
  106. alita_sdk/runtime/langchain/utils.py +51 -8
  107. alita_sdk/runtime/llms/preloaded.py +2 -6
  108. alita_sdk/runtime/models/mcp_models.py +61 -0
  109. alita_sdk/runtime/toolkits/__init__.py +26 -0
  110. alita_sdk/runtime/toolkits/application.py +9 -2
  111. alita_sdk/runtime/toolkits/artifact.py +19 -7
  112. alita_sdk/runtime/toolkits/datasource.py +13 -6
  113. alita_sdk/runtime/toolkits/mcp.py +780 -0
  114. alita_sdk/runtime/toolkits/planning.py +178 -0
  115. alita_sdk/runtime/toolkits/subgraph.py +11 -6
  116. alita_sdk/runtime/toolkits/tools.py +214 -60
  117. alita_sdk/runtime/toolkits/vectorstore.py +9 -4
  118. alita_sdk/runtime/tools/__init__.py +22 -0
  119. alita_sdk/runtime/tools/application.py +16 -4
  120. alita_sdk/runtime/tools/artifact.py +312 -19
  121. alita_sdk/runtime/tools/function.py +100 -4
  122. alita_sdk/runtime/tools/graph.py +81 -0
  123. alita_sdk/runtime/tools/image_generation.py +212 -0
  124. alita_sdk/runtime/tools/llm.py +539 -180
  125. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  126. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  127. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  128. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  129. alita_sdk/runtime/tools/planning/models.py +246 -0
  130. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  131. alita_sdk/runtime/tools/router.py +2 -1
  132. alita_sdk/runtime/tools/sandbox.py +375 -0
  133. alita_sdk/runtime/tools/vectorstore.py +62 -63
  134. alita_sdk/runtime/tools/vectorstore_base.py +156 -85
  135. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  136. alita_sdk/runtime/utils/mcp_client.py +465 -0
  137. alita_sdk/runtime/utils/mcp_oauth.py +244 -0
  138. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  139. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  140. alita_sdk/runtime/utils/streamlit.py +41 -14
  141. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  142. alita_sdk/runtime/utils/utils.py +14 -0
  143. alita_sdk/tools/__init__.py +78 -35
  144. alita_sdk/tools/ado/__init__.py +0 -1
  145. alita_sdk/tools/ado/repos/__init__.py +10 -6
  146. alita_sdk/tools/ado/repos/repos_wrapper.py +12 -11
  147. alita_sdk/tools/ado/test_plan/__init__.py +10 -7
  148. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -23
  149. alita_sdk/tools/ado/wiki/__init__.py +10 -11
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -28
  151. alita_sdk/tools/ado/work_item/__init__.py +10 -11
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +63 -10
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +10 -7
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -11
  155. alita_sdk/tools/azure_ai/search/__init__.py +11 -7
  156. alita_sdk/tools/base_indexer_toolkit.py +392 -86
  157. alita_sdk/tools/bitbucket/__init__.py +18 -11
  158. alita_sdk/tools/bitbucket/api_wrapper.py +52 -9
  159. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
  160. alita_sdk/tools/browser/__init__.py +40 -16
  161. alita_sdk/tools/browser/crawler.py +3 -1
  162. alita_sdk/tools/browser/utils.py +15 -6
  163. alita_sdk/tools/carrier/__init__.py +17 -17
  164. alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
  165. alita_sdk/tools/carrier/excel_reporter.py +8 -4
  166. alita_sdk/tools/chunkers/__init__.py +3 -1
  167. alita_sdk/tools/chunkers/code/codeparser.py +1 -1
  168. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  169. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  170. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  171. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  172. alita_sdk/tools/cloud/aws/__init__.py +9 -6
  173. alita_sdk/tools/cloud/azure/__init__.py +9 -6
  174. alita_sdk/tools/cloud/gcp/__init__.py +9 -6
  175. alita_sdk/tools/cloud/k8s/__init__.py +9 -6
  176. alita_sdk/tools/code/linter/__init__.py +7 -7
  177. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  178. alita_sdk/tools/code/sonar/__init__.py +18 -12
  179. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  180. alita_sdk/tools/confluence/__init__.py +14 -11
  181. alita_sdk/tools/confluence/api_wrapper.py +198 -58
  182. alita_sdk/tools/confluence/loader.py +10 -0
  183. alita_sdk/tools/custom_open_api/__init__.py +9 -4
  184. alita_sdk/tools/elastic/__init__.py +8 -7
  185. alita_sdk/tools/elitea_base.py +543 -64
  186. alita_sdk/tools/figma/__init__.py +10 -8
  187. alita_sdk/tools/figma/api_wrapper.py +352 -153
  188. alita_sdk/tools/github/__init__.py +13 -11
  189. alita_sdk/tools/github/api_wrapper.py +9 -26
  190. alita_sdk/tools/github/github_client.py +75 -12
  191. alita_sdk/tools/github/schemas.py +2 -1
  192. alita_sdk/tools/gitlab/__init__.py +11 -10
  193. alita_sdk/tools/gitlab/api_wrapper.py +135 -45
  194. alita_sdk/tools/gitlab_org/__init__.py +11 -9
  195. alita_sdk/tools/google/bigquery/__init__.py +12 -13
  196. alita_sdk/tools/google_places/__init__.py +18 -10
  197. alita_sdk/tools/jira/__init__.py +14 -8
  198. alita_sdk/tools/jira/api_wrapper.py +315 -168
  199. alita_sdk/tools/keycloak/__init__.py +8 -7
  200. alita_sdk/tools/localgit/local_git.py +56 -54
  201. alita_sdk/tools/memory/__init__.py +27 -11
  202. alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
  203. alita_sdk/tools/ocr/__init__.py +8 -7
  204. alita_sdk/tools/openapi/__init__.py +10 -1
  205. alita_sdk/tools/pandas/__init__.py +8 -7
  206. alita_sdk/tools/pandas/api_wrapper.py +7 -25
  207. alita_sdk/tools/postman/__init__.py +8 -10
  208. alita_sdk/tools/postman/api_wrapper.py +19 -8
  209. alita_sdk/tools/postman/postman_analysis.py +8 -1
  210. alita_sdk/tools/pptx/__init__.py +8 -9
  211. alita_sdk/tools/qtest/__init__.py +19 -13
  212. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  213. alita_sdk/tools/rally/__init__.py +10 -9
  214. alita_sdk/tools/report_portal/__init__.py +20 -15
  215. alita_sdk/tools/salesforce/__init__.py +19 -15
  216. alita_sdk/tools/servicenow/__init__.py +14 -11
  217. alita_sdk/tools/sharepoint/__init__.py +14 -13
  218. alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
  219. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  220. alita_sdk/tools/sharepoint/utils.py +8 -2
  221. alita_sdk/tools/slack/__init__.py +10 -7
  222. alita_sdk/tools/sql/__init__.py +19 -18
  223. alita_sdk/tools/sql/api_wrapper.py +71 -23
  224. alita_sdk/tools/testio/__init__.py +18 -12
  225. alita_sdk/tools/testrail/__init__.py +10 -10
  226. alita_sdk/tools/testrail/api_wrapper.py +213 -45
  227. alita_sdk/tools/utils/__init__.py +28 -4
  228. alita_sdk/tools/utils/content_parser.py +181 -61
  229. alita_sdk/tools/utils/text_operations.py +254 -0
  230. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
  231. alita_sdk/tools/xray/__init__.py +12 -7
  232. alita_sdk/tools/xray/api_wrapper.py +58 -113
  233. alita_sdk/tools/zephyr/__init__.py +9 -6
  234. alita_sdk/tools/zephyr_enterprise/__init__.py +13 -8
  235. alita_sdk/tools/zephyr_enterprise/api_wrapper.py +17 -7
  236. alita_sdk/tools/zephyr_essential/__init__.py +13 -9
  237. alita_sdk/tools/zephyr_essential/api_wrapper.py +289 -47
  238. alita_sdk/tools/zephyr_essential/client.py +6 -4
  239. alita_sdk/tools/zephyr_scale/__init__.py +10 -7
  240. alita_sdk/tools/zephyr_scale/api_wrapper.py +6 -2
  241. alita_sdk/tools/zephyr_squad/__init__.py +9 -6
  242. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +180 -33
  243. alita_sdk-0.3.499.dist-info/RECORD +433 -0
  244. alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
  245. alita_sdk-0.3.263.dist-info/RECORD +0 -342
  246. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
  247. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
  248. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,375 @@
1
+ import asyncio
2
+ import logging
3
+ import subprocess
4
+ import os
5
+ from typing import Any, Type, Optional, Dict, List, Literal, Union
6
+ from copy import deepcopy
7
+ from pathlib import Path
8
+
9
+ from langchain_core.tools import BaseTool, BaseToolkit
10
+ from langchain_core.messages import ToolCall
11
+ from pydantic import BaseModel, create_model, ConfigDict, Field
12
+ from pydantic.fields import FieldInfo
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ name = "pyodide"
17
+
18
+
19
+ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
20
+ """
21
+ Get sandbox tools for the provided tool configurations.
22
+
23
+ Args:
24
+ tools_list: List of tool configurations
25
+ alita_client: Alita client instance for sandbox tools
26
+ llm: LLM client instance (unused for sandbox)
27
+ memory_store: Optional memory store instance (unused for sandbox)
28
+
29
+ Returns:
30
+ List of sandbox tools
31
+ """
32
+ all_tools = []
33
+
34
+ for tool in tools_list:
35
+ if tool.get('type') == 'sandbox' or tool.get('toolkit_name') == 'sandbox':
36
+ try:
37
+ toolkit_instance = SandboxToolkit.get_toolkit(
38
+ stateful=tool['settings'].get('stateful', False),
39
+ allow_net=tool['settings'].get('allow_net', True),
40
+ alita_client=alita_client,
41
+ toolkit_name=tool.get('toolkit_name', '')
42
+ )
43
+ all_tools.extend(toolkit_instance.get_tools())
44
+ except Exception as e:
45
+ logger.error(f"Error in sandbox toolkit get_tools: {e}")
46
+ logger.error(f"Tool config: {tool}")
47
+ raise
48
+
49
+ return all_tools
50
+
51
+
52
+ def _is_deno_available() -> bool:
53
+ """Check if Deno is available in the PATH"""
54
+ try:
55
+ result = subprocess.run(
56
+ ["deno", "--version"],
57
+ capture_output=True,
58
+ text=True,
59
+ timeout=10
60
+ )
61
+ return result.returncode == 0
62
+ except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError):
63
+ return False
64
+
65
+
66
+ def _setup_pyodide_cache_env() -> None:
67
+ """Setup Pyodide caching environment variables for performance optimization [NO-OP]"""
68
+ try:
69
+ for key in ["SANDBOX_BASE", "DENO_DIR"]:
70
+ logger.info("Sandbox env: %s -> %s", key, os.environ.get(key, "n/a"))
71
+ except Exception as e:
72
+ logger.warning(f"Could not setup Pyodide cache environment: {e}")
73
+
74
+
75
+ # Create input schema for the sandbox tool
76
+ sandbox_tool_input = create_model(
77
+ "SandboxToolInput",
78
+ code=(str, FieldInfo(description="Python code to execute in the sandbox environment"))
79
+ )
80
+
81
+
82
+ class PyodideSandboxTool(BaseTool):
83
+ """
84
+ A tool that provides secure Python code execution using Pyodide (Python compiled to WebAssembly).
85
+ This tool leverages langchain-sandbox to provide a safe environment for running untrusted Python code.
86
+ Optimized for performance with caching and stateless execution by default.
87
+ """
88
+
89
+ name: str = "pyodide_sandbox"
90
+ description: str = """Execute Python code in a secure sandbox environment using Pyodide.
91
+ This tool allows safe execution of Python code without access to the host system.
92
+ Use this tool when you need to:
93
+ - Execute Python code snippets
94
+ - Perform calculations or data analysis
95
+ - Test Python algorithms
96
+ - Run code that requires isolation from the host system
97
+
98
+ The sandbox supports most Python standard library modules and can install additional packages.
99
+ Note: File access and some system operations are restricted for security.
100
+ Optimized for performance with local caching (stateless by default for faster execution).
101
+ """
102
+ args_schema: Type[BaseModel] = sandbox_tool_input
103
+ stateful: bool = False # Default to stateless for better performance
104
+ allow_net: bool = True
105
+ session_bytes: Optional[bytes] = None
106
+ session_metadata: Optional[Dict] = None
107
+ alita_client: Optional[Any] = None
108
+
109
+ def __init__(self, **kwargs: Any) -> None:
110
+ super().__init__(**kwargs)
111
+ self._sandbox = None
112
+ # Setup caching environment for optimal performance
113
+ _setup_pyodide_cache_env()
114
+ self._initialize_sandbox()
115
+
116
+ def _prepare_pyodide_input(self, code: str) -> str:
117
+ """Prepare input for PyodideSandboxTool by injecting state and alita_client into the code block."""
118
+ pyodide_predata = ""
119
+
120
+ # Add alita_client if available
121
+ if self.alita_client:
122
+ try:
123
+ # Get the directory of the current file and construct the path to sandbox_client.py
124
+ current_dir = Path(__file__).parent
125
+ sandbox_client_path = current_dir.parent / 'clients' / 'sandbox_client.py'
126
+
127
+ with open(sandbox_client_path, 'r') as f:
128
+ sandbox_client_code = f.read()
129
+ pyodide_predata += f"{sandbox_client_code}\n"
130
+ pyodide_predata += (f"alita_client = SandboxClient(base_url='{self.alita_client.base_url}',"
131
+ f"project_id={self.alita_client.project_id},"
132
+ f"auth_token='{self.alita_client.auth_token}')\n")
133
+ except FileNotFoundError:
134
+ logger.error(f"sandbox_client.py not found. Ensure the file exists.")
135
+
136
+ return f"#elitea simplified client\n{pyodide_predata}{code}"
137
+
138
+ def _initialize_sandbox(self) -> None:
139
+ """Initialize the PyodideSandbox instance with optimized settings"""
140
+ try:
141
+ # Check if Deno is available
142
+ if not _is_deno_available():
143
+ error_msg = (
144
+ "Deno is required for PyodideSandbox but is not installed. "
145
+ "Please run the bootstrap.sh script or install Deno manually."
146
+ )
147
+ logger.error(error_msg)
148
+ raise RuntimeError(error_msg)
149
+
150
+ from langchain_sandbox import PyodideSandbox
151
+
152
+ # Air-gapped settings
153
+ sandbox_base = os.environ.get("SANDBOX_BASE", os.path.expanduser('~/.cache/pyodide'))
154
+ sandbox_tmp = os.path.join(sandbox_base, "tmp")
155
+ deno_cache = os.environ.get("DENO_DIR", os.path.expanduser('~/.cache/deno'))
156
+
157
+ # Configure sandbox with performance optimizations
158
+ self._sandbox = PyodideSandbox(
159
+ stateful=self.stateful,
160
+ #
161
+ allow_env=["SANDBOX_BASE"],
162
+ allow_read=[sandbox_base, sandbox_tmp, deno_cache],
163
+ allow_write=[sandbox_tmp, deno_cache],
164
+ #
165
+ allow_net=self.allow_net,
166
+ # Use auto node_modules_dir for better caching
167
+ node_modules_dir="auto"
168
+ )
169
+ logger.info(f"PyodideSandbox initialized successfully (stateful={self.stateful})")
170
+ except ImportError as e:
171
+ if "langchain_sandbox" in str(e):
172
+ error_msg = (
173
+ "langchain-sandbox is required for the PyodideSandboxTool. "
174
+ "Please install it with: pip install langchain-sandbox"
175
+ )
176
+ logger.error(error_msg)
177
+ raise ImportError(error_msg) from e
178
+ else:
179
+ logger.error(f"Failed to import required module: {e}")
180
+ raise
181
+ except Exception as e:
182
+ logger.error(f"Failed to initialize PyodideSandbox: {e}")
183
+ raise
184
+
185
+ def _run(self, code: str) -> str:
186
+ """
187
+ Synchronous version - runs the async method in a new event loop
188
+ """
189
+ try:
190
+ # Check if sandbox is initialized, if not try to initialize
191
+ if self._sandbox is None:
192
+ self._initialize_sandbox()
193
+
194
+ # Prepare code with state and client injection
195
+ prepared_code = self._prepare_pyodide_input(code)
196
+
197
+ # Check if we're already in an async context
198
+ try:
199
+ loop = asyncio.get_running_loop()
200
+ # We're in an async context, but _run is supposed to be sync
201
+ # We'll need to use a different approach
202
+ import concurrent.futures
203
+ with concurrent.futures.ThreadPoolExecutor() as executor:
204
+ future = executor.submit(asyncio.run, self._arun(prepared_code))
205
+ return future.result()
206
+ except RuntimeError:
207
+ # No running loop, safe to use asyncio.run
208
+ return asyncio.run(self._arun(prepared_code))
209
+ except (ImportError, RuntimeError) as e:
210
+ # Handle specific dependency errors gracefully
211
+ error_msg = str(e)
212
+ if "langchain-sandbox" in error_msg:
213
+ return "❌ PyodideSandboxTool requires langchain-sandbox. Install with: pip install langchain-sandbox"
214
+ elif "Deno" in error_msg:
215
+ return "❌ PyodideSandboxTool requires Deno. Install from: https://docs.deno.com/runtime/getting_started/installation/"
216
+ else:
217
+ return f"❌ PyodideSandboxTool initialization failed: {error_msg}"
218
+ except Exception as e:
219
+ logger.error(f"Error executing code in sandbox: {e}")
220
+ return f"Error executing code: {str(e)}"
221
+
222
+ async def _arun(self, code: str) -> str:
223
+ """
224
+ Execute Python code in the Pyodide sandbox
225
+ """
226
+ try:
227
+ if self._sandbox is None:
228
+ self._initialize_sandbox()
229
+
230
+ # Execute the code with session state if available
231
+ result = await self._sandbox.execute(
232
+ code,
233
+ session_bytes=self.session_bytes,
234
+ session_metadata=self.session_metadata
235
+ )
236
+
237
+ # Update session state for stateful execution
238
+ if self.stateful:
239
+ self.session_bytes = result.session_bytes
240
+ self.session_metadata = result.session_metadata
241
+
242
+ result_dict = {}
243
+
244
+ if result.result is not None:
245
+ result_dict["result"] = result.result
246
+
247
+ if result.stdout:
248
+ result_dict["output"] = result.stdout
249
+
250
+ if result.stderr:
251
+ result_dict["error"] = result.stderr
252
+
253
+ if result.status == 'error':
254
+ result_dict["status"] = "Execution failed"
255
+
256
+ execution_info = f"Execution time: {result.execution_time:.2f}s"
257
+ if result.session_metadata and 'packages' in result.session_metadata:
258
+ packages = result.session_metadata.get('packages', [])
259
+ if packages:
260
+ execution_info += f", Packages: {', '.join(packages)}"
261
+
262
+ result_dict["execution_info"] = execution_info
263
+ return result_dict
264
+
265
+ except Exception as e:
266
+ logger.error(f"Error executing code in sandbox: {e}")
267
+ return {"error": f"Error executing code: {str(e)}"}
268
+
269
+
270
+ class StatefulPyodideSandboxTool(PyodideSandboxTool):
271
+ """
272
+ A stateful version of the PyodideSandboxTool that maintains state between executions.
273
+ This version preserves variables, imports, and function definitions across multiple tool calls.
274
+ """
275
+
276
+ name: str = "stateful_pyodide_sandbox"
277
+ description: str = """Execute Python code in a stateful sandbox environment using Pyodide.
278
+ This tool maintains state between executions, preserving variables, imports, and function definitions.
279
+ Use this tool when you need to:
280
+ - Build upon previous code executions
281
+ - Maintain variables across multiple calls
282
+ - Develop complex programs step by step
283
+ - Preserve imported libraries and defined functions
284
+
285
+ The sandbox supports most Python standard library modules and can install additional packages.
286
+ Note: File access and some system operations are restricted for security.
287
+ """
288
+
289
+ def __init__(self, **kwargs: Any) -> None:
290
+ kwargs['stateful'] = True # Force stateful mode
291
+ super().__init__(**kwargs)
292
+
293
+
294
+ # Factory function for creating sandbox tools
295
+ def create_sandbox_tool(stateful: bool = False, allow_net: bool = True, alita_client: Optional[Any] = None) -> BaseTool:
296
+ """
297
+ Factory function to create sandbox tools with specified configuration.
298
+
299
+ Note: This tool requires Deno to be installed and available in PATH.
300
+ For installation and optimization, run the bootstrap.sh script.
301
+
302
+ Args:
303
+ stateful: Whether to maintain state between executions (default: False for better performance)
304
+ allow_net: Whether to allow network access (for package installation)
305
+
306
+ Returns:
307
+ Configured sandbox tool instance
308
+
309
+ Raises:
310
+ ImportError: If langchain-sandbox is not installed
311
+ RuntimeError: If Deno is not found in PATH
312
+
313
+ Performance Notes:
314
+ - Stateless mode (default) is faster and avoids session state overhead
315
+ - Run bootstrap.sh script to enable local caching and reduce initialization time
316
+ - Cached wheels reduce package download time from ~4.76s to near-instant
317
+ """
318
+ if stateful:
319
+ return StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client)
320
+ else:
321
+ return PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client)
322
+
323
+
324
+ class SandboxToolkit(BaseToolkit):
325
+ tools: List[BaseTool] = []
326
+
327
+ @staticmethod
328
+ def toolkit_config_schema() -> Type[BaseModel]:
329
+ # Create sample tools to get their schemas
330
+ sample_tools = [
331
+ PyodideSandboxTool(),
332
+ StatefulPyodideSandboxTool()
333
+ ]
334
+ selected_tools = {x.name: x.args_schema.model_json_schema() for x in sample_tools}
335
+
336
+ return create_model(
337
+ 'sandbox',
338
+ stateful=(bool, Field(default=False, description="Whether to maintain state between executions")),
339
+ allow_net=(bool, Field(default=True, description="Whether to allow network access for package installation")),
340
+ selected_tools=(List[Literal[tuple(selected_tools)]],
341
+ Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
342
+
343
+ __config__=ConfigDict(json_schema_extra={
344
+ 'metadata': {
345
+ "label": "Python Sandbox",
346
+ "icon_url": "sandbox.svg",
347
+ "hidden": False,
348
+ "categories": ["code", "execution", "internal_tool"],
349
+ "extra_categories": ["python", "pyodide", "sandbox", "code execution"],
350
+ }
351
+ })
352
+ )
353
+
354
+ @classmethod
355
+ def get_toolkit(cls, stateful: bool = False, allow_net: bool = True, alita_client=None, **kwargs):
356
+ """
357
+ Get toolkit with sandbox tools.
358
+
359
+ Args:
360
+ stateful: Whether to maintain state between executions
361
+ allow_net: Whether to allow network access
362
+ alita_client: Alita client instance for sandbox tools
363
+ **kwargs: Additional arguments
364
+ """
365
+ tools = []
366
+
367
+ if stateful:
368
+ tools.append(StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client))
369
+ else:
370
+ tools.append(PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client))
371
+
372
+ return cls(tools=tools)
373
+
374
+ def get_tools(self):
375
+ return self.tools
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import math
3
3
  import types
4
- from typing import Any, Optional, List, Dict, Callable, Generator
4
+ from typing import Any, Optional, List, Dict, Callable, Generator, OrderedDict
5
5
 
6
6
  from langchain_core.documents import Document
7
7
  from pydantic import BaseModel, model_validator, Field
@@ -12,7 +12,6 @@ from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapte
12
12
  from logging import getLogger
13
13
 
14
14
  from ..utils.logging import dispatch_custom_event
15
- from ..utils.utils import IndexerKeywords
16
15
 
17
16
  logger = getLogger(__name__)
18
17
 
@@ -73,6 +72,10 @@ class StepBackSearchDocumentsModel(BaseModel):
73
72
  }""",
74
73
  default=None
75
74
  )
75
+ extended_search: Optional[List[str]] = Field(
76
+ description="List of chunk types to search for (title, summary, propositions, keywords, documents)",
77
+ default=None
78
+ )
76
79
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = Field(
77
80
  description="""Reranking configuration. Example:
78
81
  {
@@ -87,10 +90,6 @@ class StepBackSearchDocumentsModel(BaseModel):
87
90
  }""",
88
91
  default=None
89
92
  )
90
- extended_search: Optional[List[str]] = Field(
91
- description="List of chunk types to search for (title, summary, propositions, keywords, documents)",
92
- default=None
93
- )
94
93
 
95
94
  STEPBACK_PROMPT = """Your task is to convert provided question into a more generic question that will be used for similarity search.
96
95
  Remove all not important words, question words, but save all names, dates and acronym as in original question.
@@ -138,7 +137,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
138
137
  embedding_model_params: dict
139
138
  vectorstore_type: str
140
139
  vectorstore_params: dict
141
- max_docs_per_add: int = 100
140
+ max_docs_per_add: int = 20
142
141
  dataset: str = None
143
142
  embedding: Any = None
144
143
  vectorstore: Any = None
@@ -208,16 +207,33 @@ class VectorStoreWrapper(BaseToolApiWrapper):
208
207
  tool_name="_remove_collection"
209
208
  )
210
209
 
211
- def _get_indexed_ids(self, collection_suffix: Optional[str] = '') -> List[str]:
210
+ def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
212
211
  """Get all indexed document IDs from vectorstore"""
213
- return self.vector_adapter.get_indexed_ids(self, collection_suffix)
214
-
215
- def list_collections(self) -> List[str]:
216
- """List all collections in the vectorstore."""
217
-
218
- return self.vector_adapter.list_collections(self)
212
+ return self.vector_adapter.get_indexed_ids(self, index_name)
213
+
214
+ def list_collections(self) -> Any:
215
+ """List all collections in the vectorstore.
216
+ Returns a list of collection names, or if no collections exist,
217
+ returns a dict with an empty list and a message."""
218
+ raw = self.vector_adapter.list_collections(self)
219
+ # Normalize raw result to a list of names
220
+ if not raw:
221
+ # No collections found
222
+ return {"collections": [], "message": "No indexed collections"}
223
+ if isinstance(raw, str):
224
+ # e.g., Chroma adapter returns comma-separated string
225
+ cols = [c for c in raw.split(',') if c]
226
+ else:
227
+ try:
228
+ cols = list(raw)
229
+ except Exception:
230
+ # Unexpected type, return raw directly
231
+ return raw
232
+ if not cols:
233
+ return {"collections": [], "message": "No indexed collections"}
234
+ return cols
219
235
 
220
- def _clean_collection(self, collection_suffix: str = ''):
236
+ def _clean_collection(self, index_name: str = ''):
221
237
  """
222
238
  Clean the vectorstore collection by deleting all indexed data.
223
239
  """
@@ -225,19 +241,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
225
241
  f"Cleaning collection '{self.dataset}'",
226
242
  tool_name="_clean_collection"
227
243
  )
228
- self.vector_adapter.clean_collection(self, collection_suffix)
244
+ self.vector_adapter.clean_collection(self, index_name)
229
245
  self._log_data(
230
246
  f"Collection '{self.dataset}' has been cleaned. ",
231
247
  tool_name="_clean_collection"
232
248
  )
233
249
 
234
- def _get_indexed_data(self, collection_name: str):
235
- """ Get all indexed data from vectorstore for non-code content """
236
- return self.vector_adapter.get_indexed_data(self, collection_name)
237
-
238
- def _get_code_indexed_data(self, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
250
+ def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
239
251
  """ Get all indexed data from vectorstore for code content """
240
- return self.vector_adapter.get_code_indexed_data(self, collection_suffix)
252
+ return self.vector_adapter.get_code_indexed_data(self, index_name)
241
253
 
242
254
  def _add_to_collection(self, entry_id, new_collection_value):
243
255
  """Add a new collection name to the `collection` key in the `metadata` column."""
@@ -246,7 +258,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
246
258
  def _reduce_duplicates(
247
259
  self,
248
260
  documents: Generator[Any, None, None],
249
- collection_suffix: str,
261
+ index_name: str,
250
262
  get_indexed_data: Callable,
251
263
  key_fn: Callable,
252
264
  compare_fn: Callable,
@@ -255,7 +267,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
255
267
  ) -> List[Any]:
256
268
  """Generic duplicate reduction logic for documents."""
257
269
  self._log_data(log_msg, tool_name="index_documents")
258
- indexed_data = get_indexed_data(collection_suffix)
270
+ indexed_data = get_indexed_data(index_name)
259
271
  indexed_keys = set(indexed_data.keys())
260
272
  if not indexed_keys:
261
273
  self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -266,14 +278,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
266
278
 
267
279
  for document in documents:
268
280
  key = key_fn(document)
269
- if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
281
+ key = key if isinstance(key, str) else str(key)
282
+ if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
270
283
  if compare_fn(document, indexed_data[key]):
271
284
  # Disabled addition of new collection to already indexed documents
272
285
  # # check metadata.collection and update if needed
273
286
  # for update_collection_id in remove_ids_fn(indexed_data, key):
274
287
  # self._add_to_collection(
275
288
  # update_collection_id,
276
- # collection_suffix
289
+ # index_name
277
290
  # )
278
291
  continue
279
292
  final_docs.append(document)
@@ -290,30 +303,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
290
303
 
291
304
  return final_docs
292
305
 
293
- def _reduce_non_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
294
- return self._reduce_duplicates(
295
- documents,
296
- collection_suffix,
297
- self._get_indexed_data,
298
- lambda doc: doc.metadata.get('id'),
299
- lambda doc, idx: (
300
- doc.metadata.get('updated_on') and
301
- idx['metadata'].get('updated_on') and
302
- doc.metadata.get('updated_on') == idx['metadata'].get('updated_on')
303
- ),
304
- lambda idx_data, key: (
305
- idx_data[key]['all_chunks'] +
306
- [idx_data[dep_id]['id'] for dep_id in idx_data[key][IndexerKeywords.DEPENDENT_DOCS.value]] +
307
- [chunk_db_id for dep_id in idx_data[key][IndexerKeywords.DEPENDENT_DOCS.value]
308
- for chunk_db_id in idx_data[dep_id]['all_chunks']]
309
- ),
310
- log_msg="Verification of documents to index started"
311
- )
312
-
313
- def _reduce_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
306
+ def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
314
307
  return self._reduce_duplicates(
315
308
  documents,
316
- collection_suffix,
309
+ index_name,
317
310
  self._get_code_indexed_data,
318
311
  lambda doc: doc.metadata.get('filename'),
319
312
  lambda doc, idx: (
@@ -325,7 +318,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
325
318
  log_msg="Verification of code documents to index started"
326
319
  )
327
320
 
328
- def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = False):
321
+ def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
329
322
  """ Index documents in the vectorstore.
330
323
 
331
324
  Args:
@@ -336,13 +329,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
336
329
 
337
330
  from ..langchain.interfaces.llm_processor import add_documents
338
331
 
339
- self._log_tool_event(message=f"Starting the indexing... Parameters: {collection_suffix=}, {clean_index=}, {is_code}", tool_name="index_documents")
332
+ self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
340
333
  # pre-process documents if needed (find duplicates, etc.)
341
334
  if clean_index:
342
335
  logger.info("Cleaning index before re-indexing all documents.")
343
336
  self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
344
337
  try:
345
- self._clean_collection(collection_suffix)
338
+ self._clean_collection(index_name)
346
339
  self.vectoradapter.persist()
347
340
  self.vectoradapter.vacuum()
348
341
  self._log_data("Previous index has been removed",
@@ -356,8 +349,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
356
349
  message="Filter for duplicates",
357
350
  tool_name="index_documents")
358
351
  # remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
359
- documents = self._reduce_code_duplicates(documents, collection_suffix) if is_code \
360
- else self._reduce_non_code_duplicates(documents, collection_suffix)
352
+ documents = self._reduce_code_duplicates(documents, index_name)
361
353
  self._log_tool_event(
362
354
  message="All the duplicates were filtered out. Proceeding with indexing.",
363
355
  tool_name="index_documents")
@@ -385,13 +377,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
385
377
  self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
386
378
  tool_name="index_documents")
387
379
 
388
- # if collection_suffix is provided, add it to metadata of each document
389
- if collection_suffix:
380
+ # if index_name is provided, add it to metadata of each document
381
+ if index_name:
390
382
  for doc in documents:
391
383
  if not doc.metadata.get('collection'):
392
- doc.metadata['collection'] = collection_suffix
384
+ doc.metadata['collection'] = index_name
393
385
  else:
394
- doc.metadata['collection'] += f";{collection_suffix}"
386
+ doc.metadata['collection'] += f";{index_name}"
395
387
 
396
388
  total_docs = len(documents)
397
389
  documents_count = 0
@@ -422,7 +414,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
422
414
  return {"status": "error", "message": f"Error: {format_exc()}"}
423
415
  if _documents:
424
416
  add_documents(vectorstore=self.vectorstore, documents=_documents)
425
- return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
417
+ return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
418
+ else "No new documents to index."}
426
419
 
427
420
  def search_documents(self, query:str, doctype: str = 'code',
428
421
  filter:dict|str={}, cut_off: float=0.5,
@@ -542,11 +535,18 @@ class VectorStoreWrapper(BaseToolApiWrapper):
542
535
 
543
536
  # Initialize document map for tracking by ID
544
537
  doc_map = {
545
- f"{doc.metadata.get('id', f'idx_{i}')}_{doc.metadata['chunk_id']}"
546
- if 'chunk_id' in doc.metadata
547
- else doc.metadata.get('id', f"idx_{i}"): (doc, score)
538
+ (
539
+ f"{doc.metadata.get('id', f'idx_{i}')}_{doc.metadata['chunk_id']}"
540
+ if 'chunk_id' in doc.metadata
541
+ else doc.metadata.get('id', f"idx_{i}")
542
+ ): (doc, 1 - score)
548
543
  for i, (doc, score) in enumerate(vector_items)
549
544
  }
545
+
546
+ # Sort the items by the new score in descending order
547
+ doc_map = OrderedDict(
548
+ sorted(doc_map.items(), key=lambda x: x[1][1], reverse=True)
549
+ )
550
550
 
551
551
  # Process full-text search if configured
552
552
  if full_text_search and full_text_search.get('enabled') and full_text_search.get('fields'):
@@ -597,7 +597,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
597
597
  # Apply cutoff filter
598
598
  if cut_off:
599
599
  # Filter out items above the cutoff score (since the lower the score, the better)
600
- combined_items = [item for item in combined_items if abs(item[1]) <= cut_off]
600
+ combined_items = [item for item in combined_items if abs(item[1]) >= cut_off]
601
601
 
602
602
  # Sort by score and limit results
603
603
  # DISABLED: for chroma we want ascending order (lower score is better), for others descending
@@ -758,4 +758,3 @@ class VectorStoreWrapper(BaseToolApiWrapper):
758
758
  "args_schema": StepBackSearchDocumentsModel
759
759
  }
760
760
  ]
761
-