alita-sdk 0.3.379__py3-none-any.whl → 0.3.462__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (110) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent_executor.py +144 -0
  4. alita_sdk/cli/agent_loader.py +197 -0
  5. alita_sdk/cli/agent_ui.py +166 -0
  6. alita_sdk/cli/agents.py +1069 -0
  7. alita_sdk/cli/callbacks.py +576 -0
  8. alita_sdk/cli/cli.py +159 -0
  9. alita_sdk/cli/config.py +153 -0
  10. alita_sdk/cli/formatting.py +182 -0
  11. alita_sdk/cli/mcp_loader.py +315 -0
  12. alita_sdk/cli/toolkit.py +330 -0
  13. alita_sdk/cli/toolkit_loader.py +55 -0
  14. alita_sdk/cli/tools/__init__.py +9 -0
  15. alita_sdk/cli/tools/filesystem.py +905 -0
  16. alita_sdk/configurations/bitbucket.py +95 -0
  17. alita_sdk/configurations/confluence.py +96 -1
  18. alita_sdk/configurations/gitlab.py +79 -0
  19. alita_sdk/configurations/jira.py +103 -0
  20. alita_sdk/configurations/testrail.py +88 -0
  21. alita_sdk/configurations/xray.py +93 -0
  22. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  23. alita_sdk/configurations/zephyr_essential.py +75 -0
  24. alita_sdk/runtime/clients/client.py +47 -10
  25. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  26. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  27. alita_sdk/runtime/clients/sandbox_client.py +8 -0
  28. alita_sdk/runtime/langchain/assistant.py +37 -16
  29. alita_sdk/runtime/langchain/constants.py +6 -1
  30. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  31. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  32. alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
  33. alita_sdk/runtime/langchain/langraph_agent.py +146 -31
  34. alita_sdk/runtime/langchain/utils.py +39 -7
  35. alita_sdk/runtime/models/mcp_models.py +61 -0
  36. alita_sdk/runtime/toolkits/__init__.py +24 -0
  37. alita_sdk/runtime/toolkits/application.py +8 -1
  38. alita_sdk/runtime/toolkits/artifact.py +5 -6
  39. alita_sdk/runtime/toolkits/mcp.py +895 -0
  40. alita_sdk/runtime/toolkits/tools.py +137 -56
  41. alita_sdk/runtime/tools/__init__.py +7 -2
  42. alita_sdk/runtime/tools/application.py +7 -0
  43. alita_sdk/runtime/tools/function.py +29 -25
  44. alita_sdk/runtime/tools/graph.py +10 -4
  45. alita_sdk/runtime/tools/image_generation.py +104 -8
  46. alita_sdk/runtime/tools/llm.py +204 -114
  47. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  48. alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
  49. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  50. alita_sdk/runtime/tools/sandbox.py +57 -43
  51. alita_sdk/runtime/tools/vectorstore.py +2 -1
  52. alita_sdk/runtime/tools/vectorstore_base.py +19 -3
  53. alita_sdk/runtime/utils/mcp_oauth.py +164 -0
  54. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  55. alita_sdk/runtime/utils/streamlit.py +34 -3
  56. alita_sdk/runtime/utils/toolkit_utils.py +14 -4
  57. alita_sdk/tools/__init__.py +46 -31
  58. alita_sdk/tools/ado/repos/__init__.py +1 -0
  59. alita_sdk/tools/ado/test_plan/__init__.py +1 -1
  60. alita_sdk/tools/ado/wiki/__init__.py +1 -5
  61. alita_sdk/tools/ado/work_item/__init__.py +1 -5
  62. alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
  63. alita_sdk/tools/base_indexer_toolkit.py +105 -43
  64. alita_sdk/tools/bitbucket/__init__.py +1 -0
  65. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  66. alita_sdk/tools/code/sonar/__init__.py +1 -1
  67. alita_sdk/tools/code_indexer_toolkit.py +13 -3
  68. alita_sdk/tools/confluence/__init__.py +2 -2
  69. alita_sdk/tools/confluence/api_wrapper.py +29 -7
  70. alita_sdk/tools/confluence/loader.py +10 -0
  71. alita_sdk/tools/github/__init__.py +2 -2
  72. alita_sdk/tools/gitlab/__init__.py +2 -1
  73. alita_sdk/tools/gitlab/api_wrapper.py +11 -7
  74. alita_sdk/tools/gitlab_org/__init__.py +1 -2
  75. alita_sdk/tools/google_places/__init__.py +2 -1
  76. alita_sdk/tools/jira/__init__.py +1 -0
  77. alita_sdk/tools/jira/api_wrapper.py +1 -1
  78. alita_sdk/tools/memory/__init__.py +1 -1
  79. alita_sdk/tools/openapi/__init__.py +10 -1
  80. alita_sdk/tools/pandas/__init__.py +1 -1
  81. alita_sdk/tools/postman/__init__.py +2 -1
  82. alita_sdk/tools/pptx/__init__.py +2 -2
  83. alita_sdk/tools/qtest/__init__.py +3 -3
  84. alita_sdk/tools/qtest/api_wrapper.py +1708 -76
  85. alita_sdk/tools/rally/__init__.py +1 -2
  86. alita_sdk/tools/report_portal/__init__.py +1 -0
  87. alita_sdk/tools/salesforce/__init__.py +1 -0
  88. alita_sdk/tools/servicenow/__init__.py +2 -3
  89. alita_sdk/tools/sharepoint/__init__.py +1 -0
  90. alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
  91. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  92. alita_sdk/tools/sharepoint/utils.py +8 -2
  93. alita_sdk/tools/slack/__init__.py +1 -0
  94. alita_sdk/tools/sql/__init__.py +2 -1
  95. alita_sdk/tools/testio/__init__.py +1 -0
  96. alita_sdk/tools/testrail/__init__.py +1 -3
  97. alita_sdk/tools/utils/content_parser.py +27 -16
  98. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +18 -5
  99. alita_sdk/tools/xray/__init__.py +2 -1
  100. alita_sdk/tools/zephyr/__init__.py +2 -1
  101. alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
  102. alita_sdk/tools/zephyr_essential/__init__.py +1 -0
  103. alita_sdk/tools/zephyr_scale/__init__.py +1 -0
  104. alita_sdk/tools/zephyr_squad/__init__.py +1 -0
  105. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
  106. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +110 -86
  107. alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
  108. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
  109. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
  110. {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,166 @@
1
+ """
2
+ MCP Remote Tool for direct HTTP/SSE invocation.
3
+ This tool is used for remote MCP servers accessed via HTTP/SSE.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import logging
9
+ import time
10
+ import uuid
11
+ from concurrent.futures import ThreadPoolExecutor
12
+ from typing import Any, Dict, Optional
13
+
14
+ from .mcp_server_tool import McpServerTool
15
+ from pydantic import Field
16
+ from ..utils.mcp_oauth import (
17
+ McpAuthorizationRequired,
18
+ canonical_resource,
19
+ extract_resource_metadata_url,
20
+ fetch_resource_metadata_async,
21
+ infer_authorization_servers_from_realm,
22
+ )
23
+ from ..utils.mcp_sse_client import McpSseClient
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class McpRemoteTool(McpServerTool):
29
+ """
30
+ Tool for invoking remote MCP server tools via HTTP/SSE.
31
+ Extends McpServerTool and overrides _run to use direct HTTP calls instead of client.mcp_tool_call.
32
+ """
33
+
34
+ # Remote MCP connection details
35
+ server_url: str = Field(..., description="URL of the remote MCP server")
36
+ server_headers: Optional[Dict[str, str]] = Field(default=None, description="HTTP headers for authentication")
37
+ original_tool_name: Optional[str] = Field(default=None, description="Original tool name from MCP server (before optimization)")
38
+ is_prompt: bool = False # Flag to indicate if this is a prompt tool
39
+ prompt_name: Optional[str] = None # Original prompt name if this is a prompt
40
+ session_id: Optional[str] = Field(default=None, description="MCP session ID for stateful SSE servers")
41
+
42
+ def model_post_init(self, __context: Any) -> None:
43
+ """Update metadata with session info after model initialization."""
44
+ super().model_post_init(__context)
45
+ self._update_metadata_with_session()
46
+
47
+ def _update_metadata_with_session(self):
48
+ """Update the metadata dict with current session information."""
49
+ if self.session_id:
50
+ if self.metadata is None:
51
+ self.metadata = {}
52
+ self.metadata.update({
53
+ 'mcp_session_id': self.session_id,
54
+ 'mcp_server_url': canonical_resource(self.server_url)
55
+ })
56
+
57
+ def __getstate__(self):
58
+ """Custom serialization for pickle compatibility."""
59
+ state = super().__getstate__()
60
+ # Ensure headers are serializable
61
+ if 'server_headers' in state and state['server_headers'] is not None:
62
+ state['server_headers'] = dict(state['server_headers'])
63
+ return state
64
+
65
+ def _run(self, *args, **kwargs):
66
+ """
67
+ Execute the MCP tool via direct HTTP/SSE call to the remote server.
68
+ Overrides the parent method to avoid using client.mcp_tool_call.
69
+ """
70
+ try:
71
+ # Always create a new event loop for sync context
72
+ with ThreadPoolExecutor() as executor:
73
+ future = executor.submit(self._run_in_new_loop, kwargs)
74
+ return future.result(timeout=self.tool_timeout_sec)
75
+ except McpAuthorizationRequired:
76
+ # Bubble up so LangChain can surface a tool error with useful metadata
77
+ raise
78
+ except Exception as e:
79
+ logger.error(f"Error executing remote MCP tool '{self.name}': {e}")
80
+ return f"Error executing tool: {e}"
81
+
82
+ def _run_in_new_loop(self, kwargs: Dict[str, Any]) -> str:
83
+ """Run the async tool invocation in a new event loop."""
84
+ return asyncio.run(self._execute_remote_tool(kwargs))
85
+
86
+ async def _execute_remote_tool(self, kwargs: Dict[str, Any]) -> str:
87
+ """Execute the actual remote MCP tool call using SSE client."""
88
+ from ...tools.utils import TOOLKIT_SPLITTER
89
+
90
+ # Check for session_id requirement
91
+ if not self.session_id:
92
+ logger.error(f"[MCP Session] Missing session_id for tool '{self.name}'")
93
+ raise Exception("sessionId required. Frontend must generate UUID and send with mcp_tokens.")
94
+
95
+ # Use the original tool name from discovery for MCP server invocation
96
+ tool_name_for_server = self.original_tool_name
97
+ if not tool_name_for_server:
98
+ tool_name_for_server = self.name.rsplit(TOOLKIT_SPLITTER, 1)[-1] if TOOLKIT_SPLITTER in self.name else self.name
99
+ logger.warning(f"original_tool_name not set for '{self.name}', using extracted: {tool_name_for_server}")
100
+
101
+ logger.info(f"[MCP SSE] Executing tool '{tool_name_for_server}' with session {self.session_id}")
102
+
103
+ try:
104
+ # Prepare headers
105
+ headers = {}
106
+ if self.server_headers:
107
+ headers.update(self.server_headers)
108
+
109
+ # Create SSE client
110
+ client = McpSseClient(
111
+ url=self.server_url,
112
+ session_id=self.session_id,
113
+ headers=headers,
114
+ timeout=self.tool_timeout_sec
115
+ )
116
+
117
+ # Execute tool call via SSE
118
+ result = await client.call_tool(tool_name_for_server, kwargs)
119
+
120
+ # Format the result
121
+ if isinstance(result, dict):
122
+ # Check for content array (common in MCP responses)
123
+ if "content" in result:
124
+ content_items = result["content"]
125
+ if isinstance(content_items, list):
126
+ # Extract text from content items
127
+ text_parts = []
128
+ for item in content_items:
129
+ if isinstance(item, dict):
130
+ if item.get("type") == "text" and "text" in item:
131
+ text_parts.append(item["text"])
132
+ elif "text" in item:
133
+ text_parts.append(item["text"])
134
+ else:
135
+ text_parts.append(json.dumps(item))
136
+ else:
137
+ text_parts.append(str(item))
138
+ return "\n".join(text_parts)
139
+
140
+ # Return formatted JSON if no content field
141
+ return json.dumps(result, indent=2)
142
+
143
+ # Return as string for other types
144
+ return str(result)
145
+
146
+ except Exception as e:
147
+ logger.error(f"[MCP SSE] Tool execution failed: {e}", exc_info=True)
148
+ raise
149
+
150
+ def _parse_sse(self, text: str) -> Dict[str, Any]:
151
+ """Parse Server-Sent Events (SSE) format response."""
152
+ for line in text.split('\n'):
153
+ line = line.strip()
154
+ if line.startswith('data:'):
155
+ json_str = line[5:].strip()
156
+ return json.loads(json_str)
157
+ raise ValueError("No data found in SSE response")
158
+
159
+ def get_session_metadata(self) -> dict:
160
+ """Return session metadata to be included in tool responses."""
161
+ if self.session_id:
162
+ return {
163
+ 'mcp_session_id': self.session_id,
164
+ 'mcp_server_url': canonical_resource(self.server_url)
165
+ }
166
+ return {}
@@ -3,7 +3,7 @@ from logging import getLogger
3
3
  from typing import Any, Type, Literal, Optional, Union, List
4
4
 
5
5
  from langchain_core.tools import BaseTool
6
- from pydantic import BaseModel, Field, create_model, EmailStr, constr
6
+ from pydantic import BaseModel, Field, create_model, EmailStr, constr, ConfigDict
7
7
 
8
8
  from ...tools.utils import TOOLKIT_SPLITTER
9
9
 
@@ -19,6 +19,7 @@ class McpServerTool(BaseTool):
19
19
  server: str
20
20
  tool_timeout_sec: int = 60
21
21
 
22
+ model_config = ConfigDict(arbitrary_types_allowed=True)
22
23
 
23
24
  @staticmethod
24
25
  def create_pydantic_model_from_schema(schema: dict, model_name: str = "ArgsSchema"):
@@ -90,6 +91,7 @@ class McpServerTool(BaseTool):
90
91
  return create_model(model_name, **fields)
91
92
 
92
93
  def _run(self, *args, **kwargs):
94
+ # Extract the actual tool/prompt name (remove toolkit prefix)
93
95
  call_data = {
94
96
  "server": self.server,
95
97
  "tool_timeout_sec": self.tool_timeout_sec,
@@ -2,9 +2,12 @@ import asyncio
2
2
  import logging
3
3
  import subprocess
4
4
  import os
5
- from typing import Any, Type, Optional, Dict, List, Literal
5
+ from typing import Any, Type, Optional, Dict, List, Literal, Union
6
+ from copy import deepcopy
7
+ from pathlib import Path
6
8
 
7
9
  from langchain_core.tools import BaseTool, BaseToolkit
10
+ from langchain_core.messages import ToolCall
8
11
  from pydantic import BaseModel, create_model, ConfigDict, Field
9
12
  from pydantic.fields import FieldInfo
10
13
 
@@ -19,7 +22,7 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
19
22
 
20
23
  Args:
21
24
  tools_list: List of tool configurations
22
- alita_client: Alita client instance (unused for sandbox)
25
+ alita_client: Alita client instance for sandbox tools
23
26
  llm: LLM client instance (unused for sandbox)
24
27
  memory_store: Optional memory store instance (unused for sandbox)
25
28
 
@@ -34,6 +37,7 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
34
37
  toolkit_instance = SandboxToolkit.get_toolkit(
35
38
  stateful=tool['settings'].get('stateful', False),
36
39
  allow_net=tool['settings'].get('allow_net', True),
40
+ alita_client=alita_client,
37
41
  toolkit_name=tool.get('toolkit_name', '')
38
42
  )
39
43
  all_tools.extend(toolkit_instance.get_tools())
@@ -60,36 +64,10 @@ def _is_deno_available() -> bool:
60
64
 
61
65
 
62
66
  def _setup_pyodide_cache_env() -> None:
63
- """Setup Pyodide caching environment variables for performance optimization"""
67
+ """Setup Pyodide caching environment variables for performance optimization [NO-OP]"""
64
68
  try:
65
- # Check if cache environment file exists and source it
66
- cache_env_file = os.path.expanduser("~/.pyodide_cache_env")
67
- if os.path.exists(cache_env_file):
68
- with open(cache_env_file, 'r') as f:
69
- for line in f:
70
- line = line.strip()
71
- if line.startswith('export ') and '=' in line:
72
- # Parse export VAR=value format
73
- var_assignment = line[7:] # Remove 'export '
74
- if '=' in var_assignment:
75
- key, value = var_assignment.split('=', 1)
76
- # Remove quotes if present
77
- value = value.strip('"').strip("'")
78
- os.environ[key] = value
79
- logger.debug(f"Set Pyodide cache env: {key}={value}")
80
-
81
- # Set default caching environment variables if not already set
82
- cache_defaults = {
83
- 'PYODIDE_PACKAGES_PATH': os.path.expanduser('~/.cache/pyodide'),
84
- 'DENO_DIR': os.path.expanduser('~/.cache/deno'),
85
- 'PYODIDE_CACHE_DIR': os.path.expanduser('~/.cache/pyodide'),
86
- }
87
-
88
- for key, default_value in cache_defaults.items():
89
- if key not in os.environ:
90
- os.environ[key] = default_value
91
- logger.debug(f"Set default Pyodide env: {key}={default_value}")
92
-
69
+ for key in ["SANDBOX_BASE", "DENO_DIR"]:
70
+ logger.info("Sandbox env: %s -> %s", key, os.environ.get(key, "n/a"))
93
71
  except Exception as e:
94
72
  logger.warning(f"Could not setup Pyodide cache environment: {e}")
95
73
 
@@ -126,6 +104,7 @@ class PyodideSandboxTool(BaseTool):
126
104
  allow_net: bool = True
127
105
  session_bytes: Optional[bytes] = None
128
106
  session_metadata: Optional[Dict] = None
107
+ alita_client: Optional[Any] = None
129
108
 
130
109
  def __init__(self, **kwargs: Any) -> None:
131
110
  super().__init__(**kwargs)
@@ -134,6 +113,28 @@ class PyodideSandboxTool(BaseTool):
134
113
  _setup_pyodide_cache_env()
135
114
  self._initialize_sandbox()
136
115
 
116
+ def _prepare_pyodide_input(self, code: str) -> str:
117
+ """Prepare input for PyodideSandboxTool by injecting state and alita_client into the code block."""
118
+ pyodide_predata = ""
119
+
120
+ # Add alita_client if available
121
+ if self.alita_client:
122
+ try:
123
+ # Get the directory of the current file and construct the path to sandbox_client.py
124
+ current_dir = Path(__file__).parent
125
+ sandbox_client_path = current_dir.parent / 'clients' / 'sandbox_client.py'
126
+
127
+ with open(sandbox_client_path, 'r') as f:
128
+ sandbox_client_code = f.read()
129
+ pyodide_predata += f"{sandbox_client_code}\n"
130
+ pyodide_predata += (f"alita_client = SandboxClient(base_url='{self.alita_client.base_url}',"
131
+ f"project_id={self.alita_client.project_id},"
132
+ f"auth_token='{self.alita_client.auth_token}')\n")
133
+ except FileNotFoundError:
134
+ logger.error(f"sandbox_client.py not found. Ensure the file exists.")
135
+
136
+ return f"#elitea simplified client\n{pyodide_predata}{code}"
137
+
137
138
  def _initialize_sandbox(self) -> None:
138
139
  """Initialize the PyodideSandbox instance with optimized settings"""
139
140
  try:
@@ -148,9 +149,19 @@ class PyodideSandboxTool(BaseTool):
148
149
 
149
150
  from langchain_sandbox import PyodideSandbox
150
151
 
152
+ # Air-gapped settings
153
+ sandbox_base = os.environ.get("SANDBOX_BASE", os.path.expanduser('~/.cache/pyodide'))
154
+ sandbox_tmp = os.path.join(sandbox_base, "tmp")
155
+ deno_cache = os.environ.get("DENO_DIR", os.path.expanduser('~/.cache/deno'))
156
+
151
157
  # Configure sandbox with performance optimizations
152
158
  self._sandbox = PyodideSandbox(
153
159
  stateful=self.stateful,
160
+ #
161
+ allow_env=["SANDBOX_BASE"],
162
+ allow_read=[sandbox_base, sandbox_tmp, deno_cache],
163
+ allow_write=[sandbox_tmp, deno_cache],
164
+ #
154
165
  allow_net=self.allow_net,
155
166
  # Use auto node_modules_dir for better caching
156
167
  node_modules_dir="auto"
@@ -180,6 +191,9 @@ class PyodideSandboxTool(BaseTool):
180
191
  if self._sandbox is None:
181
192
  self._initialize_sandbox()
182
193
 
194
+ # Prepare code with state and client injection
195
+ prepared_code = self._prepare_pyodide_input(code)
196
+
183
197
  # Check if we're already in an async context
184
198
  try:
185
199
  loop = asyncio.get_running_loop()
@@ -187,11 +201,11 @@ class PyodideSandboxTool(BaseTool):
187
201
  # We'll need to use a different approach
188
202
  import concurrent.futures
189
203
  with concurrent.futures.ThreadPoolExecutor() as executor:
190
- future = executor.submit(asyncio.run, self._arun(code))
204
+ future = executor.submit(asyncio.run, self._arun(prepared_code))
191
205
  return future.result()
192
206
  except RuntimeError:
193
207
  # No running loop, safe to use asyncio.run
194
- return asyncio.run(self._arun(code))
208
+ return asyncio.run(self._arun(prepared_code))
195
209
  except (ImportError, RuntimeError) as e:
196
210
  # Handle specific dependency errors gracefully
197
211
  error_msg = str(e)
@@ -250,7 +264,7 @@ class PyodideSandboxTool(BaseTool):
250
264
 
251
265
  except Exception as e:
252
266
  logger.error(f"Error executing code in sandbox: {e}")
253
- return f"Error executing code: {str(e)}"
267
+ return {"error": f"Error executing code: {str(e)}"}
254
268
 
255
269
 
256
270
  class StatefulPyodideSandboxTool(PyodideSandboxTool):
@@ -278,7 +292,7 @@ class StatefulPyodideSandboxTool(PyodideSandboxTool):
278
292
 
279
293
 
280
294
  # Factory function for creating sandbox tools
281
- def create_sandbox_tool(stateful: bool = False, allow_net: bool = True) -> BaseTool:
295
+ def create_sandbox_tool(stateful: bool = False, allow_net: bool = True, alita_client: Optional[Any] = None) -> BaseTool:
282
296
  """
283
297
  Factory function to create sandbox tools with specified configuration.
284
298
 
@@ -302,22 +316,22 @@ def create_sandbox_tool(stateful: bool = False, allow_net: bool = True) -> BaseT
302
316
  - Cached wheels reduce package download time from ~4.76s to near-instant
303
317
  """
304
318
  if stateful:
305
- return StatefulPyodideSandboxTool(allow_net=allow_net)
319
+ return StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client)
306
320
  else:
307
- return PyodideSandboxTool(stateful=False, allow_net=allow_net)
321
+ return PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client)
308
322
 
309
323
 
310
324
  class SandboxToolkit(BaseToolkit):
311
325
  tools: List[BaseTool] = []
312
326
 
313
327
  @staticmethod
314
- def toolkit_config_schema() -> BaseModel:
328
+ def toolkit_config_schema() -> Type[BaseModel]:
315
329
  # Create sample tools to get their schemas
316
330
  sample_tools = [
317
331
  PyodideSandboxTool(),
318
332
  StatefulPyodideSandboxTool()
319
333
  ]
320
- selected_tools = {x.name: x.args_schema.schema() for x in sample_tools}
334
+ selected_tools = {x.name: x.args_schema.model_json_schema() for x in sample_tools}
321
335
 
322
336
  return create_model(
323
337
  'sandbox',
@@ -338,24 +352,24 @@ class SandboxToolkit(BaseToolkit):
338
352
  )
339
353
 
340
354
  @classmethod
341
- def get_toolkit(cls, stateful: bool = False, allow_net: bool = True, **kwargs):
355
+ def get_toolkit(cls, stateful: bool = False, allow_net: bool = True, alita_client=None, **kwargs):
342
356
  """
343
357
  Get toolkit with sandbox tools.
344
358
 
345
359
  Args:
346
360
  stateful: Whether to maintain state between executions
347
361
  allow_net: Whether to allow network access
362
+ alita_client: Alita client instance for sandbox tools
348
363
  **kwargs: Additional arguments
349
364
  """
350
365
  tools = []
351
366
 
352
367
  if stateful:
353
- tools.append(StatefulPyodideSandboxTool(allow_net=allow_net))
368
+ tools.append(StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client))
354
369
  else:
355
- tools.append(PyodideSandboxTool(stateful=False, allow_net=allow_net))
370
+ tools.append(PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client))
356
371
 
357
372
  return cls(tools=tools)
358
373
 
359
374
  def get_tools(self):
360
375
  return self.tools
361
-
@@ -414,7 +414,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
414
414
  return {"status": "error", "message": f"Error: {format_exc()}"}
415
415
  if _documents:
416
416
  add_documents(vectorstore=self.vectorstore, documents=_documents)
417
- return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
417
+ return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
418
+ else "No new documents to index."}
418
419
 
419
420
  def search_documents(self, query:str, doctype: str = 'code',
420
421
  filter:dict|str={}, cut_off: float=0.5,
@@ -1,9 +1,9 @@
1
1
  import json
2
- import math
3
2
  from collections import OrderedDict
4
3
  from logging import getLogger
5
4
  from typing import Any, Optional, List, Dict, Generator
6
5
 
6
+ import math
7
7
  from langchain_core.documents import Document
8
8
  from langchain_core.messages import HumanMessage
9
9
  from langchain_core.tools import ToolException
@@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator, Field
12
12
 
13
13
  from alita_sdk.tools.elitea_base import BaseToolApiWrapper
14
14
  from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
15
- from ..utils.logging import dispatch_custom_event
15
+ from ...runtime.utils.utils import IndexerKeywords
16
16
 
17
17
  logger = getLogger(__name__)
18
18
 
@@ -222,6 +222,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
222
222
  raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
223
223
  return index_metas[0] if index_metas else None
224
224
 
225
+ def get_indexed_count(self, index_name: str) -> int:
226
+ from sqlalchemy.orm import Session
227
+ from sqlalchemy import func, or_
228
+
229
+ with Session(self.vectorstore.session_maker.bind) as session:
230
+ return session.query(
231
+ self.vectorstore.EmbeddingStore.id,
232
+ ).filter(
233
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'collection') == index_name,
234
+ or_(
235
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type').is_(None),
236
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value
237
+ )
238
+ ).count()
239
+
225
240
  def _clean_collection(self, index_name: str = ''):
226
241
  """
227
242
  Clean the vectorstore collection by deleting all indexed data.
@@ -308,7 +323,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
308
323
  return {"status": "error", "message": f"Error: {format_exc()}"}
309
324
  if _documents:
310
325
  add_documents(vectorstore=self.vectorstore, documents=_documents)
311
- return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
326
+ return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
327
+ else "no documents to index"}
312
328
 
313
329
  def search_documents(self, query:str, doctype: str = 'code',
314
330
  filter:dict|str={}, cut_off: float=0.5,
@@ -0,0 +1,164 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ from typing import Any, Dict, Optional
5
+ from urllib.parse import urlparse
6
+
7
+ import requests
8
+ from langchain_core.tools import ToolException
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class McpAuthorizationRequired(ToolException):
14
+ """Raised when an MCP server requires OAuth authorization before use."""
15
+
16
+ def __init__(
17
+ self,
18
+ message: str,
19
+ server_url: str,
20
+ resource_metadata_url: Optional[str] = None,
21
+ www_authenticate: Optional[str] = None,
22
+ resource_metadata: Optional[Dict[str, Any]] = None,
23
+ status: Optional[int] = None,
24
+ tool_name: Optional[str] = None,
25
+ ):
26
+ super().__init__(message)
27
+ self.server_url = server_url
28
+ self.resource_metadata_url = resource_metadata_url
29
+ self.www_authenticate = www_authenticate
30
+ self.resource_metadata = resource_metadata
31
+ self.status = status
32
+ self.tool_name = tool_name
33
+
34
+ def to_dict(self) -> Dict[str, Any]:
35
+ return {
36
+ "message": str(self),
37
+ "server_url": self.server_url,
38
+ "resource_metadata_url": self.resource_metadata_url,
39
+ "www_authenticate": self.www_authenticate,
40
+ "resource_metadata": self.resource_metadata,
41
+ "status": self.status,
42
+ "tool_name": self.tool_name,
43
+ }
44
+
45
+
46
+ def extract_resource_metadata_url(www_authenticate: Optional[str], server_url: Optional[str] = None) -> Optional[str]:
47
+ """
48
+ Pull the resource_metadata URL from a WWW-Authenticate header if present.
49
+ If not found and server_url is provided, try to construct resource metadata URLs.
50
+ """
51
+ if not www_authenticate and not server_url:
52
+ return None
53
+
54
+ # RFC9728 returns `resource_metadata="<url>"` inside the header value
55
+ if www_authenticate:
56
+ match = re.search(r'resource_metadata\s*=\s*\"?([^\", ]+)\"?', www_authenticate)
57
+ if match:
58
+ return match.group(1)
59
+
60
+ # For servers that don't provide resource_metadata in WWW-Authenticate,
61
+ # we'll return None and rely on inferring authorization servers from the realm
62
+ # or using well-known OAuth discovery endpoints directly
63
+ return None
64
+
65
+
66
+ def fetch_oauth_authorization_server_metadata(base_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
67
+ """
68
+ Fetch OAuth authorization server metadata from well-known endpoints.
69
+ Tries both oauth-authorization-server and openid-configuration discovery endpoints.
70
+ """
71
+ discovery_endpoints = [
72
+ f"{base_url}/.well-known/oauth-authorization-server",
73
+ f"{base_url}/.well-known/openid-configuration",
74
+ ]
75
+
76
+ for endpoint in discovery_endpoints:
77
+ try:
78
+ resp = requests.get(endpoint, timeout=timeout)
79
+ if resp.status_code == 200:
80
+ return resp.json()
81
+ except Exception as exc:
82
+ logger.debug(f"Failed to fetch OAuth metadata from {endpoint}: {exc}")
83
+ continue
84
+
85
+ return None
86
+
87
+
88
+ def infer_authorization_servers_from_realm(www_authenticate: Optional[str], server_url: str) -> Optional[list]:
89
+ """
90
+ Infer authorization server URLs from WWW-Authenticate realm or server URL.
91
+ This is used when the server doesn't provide resource_metadata endpoint.
92
+ """
93
+ if not www_authenticate and not server_url:
94
+ return None
95
+
96
+ authorization_servers = []
97
+
98
+ # Try to extract realm from WWW-Authenticate header
99
+ realm = None
100
+ if www_authenticate:
101
+ realm_match = re.search(r'realm\s*=\s*\"([^\"]+)\"', www_authenticate)
102
+ if realm_match:
103
+ realm = realm_match.group(1)
104
+
105
+ # Parse the server URL to get base domain
106
+ parsed = urlparse(server_url)
107
+ base_url = f"{parsed.scheme}://{parsed.netloc}"
108
+
109
+ # Return the base authorization server URL (not the discovery endpoint)
110
+ # The client will append .well-known paths when fetching metadata
111
+ authorization_servers.append(base_url)
112
+
113
+ return authorization_servers if authorization_servers else None
114
+
115
+
116
+ def fetch_resource_metadata(resource_metadata_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
117
+ """Fetch and parse the protected resource metadata document."""
118
+ try:
119
+ resp = requests.get(resource_metadata_url, timeout=timeout)
120
+ resp.raise_for_status()
121
+ return resp.json()
122
+ except Exception as exc: # broad catch – we want to surface auth requirement even if this fails
123
+ logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
124
+ return None
125
+
126
+
127
+ async def fetch_resource_metadata_async(resource_metadata_url: str, session=None, timeout: int = 10) -> Optional[Dict[str, Any]]:
128
+ """Async variant for fetching protected resource metadata."""
129
+ try:
130
+ import aiohttp
131
+
132
+ client_timeout = aiohttp.ClientTimeout(total=timeout)
133
+ if session:
134
+ async with session.get(resource_metadata_url, timeout=client_timeout) as resp:
135
+ text = await resp.text()
136
+ else:
137
+ async with aiohttp.ClientSession(timeout=client_timeout) as local_session:
138
+ async with local_session.get(resource_metadata_url) as resp:
139
+ text = await resp.text()
140
+
141
+ try:
142
+ return json.loads(text)
143
+ except json.JSONDecodeError:
144
+ logger.warning("Resource metadata at %s is not valid JSON: %s", resource_metadata_url, text[:200])
145
+ return None
146
+ except Exception as exc:
147
+ logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
148
+ return None
149
+
150
+
151
+ def canonical_resource(server_url: str) -> str:
152
+ """Produce a canonical resource identifier for the MCP server."""
153
+ parsed = urlparse(server_url)
154
+ # Normalize scheme/host casing per RFC guidance
155
+ normalized = parsed._replace(
156
+ scheme=parsed.scheme.lower(),
157
+ netloc=parsed.netloc.lower(),
158
+ )
159
+ resource = normalized.geturl()
160
+
161
+ # Prefer form without trailing slash unless path is meaningful
162
+ if resource.endswith("/") and parsed.path in ("", "/"):
163
+ resource = resource[:-1]
164
+ return resource