alita-sdk 0.3.379__py3-none-any.whl → 0.3.462__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent_executor.py +144 -0
- alita_sdk/cli/agent_loader.py +197 -0
- alita_sdk/cli/agent_ui.py +166 -0
- alita_sdk/cli/agents.py +1069 -0
- alita_sdk/cli/callbacks.py +576 -0
- alita_sdk/cli/cli.py +159 -0
- alita_sdk/cli/config.py +153 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +330 -0
- alita_sdk/cli/toolkit_loader.py +55 -0
- alita_sdk/cli/tools/__init__.py +9 -0
- alita_sdk/cli/tools/filesystem.py +905 -0
- alita_sdk/configurations/bitbucket.py +95 -0
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/client.py +47 -10
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +8 -0
- alita_sdk/runtime/langchain/assistant.py +37 -16
- alita_sdk/runtime/langchain/constants.py +6 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
- alita_sdk/runtime/langchain/langraph_agent.py +146 -31
- alita_sdk/runtime/langchain/utils.py +39 -7
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/toolkits/__init__.py +24 -0
- alita_sdk/runtime/toolkits/application.py +8 -1
- alita_sdk/runtime/toolkits/artifact.py +5 -6
- alita_sdk/runtime/toolkits/mcp.py +895 -0
- alita_sdk/runtime/toolkits/tools.py +137 -56
- alita_sdk/runtime/tools/__init__.py +7 -2
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/function.py +29 -25
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +104 -8
- alita_sdk/runtime/tools/llm.py +204 -114
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/sandbox.py +57 -43
- alita_sdk/runtime/tools/vectorstore.py +2 -1
- alita_sdk/runtime/tools/vectorstore_base.py +19 -3
- alita_sdk/runtime/utils/mcp_oauth.py +164 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
- alita_sdk/runtime/utils/streamlit.py +34 -3
- alita_sdk/runtime/utils/toolkit_utils.py +14 -4
- alita_sdk/tools/__init__.py +46 -31
- alita_sdk/tools/ado/repos/__init__.py +1 -0
- alita_sdk/tools/ado/test_plan/__init__.py +1 -1
- alita_sdk/tools/ado/wiki/__init__.py +1 -5
- alita_sdk/tools/ado/work_item/__init__.py +1 -5
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/base_indexer_toolkit.py +105 -43
- alita_sdk/tools/bitbucket/__init__.py +1 -0
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/code/sonar/__init__.py +1 -1
- alita_sdk/tools/code_indexer_toolkit.py +13 -3
- alita_sdk/tools/confluence/__init__.py +2 -2
- alita_sdk/tools/confluence/api_wrapper.py +29 -7
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/github/__init__.py +2 -2
- alita_sdk/tools/gitlab/__init__.py +2 -1
- alita_sdk/tools/gitlab/api_wrapper.py +11 -7
- alita_sdk/tools/gitlab_org/__init__.py +1 -2
- alita_sdk/tools/google_places/__init__.py +2 -1
- alita_sdk/tools/jira/__init__.py +1 -0
- alita_sdk/tools/jira/api_wrapper.py +1 -1
- alita_sdk/tools/memory/__init__.py +1 -1
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/pandas/__init__.py +1 -1
- alita_sdk/tools/postman/__init__.py +2 -1
- alita_sdk/tools/pptx/__init__.py +2 -2
- alita_sdk/tools/qtest/__init__.py +3 -3
- alita_sdk/tools/qtest/api_wrapper.py +1708 -76
- alita_sdk/tools/rally/__init__.py +1 -2
- alita_sdk/tools/report_portal/__init__.py +1 -0
- alita_sdk/tools/salesforce/__init__.py +1 -0
- alita_sdk/tools/servicenow/__init__.py +2 -3
- alita_sdk/tools/sharepoint/__init__.py +1 -0
- alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +1 -0
- alita_sdk/tools/sql/__init__.py +2 -1
- alita_sdk/tools/testio/__init__.py +1 -0
- alita_sdk/tools/testrail/__init__.py +1 -3
- alita_sdk/tools/utils/content_parser.py +27 -16
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +18 -5
- alita_sdk/tools/xray/__init__.py +2 -1
- alita_sdk/tools/zephyr/__init__.py +2 -1
- alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
- alita_sdk/tools/zephyr_essential/__init__.py +1 -0
- alita_sdk/tools/zephyr_scale/__init__.py +1 -0
- alita_sdk/tools/zephyr_squad/__init__.py +1 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +110 -86
- alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MCP Remote Tool for direct HTTP/SSE invocation.
|
|
3
|
+
This tool is used for remote MCP servers accessed via HTTP/SSE.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import time
|
|
10
|
+
import uuid
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
12
|
+
from typing import Any, Dict, Optional
|
|
13
|
+
|
|
14
|
+
from .mcp_server_tool import McpServerTool
|
|
15
|
+
from pydantic import Field
|
|
16
|
+
from ..utils.mcp_oauth import (
|
|
17
|
+
McpAuthorizationRequired,
|
|
18
|
+
canonical_resource,
|
|
19
|
+
extract_resource_metadata_url,
|
|
20
|
+
fetch_resource_metadata_async,
|
|
21
|
+
infer_authorization_servers_from_realm,
|
|
22
|
+
)
|
|
23
|
+
from ..utils.mcp_sse_client import McpSseClient
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class McpRemoteTool(McpServerTool):
|
|
29
|
+
"""
|
|
30
|
+
Tool for invoking remote MCP server tools via HTTP/SSE.
|
|
31
|
+
Extends McpServerTool and overrides _run to use direct HTTP calls instead of client.mcp_tool_call.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# Remote MCP connection details
|
|
35
|
+
server_url: str = Field(..., description="URL of the remote MCP server")
|
|
36
|
+
server_headers: Optional[Dict[str, str]] = Field(default=None, description="HTTP headers for authentication")
|
|
37
|
+
original_tool_name: Optional[str] = Field(default=None, description="Original tool name from MCP server (before optimization)")
|
|
38
|
+
is_prompt: bool = False # Flag to indicate if this is a prompt tool
|
|
39
|
+
prompt_name: Optional[str] = None # Original prompt name if this is a prompt
|
|
40
|
+
session_id: Optional[str] = Field(default=None, description="MCP session ID for stateful SSE servers")
|
|
41
|
+
|
|
42
|
+
def model_post_init(self, __context: Any) -> None:
|
|
43
|
+
"""Update metadata with session info after model initialization."""
|
|
44
|
+
super().model_post_init(__context)
|
|
45
|
+
self._update_metadata_with_session()
|
|
46
|
+
|
|
47
|
+
def _update_metadata_with_session(self):
|
|
48
|
+
"""Update the metadata dict with current session information."""
|
|
49
|
+
if self.session_id:
|
|
50
|
+
if self.metadata is None:
|
|
51
|
+
self.metadata = {}
|
|
52
|
+
self.metadata.update({
|
|
53
|
+
'mcp_session_id': self.session_id,
|
|
54
|
+
'mcp_server_url': canonical_resource(self.server_url)
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
def __getstate__(self):
|
|
58
|
+
"""Custom serialization for pickle compatibility."""
|
|
59
|
+
state = super().__getstate__()
|
|
60
|
+
# Ensure headers are serializable
|
|
61
|
+
if 'server_headers' in state and state['server_headers'] is not None:
|
|
62
|
+
state['server_headers'] = dict(state['server_headers'])
|
|
63
|
+
return state
|
|
64
|
+
|
|
65
|
+
def _run(self, *args, **kwargs):
|
|
66
|
+
"""
|
|
67
|
+
Execute the MCP tool via direct HTTP/SSE call to the remote server.
|
|
68
|
+
Overrides the parent method to avoid using client.mcp_tool_call.
|
|
69
|
+
"""
|
|
70
|
+
try:
|
|
71
|
+
# Always create a new event loop for sync context
|
|
72
|
+
with ThreadPoolExecutor() as executor:
|
|
73
|
+
future = executor.submit(self._run_in_new_loop, kwargs)
|
|
74
|
+
return future.result(timeout=self.tool_timeout_sec)
|
|
75
|
+
except McpAuthorizationRequired:
|
|
76
|
+
# Bubble up so LangChain can surface a tool error with useful metadata
|
|
77
|
+
raise
|
|
78
|
+
except Exception as e:
|
|
79
|
+
logger.error(f"Error executing remote MCP tool '{self.name}': {e}")
|
|
80
|
+
return f"Error executing tool: {e}"
|
|
81
|
+
|
|
82
|
+
def _run_in_new_loop(self, kwargs: Dict[str, Any]) -> str:
|
|
83
|
+
"""Run the async tool invocation in a new event loop."""
|
|
84
|
+
return asyncio.run(self._execute_remote_tool(kwargs))
|
|
85
|
+
|
|
86
|
+
async def _execute_remote_tool(self, kwargs: Dict[str, Any]) -> str:
|
|
87
|
+
"""Execute the actual remote MCP tool call using SSE client."""
|
|
88
|
+
from ...tools.utils import TOOLKIT_SPLITTER
|
|
89
|
+
|
|
90
|
+
# Check for session_id requirement
|
|
91
|
+
if not self.session_id:
|
|
92
|
+
logger.error(f"[MCP Session] Missing session_id for tool '{self.name}'")
|
|
93
|
+
raise Exception("sessionId required. Frontend must generate UUID and send with mcp_tokens.")
|
|
94
|
+
|
|
95
|
+
# Use the original tool name from discovery for MCP server invocation
|
|
96
|
+
tool_name_for_server = self.original_tool_name
|
|
97
|
+
if not tool_name_for_server:
|
|
98
|
+
tool_name_for_server = self.name.rsplit(TOOLKIT_SPLITTER, 1)[-1] if TOOLKIT_SPLITTER in self.name else self.name
|
|
99
|
+
logger.warning(f"original_tool_name not set for '{self.name}', using extracted: {tool_name_for_server}")
|
|
100
|
+
|
|
101
|
+
logger.info(f"[MCP SSE] Executing tool '{tool_name_for_server}' with session {self.session_id}")
|
|
102
|
+
|
|
103
|
+
try:
|
|
104
|
+
# Prepare headers
|
|
105
|
+
headers = {}
|
|
106
|
+
if self.server_headers:
|
|
107
|
+
headers.update(self.server_headers)
|
|
108
|
+
|
|
109
|
+
# Create SSE client
|
|
110
|
+
client = McpSseClient(
|
|
111
|
+
url=self.server_url,
|
|
112
|
+
session_id=self.session_id,
|
|
113
|
+
headers=headers,
|
|
114
|
+
timeout=self.tool_timeout_sec
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Execute tool call via SSE
|
|
118
|
+
result = await client.call_tool(tool_name_for_server, kwargs)
|
|
119
|
+
|
|
120
|
+
# Format the result
|
|
121
|
+
if isinstance(result, dict):
|
|
122
|
+
# Check for content array (common in MCP responses)
|
|
123
|
+
if "content" in result:
|
|
124
|
+
content_items = result["content"]
|
|
125
|
+
if isinstance(content_items, list):
|
|
126
|
+
# Extract text from content items
|
|
127
|
+
text_parts = []
|
|
128
|
+
for item in content_items:
|
|
129
|
+
if isinstance(item, dict):
|
|
130
|
+
if item.get("type") == "text" and "text" in item:
|
|
131
|
+
text_parts.append(item["text"])
|
|
132
|
+
elif "text" in item:
|
|
133
|
+
text_parts.append(item["text"])
|
|
134
|
+
else:
|
|
135
|
+
text_parts.append(json.dumps(item))
|
|
136
|
+
else:
|
|
137
|
+
text_parts.append(str(item))
|
|
138
|
+
return "\n".join(text_parts)
|
|
139
|
+
|
|
140
|
+
# Return formatted JSON if no content field
|
|
141
|
+
return json.dumps(result, indent=2)
|
|
142
|
+
|
|
143
|
+
# Return as string for other types
|
|
144
|
+
return str(result)
|
|
145
|
+
|
|
146
|
+
except Exception as e:
|
|
147
|
+
logger.error(f"[MCP SSE] Tool execution failed: {e}", exc_info=True)
|
|
148
|
+
raise
|
|
149
|
+
|
|
150
|
+
def _parse_sse(self, text: str) -> Dict[str, Any]:
|
|
151
|
+
"""Parse Server-Sent Events (SSE) format response."""
|
|
152
|
+
for line in text.split('\n'):
|
|
153
|
+
line = line.strip()
|
|
154
|
+
if line.startswith('data:'):
|
|
155
|
+
json_str = line[5:].strip()
|
|
156
|
+
return json.loads(json_str)
|
|
157
|
+
raise ValueError("No data found in SSE response")
|
|
158
|
+
|
|
159
|
+
def get_session_metadata(self) -> dict:
|
|
160
|
+
"""Return session metadata to be included in tool responses."""
|
|
161
|
+
if self.session_id:
|
|
162
|
+
return {
|
|
163
|
+
'mcp_session_id': self.session_id,
|
|
164
|
+
'mcp_server_url': canonical_resource(self.server_url)
|
|
165
|
+
}
|
|
166
|
+
return {}
|
|
@@ -3,7 +3,7 @@ from logging import getLogger
|
|
|
3
3
|
from typing import Any, Type, Literal, Optional, Union, List
|
|
4
4
|
|
|
5
5
|
from langchain_core.tools import BaseTool
|
|
6
|
-
from pydantic import BaseModel, Field, create_model, EmailStr, constr
|
|
6
|
+
from pydantic import BaseModel, Field, create_model, EmailStr, constr, ConfigDict
|
|
7
7
|
|
|
8
8
|
from ...tools.utils import TOOLKIT_SPLITTER
|
|
9
9
|
|
|
@@ -19,6 +19,7 @@ class McpServerTool(BaseTool):
|
|
|
19
19
|
server: str
|
|
20
20
|
tool_timeout_sec: int = 60
|
|
21
21
|
|
|
22
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
22
23
|
|
|
23
24
|
@staticmethod
|
|
24
25
|
def create_pydantic_model_from_schema(schema: dict, model_name: str = "ArgsSchema"):
|
|
@@ -90,6 +91,7 @@ class McpServerTool(BaseTool):
|
|
|
90
91
|
return create_model(model_name, **fields)
|
|
91
92
|
|
|
92
93
|
def _run(self, *args, **kwargs):
|
|
94
|
+
# Extract the actual tool/prompt name (remove toolkit prefix)
|
|
93
95
|
call_data = {
|
|
94
96
|
"server": self.server,
|
|
95
97
|
"tool_timeout_sec": self.tool_timeout_sec,
|
|
@@ -2,9 +2,12 @@ import asyncio
|
|
|
2
2
|
import logging
|
|
3
3
|
import subprocess
|
|
4
4
|
import os
|
|
5
|
-
from typing import Any, Type, Optional, Dict, List, Literal
|
|
5
|
+
from typing import Any, Type, Optional, Dict, List, Literal, Union
|
|
6
|
+
from copy import deepcopy
|
|
7
|
+
from pathlib import Path
|
|
6
8
|
|
|
7
9
|
from langchain_core.tools import BaseTool, BaseToolkit
|
|
10
|
+
from langchain_core.messages import ToolCall
|
|
8
11
|
from pydantic import BaseModel, create_model, ConfigDict, Field
|
|
9
12
|
from pydantic.fields import FieldInfo
|
|
10
13
|
|
|
@@ -19,7 +22,7 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
|
|
|
19
22
|
|
|
20
23
|
Args:
|
|
21
24
|
tools_list: List of tool configurations
|
|
22
|
-
alita_client: Alita client instance
|
|
25
|
+
alita_client: Alita client instance for sandbox tools
|
|
23
26
|
llm: LLM client instance (unused for sandbox)
|
|
24
27
|
memory_store: Optional memory store instance (unused for sandbox)
|
|
25
28
|
|
|
@@ -34,6 +37,7 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
|
|
|
34
37
|
toolkit_instance = SandboxToolkit.get_toolkit(
|
|
35
38
|
stateful=tool['settings'].get('stateful', False),
|
|
36
39
|
allow_net=tool['settings'].get('allow_net', True),
|
|
40
|
+
alita_client=alita_client,
|
|
37
41
|
toolkit_name=tool.get('toolkit_name', '')
|
|
38
42
|
)
|
|
39
43
|
all_tools.extend(toolkit_instance.get_tools())
|
|
@@ -60,36 +64,10 @@ def _is_deno_available() -> bool:
|
|
|
60
64
|
|
|
61
65
|
|
|
62
66
|
def _setup_pyodide_cache_env() -> None:
|
|
63
|
-
"""Setup Pyodide caching environment variables for performance optimization"""
|
|
67
|
+
"""Setup Pyodide caching environment variables for performance optimization [NO-OP]"""
|
|
64
68
|
try:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
if os.path.exists(cache_env_file):
|
|
68
|
-
with open(cache_env_file, 'r') as f:
|
|
69
|
-
for line in f:
|
|
70
|
-
line = line.strip()
|
|
71
|
-
if line.startswith('export ') and '=' in line:
|
|
72
|
-
# Parse export VAR=value format
|
|
73
|
-
var_assignment = line[7:] # Remove 'export '
|
|
74
|
-
if '=' in var_assignment:
|
|
75
|
-
key, value = var_assignment.split('=', 1)
|
|
76
|
-
# Remove quotes if present
|
|
77
|
-
value = value.strip('"').strip("'")
|
|
78
|
-
os.environ[key] = value
|
|
79
|
-
logger.debug(f"Set Pyodide cache env: {key}={value}")
|
|
80
|
-
|
|
81
|
-
# Set default caching environment variables if not already set
|
|
82
|
-
cache_defaults = {
|
|
83
|
-
'PYODIDE_PACKAGES_PATH': os.path.expanduser('~/.cache/pyodide'),
|
|
84
|
-
'DENO_DIR': os.path.expanduser('~/.cache/deno'),
|
|
85
|
-
'PYODIDE_CACHE_DIR': os.path.expanduser('~/.cache/pyodide'),
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
for key, default_value in cache_defaults.items():
|
|
89
|
-
if key not in os.environ:
|
|
90
|
-
os.environ[key] = default_value
|
|
91
|
-
logger.debug(f"Set default Pyodide env: {key}={default_value}")
|
|
92
|
-
|
|
69
|
+
for key in ["SANDBOX_BASE", "DENO_DIR"]:
|
|
70
|
+
logger.info("Sandbox env: %s -> %s", key, os.environ.get(key, "n/a"))
|
|
93
71
|
except Exception as e:
|
|
94
72
|
logger.warning(f"Could not setup Pyodide cache environment: {e}")
|
|
95
73
|
|
|
@@ -126,6 +104,7 @@ class PyodideSandboxTool(BaseTool):
|
|
|
126
104
|
allow_net: bool = True
|
|
127
105
|
session_bytes: Optional[bytes] = None
|
|
128
106
|
session_metadata: Optional[Dict] = None
|
|
107
|
+
alita_client: Optional[Any] = None
|
|
129
108
|
|
|
130
109
|
def __init__(self, **kwargs: Any) -> None:
|
|
131
110
|
super().__init__(**kwargs)
|
|
@@ -134,6 +113,28 @@ class PyodideSandboxTool(BaseTool):
|
|
|
134
113
|
_setup_pyodide_cache_env()
|
|
135
114
|
self._initialize_sandbox()
|
|
136
115
|
|
|
116
|
+
def _prepare_pyodide_input(self, code: str) -> str:
|
|
117
|
+
"""Prepare input for PyodideSandboxTool by injecting state and alita_client into the code block."""
|
|
118
|
+
pyodide_predata = ""
|
|
119
|
+
|
|
120
|
+
# Add alita_client if available
|
|
121
|
+
if self.alita_client:
|
|
122
|
+
try:
|
|
123
|
+
# Get the directory of the current file and construct the path to sandbox_client.py
|
|
124
|
+
current_dir = Path(__file__).parent
|
|
125
|
+
sandbox_client_path = current_dir.parent / 'clients' / 'sandbox_client.py'
|
|
126
|
+
|
|
127
|
+
with open(sandbox_client_path, 'r') as f:
|
|
128
|
+
sandbox_client_code = f.read()
|
|
129
|
+
pyodide_predata += f"{sandbox_client_code}\n"
|
|
130
|
+
pyodide_predata += (f"alita_client = SandboxClient(base_url='{self.alita_client.base_url}',"
|
|
131
|
+
f"project_id={self.alita_client.project_id},"
|
|
132
|
+
f"auth_token='{self.alita_client.auth_token}')\n")
|
|
133
|
+
except FileNotFoundError:
|
|
134
|
+
logger.error(f"sandbox_client.py not found. Ensure the file exists.")
|
|
135
|
+
|
|
136
|
+
return f"#elitea simplified client\n{pyodide_predata}{code}"
|
|
137
|
+
|
|
137
138
|
def _initialize_sandbox(self) -> None:
|
|
138
139
|
"""Initialize the PyodideSandbox instance with optimized settings"""
|
|
139
140
|
try:
|
|
@@ -148,9 +149,19 @@ class PyodideSandboxTool(BaseTool):
|
|
|
148
149
|
|
|
149
150
|
from langchain_sandbox import PyodideSandbox
|
|
150
151
|
|
|
152
|
+
# Air-gapped settings
|
|
153
|
+
sandbox_base = os.environ.get("SANDBOX_BASE", os.path.expanduser('~/.cache/pyodide'))
|
|
154
|
+
sandbox_tmp = os.path.join(sandbox_base, "tmp")
|
|
155
|
+
deno_cache = os.environ.get("DENO_DIR", os.path.expanduser('~/.cache/deno'))
|
|
156
|
+
|
|
151
157
|
# Configure sandbox with performance optimizations
|
|
152
158
|
self._sandbox = PyodideSandbox(
|
|
153
159
|
stateful=self.stateful,
|
|
160
|
+
#
|
|
161
|
+
allow_env=["SANDBOX_BASE"],
|
|
162
|
+
allow_read=[sandbox_base, sandbox_tmp, deno_cache],
|
|
163
|
+
allow_write=[sandbox_tmp, deno_cache],
|
|
164
|
+
#
|
|
154
165
|
allow_net=self.allow_net,
|
|
155
166
|
# Use auto node_modules_dir for better caching
|
|
156
167
|
node_modules_dir="auto"
|
|
@@ -180,6 +191,9 @@ class PyodideSandboxTool(BaseTool):
|
|
|
180
191
|
if self._sandbox is None:
|
|
181
192
|
self._initialize_sandbox()
|
|
182
193
|
|
|
194
|
+
# Prepare code with state and client injection
|
|
195
|
+
prepared_code = self._prepare_pyodide_input(code)
|
|
196
|
+
|
|
183
197
|
# Check if we're already in an async context
|
|
184
198
|
try:
|
|
185
199
|
loop = asyncio.get_running_loop()
|
|
@@ -187,11 +201,11 @@ class PyodideSandboxTool(BaseTool):
|
|
|
187
201
|
# We'll need to use a different approach
|
|
188
202
|
import concurrent.futures
|
|
189
203
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
190
|
-
future = executor.submit(asyncio.run, self._arun(
|
|
204
|
+
future = executor.submit(asyncio.run, self._arun(prepared_code))
|
|
191
205
|
return future.result()
|
|
192
206
|
except RuntimeError:
|
|
193
207
|
# No running loop, safe to use asyncio.run
|
|
194
|
-
return asyncio.run(self._arun(
|
|
208
|
+
return asyncio.run(self._arun(prepared_code))
|
|
195
209
|
except (ImportError, RuntimeError) as e:
|
|
196
210
|
# Handle specific dependency errors gracefully
|
|
197
211
|
error_msg = str(e)
|
|
@@ -250,7 +264,7 @@ class PyodideSandboxTool(BaseTool):
|
|
|
250
264
|
|
|
251
265
|
except Exception as e:
|
|
252
266
|
logger.error(f"Error executing code in sandbox: {e}")
|
|
253
|
-
return f"Error executing code: {str(e)}"
|
|
267
|
+
return {"error": f"Error executing code: {str(e)}"}
|
|
254
268
|
|
|
255
269
|
|
|
256
270
|
class StatefulPyodideSandboxTool(PyodideSandboxTool):
|
|
@@ -278,7 +292,7 @@ class StatefulPyodideSandboxTool(PyodideSandboxTool):
|
|
|
278
292
|
|
|
279
293
|
|
|
280
294
|
# Factory function for creating sandbox tools
|
|
281
|
-
def create_sandbox_tool(stateful: bool = False, allow_net: bool = True) -> BaseTool:
|
|
295
|
+
def create_sandbox_tool(stateful: bool = False, allow_net: bool = True, alita_client: Optional[Any] = None) -> BaseTool:
|
|
282
296
|
"""
|
|
283
297
|
Factory function to create sandbox tools with specified configuration.
|
|
284
298
|
|
|
@@ -302,22 +316,22 @@ def create_sandbox_tool(stateful: bool = False, allow_net: bool = True) -> BaseT
|
|
|
302
316
|
- Cached wheels reduce package download time from ~4.76s to near-instant
|
|
303
317
|
"""
|
|
304
318
|
if stateful:
|
|
305
|
-
return StatefulPyodideSandboxTool(allow_net=allow_net)
|
|
319
|
+
return StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client)
|
|
306
320
|
else:
|
|
307
|
-
return PyodideSandboxTool(stateful=False, allow_net=allow_net)
|
|
321
|
+
return PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client)
|
|
308
322
|
|
|
309
323
|
|
|
310
324
|
class SandboxToolkit(BaseToolkit):
|
|
311
325
|
tools: List[BaseTool] = []
|
|
312
326
|
|
|
313
327
|
@staticmethod
|
|
314
|
-
def toolkit_config_schema() -> BaseModel:
|
|
328
|
+
def toolkit_config_schema() -> Type[BaseModel]:
|
|
315
329
|
# Create sample tools to get their schemas
|
|
316
330
|
sample_tools = [
|
|
317
331
|
PyodideSandboxTool(),
|
|
318
332
|
StatefulPyodideSandboxTool()
|
|
319
333
|
]
|
|
320
|
-
selected_tools = {x.name: x.args_schema.
|
|
334
|
+
selected_tools = {x.name: x.args_schema.model_json_schema() for x in sample_tools}
|
|
321
335
|
|
|
322
336
|
return create_model(
|
|
323
337
|
'sandbox',
|
|
@@ -338,24 +352,24 @@ class SandboxToolkit(BaseToolkit):
|
|
|
338
352
|
)
|
|
339
353
|
|
|
340
354
|
@classmethod
|
|
341
|
-
def get_toolkit(cls, stateful: bool = False, allow_net: bool = True, **kwargs):
|
|
355
|
+
def get_toolkit(cls, stateful: bool = False, allow_net: bool = True, alita_client=None, **kwargs):
|
|
342
356
|
"""
|
|
343
357
|
Get toolkit with sandbox tools.
|
|
344
358
|
|
|
345
359
|
Args:
|
|
346
360
|
stateful: Whether to maintain state between executions
|
|
347
361
|
allow_net: Whether to allow network access
|
|
362
|
+
alita_client: Alita client instance for sandbox tools
|
|
348
363
|
**kwargs: Additional arguments
|
|
349
364
|
"""
|
|
350
365
|
tools = []
|
|
351
366
|
|
|
352
367
|
if stateful:
|
|
353
|
-
tools.append(StatefulPyodideSandboxTool(allow_net=allow_net))
|
|
368
|
+
tools.append(StatefulPyodideSandboxTool(allow_net=allow_net, alita_client=alita_client))
|
|
354
369
|
else:
|
|
355
|
-
tools.append(PyodideSandboxTool(stateful=False, allow_net=allow_net))
|
|
370
|
+
tools.append(PyodideSandboxTool(stateful=False, allow_net=allow_net, alita_client=alita_client))
|
|
356
371
|
|
|
357
372
|
return cls(tools=tools)
|
|
358
373
|
|
|
359
374
|
def get_tools(self):
|
|
360
375
|
return self.tools
|
|
361
|
-
|
|
@@ -414,7 +414,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
414
414
|
return {"status": "error", "message": f"Error: {format_exc()}"}
|
|
415
415
|
if _documents:
|
|
416
416
|
add_documents(vectorstore=self.vectorstore, documents=_documents)
|
|
417
|
-
return {"status": "ok", "message": f"successfully indexed {documents_count} documents"
|
|
417
|
+
return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
|
|
418
|
+
else "No new documents to index."}
|
|
418
419
|
|
|
419
420
|
def search_documents(self, query:str, doctype: str = 'code',
|
|
420
421
|
filter:dict|str={}, cut_off: float=0.5,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import math
|
|
3
2
|
from collections import OrderedDict
|
|
4
3
|
from logging import getLogger
|
|
5
4
|
from typing import Any, Optional, List, Dict, Generator
|
|
6
5
|
|
|
6
|
+
import math
|
|
7
7
|
from langchain_core.documents import Document
|
|
8
8
|
from langchain_core.messages import HumanMessage
|
|
9
9
|
from langchain_core.tools import ToolException
|
|
@@ -12,7 +12,7 @@ from pydantic import BaseModel, model_validator, Field
|
|
|
12
12
|
|
|
13
13
|
from alita_sdk.tools.elitea_base import BaseToolApiWrapper
|
|
14
14
|
from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
|
|
15
|
-
from
|
|
15
|
+
from ...runtime.utils.utils import IndexerKeywords
|
|
16
16
|
|
|
17
17
|
logger = getLogger(__name__)
|
|
18
18
|
|
|
@@ -222,6 +222,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
222
222
|
raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
|
|
223
223
|
return index_metas[0] if index_metas else None
|
|
224
224
|
|
|
225
|
+
def get_indexed_count(self, index_name: str) -> int:
|
|
226
|
+
from sqlalchemy.orm import Session
|
|
227
|
+
from sqlalchemy import func, or_
|
|
228
|
+
|
|
229
|
+
with Session(self.vectorstore.session_maker.bind) as session:
|
|
230
|
+
return session.query(
|
|
231
|
+
self.vectorstore.EmbeddingStore.id,
|
|
232
|
+
).filter(
|
|
233
|
+
func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'collection') == index_name,
|
|
234
|
+
or_(
|
|
235
|
+
func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type').is_(None),
|
|
236
|
+
func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value
|
|
237
|
+
)
|
|
238
|
+
).count()
|
|
239
|
+
|
|
225
240
|
def _clean_collection(self, index_name: str = ''):
|
|
226
241
|
"""
|
|
227
242
|
Clean the vectorstore collection by deleting all indexed data.
|
|
@@ -308,7 +323,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
308
323
|
return {"status": "error", "message": f"Error: {format_exc()}"}
|
|
309
324
|
if _documents:
|
|
310
325
|
add_documents(vectorstore=self.vectorstore, documents=_documents)
|
|
311
|
-
return {"status": "ok", "message": f"successfully indexed {documents_count} documents"
|
|
326
|
+
return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
|
|
327
|
+
else "no documents to index"}
|
|
312
328
|
|
|
313
329
|
def search_documents(self, query:str, doctype: str = 'code',
|
|
314
330
|
filter:dict|str={}, cut_off: float=0.5,
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
from langchain_core.tools import ToolException
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class McpAuthorizationRequired(ToolException):
|
|
14
|
+
"""Raised when an MCP server requires OAuth authorization before use."""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
message: str,
|
|
19
|
+
server_url: str,
|
|
20
|
+
resource_metadata_url: Optional[str] = None,
|
|
21
|
+
www_authenticate: Optional[str] = None,
|
|
22
|
+
resource_metadata: Optional[Dict[str, Any]] = None,
|
|
23
|
+
status: Optional[int] = None,
|
|
24
|
+
tool_name: Optional[str] = None,
|
|
25
|
+
):
|
|
26
|
+
super().__init__(message)
|
|
27
|
+
self.server_url = server_url
|
|
28
|
+
self.resource_metadata_url = resource_metadata_url
|
|
29
|
+
self.www_authenticate = www_authenticate
|
|
30
|
+
self.resource_metadata = resource_metadata
|
|
31
|
+
self.status = status
|
|
32
|
+
self.tool_name = tool_name
|
|
33
|
+
|
|
34
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
35
|
+
return {
|
|
36
|
+
"message": str(self),
|
|
37
|
+
"server_url": self.server_url,
|
|
38
|
+
"resource_metadata_url": self.resource_metadata_url,
|
|
39
|
+
"www_authenticate": self.www_authenticate,
|
|
40
|
+
"resource_metadata": self.resource_metadata,
|
|
41
|
+
"status": self.status,
|
|
42
|
+
"tool_name": self.tool_name,
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def extract_resource_metadata_url(www_authenticate: Optional[str], server_url: Optional[str] = None) -> Optional[str]:
|
|
47
|
+
"""
|
|
48
|
+
Pull the resource_metadata URL from a WWW-Authenticate header if present.
|
|
49
|
+
If not found and server_url is provided, try to construct resource metadata URLs.
|
|
50
|
+
"""
|
|
51
|
+
if not www_authenticate and not server_url:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
# RFC9728 returns `resource_metadata="<url>"` inside the header value
|
|
55
|
+
if www_authenticate:
|
|
56
|
+
match = re.search(r'resource_metadata\s*=\s*\"?([^\", ]+)\"?', www_authenticate)
|
|
57
|
+
if match:
|
|
58
|
+
return match.group(1)
|
|
59
|
+
|
|
60
|
+
# For servers that don't provide resource_metadata in WWW-Authenticate,
|
|
61
|
+
# we'll return None and rely on inferring authorization servers from the realm
|
|
62
|
+
# or using well-known OAuth discovery endpoints directly
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def fetch_oauth_authorization_server_metadata(base_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
|
|
67
|
+
"""
|
|
68
|
+
Fetch OAuth authorization server metadata from well-known endpoints.
|
|
69
|
+
Tries both oauth-authorization-server and openid-configuration discovery endpoints.
|
|
70
|
+
"""
|
|
71
|
+
discovery_endpoints = [
|
|
72
|
+
f"{base_url}/.well-known/oauth-authorization-server",
|
|
73
|
+
f"{base_url}/.well-known/openid-configuration",
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
for endpoint in discovery_endpoints:
|
|
77
|
+
try:
|
|
78
|
+
resp = requests.get(endpoint, timeout=timeout)
|
|
79
|
+
if resp.status_code == 200:
|
|
80
|
+
return resp.json()
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
logger.debug(f"Failed to fetch OAuth metadata from {endpoint}: {exc}")
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def infer_authorization_servers_from_realm(www_authenticate: Optional[str], server_url: str) -> Optional[list]:
|
|
89
|
+
"""
|
|
90
|
+
Infer authorization server URLs from WWW-Authenticate realm or server URL.
|
|
91
|
+
This is used when the server doesn't provide resource_metadata endpoint.
|
|
92
|
+
"""
|
|
93
|
+
if not www_authenticate and not server_url:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
authorization_servers = []
|
|
97
|
+
|
|
98
|
+
# Try to extract realm from WWW-Authenticate header
|
|
99
|
+
realm = None
|
|
100
|
+
if www_authenticate:
|
|
101
|
+
realm_match = re.search(r'realm\s*=\s*\"([^\"]+)\"', www_authenticate)
|
|
102
|
+
if realm_match:
|
|
103
|
+
realm = realm_match.group(1)
|
|
104
|
+
|
|
105
|
+
# Parse the server URL to get base domain
|
|
106
|
+
parsed = urlparse(server_url)
|
|
107
|
+
base_url = f"{parsed.scheme}://{parsed.netloc}"
|
|
108
|
+
|
|
109
|
+
# Return the base authorization server URL (not the discovery endpoint)
|
|
110
|
+
# The client will append .well-known paths when fetching metadata
|
|
111
|
+
authorization_servers.append(base_url)
|
|
112
|
+
|
|
113
|
+
return authorization_servers if authorization_servers else None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def fetch_resource_metadata(resource_metadata_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
|
|
117
|
+
"""Fetch and parse the protected resource metadata document."""
|
|
118
|
+
try:
|
|
119
|
+
resp = requests.get(resource_metadata_url, timeout=timeout)
|
|
120
|
+
resp.raise_for_status()
|
|
121
|
+
return resp.json()
|
|
122
|
+
except Exception as exc: # broad catch – we want to surface auth requirement even if this fails
|
|
123
|
+
logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
async def fetch_resource_metadata_async(resource_metadata_url: str, session=None, timeout: int = 10) -> Optional[Dict[str, Any]]:
|
|
128
|
+
"""Async variant for fetching protected resource metadata."""
|
|
129
|
+
try:
|
|
130
|
+
import aiohttp
|
|
131
|
+
|
|
132
|
+
client_timeout = aiohttp.ClientTimeout(total=timeout)
|
|
133
|
+
if session:
|
|
134
|
+
async with session.get(resource_metadata_url, timeout=client_timeout) as resp:
|
|
135
|
+
text = await resp.text()
|
|
136
|
+
else:
|
|
137
|
+
async with aiohttp.ClientSession(timeout=client_timeout) as local_session:
|
|
138
|
+
async with local_session.get(resource_metadata_url) as resp:
|
|
139
|
+
text = await resp.text()
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
return json.loads(text)
|
|
143
|
+
except json.JSONDecodeError:
|
|
144
|
+
logger.warning("Resource metadata at %s is not valid JSON: %s", resource_metadata_url, text[:200])
|
|
145
|
+
return None
|
|
146
|
+
except Exception as exc:
|
|
147
|
+
logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def canonical_resource(server_url: str) -> str:
|
|
152
|
+
"""Produce a canonical resource identifier for the MCP server."""
|
|
153
|
+
parsed = urlparse(server_url)
|
|
154
|
+
# Normalize scheme/host casing per RFC guidance
|
|
155
|
+
normalized = parsed._replace(
|
|
156
|
+
scheme=parsed.scheme.lower(),
|
|
157
|
+
netloc=parsed.netloc.lower(),
|
|
158
|
+
)
|
|
159
|
+
resource = normalized.geturl()
|
|
160
|
+
|
|
161
|
+
# Prefer form without trailing slash unless path is meaningful
|
|
162
|
+
if resource.endswith("/") and parsed.path in ("", "/"):
|
|
163
|
+
resource = resource[:-1]
|
|
164
|
+
return resource
|