alita-sdk 0.3.486__py3-none-any.whl → 0.3.515__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/agent_loader.py +27 -6
- alita_sdk/cli/agents.py +10 -1
- alita_sdk/cli/inventory.py +12 -195
- alita_sdk/cli/tools/filesystem.py +95 -9
- alita_sdk/community/inventory/__init__.py +12 -0
- alita_sdk/community/inventory/toolkit.py +9 -5
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/configurations/ado.py +144 -0
- alita_sdk/configurations/confluence.py +76 -42
- alita_sdk/configurations/figma.py +76 -0
- alita_sdk/configurations/gitlab.py +2 -0
- alita_sdk/configurations/qtest.py +72 -1
- alita_sdk/configurations/report_portal.py +96 -0
- alita_sdk/configurations/sharepoint.py +148 -0
- alita_sdk/configurations/testio.py +83 -0
- alita_sdk/runtime/clients/artifact.py +2 -2
- alita_sdk/runtime/clients/client.py +64 -40
- alita_sdk/runtime/clients/sandbox_client.py +14 -0
- alita_sdk/runtime/langchain/assistant.py +48 -2
- alita_sdk/runtime/langchain/constants.py +3 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +2 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
- alita_sdk/runtime/langchain/langraph_agent.py +10 -10
- alita_sdk/runtime/langchain/utils.py +6 -1
- alita_sdk/runtime/toolkits/artifact.py +14 -5
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +94 -219
- alita_sdk/runtime/toolkits/planning.py +13 -6
- alita_sdk/runtime/toolkits/tools.py +60 -25
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/artifact.py +185 -23
- alita_sdk/runtime/tools/function.py +2 -1
- alita_sdk/runtime/tools/llm.py +155 -34
- alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
- alita_sdk/runtime/tools/mcp_server_tool.py +2 -4
- alita_sdk/runtime/tools/vectorstore_base.py +3 -3
- alita_sdk/runtime/utils/AlitaCallback.py +136 -21
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +125 -8
- alita_sdk/runtime/utils/mcp_sse_client.py +35 -6
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/toolkit_utils.py +7 -13
- alita_sdk/runtime/utils/utils.py +2 -0
- alita_sdk/tools/__init__.py +15 -0
- alita_sdk/tools/ado/repos/__init__.py +10 -12
- alita_sdk/tools/ado/test_plan/__init__.py +23 -8
- alita_sdk/tools/ado/wiki/__init__.py +24 -8
- alita_sdk/tools/ado/wiki/ado_wrapper.py +21 -7
- alita_sdk/tools/ado/work_item/__init__.py +24 -8
- alita_sdk/tools/advanced_jira_mining/__init__.py +10 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +12 -9
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +9 -7
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +26 -1
- alita_sdk/tools/bitbucket/__init__.py +14 -10
- alita_sdk/tools/bitbucket/api_wrapper.py +50 -2
- alita_sdk/tools/browser/__init__.py +5 -4
- alita_sdk/tools/carrier/__init__.py +5 -6
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +2 -0
- alita_sdk/tools/chunkers/universal_chunker.py +1 -0
- alita_sdk/tools/cloud/aws/__init__.py +9 -7
- alita_sdk/tools/cloud/azure/__init__.py +9 -7
- alita_sdk/tools/cloud/gcp/__init__.py +9 -7
- alita_sdk/tools/cloud/k8s/__init__.py +9 -7
- alita_sdk/tools/code/linter/__init__.py +9 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +9 -7
- alita_sdk/tools/confluence/__init__.py +15 -10
- alita_sdk/tools/confluence/api_wrapper.py +63 -14
- alita_sdk/tools/custom_open_api/__init__.py +11 -5
- alita_sdk/tools/elastic/__init__.py +10 -8
- alita_sdk/tools/elitea_base.py +387 -9
- alita_sdk/tools/figma/__init__.py +8 -7
- alita_sdk/tools/github/__init__.py +12 -14
- alita_sdk/tools/github/github_client.py +68 -2
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +14 -11
- alita_sdk/tools/gitlab/api_wrapper.py +81 -1
- alita_sdk/tools/gitlab_org/__init__.py +9 -8
- alita_sdk/tools/google/bigquery/__init__.py +12 -12
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +9 -8
- alita_sdk/tools/jira/__init__.py +15 -10
- alita_sdk/tools/keycloak/__init__.py +10 -8
- alita_sdk/tools/localgit/__init__.py +8 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +11 -3
- alita_sdk/tools/ocr/__init__.py +10 -8
- alita_sdk/tools/openapi/__init__.py +6 -2
- alita_sdk/tools/pandas/__init__.py +9 -7
- alita_sdk/tools/postman/__init__.py +10 -11
- alita_sdk/tools/pptx/__init__.py +9 -9
- alita_sdk/tools/qtest/__init__.py +9 -8
- alita_sdk/tools/rally/__init__.py +9 -8
- alita_sdk/tools/report_portal/__init__.py +11 -9
- alita_sdk/tools/salesforce/__init__.py +9 -9
- alita_sdk/tools/servicenow/__init__.py +10 -8
- alita_sdk/tools/sharepoint/__init__.py +9 -8
- alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
- alita_sdk/tools/slack/__init__.py +8 -7
- alita_sdk/tools/sql/__init__.py +9 -8
- alita_sdk/tools/testio/__init__.py +9 -8
- alita_sdk/tools/testrail/__init__.py +10 -8
- alita_sdk/tools/utils/__init__.py +9 -4
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +16 -18
- alita_sdk/tools/xray/__init__.py +10 -8
- alita_sdk/tools/yagmail/__init__.py +8 -3
- alita_sdk/tools/zephyr/__init__.py +8 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +10 -8
- alita_sdk/tools/zephyr_essential/__init__.py +9 -8
- alita_sdk/tools/zephyr_scale/__init__.py +9 -8
- alita_sdk/tools/zephyr_squad/__init__.py +9 -8
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/RECORD +124 -119
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/entry_points.txt +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.486.dist-info → alita_sdk-0.3.515.dist-info}/top_level.txt +0 -0
|
@@ -21,7 +21,7 @@ from ..tools.image_generation import ImageGenerationToolkit
|
|
|
21
21
|
from ...community import get_toolkits as community_toolkits, get_tools as community_tools
|
|
22
22
|
from ...tools.memory import MemoryToolkit
|
|
23
23
|
from ..utils.mcp_oauth import canonical_resource, McpAuthorizationRequired
|
|
24
|
-
from ...tools.utils import
|
|
24
|
+
from ...tools.utils import clean_string
|
|
25
25
|
from alita_sdk.tools import _inject_toolkit_id
|
|
26
26
|
|
|
27
27
|
logger = logging.getLogger(__name__)
|
|
@@ -41,7 +41,7 @@ def get_toolkits():
|
|
|
41
41
|
return core_toolkits + community_toolkits() + alita_toolkits()
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseStore = None, debug_mode: Optional[bool] = False, mcp_tokens: Optional[dict] = None, conversation_id: Optional[str] = None) -> list:
|
|
44
|
+
def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseStore = None, debug_mode: Optional[bool] = False, mcp_tokens: Optional[dict] = None, conversation_id: Optional[str] = None, ignored_mcp_servers: Optional[list] = None) -> list:
|
|
45
45
|
prompts = []
|
|
46
46
|
tools = []
|
|
47
47
|
|
|
@@ -94,6 +94,11 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
|
|
|
94
94
|
else:
|
|
95
95
|
logger.warning("Image generation internal tool requested "
|
|
96
96
|
"but no image generation model configured")
|
|
97
|
+
elif tool['name'] == 'planner':
|
|
98
|
+
tools += PlanningToolkit.get_toolkit(
|
|
99
|
+
pgvector_configuration=tool.get('settings', {}).get('pgvector_configuration'),
|
|
100
|
+
conversation_id=conversation_id,
|
|
101
|
+
).get_tools()
|
|
97
102
|
elif tool['type'] == 'artifact':
|
|
98
103
|
toolkit_tools = ArtifactToolkit.get_toolkit(
|
|
99
104
|
client=alita_client,
|
|
@@ -105,10 +110,11 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
|
|
|
105
110
|
pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
|
|
106
111
|
embedding_model=tool['settings'].get('embedding_model'),
|
|
107
112
|
collection_name=f"{tool.get('toolkit_name')}",
|
|
108
|
-
collection_schema=str(tool['id'
|
|
113
|
+
collection_schema=str(tool['settings'].get('id', tool.get('id', ''))),
|
|
109
114
|
).get_tools()
|
|
110
115
|
# Inject toolkit_id for artifact tools as well
|
|
111
|
-
|
|
116
|
+
# Pass settings as the tool config since that's where the id field is
|
|
117
|
+
_inject_toolkit_id(tool['settings'], toolkit_tools)
|
|
112
118
|
tools.extend(toolkit_tools)
|
|
113
119
|
|
|
114
120
|
elif tool['type'] == 'vectorstore':
|
|
@@ -118,27 +124,55 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
|
|
|
118
124
|
**tool['settings']).get_tools())
|
|
119
125
|
elif tool['type'] == 'planning':
|
|
120
126
|
# Planning toolkit for multi-step task tracking
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
if
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
127
|
+
settings = tool.get('settings', {})
|
|
128
|
+
|
|
129
|
+
# Check if local mode is enabled (uses filesystem storage, ignores pgvector)
|
|
130
|
+
use_local = settings.get('local', False)
|
|
131
|
+
|
|
132
|
+
if use_local:
|
|
133
|
+
# Local mode - use filesystem storage
|
|
134
|
+
logger.info("Planning toolkit using local filesystem storage (local=true)")
|
|
135
|
+
pgvector_config = {}
|
|
136
|
+
else:
|
|
137
|
+
# Check if explicit connection_string is provided in pgvector_configuration
|
|
138
|
+
explicit_pgvector_config = settings.get('pgvector_configuration', {})
|
|
139
|
+
explicit_connstr = explicit_pgvector_config.get('connection_string') if explicit_pgvector_config else None
|
|
140
|
+
|
|
141
|
+
if explicit_connstr:
|
|
142
|
+
# Use explicitly provided connection string (overrides project secrets)
|
|
143
|
+
logger.info("Using explicit connection_string for planning toolkit")
|
|
144
|
+
pgvector_config = explicit_pgvector_config
|
|
145
|
+
else:
|
|
146
|
+
# Try to fetch pgvector_project_connstr from project secrets
|
|
147
|
+
pgvector_connstr = None
|
|
148
|
+
if alita_client:
|
|
149
|
+
try:
|
|
150
|
+
pgvector_connstr = alita_client.unsecret('pgvector_project_connstr')
|
|
151
|
+
if pgvector_connstr:
|
|
152
|
+
logger.info("Using pgvector_project_connstr for planning toolkit")
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.debug(f"pgvector_project_connstr not available: {e}")
|
|
155
|
+
|
|
156
|
+
pgvector_config = {'connection_string': pgvector_connstr} if pgvector_connstr else {}
|
|
130
157
|
|
|
131
|
-
pgvector_config = {'connection_string': pgvector_connstr} if pgvector_connstr else {}
|
|
132
158
|
tools.extend(PlanningToolkit.get_toolkit(
|
|
133
159
|
toolkit_name=tool.get('toolkit_name', ''),
|
|
134
|
-
selected_tools=
|
|
160
|
+
selected_tools=settings.get('selected_tools', []),
|
|
135
161
|
pgvector_configuration=pgvector_config,
|
|
136
|
-
conversation_id=conversation_id or
|
|
162
|
+
conversation_id=conversation_id or settings.get('conversation_id'),
|
|
137
163
|
).get_tools())
|
|
138
164
|
elif tool['type'] == 'mcp':
|
|
139
165
|
# remote mcp tool initialization with token injection
|
|
140
166
|
settings = dict(tool['settings'])
|
|
141
167
|
url = settings.get('url')
|
|
168
|
+
|
|
169
|
+
# Check if this MCP server should be ignored (user chose to continue without auth)
|
|
170
|
+
if ignored_mcp_servers and url:
|
|
171
|
+
canonical_url = canonical_resource(url)
|
|
172
|
+
if canonical_url in ignored_mcp_servers or url in ignored_mcp_servers:
|
|
173
|
+
logger.info(f"[MCP Auth] Skipping ignored MCP server: {url}")
|
|
174
|
+
continue
|
|
175
|
+
|
|
142
176
|
headers = settings.get('headers')
|
|
143
177
|
token_data = None
|
|
144
178
|
session_id = None
|
|
@@ -183,12 +217,6 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
|
|
|
183
217
|
# Re-raise auth required exceptions directly
|
|
184
218
|
raise
|
|
185
219
|
except Exception as e:
|
|
186
|
-
# Check for wrapped McpAuthorizationRequired
|
|
187
|
-
if hasattr(e, '__cause__') and isinstance(e.__cause__, McpAuthorizationRequired):
|
|
188
|
-
raise e.__cause__
|
|
189
|
-
# Check exception class name as fallback
|
|
190
|
-
if e.__class__.__name__ == 'McpAuthorizationRequired':
|
|
191
|
-
raise
|
|
192
220
|
logger.error(f"Error initializing toolkit for tool '{tool.get('name', 'unknown')}': {e}", exc_info=True)
|
|
193
221
|
if debug_mode:
|
|
194
222
|
logger.info("Skipping tool initialization error due to debug mode.")
|
|
@@ -300,11 +328,18 @@ def _mcp_tools(tools_list, alita):
|
|
|
300
328
|
|
|
301
329
|
def _init_single_mcp_tool(server_toolkit_name, toolkit_name, available_tool, alita, toolkit_settings):
|
|
302
330
|
try:
|
|
303
|
-
|
|
304
|
-
tool_name =
|
|
331
|
+
# Use clean tool name without prefix
|
|
332
|
+
tool_name = available_tool["name"]
|
|
333
|
+
# Add toolkit context to description (max 1000 chars)
|
|
334
|
+
toolkit_context = f" [Toolkit: {clean_string(toolkit_name)}]" if toolkit_name else ''
|
|
335
|
+
base_description = f"MCP for a tool '{tool_name}': {available_tool.get('description', '')}"
|
|
336
|
+
description = base_description
|
|
337
|
+
if toolkit_context and len(base_description + toolkit_context) <= 1000:
|
|
338
|
+
description = base_description + toolkit_context
|
|
339
|
+
|
|
305
340
|
return McpServerTool(
|
|
306
341
|
name=tool_name,
|
|
307
|
-
description=
|
|
342
|
+
description=description,
|
|
308
343
|
args_schema=McpServerTool.create_pydantic_model_from_schema(
|
|
309
344
|
available_tool.get("inputSchema", {})
|
|
310
345
|
),
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from logging import getLogger
|
|
2
2
|
from typing import Any, List, Literal, Optional
|
|
3
3
|
|
|
4
|
-
from alita_sdk.tools.utils import clean_string
|
|
4
|
+
from alita_sdk.tools.utils import clean_string
|
|
5
5
|
from pydantic import BaseModel, create_model, Field, ConfigDict
|
|
6
6
|
from langchain_core.tools import BaseToolkit, BaseTool
|
|
7
7
|
from alita_sdk.tools.base.tool import BaseAction
|
|
@@ -31,7 +31,8 @@ class VectorStoreToolkit(BaseToolkit):
|
|
|
31
31
|
toolkit_name: Optional[str] = None,
|
|
32
32
|
selected_tools: list[str] = []):
|
|
33
33
|
logger.info("Selected tools: %s", selected_tools)
|
|
34
|
-
|
|
34
|
+
# Use clean toolkit name for context (max 1000 chars in description)
|
|
35
|
+
toolkit_context = f" [Toolkit: {clean_string(toolkit_name)}]" if toolkit_name else ''
|
|
35
36
|
if selected_tools is None:
|
|
36
37
|
selected_tools = []
|
|
37
38
|
tools = []
|
|
@@ -46,11 +47,16 @@ class VectorStoreToolkit(BaseToolkit):
|
|
|
46
47
|
# if selected_tools:
|
|
47
48
|
# if tool["name"] not in selected_tools:
|
|
48
49
|
# continue
|
|
50
|
+
# Add toolkit context to description with character limit
|
|
51
|
+
description = tool["description"]
|
|
52
|
+
if toolkit_context and len(description + toolkit_context) <= 1000:
|
|
53
|
+
description = description + toolkit_context
|
|
49
54
|
tools.append(BaseAction(
|
|
50
55
|
api_wrapper=vectorstore_wrapper,
|
|
51
|
-
name=
|
|
52
|
-
description=
|
|
53
|
-
args_schema=tool["args_schema"]
|
|
56
|
+
name=tool["name"],
|
|
57
|
+
description=description,
|
|
58
|
+
args_schema=tool["args_schema"],
|
|
59
|
+
metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
|
|
54
60
|
))
|
|
55
61
|
return cls(tools=tools)
|
|
56
62
|
|
|
@@ -13,6 +13,7 @@ from pydantic import create_model, Field, model_validator
|
|
|
13
13
|
|
|
14
14
|
from ...tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
|
|
15
15
|
from ...tools.utils.available_tools_decorator import extend_with_parent_available_tools
|
|
16
|
+
from ...tools.elitea_base import extend_with_file_operations, BaseCodeToolApiWrapper
|
|
16
17
|
from ...runtime.utils.utils import IndexerKeywords
|
|
17
18
|
|
|
18
19
|
|
|
@@ -20,6 +21,12 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
20
21
|
bucket: str
|
|
21
22
|
artifact: Optional[Any] = None
|
|
22
23
|
|
|
24
|
+
# Import file operation methods from BaseCodeToolApiWrapper
|
|
25
|
+
read_file_chunk = BaseCodeToolApiWrapper.read_file_chunk
|
|
26
|
+
read_multiple_files = BaseCodeToolApiWrapper.read_multiple_files
|
|
27
|
+
search_file = BaseCodeToolApiWrapper.search_file
|
|
28
|
+
edit_file = BaseCodeToolApiWrapper.edit_file
|
|
29
|
+
|
|
23
30
|
@model_validator(mode='before')
|
|
24
31
|
@classmethod
|
|
25
32
|
def validate_toolkit(cls, values):
|
|
@@ -31,31 +38,77 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
31
38
|
return super().validate_toolkit(values)
|
|
32
39
|
|
|
33
40
|
def list_files(self, bucket_name = None, return_as_string = True):
|
|
34
|
-
|
|
41
|
+
"""List all files in the artifact bucket with API download links."""
|
|
42
|
+
result = self.artifact.list(bucket_name, return_as_string=False)
|
|
43
|
+
|
|
44
|
+
# Add API download link to each file
|
|
45
|
+
if isinstance(result, dict) and 'rows' in result:
|
|
46
|
+
bucket = bucket_name or self.bucket
|
|
47
|
+
|
|
48
|
+
# Get base_url and project_id from alita client
|
|
49
|
+
base_url = getattr(self.alita, 'base_url', '').rstrip('/')
|
|
50
|
+
project_id = getattr(self.alita, 'project_id', '')
|
|
51
|
+
|
|
52
|
+
for file_info in result['rows']:
|
|
53
|
+
if 'name' in file_info:
|
|
54
|
+
# Generate API download link
|
|
55
|
+
file_name = file_info['name']
|
|
56
|
+
file_info['link'] = f"{base_url}/api/v2/artifacts/artifact/default/{project_id}/{bucket}/{file_name}"
|
|
57
|
+
|
|
58
|
+
return str(result) if return_as_string else result
|
|
35
59
|
|
|
36
60
|
def create_file(self, filename: str, filedata: str, bucket_name = None):
|
|
37
|
-
|
|
61
|
+
# Sanitize filename to prevent regex errors during indexing
|
|
62
|
+
sanitized_filename, was_modified = self._sanitize_filename(filename)
|
|
63
|
+
if was_modified:
|
|
64
|
+
logging.warning(f"Filename sanitized: '{filename}' -> '{sanitized_filename}'")
|
|
65
|
+
|
|
66
|
+
if sanitized_filename.endswith(".xlsx"):
|
|
38
67
|
data = json.loads(filedata)
|
|
39
68
|
filedata = self.create_xlsx_filedata(data)
|
|
40
69
|
|
|
41
|
-
result = self.artifact.create(
|
|
70
|
+
result = self.artifact.create(sanitized_filename, filedata, bucket_name)
|
|
42
71
|
|
|
43
72
|
# Dispatch custom event for file creation
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
"bucket": bucket_name or self.bucket
|
|
55
|
-
}
|
|
56
|
-
}""")
|
|
73
|
+
dispatch_custom_event("file_modified", {
|
|
74
|
+
"message": f"File '{filename}' created successfully",
|
|
75
|
+
"filename": filename,
|
|
76
|
+
"tool_name": "createFile",
|
|
77
|
+
"toolkit": "artifact",
|
|
78
|
+
"operation_type": "create",
|
|
79
|
+
"meta": {
|
|
80
|
+
"bucket": bucket_name or self.bucket
|
|
81
|
+
}
|
|
82
|
+
})
|
|
57
83
|
|
|
58
84
|
return result
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def _sanitize_filename(filename: str) -> tuple:
|
|
88
|
+
"""Sanitize filename for safe storage and regex pattern matching."""
|
|
89
|
+
from pathlib import Path
|
|
90
|
+
|
|
91
|
+
if not filename or not filename.strip():
|
|
92
|
+
return "unnamed_file", True
|
|
93
|
+
|
|
94
|
+
original = filename
|
|
95
|
+
path_obj = Path(filename)
|
|
96
|
+
name = path_obj.stem
|
|
97
|
+
extension = path_obj.suffix
|
|
98
|
+
|
|
99
|
+
# Whitelist: alphanumeric, underscore, hyphen, space, Unicode letters/digits
|
|
100
|
+
sanitized_name = re.sub(r'[^\w\s-]', '', name, flags=re.UNICODE)
|
|
101
|
+
sanitized_name = re.sub(r'[-\s]+', '-', sanitized_name)
|
|
102
|
+
sanitized_name = sanitized_name.strip('-').strip()
|
|
103
|
+
|
|
104
|
+
if not sanitized_name:
|
|
105
|
+
sanitized_name = "file"
|
|
106
|
+
|
|
107
|
+
if extension:
|
|
108
|
+
extension = re.sub(r'[^\w.-]', '', extension, flags=re.UNICODE)
|
|
109
|
+
|
|
110
|
+
sanitized = sanitized_name + extension
|
|
111
|
+
return sanitized, (sanitized != original)
|
|
59
112
|
|
|
60
113
|
def create_xlsx_filedata(self, data: dict[str, list[list]]) -> bytes:
|
|
61
114
|
try:
|
|
@@ -99,6 +152,94 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
99
152
|
sheet_name=sheet_name,
|
|
100
153
|
excel_by_sheets=excel_by_sheets,
|
|
101
154
|
llm=self.llm)
|
|
155
|
+
|
|
156
|
+
def _read_file(
|
|
157
|
+
self,
|
|
158
|
+
file_path: str,
|
|
159
|
+
branch: str = None,
|
|
160
|
+
bucket_name: str = None,
|
|
161
|
+
**kwargs
|
|
162
|
+
) -> str:
|
|
163
|
+
"""
|
|
164
|
+
Read a file from artifact bucket with optional partial read support.
|
|
165
|
+
|
|
166
|
+
Parameters:
|
|
167
|
+
file_path: Name of the file in the bucket
|
|
168
|
+
branch: Not used for artifacts (kept for API consistency)
|
|
169
|
+
bucket_name: Name of the bucket (uses default if None)
|
|
170
|
+
**kwargs: Additional parameters (offset, limit, head, tail) - currently ignored,
|
|
171
|
+
partial read handled client-side by base class methods
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
File content as string
|
|
175
|
+
"""
|
|
176
|
+
return self.read_file(filename=file_path, bucket_name=bucket_name)
|
|
177
|
+
|
|
178
|
+
def _write_file(
|
|
179
|
+
self,
|
|
180
|
+
file_path: str,
|
|
181
|
+
content: str,
|
|
182
|
+
branch: str = None,
|
|
183
|
+
commit_message: str = None,
|
|
184
|
+
bucket_name: str = None
|
|
185
|
+
) -> str:
|
|
186
|
+
"""
|
|
187
|
+
Write content to a file (create or overwrite).
|
|
188
|
+
|
|
189
|
+
Parameters:
|
|
190
|
+
file_path: Name of the file in the bucket
|
|
191
|
+
content: New file content
|
|
192
|
+
branch: Not used for artifacts (kept for API consistency)
|
|
193
|
+
commit_message: Not used for artifacts (kept for API consistency)
|
|
194
|
+
bucket_name: Name of the bucket (uses default if None)
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
Success message
|
|
198
|
+
"""
|
|
199
|
+
try:
|
|
200
|
+
# Sanitize filename
|
|
201
|
+
sanitized_filename, was_modified = self._sanitize_filename(file_path)
|
|
202
|
+
if was_modified:
|
|
203
|
+
logging.warning(f"Filename sanitized: '{file_path}' -> '{sanitized_filename}'")
|
|
204
|
+
|
|
205
|
+
# Check if file exists
|
|
206
|
+
try:
|
|
207
|
+
self.artifact.get(artifact_name=sanitized_filename, bucket_name=bucket_name, llm=self.llm)
|
|
208
|
+
# File exists, overwrite it
|
|
209
|
+
result = self.artifact.overwrite(sanitized_filename, content, bucket_name)
|
|
210
|
+
|
|
211
|
+
# Dispatch custom event
|
|
212
|
+
dispatch_custom_event("file_modified", {
|
|
213
|
+
"message": f"File '{sanitized_filename}' updated successfully",
|
|
214
|
+
"filename": sanitized_filename,
|
|
215
|
+
"tool_name": "edit_file",
|
|
216
|
+
"toolkit": "artifact",
|
|
217
|
+
"operation_type": "modify",
|
|
218
|
+
"meta": {
|
|
219
|
+
"bucket": bucket_name or self.bucket
|
|
220
|
+
}
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
return f"Updated file {sanitized_filename}"
|
|
224
|
+
except:
|
|
225
|
+
# File doesn't exist, create it
|
|
226
|
+
result = self.artifact.create(sanitized_filename, content, bucket_name)
|
|
227
|
+
|
|
228
|
+
# Dispatch custom event
|
|
229
|
+
dispatch_custom_event("file_modified", {
|
|
230
|
+
"message": f"File '{sanitized_filename}' created successfully",
|
|
231
|
+
"filename": sanitized_filename,
|
|
232
|
+
"tool_name": "edit_file",
|
|
233
|
+
"toolkit": "artifact",
|
|
234
|
+
"operation_type": "create",
|
|
235
|
+
"meta": {
|
|
236
|
+
"bucket": bucket_name or self.bucket
|
|
237
|
+
}
|
|
238
|
+
})
|
|
239
|
+
|
|
240
|
+
return f"Created file {sanitized_filename}"
|
|
241
|
+
except Exception as e:
|
|
242
|
+
raise ToolException(f"Unable to write file {file_path}: {str(e)}")
|
|
102
243
|
|
|
103
244
|
def delete_file(self, filename: str, bucket_name = None):
|
|
104
245
|
return self.artifact.delete(filename, bucket_name)
|
|
@@ -138,7 +279,11 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
138
279
|
return result
|
|
139
280
|
|
|
140
281
|
def create_new_bucket(self, bucket_name: str, expiration_measure = "weeks", expiration_value = 1):
|
|
141
|
-
|
|
282
|
+
# Sanitize bucket name: replace underscores with hyphens and ensure lowercase
|
|
283
|
+
sanitized_name = bucket_name.replace('_', '-').lower()
|
|
284
|
+
if sanitized_name != bucket_name:
|
|
285
|
+
logging.warning(f"Bucket name '{bucket_name}' was sanitized to '{sanitized_name}' (underscores replaced with hyphens, converted to lowercase)")
|
|
286
|
+
return self.artifact.client.create_bucket(sanitized_name, expiration_measure, expiration_value)
|
|
142
287
|
|
|
143
288
|
def _index_tool_params(self):
|
|
144
289
|
return {
|
|
@@ -173,13 +318,13 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
173
318
|
file_name = file['name']
|
|
174
319
|
|
|
175
320
|
# Check if file should be skipped based on skip_extensions
|
|
176
|
-
if any(re.match(pattern.replace('
|
|
321
|
+
if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
|
|
177
322
|
for pattern in skip_extensions):
|
|
178
323
|
continue
|
|
179
324
|
|
|
180
325
|
# Check if file should be included based on include_extensions
|
|
181
326
|
# If include_extensions is empty, process all files (that weren't skipped)
|
|
182
|
-
if include_extensions and not (any(re.match(pattern.replace('
|
|
327
|
+
if include_extensions and not (any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
|
|
183
328
|
for pattern in include_extensions)):
|
|
184
329
|
continue
|
|
185
330
|
|
|
@@ -207,14 +352,17 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
207
352
|
logging.error(f"Failed while parsing the file '{document.metadata['name']}': {e}")
|
|
208
353
|
yield document
|
|
209
354
|
|
|
210
|
-
@
|
|
355
|
+
@extend_with_file_operations
|
|
211
356
|
def get_available_tools(self):
|
|
357
|
+
"""Get available tools. Returns all tools for schema; filtering happens at toolkit level."""
|
|
212
358
|
bucket_name = (Optional[str], Field(description="Name of the bucket to work with."
|
|
213
359
|
"If bucket is not specified by user directly, the name should be taken from chat history."
|
|
214
360
|
"If bucket never mentioned in chat, the name will be taken from tool configuration."
|
|
215
361
|
" ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`",
|
|
216
362
|
default=None))
|
|
217
|
-
|
|
363
|
+
|
|
364
|
+
# Basic artifact tools (always available)
|
|
365
|
+
basic_tools = [
|
|
218
366
|
{
|
|
219
367
|
"ref": self.list_files,
|
|
220
368
|
"name": "listFiles",
|
|
@@ -299,11 +447,25 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
299
447
|
"description": "Creates new bucket specified by user.",
|
|
300
448
|
"args_schema": create_model(
|
|
301
449
|
"createNewBucket",
|
|
302
|
-
bucket_name=(str, Field(
|
|
450
|
+
bucket_name=(str, Field(
|
|
451
|
+
description="Bucket name to create. Must start with lowercase letter and contain only lowercase letters, numbers, and hyphens. Underscores will be automatically converted to hyphens.",
|
|
452
|
+
pattern=r'^[a-z][a-z0-9_-]*$' # Allow underscores in input, will be sanitized
|
|
453
|
+
)),
|
|
303
454
|
expiration_measure=(Optional[str], Field(description="Measure of expiration time for bucket configuration."
|
|
304
455
|
"Possible values: `days`, `weeks`, `months`, `years`.",
|
|
305
456
|
default="weeks")),
|
|
306
457
|
expiration_value=(Optional[int], Field(description="Expiration time values.", default=1))
|
|
307
458
|
)
|
|
308
459
|
}
|
|
309
|
-
]
|
|
460
|
+
]
|
|
461
|
+
|
|
462
|
+
# Always include indexing tools in available tools list
|
|
463
|
+
# Filtering based on vector store config happens at toolkit level via decorator
|
|
464
|
+
try:
|
|
465
|
+
# Get indexing tools from parent class
|
|
466
|
+
indexing_tools = super(ArtifactWrapper, self).get_available_tools()
|
|
467
|
+
return indexing_tools + basic_tools
|
|
468
|
+
except Exception as e:
|
|
469
|
+
# If getting parent tools fails, log warning and return basic tools only
|
|
470
|
+
logging.warning(f"Failed to load indexing tools: {e}. Only basic artifact tools will be available.")
|
|
471
|
+
return basic_tools
|
|
@@ -107,7 +107,8 @@ class FunctionTool(BaseTool):
|
|
|
107
107
|
|
|
108
108
|
# special handler for PyodideSandboxTool
|
|
109
109
|
if self._is_pyodide_tool():
|
|
110
|
-
|
|
110
|
+
# replace new lines in strings in code block
|
|
111
|
+
code = func_args['code'].replace('\\n', '\\\\n')
|
|
111
112
|
func_args['code'] = f"{self._prepare_pyodide_input(state)}\n{code}"
|
|
112
113
|
try:
|
|
113
114
|
tool_result = self.tool.invoke(func_args, config, **kwargs)
|