alita-sdk 0.3.528__py3-none-any.whl → 0.3.554__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (46) hide show
  1. alita_sdk/community/__init__.py +8 -4
  2. alita_sdk/configurations/__init__.py +1 -0
  3. alita_sdk/configurations/openapi.py +111 -0
  4. alita_sdk/runtime/clients/client.py +185 -10
  5. alita_sdk/runtime/langchain/langraph_agent.py +2 -2
  6. alita_sdk/runtime/langchain/utils.py +46 -0
  7. alita_sdk/runtime/skills/__init__.py +91 -0
  8. alita_sdk/runtime/skills/callbacks.py +498 -0
  9. alita_sdk/runtime/skills/discovery.py +540 -0
  10. alita_sdk/runtime/skills/executor.py +610 -0
  11. alita_sdk/runtime/skills/input_builder.py +371 -0
  12. alita_sdk/runtime/skills/models.py +330 -0
  13. alita_sdk/runtime/skills/registry.py +355 -0
  14. alita_sdk/runtime/skills/skill_runner.py +330 -0
  15. alita_sdk/runtime/toolkits/__init__.py +2 -0
  16. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  17. alita_sdk/runtime/toolkits/tools.py +76 -9
  18. alita_sdk/runtime/tools/__init__.py +3 -1
  19. alita_sdk/runtime/tools/artifact.py +70 -21
  20. alita_sdk/runtime/tools/image_generation.py +50 -44
  21. alita_sdk/runtime/tools/llm.py +363 -44
  22. alita_sdk/runtime/tools/loop.py +3 -1
  23. alita_sdk/runtime/tools/loop_output.py +3 -1
  24. alita_sdk/runtime/tools/skill_router.py +776 -0
  25. alita_sdk/runtime/tools/tool.py +3 -1
  26. alita_sdk/runtime/tools/vectorstore.py +7 -2
  27. alita_sdk/runtime/tools/vectorstore_base.py +7 -2
  28. alita_sdk/runtime/utils/AlitaCallback.py +2 -1
  29. alita_sdk/runtime/utils/utils.py +34 -0
  30. alita_sdk/tools/__init__.py +41 -1
  31. alita_sdk/tools/ado/work_item/ado_wrapper.py +33 -2
  32. alita_sdk/tools/base_indexer_toolkit.py +36 -24
  33. alita_sdk/tools/confluence/api_wrapper.py +5 -6
  34. alita_sdk/tools/confluence/loader.py +4 -2
  35. alita_sdk/tools/openapi/__init__.py +280 -120
  36. alita_sdk/tools/openapi/api_wrapper.py +883 -0
  37. alita_sdk/tools/openapi/tool.py +20 -0
  38. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  39. alita_sdk/tools/servicenow/__init__.py +9 -9
  40. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  41. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/METADATA +2 -2
  42. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/RECORD +46 -33
  43. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/WHEEL +0 -0
  44. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/entry_points.txt +0 -0
  45. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/licenses/LICENSE +0 -0
  46. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,238 @@
1
+ """
2
+ SkillRouter Toolkit for configuring and accessing specialized skills.
3
+
4
+ This toolkit provides a configurable way to set up the skill router with
5
+ specific skills from filesystem or platform-hosted agents/pipelines.
6
+ """
7
+
8
+ from typing import List, Optional, TYPE_CHECKING
9
+ from pydantic import create_model, BaseModel, Field, ConfigDict
10
+ from langchain_community.agent_toolkits.base import BaseToolkit
11
+ from langchain_core.tools import BaseTool
12
+
13
+ if TYPE_CHECKING:
14
+ from alita_sdk.clients import AlitaClient
15
+
16
+ from alita_sdk.tools.base.tool import BaseAction
17
+ from alita_sdk.tools.utils import clean_string
18
+ from ..skills import SkillsRegistry, SkillMetadata, SkillType, SkillSource
19
+ from ..tools.skill_router import SkillRouterWrapper
20
+
21
+
22
+ class SkillConfig(BaseModel):
23
+ """Configuration for a single skill."""
24
+
25
+ # Platform skill fields (type is implicit from parent field: agents or pipelines)
26
+ id: int = Field(description="Platform ID (for agent/pipeline skills)")
27
+ version_id: int = Field(description="Platform version ID (for agent/pipeline skills)")
28
+ name: Optional[str] = Field(default=None, description="Skill name (optional override)")
29
+
30
+
31
+ class SkillRouterToolkit(BaseToolkit):
32
+ """Toolkit for configuring skill router with specific skills."""
33
+
34
+ tools: List[BaseTool] = []
35
+
36
+ @staticmethod
37
+ def toolkit_config_schema() -> BaseModel:
38
+ """Define the configuration schema for the skill router toolkit."""
39
+ # Get available tools for selected_tools field
40
+ selected_tools_options = {x['name']: x['args_schema'].schema() for x in SkillRouterWrapper.model_construct().get_available_tools()}
41
+
42
+ return create_model(
43
+ "skill_router",
44
+ # Separate fields for agents and pipelines - optional but default to empty lists
45
+ agents=(Optional[List[SkillConfig]], Field(
46
+ description="List of agents to make available as skills",
47
+ default=[],
48
+ json_schema_extra={
49
+ "agent_tags": ["skill"]
50
+ }
51
+ )),
52
+ pipelines=(Optional[List[SkillConfig]], Field(
53
+ description="List of pipelines to make available as skills",
54
+ default=[],
55
+ json_schema_extra={
56
+ "pipeline_tags": ["skill"]
57
+ }
58
+ )),
59
+ prompt=(Optional[str], Field(
60
+ description="Custom system prompt for skill routing",
61
+ default="",
62
+ json_schema_extra={"lines": 4}
63
+ )),
64
+ timeout=(Optional[int], Field(description="Default timeout in seconds for skill execution", default=300)),
65
+ execution_mode=(Optional[str], Field(
66
+ description="Default execution mode for skills",
67
+ default=None,
68
+ json_schema_extra={"enum": ["subprocess", "remote"]}
69
+ )),
70
+ selected_tools=(List[str], Field(
71
+ description="List of tools to enable",
72
+ default=list(selected_tools_options.keys()),
73
+ json_schema_extra={'args_schemas': selected_tools_options}
74
+ )),
75
+ __config__=ConfigDict(json_schema_extra={'metadata': {"label": "Skill Router", "icon_url": None}})
76
+ )
77
+
78
+ @classmethod
79
+ def get_toolkit(
80
+ cls,
81
+ client: 'AlitaClient',
82
+ llm = None,
83
+ toolkit_name: Optional[str] = None,
84
+ selected_tools: List[str] = None,
85
+ agents: List[SkillConfig] = None,
86
+ pipelines: List[SkillConfig] = None,
87
+ prompt: Optional[str] = None,
88
+ timeout: Optional[int] = None,
89
+ execution_mode: Optional[str] = None
90
+ ):
91
+ """Create a skill router toolkit with configured skills."""
92
+
93
+ if selected_tools is None:
94
+ selected_tools = []
95
+
96
+ # Create a custom registry for this toolkit
97
+ registry = SkillsRegistry(search_paths=[])
98
+
99
+ # Helper function to process skill configs
100
+ def add_skills_to_registry(skill_configs, skill_type):
101
+ if skill_configs:
102
+ for skill_config_dict in skill_configs:
103
+ # Convert dict to SkillConfig object
104
+ skill_config = SkillConfig(**skill_config_dict)
105
+ skill_metadata = cls._create_skill_from_config(skill_config, client, skill_type)
106
+ if skill_metadata:
107
+ # Add skill to registry manually
108
+ registry.discovery.cache[skill_metadata.name] = skill_metadata
109
+
110
+ # Add configured agents (if provided)
111
+ add_skills_to_registry(agents or [], "agent")
112
+
113
+ # Add configured pipelines (if provided)
114
+ add_skills_to_registry(pipelines or [], "pipeline")
115
+
116
+ # Create skill router wrapper with custom configuration
117
+ wrapper = SkillRouterWrapper(
118
+ registry=registry,
119
+ alita_client=client,
120
+ llm=llm,
121
+ enable_callbacks=True,
122
+ default_timeout=timeout,
123
+ default_execution_mode=execution_mode,
124
+ custom_prompt=prompt
125
+ )
126
+
127
+ # Get available tools from wrapper
128
+ available_tools = wrapper.get_available_tools()
129
+
130
+ # Filter by selected_tools if provided
131
+ tools = []
132
+ toolkit_context = f" [Toolkit: {clean_string(toolkit_name, 0)}]" if toolkit_name else ''
133
+
134
+ for tool in available_tools:
135
+ if selected_tools:
136
+ if tool["name"] not in selected_tools:
137
+ continue
138
+
139
+ # Add toolkit context to description with character limit
140
+ description = tool["description"]
141
+ if toolkit_context and len(description + toolkit_context) <= 1000:
142
+ description = description + toolkit_context
143
+
144
+ # Wrap in BaseAction
145
+ tools.append(BaseAction(
146
+ api_wrapper=wrapper,
147
+ name=tool["name"],
148
+ description=description,
149
+ args_schema=tool["args_schema"],
150
+ metadata={"toolkit_name": toolkit_name} if toolkit_name else {}
151
+ ))
152
+
153
+ return cls(tools=tools)
154
+
155
+ @classmethod
156
+ def _create_skill_from_config(cls, config: SkillConfig, client: 'AlitaClient', skill_type: str) -> Optional[SkillMetadata]:
157
+ """Create SkillMetadata from SkillConfig.
158
+
159
+ Args:
160
+ config: SkillConfig with id, version_id, and optional name
161
+ client: AlitaClient for fetching skill details
162
+ skill_type: Either "agent" or "pipeline" (from parent field)
163
+ """
164
+ try:
165
+ # Get skill details from platform
166
+ if skill_type == "agent":
167
+ skill_details = cls._get_agent_details(client, config.id, config.version_id)
168
+ metadata_type = SkillType.AGENT
169
+ else: # pipeline
170
+ skill_details = cls._get_pipeline_details(client, config.id, config.version_id)
171
+ metadata_type = SkillType.PIPELINE
172
+
173
+ # Create SkillMetadata for platform skill
174
+ return SkillMetadata(
175
+ name=config.name or skill_details.get('name', f"{skill_type}_{config.id}"),
176
+ skill_type=metadata_type,
177
+ source=SkillSource.PLATFORM,
178
+ id=config.id,
179
+ version_id=config.version_id,
180
+ description=skill_details.get('description', ''),
181
+ capabilities=skill_details.get('capabilities', []),
182
+ tags=skill_details.get('tags', []),
183
+ version=skill_details.get('version', '1.0.0'),
184
+ # Set default execution config - platform skills run remotely
185
+ execution={"mode": "remote", "timeout": 300},
186
+ results={"format": "text_with_links"},
187
+ inputs={},
188
+ outputs={}
189
+ )
190
+
191
+ except Exception as e:
192
+ import logging
193
+ logging.getLogger(__name__).error(f"Failed to create skill from config {config}: {e}")
194
+ return None
195
+
196
+ @classmethod
197
+ def _get_agent_details(cls, client: 'AlitaClient', agent_id: int, version_id: int) -> dict:
198
+ """Get agent details from platform."""
199
+ try:
200
+ app_details = client.get_app_details(agent_id)
201
+ version_details = client.get_app_version_details(agent_id, version_id)
202
+
203
+ return {
204
+ 'name': app_details.get('name', f'agent_{agent_id}'),
205
+ 'description': app_details.get('description', ''),
206
+ 'capabilities': [], # Could be extracted from app metadata
207
+ 'tags': [], # Could be extracted from app metadata
208
+ 'version': version_details.get('version', '1.0.0')
209
+ }
210
+ except Exception as e:
211
+ import logging
212
+ logging.getLogger(__name__).error(f"Failed to get agent details for {agent_id}/{version_id}: {e}")
213
+ return {'name': f'agent_{agent_id}', 'description': 'Platform-hosted agent'}
214
+
215
+ @classmethod
216
+ def _get_pipeline_details(cls, client: 'AlitaClient', pipeline_id: int, version_id: int) -> dict:
217
+ """Get pipeline details from platform."""
218
+ try:
219
+ # For now, use the same method as agents since they use the same API
220
+ # In the future, this might use a different endpoint for pipelines
221
+ app_details = client.get_app_details(pipeline_id)
222
+ version_details = client.get_app_version_details(pipeline_id, version_id)
223
+
224
+ return {
225
+ 'name': app_details.get('name', f'pipeline_{pipeline_id}'),
226
+ 'description': app_details.get('description', ''),
227
+ 'capabilities': [], # Could be extracted from pipeline metadata
228
+ 'tags': [], # Could be extracted from pipeline metadata
229
+ 'version': version_details.get('version', '1.0.0')
230
+ }
231
+ except Exception as e:
232
+ import logging
233
+ logging.getLogger(__name__).error(f"Failed to get pipeline details for {pipeline_id}/{version_id}: {e}")
234
+ return {'name': f'pipeline_{pipeline_id}', 'description': 'Platform-hosted pipeline'}
235
+
236
+ def get_tools(self):
237
+ """Get the configured tools."""
238
+ return self.tools
@@ -14,6 +14,7 @@ from .prompt import PromptToolkit
14
14
  from .subgraph import SubgraphToolkit
15
15
  from .vectorstore import VectorStoreToolkit
16
16
  from .mcp import McpToolkit
17
+ from .skill_router import SkillRouterToolkit
17
18
  from ..tools.mcp_server_tool import McpServerTool
18
19
  from ..tools.sandbox import SandboxToolkit
19
20
  from ..tools.image_generation import ImageGenerationToolkit
@@ -35,19 +36,50 @@ def get_toolkits():
35
36
  VectorStoreToolkit.toolkit_config_schema(),
36
37
  SandboxToolkit.toolkit_config_schema(),
37
38
  ImageGenerationToolkit.toolkit_config_schema(),
38
- McpToolkit.toolkit_config_schema()
39
+ McpToolkit.toolkit_config_schema(),
40
+ SkillRouterToolkit.toolkit_config_schema()
39
41
  ]
40
42
 
41
43
  return core_toolkits + community_toolkits() + alita_toolkits()
42
44
 
43
45
 
44
46
  def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseStore = None, debug_mode: Optional[bool] = False, mcp_tokens: Optional[dict] = None, conversation_id: Optional[str] = None, ignored_mcp_servers: Optional[list] = None) -> list:
47
+ # Sanitize tools_list to handle corrupted tool configurations
48
+ sanitized_tools = []
49
+ for tool in tools_list:
50
+ if isinstance(tool, dict):
51
+ # Check for corrupted structure where 'type' and 'name' contain the full tool config
52
+ if 'type' in tool and isinstance(tool['type'], dict):
53
+ # This is a corrupted tool - use the inner dict instead
54
+ logger.warning(f"Detected corrupted tool configuration (type=dict), fixing: {tool}")
55
+ actual_tool = tool['type'] # or tool['name'], they should be the same
56
+ sanitized_tools.append(actual_tool)
57
+ elif 'name' in tool and isinstance(tool['name'], dict):
58
+ # Another corruption pattern where name contains the full config
59
+ logger.warning(f"Detected corrupted tool configuration (name=dict), fixing: {tool}")
60
+ actual_tool = tool['name']
61
+ sanitized_tools.append(actual_tool)
62
+ elif 'type' in tool and isinstance(tool['type'], str):
63
+ # Valid tool configuration
64
+ sanitized_tools.append(tool)
65
+ else:
66
+ # Skip invalid/corrupted tools that can't be fixed
67
+ logger.warning(f"Skipping invalid tool configuration: {tool}")
68
+ else:
69
+ logger.warning(f"Skipping non-dict tool: {tool}")
70
+ # Skip non-dict tools
71
+
45
72
  prompts = []
46
73
  tools = []
74
+ unhandled_tools = [] # Track tools not handled by main processing
47
75
 
48
- for tool in tools_list:
76
+ for tool in sanitized_tools:
77
+ # Flag to track if this tool was processed by the main loop
78
+ # Used to prevent double processing by fallback systems
79
+ tool_handled = False
49
80
  try:
50
81
  if tool['type'] == 'datasource':
82
+ tool_handled = True
51
83
  tools.extend(DatasourcesToolkit.get_toolkit(
52
84
  alita_client,
53
85
  datasource_ids=[int(tool['settings']['datasource_id'])],
@@ -55,6 +87,7 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
55
87
  toolkit_name=tool.get('toolkit_name', '') or tool.get('name', '')
56
88
  ).get_tools())
57
89
  elif tool['type'] == 'application':
90
+ tool_handled = True
58
91
  tools.extend(ApplicationToolkit.get_toolkit(
59
92
  alita_client,
60
93
  application_id=int(tool['settings']['application_id']),
@@ -74,6 +107,7 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
74
107
  llm=llm
75
108
  ))
76
109
  elif tool['type'] == 'memory':
110
+ tool_handled = True
77
111
  tools += MemoryToolkit.get_toolkit(
78
112
  namespace=tool['settings'].get('namespace', str(tool['id'])),
79
113
  pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
@@ -81,6 +115,7 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
81
115
  ).get_tools()
82
116
  # TODO: update configuration of internal tools
83
117
  elif tool['type'] == 'internal_tool':
118
+ tool_handled = True
84
119
  if tool['name'] == 'pyodide':
85
120
  tools += SandboxToolkit.get_toolkit(
86
121
  stateful=False,
@@ -101,6 +136,7 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
101
136
  conversation_id=conversation_id,
102
137
  ).get_tools()
103
138
  elif tool['type'] == 'artifact':
139
+ tool_handled = True
104
140
  toolkit_tools = ArtifactToolkit.get_toolkit(
105
141
  client=alita_client,
106
142
  bucket=tool['settings']['bucket'],
@@ -119,11 +155,13 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
119
155
  tools.extend(toolkit_tools)
120
156
 
121
157
  elif tool['type'] == 'vectorstore':
158
+ tool_handled = True
122
159
  tools.extend(VectorStoreToolkit.get_toolkit(
123
160
  llm=llm,
124
161
  toolkit_name=tool.get('toolkit_name', ''),
125
162
  **tool['settings']).get_tools())
126
163
  elif tool['type'] == 'planning':
164
+ tool_handled = True
127
165
  # Planning toolkit for multi-step task tracking
128
166
  settings = tool.get('settings', {})
129
167
 
@@ -163,6 +201,7 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
163
201
  conversation_id=conversation_id or settings.get('conversation_id'),
164
202
  ).get_tools())
165
203
  elif tool['type'] == 'mcp':
204
+ tool_handled = True
166
205
  # remote mcp tool initialization with token injection
167
206
  settings = dict(tool['settings'])
168
207
  url = settings.get('url')
@@ -214,6 +253,28 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
214
253
  toolkit_name=tool.get('toolkit_name', ''),
215
254
  client=alita_client,
216
255
  **settings).get_tools())
256
+ elif tool['type'] == 'skill_router':
257
+ tool_handled = True
258
+ # Skills Registry Router Toolkit
259
+ logger.info(f"Processing skill_router toolkit: {tool}")
260
+ try:
261
+ settings = tool.get('settings', {})
262
+ toolkit_name = tool.get('toolkit_name', '')
263
+ selected_tools = settings.get('selected_tools', [])
264
+
265
+ toolkit_tools = SkillRouterToolkit.get_toolkit(
266
+ client=alita_client,
267
+ llm=llm,
268
+ toolkit_name=toolkit_name,
269
+ selected_tools=selected_tools,
270
+ **settings
271
+ ).get_tools()
272
+
273
+ tools.extend(toolkit_tools)
274
+ logger.info(f"✅ Successfully added {len(toolkit_tools)} tools from SkillRouterToolkit")
275
+ except Exception as e:
276
+ logger.error(f"❌ Failed to initialize SkillRouterToolkit: {e}")
277
+ raise
217
278
  except McpAuthorizationRequired:
218
279
  # Re-raise auth required exceptions directly
219
280
  raise
@@ -224,17 +285,23 @@ def get_tools(tools_list: list, alita_client=None, llm=None, memory_store: BaseS
224
285
  continue
225
286
  else:
226
287
  raise ToolException(f"Error initializing toolkit for tool '{tool.get('name', 'unknown')}': {e}")
227
-
288
+
289
+ # Track unhandled tools (make a copy to avoid reference issues)
290
+ if not tool_handled:
291
+ # Ensure we only add valid tool configurations to unhandled_tools
292
+ if isinstance(tool, dict) and 'type' in tool and isinstance(tool['type'], str):
293
+ unhandled_tools.append(dict(tool))
294
+
228
295
  if len(prompts) > 0:
229
296
  tools += PromptToolkit.get_toolkit(alita_client, prompts).get_tools()
230
-
231
- # Add community tools
232
- tools += community_tools(tools_list, alita_client, llm)
233
- # Add alita tools
234
- tools += alita_tools(tools_list, alita_client, llm, memory_store)
297
+
298
+ # Add community tools (only for unhandled tools)
299
+ tools += community_tools(unhandled_tools, alita_client, llm)
300
+ # Add alita tools (only for unhandled tools)
301
+ tools += alita_tools(unhandled_tools, alita_client, llm, memory_store)
235
302
  # Add MCP tools registered via alita-mcp CLI (static registry)
236
303
  # Note: Tools with type='mcp' are already handled in main loop above
237
- tools += _mcp_tools(tools_list, alita_client)
304
+ tools += _mcp_tools(unhandled_tools, alita_client)
238
305
 
239
306
  # Sanitize tool names to meet OpenAI's function naming requirements
240
307
  # tools = _sanitize_tool_names(tools)
@@ -10,6 +10,7 @@ from .image_generation import (
10
10
  create_image_generation_tool,
11
11
  ImageGenerationToolkit
12
12
  )
13
+ from .skill_router import SkillRouterWrapper
13
14
 
14
15
  __all__ = [
15
16
  "PyodideSandboxTool",
@@ -18,5 +19,6 @@ __all__ = [
18
19
  "EchoTool",
19
20
  "ImageGenerationTool",
20
21
  "ImageGenerationToolkit",
21
- "create_image_generation_tool"
22
+ "create_image_generation_tool",
23
+ "SkillRouterWrapper"
22
24
  ]
@@ -1,3 +1,4 @@
1
+ import base64
1
2
  import hashlib
2
3
  import io
3
4
  import json
@@ -14,7 +15,7 @@ from pydantic import create_model, Field, model_validator
14
15
  from ...tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
15
16
  from ...tools.utils.available_tools_decorator import extend_with_parent_available_tools
16
17
  from ...tools.elitea_base import extend_with_file_operations, BaseCodeToolApiWrapper
17
- from ...runtime.utils.utils import IndexerKeywords
18
+ from ...runtime.utils.utils import IndexerKeywords, resolve_image_from_cache
18
19
 
19
20
 
20
21
  class ArtifactWrapper(NonCodeIndexerToolkit):
@@ -63,23 +64,30 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
63
64
  if was_modified:
64
65
  logging.warning(f"Filename sanitized: '{filename}' -> '{sanitized_filename}'")
65
66
 
67
+ # Auto-detect and extract base64 from image_url structures (from image_generation tool)
68
+ # Returns tuple: (processed_data, is_from_image_generation)
69
+ filedata, is_from_image_generation = self._extract_base64_if_needed(filedata)
70
+
66
71
  if sanitized_filename.endswith(".xlsx"):
67
72
  data = json.loads(filedata)
68
73
  filedata = self.create_xlsx_filedata(data)
69
74
 
70
75
  result = self.artifact.create(sanitized_filename, filedata, bucket_name)
71
76
 
72
- # Dispatch custom event for file creation
73
- dispatch_custom_event("file_modified", {
74
- "message": f"File '{filename}' created successfully",
75
- "filename": filename,
76
- "tool_name": "createFile",
77
- "toolkit": "artifact",
78
- "operation_type": "create",
79
- "meta": {
80
- "bucket": bucket_name or self.bucket
81
- }
82
- })
77
+ # Skip file_modified event for images from image_generation tool
78
+ # These are already tracked in the tool output and don't need duplicate events
79
+ if not is_from_image_generation:
80
+ # Dispatch custom event for file creation
81
+ dispatch_custom_event("file_modified", {
82
+ "message": f"File '{filename}' created successfully",
83
+ "filename": filename,
84
+ "tool_name": "createFile",
85
+ "toolkit": "artifact",
86
+ "operation_type": "create",
87
+ "meta": {
88
+ "bucket": bucket_name or self.bucket
89
+ }
90
+ })
83
91
 
84
92
  return result
85
93
 
@@ -109,6 +117,43 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
109
117
 
110
118
  sanitized = sanitized_name + extension
111
119
  return sanitized, (sanitized != original)
120
+
121
+ def _extract_base64_if_needed(self, filedata: str) -> tuple[str | bytes, bool]:
122
+ """
123
+ Resolve cached_image_id references from cache and decode to binary data.
124
+
125
+ Requires JSON format with cached_image_id field: {"cached_image_id": "img_xxx"}
126
+ LLM must extract specific cached_image_id from generate_image response.
127
+
128
+ Returns:
129
+ tuple: (processed_data, is_from_image_generation)
130
+ - processed_data: Original filedata or resolved binary image data
131
+ - is_from_image_generation: True if data came from image_generation cache
132
+ """
133
+ if not filedata or not isinstance(filedata, str):
134
+ return filedata, False
135
+
136
+ # Require JSON format - fail fast if not JSON
137
+ if '{' not in filedata:
138
+ return filedata, False
139
+
140
+ try:
141
+ data = json.loads(filedata)
142
+ except json.JSONDecodeError:
143
+ # Not valid JSON, return as-is (regular file content)
144
+ return filedata, False
145
+
146
+ if not isinstance(data, dict):
147
+ return filedata, False
148
+
149
+ # Only accept direct cached_image_id format: {"cached_image_id": "img_xxx"}
150
+ # LLM must parse generate_image response and extract specific cached_image_id
151
+ if 'cached_image_id' in data:
152
+ binary_data = resolve_image_from_cache(self.alita, data['cached_image_id'])
153
+ return binary_data, True # Mark as from image_generation
154
+
155
+ # If JSON doesn't have cached_image_id, treat as regular file content
156
+ return filedata, False
112
157
 
113
158
  def create_xlsx_filedata(self, data: dict[str, list[list]]) -> bytes:
114
159
  try:
@@ -377,15 +422,19 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
377
422
  "createFile",
378
423
  filename=(str, Field(description="Filename")),
379
424
  filedata=(str, Field(description="""Stringified content of the file.
380
- Example for .xlsx filedata format:
381
- {
382
- "Sheet1":[
383
- ["Name", "Age", "City"],
384
- ["Alice", 25, "New York"],
385
- ["Bob", 30, "San Francisco"],
386
- ["Charlie", 35, "Los Angeles"]
387
- ]
388
- }
425
+
426
+ Supports three input formats:
427
+
428
+ 1. CACHED IMAGE REFERENCE (for generated/cached images):
429
+ Pass JSON with cached_image_id field: {"cached_image_id": "img_xxx"}
430
+ The tool will automatically resolve and decode the image from cache.
431
+ This is typically used when another tool returns an image reference.
432
+
433
+ 2. EXCEL FILES (.xlsx extension):
434
+ Pass JSON with sheet structure: {"Sheet1": [["Name", "Age"], ["Alice", 25], ["Bob", 30]]}
435
+
436
+ 3. TEXT/OTHER FILES:
437
+ Pass the plain text string directly.
389
438
  """)),
390
439
  bucket_name=bucket_name
391
440
  )