rossum-agent 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. rossum_agent/__init__.py +9 -0
  2. rossum_agent/agent/__init__.py +32 -0
  3. rossum_agent/agent/core.py +932 -0
  4. rossum_agent/agent/memory.py +176 -0
  5. rossum_agent/agent/models.py +160 -0
  6. rossum_agent/agent/request_classifier.py +152 -0
  7. rossum_agent/agent/skills.py +132 -0
  8. rossum_agent/agent/types.py +5 -0
  9. rossum_agent/agent_logging.py +56 -0
  10. rossum_agent/api/__init__.py +1 -0
  11. rossum_agent/api/cli.py +51 -0
  12. rossum_agent/api/dependencies.py +190 -0
  13. rossum_agent/api/main.py +180 -0
  14. rossum_agent/api/models/__init__.py +1 -0
  15. rossum_agent/api/models/schemas.py +301 -0
  16. rossum_agent/api/routes/__init__.py +1 -0
  17. rossum_agent/api/routes/chats.py +95 -0
  18. rossum_agent/api/routes/files.py +113 -0
  19. rossum_agent/api/routes/health.py +44 -0
  20. rossum_agent/api/routes/messages.py +218 -0
  21. rossum_agent/api/services/__init__.py +1 -0
  22. rossum_agent/api/services/agent_service.py +451 -0
  23. rossum_agent/api/services/chat_service.py +197 -0
  24. rossum_agent/api/services/file_service.py +65 -0
  25. rossum_agent/assets/Primary_light_logo.png +0 -0
  26. rossum_agent/bedrock_client.py +64 -0
  27. rossum_agent/prompts/__init__.py +27 -0
  28. rossum_agent/prompts/base_prompt.py +80 -0
  29. rossum_agent/prompts/system_prompt.py +24 -0
  30. rossum_agent/py.typed +0 -0
  31. rossum_agent/redis_storage.py +482 -0
  32. rossum_agent/rossum_mcp_integration.py +123 -0
  33. rossum_agent/skills/hook-debugging.md +31 -0
  34. rossum_agent/skills/organization-setup.md +60 -0
  35. rossum_agent/skills/rossum-deployment.md +102 -0
  36. rossum_agent/skills/schema-patching.md +61 -0
  37. rossum_agent/skills/schema-pruning.md +23 -0
  38. rossum_agent/skills/ui-settings.md +45 -0
  39. rossum_agent/streamlit_app/__init__.py +1 -0
  40. rossum_agent/streamlit_app/app.py +646 -0
  41. rossum_agent/streamlit_app/beep_sound.py +36 -0
  42. rossum_agent/streamlit_app/cli.py +17 -0
  43. rossum_agent/streamlit_app/render_modules.py +123 -0
  44. rossum_agent/streamlit_app/response_formatting.py +305 -0
  45. rossum_agent/tools/__init__.py +214 -0
  46. rossum_agent/tools/core.py +173 -0
  47. rossum_agent/tools/deploy.py +404 -0
  48. rossum_agent/tools/dynamic_tools.py +365 -0
  49. rossum_agent/tools/file_tools.py +62 -0
  50. rossum_agent/tools/formula.py +187 -0
  51. rossum_agent/tools/skills.py +31 -0
  52. rossum_agent/tools/spawn_mcp.py +227 -0
  53. rossum_agent/tools/subagents/__init__.py +31 -0
  54. rossum_agent/tools/subagents/base.py +303 -0
  55. rossum_agent/tools/subagents/hook_debug.py +591 -0
  56. rossum_agent/tools/subagents/knowledge_base.py +305 -0
  57. rossum_agent/tools/subagents/mcp_helpers.py +47 -0
  58. rossum_agent/tools/subagents/schema_patching.py +471 -0
  59. rossum_agent/url_context.py +167 -0
  60. rossum_agent/user_detection.py +100 -0
  61. rossum_agent/utils.py +128 -0
  62. rossum_agent-1.0.0rc0.dist-info/METADATA +311 -0
  63. rossum_agent-1.0.0rc0.dist-info/RECORD +67 -0
  64. rossum_agent-1.0.0rc0.dist-info/WHEEL +5 -0
  65. rossum_agent-1.0.0rc0.dist-info/entry_points.txt +3 -0
  66. rossum_agent-1.0.0rc0.dist-info/licenses/LICENSE +21 -0
  67. rossum_agent-1.0.0rc0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,218 @@
1
+ """Message endpoints with SSE streaming."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from collections.abc import Callable # noqa: TC003 - Required at runtime for service getter type hints
7
+ from dataclasses import dataclass
8
+ from typing import TYPE_CHECKING, Annotated
9
+
10
+ from fastapi import APIRouter, Depends, HTTPException, Request, status
11
+ from fastapi.responses import StreamingResponse
12
+ from slowapi import Limiter
13
+ from slowapi.util import get_remote_address
14
+
15
+ from rossum_agent.api.dependencies import RossumCredentials, get_validated_credentials
16
+ from rossum_agent.api.models.schemas import (
17
+ DocumentContent,
18
+ FileCreatedEvent,
19
+ ImageContent,
20
+ MessageRequest,
21
+ StepEvent,
22
+ StreamDoneEvent,
23
+ SubAgentProgressEvent,
24
+ SubAgentTextEvent,
25
+ )
26
+ from rossum_agent.api.services.agent_service import (
27
+ AgentService, # noqa: TC001 - Required at runtime for FastAPI Depends()
28
+ )
29
+ from rossum_agent.api.services.chat_service import (
30
+ ChatService, # noqa: TC001 - Required at runtime for FastAPI Depends()
31
+ )
32
+
33
+ if TYPE_CHECKING:
34
+ from collections.abc import Iterator
35
+ from pathlib import Path
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+ limiter = Limiter(key_func=get_remote_address)
40
+
41
+ router = APIRouter(prefix="/chats", tags=["messages"])
42
+
43
+ _get_chat_service: Callable[[], ChatService] | None = None
44
+ _get_agent_service: Callable[[], AgentService] | None = None
45
+
46
+
47
+ def set_chat_service_getter(getter: Callable[[], ChatService]) -> None:
48
+ global _get_chat_service
49
+ _get_chat_service = getter
50
+
51
+
52
+ def set_agent_service_getter(getter: Callable[[], AgentService]) -> None:
53
+ global _get_agent_service
54
+ _get_agent_service = getter
55
+
56
+
57
+ def get_chat_service_dep() -> ChatService:
58
+ if _get_chat_service is None:
59
+ raise RuntimeError("Chat service getter not configured")
60
+ return _get_chat_service()
61
+
62
+
63
+ def get_agent_service_dep() -> AgentService:
64
+ if _get_agent_service is None:
65
+ raise RuntimeError("Agent service getter not configured")
66
+ return _get_agent_service()
67
+
68
+
69
+ def _format_sse_event(event_type: str, data: str) -> str:
70
+ """Format an SSE event string."""
71
+ return f"event: {event_type}\ndata: {data}\n\n"
72
+
73
+
74
+ type AgentEvent = StreamDoneEvent | SubAgentProgressEvent | SubAgentTextEvent | StepEvent
75
+
76
+
77
+ @dataclass
78
+ class ProcessedEvent:
79
+ """Result of processing an agent event."""
80
+
81
+ sse_event: str | None = None
82
+ done_event: StreamDoneEvent | None = None
83
+ final_response_update: str | None = None
84
+
85
+
86
+ def _process_agent_event(event: AgentEvent) -> ProcessedEvent:
87
+ """Process a single agent event and return structured result."""
88
+ if isinstance(event, StreamDoneEvent):
89
+ return ProcessedEvent(done_event=event)
90
+ if isinstance(event, SubAgentProgressEvent):
91
+ return ProcessedEvent(sse_event=_format_sse_event("sub_agent_progress", event.model_dump_json()))
92
+ if isinstance(event, SubAgentTextEvent):
93
+ return ProcessedEvent(sse_event=_format_sse_event("sub_agent_text", event.model_dump_json()))
94
+ sse = _format_sse_event("step", event.model_dump_json())
95
+ if event.type == "text" and event.is_streaming:
96
+ return ProcessedEvent(sse_event=sse, final_response_update=event.content)
97
+ final_response = event.content if event.type == "final_answer" and event.content else None
98
+ return ProcessedEvent(sse_event=sse, final_response_update=final_response)
99
+
100
+
101
+ def _yield_file_events(output_dir: Path | None, chat_id: str) -> Iterator[str]:
102
+ """Yield SSE events for created files in the output directory."""
103
+ logger.info(f"_yield_file_events called with output_dir={output_dir}, chat_id={chat_id}")
104
+ if output_dir is None:
105
+ logger.info("output_dir is None, returning")
106
+ return
107
+ if output_dir.exists():
108
+ logger.info(f"output_dir exists, listing files: {list(output_dir.iterdir())}")
109
+ for file_path in output_dir.iterdir():
110
+ if file_path.is_file():
111
+ logger.info(f"Yielding file_created event for {file_path.name}")
112
+ file_event = FileCreatedEvent(
113
+ filename=file_path.name, url=f"/api/v1/chats/{chat_id}/files/{file_path.name}"
114
+ )
115
+ yield _format_sse_event("file_created", file_event.model_dump_json())
116
+ else:
117
+ logger.info(f"output_dir {output_dir} does not exist")
118
+
119
+
120
+ @router.post(
121
+ "/{chat_id}/messages",
122
+ response_class=StreamingResponse,
123
+ responses={
124
+ 200: {"description": "SSE stream of agent step events", "content": {"text/event-stream": {}}},
125
+ 404: {"description": "Chat not found"},
126
+ 429: {"description": "Rate limit exceeded"},
127
+ },
128
+ )
129
+ @limiter.limit("10/minute")
130
+ async def send_message(
131
+ request: Request,
132
+ chat_id: str,
133
+ message: MessageRequest,
134
+ credentials: Annotated[RossumCredentials, Depends(get_validated_credentials)] = None, # type: ignore[assignment]
135
+ chat_service: Annotated[ChatService, Depends(get_chat_service_dep)] = None, # type: ignore[assignment]
136
+ agent_service: Annotated[AgentService, Depends(get_agent_service_dep)] = None, # type: ignore[assignment]
137
+ ) -> StreamingResponse:
138
+ """Send a message and stream the agent's response via SSE.
139
+
140
+ Args:
141
+ request: FastAPI request object (required for rate limiting).
142
+ chat_id: Chat session identifier.
143
+ message: Message request with content.
144
+ credentials: Validated Rossum credentials.
145
+ chat_service: Chat service instance.
146
+ agent_service: Agent service instance.
147
+
148
+ Returns:
149
+ StreamingResponse with SSE events.
150
+
151
+ Raises:
152
+ HTTPException: If chat not found.
153
+ """
154
+ chat_data = chat_service.get_chat_data(credentials.user_id, chat_id)
155
+ if chat_data is None:
156
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Chat {chat_id} not found")
157
+
158
+ history = chat_data.messages
159
+ mcp_mode = chat_data.metadata.mcp_mode
160
+ user_prompt = message.content
161
+ images: list[ImageContent] | None = message.images
162
+ documents: list[DocumentContent] | None = message.documents
163
+
164
+ async def event_generator() -> Iterator[str]: # type: ignore[misc]
165
+ final_response: str | None = None
166
+ done_event: StreamDoneEvent | None = None
167
+
168
+ try:
169
+ async for event in agent_service.run_agent(
170
+ prompt=user_prompt,
171
+ images=images,
172
+ documents=documents,
173
+ conversation_history=history,
174
+ rossum_api_token=credentials.token,
175
+ rossum_api_base_url=credentials.api_url,
176
+ rossum_url=message.rossum_url,
177
+ mcp_mode=mcp_mode,
178
+ ):
179
+ result = _process_agent_event(event)
180
+ if result.done_event:
181
+ done_event = result.done_event
182
+ if result.final_response_update:
183
+ final_response = result.final_response_update
184
+ if result.sse_event:
185
+ yield result.sse_event
186
+
187
+ except Exception as e:
188
+ logger.error(f"Error during agent execution: {e}", exc_info=True)
189
+ error_event = StepEvent(type="error", step_number=0, content=str(e), is_final=True)
190
+ yield _format_sse_event("error", error_event.model_dump_json())
191
+ return
192
+
193
+ updated_history = agent_service.build_updated_history(
194
+ existing_history=history,
195
+ user_prompt=user_prompt,
196
+ final_response=final_response,
197
+ images=images,
198
+ documents=documents,
199
+ )
200
+ chat_service.save_messages(
201
+ user_id=credentials.user_id,
202
+ chat_id=chat_id,
203
+ messages=updated_history,
204
+ output_dir=agent_service.output_dir,
205
+ metadata=chat_data.metadata,
206
+ )
207
+
208
+ for file_event in _yield_file_events(agent_service.output_dir, chat_id):
209
+ yield file_event
210
+
211
+ if done_event:
212
+ yield _format_sse_event("done", done_event.model_dump_json())
213
+
214
+ return StreamingResponse(
215
+ event_generator(),
216
+ media_type="text/event-stream",
217
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"},
218
+ )
@@ -0,0 +1 @@
1
+ """Service layer for business logic."""
@@ -0,0 +1,451 @@
1
+ """Agent service for running the Rossum Agent."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import logging
7
+ from typing import TYPE_CHECKING, Any, Literal
8
+
9
+ from rossum_agent.agent.core import RossumAgent, create_agent
10
+ from rossum_agent.agent.memory import AgentMemory
11
+ from rossum_agent.agent.models import AgentConfig, AgentStep, StepType
12
+ from rossum_agent.api.models.schemas import (
13
+ DocumentContent,
14
+ ImageContent,
15
+ StepEvent,
16
+ StreamDoneEvent,
17
+ SubAgentProgressEvent,
18
+ SubAgentTextEvent,
19
+ )
20
+ from rossum_agent.prompts import get_system_prompt
21
+ from rossum_agent.rossum_mcp_integration import connect_mcp_server
22
+ from rossum_agent.streamlit_app.response_formatting import get_display_tool_name
23
+ from rossum_agent.tools import (
24
+ SubAgentProgress,
25
+ SubAgentText,
26
+ set_mcp_connection,
27
+ set_output_dir,
28
+ set_progress_callback,
29
+ set_rossum_credentials,
30
+ set_text_callback,
31
+ )
32
+ from rossum_agent.url_context import extract_url_context, format_context_for_prompt
33
+ from rossum_agent.utils import create_session_output_dir, set_session_output_dir
34
+
35
+ if TYPE_CHECKING:
36
+ from collections.abc import AsyncIterator
37
+ from pathlib import Path
38
+
39
+ from anthropic.types import ImageBlockParam, TextBlockParam
40
+
41
+ from rossum_agent.agent.types import UserContent
42
+
43
+ logger = logging.getLogger(__name__)
44
+
45
+
46
+ def convert_sub_agent_progress_to_event(progress: SubAgentProgress) -> SubAgentProgressEvent:
47
+ """Convert a SubAgentProgress to a SubAgentProgressEvent for SSE streaming.
48
+
49
+ Args:
50
+ progress: The SubAgentProgress from the internal tool.
51
+
52
+ Returns:
53
+ SubAgentProgressEvent suitable for SSE transmission.
54
+ """
55
+ return SubAgentProgressEvent(
56
+ tool_name=progress.tool_name,
57
+ iteration=progress.iteration,
58
+ max_iterations=progress.max_iterations,
59
+ current_tool=progress.current_tool,
60
+ tool_calls=progress.tool_calls,
61
+ status=progress.status,
62
+ )
63
+
64
+
65
+ def _create_tool_start_event(step: AgentStep, current_tool: str) -> StepEvent:
66
+ """Create a tool_start event from an AgentStep."""
67
+ current_tool_args = None
68
+ for tc in step.tool_calls:
69
+ if tc.name == current_tool:
70
+ current_tool_args = tc.arguments
71
+ break
72
+ display_name = get_display_tool_name(current_tool, current_tool_args)
73
+ return StepEvent(
74
+ type="tool_start",
75
+ step_number=step.step_number,
76
+ tool_name=display_name,
77
+ tool_arguments=current_tool_args,
78
+ tool_progress=step.tool_progress,
79
+ )
80
+
81
+
82
+ def _create_tool_result_event(step: AgentStep) -> StepEvent:
83
+ """Create a tool_result event from an AgentStep."""
84
+ last_result = step.tool_results[-1]
85
+ return StepEvent(
86
+ type="tool_result",
87
+ step_number=step.step_number,
88
+ tool_name=last_result.name,
89
+ result=last_result.content,
90
+ is_error=last_result.is_error,
91
+ )
92
+
93
+
94
+ def convert_step_to_event(step: AgentStep) -> StepEvent:
95
+ """Convert an AgentStep to a StepEvent for SSE streaming.
96
+
97
+ Extended thinking mode produces three distinct content types:
98
+ - "thinking": Model's chain-of-thought reasoning (from thinking blocks)
99
+ - "intermediate": Model's response text before tool calls
100
+ - "final_answer": Model's final response (no more tool calls)
101
+
102
+ Per Claude's extended thinking API, thinking blocks contain internal reasoning
103
+ while text blocks contain the actual response. Both are streamed separately.
104
+ """
105
+ if step.error:
106
+ event = StepEvent(type="error", step_number=step.step_number, content=step.error, is_final=True)
107
+ elif step.is_final and step.final_answer:
108
+ event = StepEvent(type="final_answer", step_number=step.step_number, content=step.final_answer, is_final=True)
109
+ elif step.step_type == StepType.INTERMEDIATE and step.accumulated_text is not None:
110
+ event = StepEvent(
111
+ type="intermediate", step_number=step.step_number, content=step.accumulated_text, is_streaming=True
112
+ )
113
+ elif step.step_type == StepType.FINAL_ANSWER and step.accumulated_text is not None:
114
+ event = StepEvent(
115
+ type="final_answer", step_number=step.step_number, content=step.accumulated_text, is_streaming=True
116
+ )
117
+ elif step.current_tool and step.tool_progress:
118
+ event = _create_tool_start_event(step, step.current_tool)
119
+ elif step.tool_results and not step.is_streaming:
120
+ event = _create_tool_result_event(step)
121
+ elif step.step_type == StepType.THINKING or step.thinking is not None:
122
+ event = StepEvent(
123
+ type="thinking", step_number=step.step_number, content=step.thinking, is_streaming=step.is_streaming
124
+ )
125
+ else:
126
+ event = StepEvent(type="thinking", step_number=step.step_number, content=None, is_streaming=step.is_streaming)
127
+
128
+ logger.info(f"StepEvent: type={event.type}, step={event.step_number}, is_streaming={event.is_streaming}")
129
+ return event
130
+
131
+
132
+ class AgentService:
133
+ """Service for running the Rossum Agent.
134
+
135
+ Manages MCP connection lifecycle and agent execution for API requests.
136
+ """
137
+
138
+ def __init__(self) -> None:
139
+ """Initialize agent service."""
140
+ self._output_dir: Path | None = None
141
+ self._sub_agent_queue: asyncio.Queue[SubAgentProgressEvent | SubAgentTextEvent] | None = None
142
+ self._last_memory: AgentMemory | None = None
143
+
144
+ @property
145
+ def output_dir(self) -> Path | None:
146
+ """Get the output directory for the current run."""
147
+ return self._output_dir
148
+
149
+ def _on_sub_agent_progress(self, progress: SubAgentProgress) -> None:
150
+ """Callback for sub-agent progress updates.
151
+
152
+ Converts the progress to an event and puts it on the queue for streaming.
153
+ """
154
+ if self._sub_agent_queue is not None:
155
+ event = convert_sub_agent_progress_to_event(progress)
156
+ try:
157
+ self._sub_agent_queue.put_nowait(event)
158
+ except asyncio.QueueFull:
159
+ logger.warning("Sub-agent progress queue full, dropping event")
160
+
161
+ def _on_sub_agent_text(self, text: SubAgentText) -> None:
162
+ """Callback for sub-agent text streaming.
163
+
164
+ Converts the text to an event and puts it on the queue for streaming.
165
+ """
166
+ if self._sub_agent_queue is not None:
167
+ event = SubAgentTextEvent(tool_name=text.tool_name, text=text.text, is_final=text.is_final)
168
+ try:
169
+ self._sub_agent_queue.put_nowait(event)
170
+ except asyncio.QueueFull:
171
+ logger.warning("Sub-agent text queue full, dropping event")
172
+
173
+ async def run_agent(
174
+ self,
175
+ prompt: str,
176
+ conversation_history: list[dict[str, Any]],
177
+ rossum_api_token: str,
178
+ rossum_api_base_url: str,
179
+ mcp_mode: Literal["read-only", "read-write"] = "read-only",
180
+ rossum_url: str | None = None,
181
+ images: list[ImageContent] | None = None,
182
+ documents: list[DocumentContent] | None = None,
183
+ ) -> AsyncIterator[StepEvent | StreamDoneEvent | SubAgentProgressEvent | SubAgentTextEvent]:
184
+ """Run the agent with a new prompt.
185
+
186
+ Creates a fresh MCP connection, initializes the agent with conversation
187
+ history, and streams step events.
188
+
189
+ Yields:
190
+ StepEvent objects during execution, SubAgentProgressEvent for sub-agent progress,
191
+ SubAgentTextEvent for sub-agent text streaming, StreamDoneEvent at the end.
192
+ """
193
+ logger.info(f"Starting agent run with {len(conversation_history)} history messages")
194
+ if images:
195
+ logger.info(f"Including {len(images)} images in the prompt")
196
+ if documents:
197
+ logger.info(f"Including {len(documents)} documents in the prompt")
198
+
199
+ self._output_dir = create_session_output_dir()
200
+ set_session_output_dir(self._output_dir)
201
+ set_output_dir(self._output_dir)
202
+ set_rossum_credentials(rossum_api_base_url, rossum_api_token)
203
+ logger.info(f"Created session output directory: {self._output_dir}")
204
+
205
+ if documents:
206
+ self._save_documents_to_output_dir(documents)
207
+
208
+ self._sub_agent_queue = asyncio.Queue(maxsize=100)
209
+ set_progress_callback(self._on_sub_agent_progress)
210
+ set_text_callback(self._on_sub_agent_text)
211
+
212
+ system_prompt = get_system_prompt()
213
+ url_context = extract_url_context(rossum_url)
214
+ if not url_context.is_empty():
215
+ context_section = format_context_for_prompt(url_context)
216
+ system_prompt = system_prompt + "\n\n---\n" + context_section
217
+
218
+ try:
219
+ async with connect_mcp_server(
220
+ rossum_api_token=rossum_api_token,
221
+ rossum_api_base_url=rossum_api_base_url,
222
+ mcp_mode=mcp_mode,
223
+ ) as mcp_connection:
224
+ agent = await create_agent(
225
+ mcp_connection=mcp_connection, system_prompt=system_prompt, config=AgentConfig()
226
+ )
227
+
228
+ set_mcp_connection(mcp_connection, asyncio.get_event_loop())
229
+
230
+ self._restore_conversation_history(agent, conversation_history)
231
+
232
+ total_steps = 0
233
+ total_input_tokens = 0
234
+ total_output_tokens = 0
235
+
236
+ user_content = self._build_user_content(prompt, images, documents)
237
+
238
+ try:
239
+ async for step in agent.run(user_content):
240
+ while not self._sub_agent_queue.empty():
241
+ try:
242
+ sub_event = self._sub_agent_queue.get_nowait()
243
+ yield sub_event
244
+ except asyncio.QueueEmpty:
245
+ break
246
+
247
+ yield convert_step_to_event(step)
248
+
249
+ if not step.is_streaming:
250
+ total_steps = step.step_number
251
+ total_input_tokens = agent._total_input_tokens
252
+ total_output_tokens = agent._total_output_tokens
253
+
254
+ while not self._sub_agent_queue.empty():
255
+ try:
256
+ sub_event = self._sub_agent_queue.get_nowait()
257
+ yield sub_event
258
+ except asyncio.QueueEmpty:
259
+ break
260
+
261
+ self._last_memory = agent.memory
262
+
263
+ yield StreamDoneEvent(
264
+ total_steps=total_steps,
265
+ input_tokens=total_input_tokens,
266
+ output_tokens=total_output_tokens,
267
+ token_usage_breakdown=agent.get_token_usage_breakdown(),
268
+ )
269
+ agent.log_token_usage_summary()
270
+
271
+ except Exception as e:
272
+ logger.error(f"Agent execution failed: {e}", exc_info=True)
273
+ yield StepEvent(
274
+ type="error",
275
+ step_number=total_steps + 1,
276
+ content=f"Agent execution failed: {e}",
277
+ is_final=True,
278
+ )
279
+ finally:
280
+ set_progress_callback(None)
281
+ set_text_callback(None)
282
+ set_output_dir(None)
283
+ set_rossum_credentials(None, None)
284
+ self._sub_agent_queue = None
285
+
286
+ def _save_documents_to_output_dir(self, documents: list[DocumentContent]) -> None:
287
+ """Save uploaded documents to the output directory.
288
+
289
+ Args:
290
+ documents: List of documents to save.
291
+ """
292
+ import base64 # noqa: PLC0415 - import here to avoid circular import at module level
293
+
294
+ if self._output_dir is None:
295
+ logger.warning("Cannot save documents: output directory not set")
296
+ return
297
+
298
+ for doc in documents:
299
+ file_path = self._output_dir / doc.filename
300
+ try:
301
+ file_data = base64.b64decode(doc.data)
302
+ file_path.write_bytes(file_data)
303
+ logger.info(f"Saved document to {file_path}")
304
+ except Exception as e:
305
+ logger.error(f"Failed to save document {doc.filename}: {e}")
306
+
307
+ def _build_user_content(
308
+ self, prompt: str, images: list[ImageContent] | None, documents: list[DocumentContent] | None = None
309
+ ) -> UserContent:
310
+ """Build user content for the agent, optionally including images and documents.
311
+
312
+ Args:
313
+ prompt: The user's text prompt.
314
+ images: Optional list of images to include.
315
+ documents: Optional list of documents (paths are included in prompt).
316
+
317
+ Returns:
318
+ Either a plain string (text-only) or a list of content blocks (multimodal).
319
+ """
320
+ if not images and not documents:
321
+ return prompt
322
+
323
+ content: list[ImageBlockParam | TextBlockParam] = []
324
+ if images:
325
+ for img in images:
326
+ content.append(
327
+ {
328
+ "type": "image",
329
+ "source": {
330
+ "type": "base64",
331
+ "media_type": img.media_type,
332
+ "data": img.data,
333
+ },
334
+ }
335
+ )
336
+ if documents and self._output_dir:
337
+ doc_paths = [str(self._output_dir / doc.filename) for doc in documents]
338
+ doc_info = "\n".join(f"- {path}" for path in doc_paths)
339
+ content.append({"type": "text", "text": f"[Uploaded documents available for processing:\n{doc_info}]"})
340
+ content.append({"type": "text", "text": prompt})
341
+ return content
342
+
343
+ def _restore_conversation_history(self, agent: RossumAgent, history: list[dict[str, Any]]) -> None:
344
+ """Restore conversation history to the agent.
345
+
346
+ Args:
347
+ agent: The RossumAgent instance.
348
+ history: List of step dicts with 'type' key indicating step type.
349
+ Supports both new format (with 'type') and legacy format (with 'role').
350
+ """
351
+ if not history:
352
+ return
353
+
354
+ first_item = history[0]
355
+ if "type" in first_item and first_item["type"] in ("task_step", "memory_step"):
356
+ agent.memory = AgentMemory.from_dict(history)
357
+ else:
358
+ for msg in history:
359
+ role = msg.get("role")
360
+ content = msg.get("content", "")
361
+ if role == "user":
362
+ user_content = self._parse_stored_content(content)
363
+ agent.add_user_message(user_content)
364
+ elif role == "assistant":
365
+ agent.add_assistant_message(content)
366
+
367
+ def _parse_stored_content(self, content: str | list[dict[str, Any]]) -> UserContent:
368
+ """Parse stored content back into UserContent format.
369
+
370
+ Args:
371
+ content: Either a string or a list of content block dicts.
372
+
373
+ Returns:
374
+ UserContent suitable for the agent.
375
+ """
376
+ if isinstance(content, str):
377
+ return content
378
+
379
+ result: list[ImageBlockParam | TextBlockParam] = []
380
+ for block in content:
381
+ block_type = block.get("type")
382
+ if block_type == "image":
383
+ source = block.get("source", {})
384
+ result.append(
385
+ {
386
+ "type": "image",
387
+ "source": {
388
+ "type": source.get("type", "base64"),
389
+ "media_type": source.get("media_type", "image/png"),
390
+ "data": source.get("data", ""),
391
+ },
392
+ }
393
+ )
394
+ elif block_type == "text":
395
+ result.append({"type": "text", "text": block.get("text", "")})
396
+
397
+ return result if result else ""
398
+
399
+ def build_updated_history(
400
+ self,
401
+ existing_history: list[dict[str, Any]],
402
+ user_prompt: str,
403
+ final_response: str | None,
404
+ images: list[ImageContent] | None = None,
405
+ documents: list[DocumentContent] | None = None,
406
+ ) -> list[dict[str, Any]]:
407
+ """Build updated conversation history after agent execution.
408
+
409
+ Stores task steps and assistant text responses, but strips out tool calls
410
+ and tool results to keep context lean for multi-turn conversations.
411
+
412
+ Args:
413
+ existing_history: Previous conversation history (ignored if memory available).
414
+ user_prompt: The user's prompt that was just processed.
415
+ final_response: The agent's final response, if any.
416
+ images: Optional list of images included with the user prompt.
417
+ documents: Optional list of documents included with the user prompt.
418
+ """
419
+ if self._last_memory is not None:
420
+ lean_history: list[dict[str, Any]] = []
421
+ for step_dict in self._last_memory.to_dict():
422
+ if step_dict.get("type") == "task_step":
423
+ lean_history.append(step_dict)
424
+ elif step_dict.get("type") == "memory_step":
425
+ text = step_dict.get("text")
426
+ thinking_blocks = step_dict.get("thinking_blocks", [])
427
+ if text or thinking_blocks:
428
+ lean_history.append(
429
+ {
430
+ "type": "memory_step",
431
+ "step_number": step_dict.get("step_number", 0),
432
+ "text": text,
433
+ "tool_calls": [],
434
+ "tool_results": [],
435
+ "thinking_blocks": thinking_blocks,
436
+ }
437
+ )
438
+ return lean_history
439
+
440
+ updated = list(existing_history)
441
+ user_content = self._build_user_content(user_prompt, images)
442
+ if documents:
443
+ doc_names = ", ".join(doc.filename for doc in documents)
444
+ if isinstance(user_content, str):
445
+ user_content = f"[Uploaded documents: {doc_names}]\n\n{user_content}"
446
+ else:
447
+ user_content.insert(0, {"type": "text", "text": f"[Uploaded documents: {doc_names}]"})
448
+ updated.append({"role": "user", "content": user_content})
449
+ if final_response:
450
+ updated.append({"role": "assistant", "content": final_response})
451
+ return updated