rossum-agent 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rossum_agent/__init__.py +9 -0
- rossum_agent/agent/__init__.py +32 -0
- rossum_agent/agent/core.py +932 -0
- rossum_agent/agent/memory.py +176 -0
- rossum_agent/agent/models.py +160 -0
- rossum_agent/agent/request_classifier.py +152 -0
- rossum_agent/agent/skills.py +132 -0
- rossum_agent/agent/types.py +5 -0
- rossum_agent/agent_logging.py +56 -0
- rossum_agent/api/__init__.py +1 -0
- rossum_agent/api/cli.py +51 -0
- rossum_agent/api/dependencies.py +190 -0
- rossum_agent/api/main.py +180 -0
- rossum_agent/api/models/__init__.py +1 -0
- rossum_agent/api/models/schemas.py +301 -0
- rossum_agent/api/routes/__init__.py +1 -0
- rossum_agent/api/routes/chats.py +95 -0
- rossum_agent/api/routes/files.py +113 -0
- rossum_agent/api/routes/health.py +44 -0
- rossum_agent/api/routes/messages.py +218 -0
- rossum_agent/api/services/__init__.py +1 -0
- rossum_agent/api/services/agent_service.py +451 -0
- rossum_agent/api/services/chat_service.py +197 -0
- rossum_agent/api/services/file_service.py +65 -0
- rossum_agent/assets/Primary_light_logo.png +0 -0
- rossum_agent/bedrock_client.py +64 -0
- rossum_agent/prompts/__init__.py +27 -0
- rossum_agent/prompts/base_prompt.py +80 -0
- rossum_agent/prompts/system_prompt.py +24 -0
- rossum_agent/py.typed +0 -0
- rossum_agent/redis_storage.py +482 -0
- rossum_agent/rossum_mcp_integration.py +123 -0
- rossum_agent/skills/hook-debugging.md +31 -0
- rossum_agent/skills/organization-setup.md +60 -0
- rossum_agent/skills/rossum-deployment.md +102 -0
- rossum_agent/skills/schema-patching.md +61 -0
- rossum_agent/skills/schema-pruning.md +23 -0
- rossum_agent/skills/ui-settings.md +45 -0
- rossum_agent/streamlit_app/__init__.py +1 -0
- rossum_agent/streamlit_app/app.py +646 -0
- rossum_agent/streamlit_app/beep_sound.py +36 -0
- rossum_agent/streamlit_app/cli.py +17 -0
- rossum_agent/streamlit_app/render_modules.py +123 -0
- rossum_agent/streamlit_app/response_formatting.py +305 -0
- rossum_agent/tools/__init__.py +214 -0
- rossum_agent/tools/core.py +173 -0
- rossum_agent/tools/deploy.py +404 -0
- rossum_agent/tools/dynamic_tools.py +365 -0
- rossum_agent/tools/file_tools.py +62 -0
- rossum_agent/tools/formula.py +187 -0
- rossum_agent/tools/skills.py +31 -0
- rossum_agent/tools/spawn_mcp.py +227 -0
- rossum_agent/tools/subagents/__init__.py +31 -0
- rossum_agent/tools/subagents/base.py +303 -0
- rossum_agent/tools/subagents/hook_debug.py +591 -0
- rossum_agent/tools/subagents/knowledge_base.py +305 -0
- rossum_agent/tools/subagents/mcp_helpers.py +47 -0
- rossum_agent/tools/subagents/schema_patching.py +471 -0
- rossum_agent/url_context.py +167 -0
- rossum_agent/user_detection.py +100 -0
- rossum_agent/utils.py +128 -0
- rossum_agent-1.0.0rc0.dist-info/METADATA +311 -0
- rossum_agent-1.0.0rc0.dist-info/RECORD +67 -0
- rossum_agent-1.0.0rc0.dist-info/WHEEL +5 -0
- rossum_agent-1.0.0rc0.dist-info/entry_points.txt +3 -0
- rossum_agent-1.0.0rc0.dist-info/licenses/LICENSE +21 -0
- rossum_agent-1.0.0rc0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""Message endpoints with SSE streaming."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from collections.abc import Callable # noqa: TC003 - Required at runtime for service getter type hints
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import TYPE_CHECKING, Annotated
|
|
9
|
+
|
|
10
|
+
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
|
11
|
+
from fastapi.responses import StreamingResponse
|
|
12
|
+
from slowapi import Limiter
|
|
13
|
+
from slowapi.util import get_remote_address
|
|
14
|
+
|
|
15
|
+
from rossum_agent.api.dependencies import RossumCredentials, get_validated_credentials
|
|
16
|
+
from rossum_agent.api.models.schemas import (
|
|
17
|
+
DocumentContent,
|
|
18
|
+
FileCreatedEvent,
|
|
19
|
+
ImageContent,
|
|
20
|
+
MessageRequest,
|
|
21
|
+
StepEvent,
|
|
22
|
+
StreamDoneEvent,
|
|
23
|
+
SubAgentProgressEvent,
|
|
24
|
+
SubAgentTextEvent,
|
|
25
|
+
)
|
|
26
|
+
from rossum_agent.api.services.agent_service import (
|
|
27
|
+
AgentService, # noqa: TC001 - Required at runtime for FastAPI Depends()
|
|
28
|
+
)
|
|
29
|
+
from rossum_agent.api.services.chat_service import (
|
|
30
|
+
ChatService, # noqa: TC001 - Required at runtime for FastAPI Depends()
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
from collections.abc import Iterator
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
|
|
37
|
+
logger = logging.getLogger(__name__)
|
|
38
|
+
|
|
39
|
+
limiter = Limiter(key_func=get_remote_address)
|
|
40
|
+
|
|
41
|
+
router = APIRouter(prefix="/chats", tags=["messages"])
|
|
42
|
+
|
|
43
|
+
_get_chat_service: Callable[[], ChatService] | None = None
|
|
44
|
+
_get_agent_service: Callable[[], AgentService] | None = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def set_chat_service_getter(getter: Callable[[], ChatService]) -> None:
|
|
48
|
+
global _get_chat_service
|
|
49
|
+
_get_chat_service = getter
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def set_agent_service_getter(getter: Callable[[], AgentService]) -> None:
|
|
53
|
+
global _get_agent_service
|
|
54
|
+
_get_agent_service = getter
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_chat_service_dep() -> ChatService:
|
|
58
|
+
if _get_chat_service is None:
|
|
59
|
+
raise RuntimeError("Chat service getter not configured")
|
|
60
|
+
return _get_chat_service()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_agent_service_dep() -> AgentService:
|
|
64
|
+
if _get_agent_service is None:
|
|
65
|
+
raise RuntimeError("Agent service getter not configured")
|
|
66
|
+
return _get_agent_service()
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _format_sse_event(event_type: str, data: str) -> str:
|
|
70
|
+
"""Format an SSE event string."""
|
|
71
|
+
return f"event: {event_type}\ndata: {data}\n\n"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
type AgentEvent = StreamDoneEvent | SubAgentProgressEvent | SubAgentTextEvent | StepEvent
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class ProcessedEvent:
|
|
79
|
+
"""Result of processing an agent event."""
|
|
80
|
+
|
|
81
|
+
sse_event: str | None = None
|
|
82
|
+
done_event: StreamDoneEvent | None = None
|
|
83
|
+
final_response_update: str | None = None
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _process_agent_event(event: AgentEvent) -> ProcessedEvent:
|
|
87
|
+
"""Process a single agent event and return structured result."""
|
|
88
|
+
if isinstance(event, StreamDoneEvent):
|
|
89
|
+
return ProcessedEvent(done_event=event)
|
|
90
|
+
if isinstance(event, SubAgentProgressEvent):
|
|
91
|
+
return ProcessedEvent(sse_event=_format_sse_event("sub_agent_progress", event.model_dump_json()))
|
|
92
|
+
if isinstance(event, SubAgentTextEvent):
|
|
93
|
+
return ProcessedEvent(sse_event=_format_sse_event("sub_agent_text", event.model_dump_json()))
|
|
94
|
+
sse = _format_sse_event("step", event.model_dump_json())
|
|
95
|
+
if event.type == "text" and event.is_streaming:
|
|
96
|
+
return ProcessedEvent(sse_event=sse, final_response_update=event.content)
|
|
97
|
+
final_response = event.content if event.type == "final_answer" and event.content else None
|
|
98
|
+
return ProcessedEvent(sse_event=sse, final_response_update=final_response)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _yield_file_events(output_dir: Path | None, chat_id: str) -> Iterator[str]:
|
|
102
|
+
"""Yield SSE events for created files in the output directory."""
|
|
103
|
+
logger.info(f"_yield_file_events called with output_dir={output_dir}, chat_id={chat_id}")
|
|
104
|
+
if output_dir is None:
|
|
105
|
+
logger.info("output_dir is None, returning")
|
|
106
|
+
return
|
|
107
|
+
if output_dir.exists():
|
|
108
|
+
logger.info(f"output_dir exists, listing files: {list(output_dir.iterdir())}")
|
|
109
|
+
for file_path in output_dir.iterdir():
|
|
110
|
+
if file_path.is_file():
|
|
111
|
+
logger.info(f"Yielding file_created event for {file_path.name}")
|
|
112
|
+
file_event = FileCreatedEvent(
|
|
113
|
+
filename=file_path.name, url=f"/api/v1/chats/{chat_id}/files/{file_path.name}"
|
|
114
|
+
)
|
|
115
|
+
yield _format_sse_event("file_created", file_event.model_dump_json())
|
|
116
|
+
else:
|
|
117
|
+
logger.info(f"output_dir {output_dir} does not exist")
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@router.post(
|
|
121
|
+
"/{chat_id}/messages",
|
|
122
|
+
response_class=StreamingResponse,
|
|
123
|
+
responses={
|
|
124
|
+
200: {"description": "SSE stream of agent step events", "content": {"text/event-stream": {}}},
|
|
125
|
+
404: {"description": "Chat not found"},
|
|
126
|
+
429: {"description": "Rate limit exceeded"},
|
|
127
|
+
},
|
|
128
|
+
)
|
|
129
|
+
@limiter.limit("10/minute")
|
|
130
|
+
async def send_message(
|
|
131
|
+
request: Request,
|
|
132
|
+
chat_id: str,
|
|
133
|
+
message: MessageRequest,
|
|
134
|
+
credentials: Annotated[RossumCredentials, Depends(get_validated_credentials)] = None, # type: ignore[assignment]
|
|
135
|
+
chat_service: Annotated[ChatService, Depends(get_chat_service_dep)] = None, # type: ignore[assignment]
|
|
136
|
+
agent_service: Annotated[AgentService, Depends(get_agent_service_dep)] = None, # type: ignore[assignment]
|
|
137
|
+
) -> StreamingResponse:
|
|
138
|
+
"""Send a message and stream the agent's response via SSE.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
request: FastAPI request object (required for rate limiting).
|
|
142
|
+
chat_id: Chat session identifier.
|
|
143
|
+
message: Message request with content.
|
|
144
|
+
credentials: Validated Rossum credentials.
|
|
145
|
+
chat_service: Chat service instance.
|
|
146
|
+
agent_service: Agent service instance.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
StreamingResponse with SSE events.
|
|
150
|
+
|
|
151
|
+
Raises:
|
|
152
|
+
HTTPException: If chat not found.
|
|
153
|
+
"""
|
|
154
|
+
chat_data = chat_service.get_chat_data(credentials.user_id, chat_id)
|
|
155
|
+
if chat_data is None:
|
|
156
|
+
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Chat {chat_id} not found")
|
|
157
|
+
|
|
158
|
+
history = chat_data.messages
|
|
159
|
+
mcp_mode = chat_data.metadata.mcp_mode
|
|
160
|
+
user_prompt = message.content
|
|
161
|
+
images: list[ImageContent] | None = message.images
|
|
162
|
+
documents: list[DocumentContent] | None = message.documents
|
|
163
|
+
|
|
164
|
+
async def event_generator() -> Iterator[str]: # type: ignore[misc]
|
|
165
|
+
final_response: str | None = None
|
|
166
|
+
done_event: StreamDoneEvent | None = None
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
async for event in agent_service.run_agent(
|
|
170
|
+
prompt=user_prompt,
|
|
171
|
+
images=images,
|
|
172
|
+
documents=documents,
|
|
173
|
+
conversation_history=history,
|
|
174
|
+
rossum_api_token=credentials.token,
|
|
175
|
+
rossum_api_base_url=credentials.api_url,
|
|
176
|
+
rossum_url=message.rossum_url,
|
|
177
|
+
mcp_mode=mcp_mode,
|
|
178
|
+
):
|
|
179
|
+
result = _process_agent_event(event)
|
|
180
|
+
if result.done_event:
|
|
181
|
+
done_event = result.done_event
|
|
182
|
+
if result.final_response_update:
|
|
183
|
+
final_response = result.final_response_update
|
|
184
|
+
if result.sse_event:
|
|
185
|
+
yield result.sse_event
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
logger.error(f"Error during agent execution: {e}", exc_info=True)
|
|
189
|
+
error_event = StepEvent(type="error", step_number=0, content=str(e), is_final=True)
|
|
190
|
+
yield _format_sse_event("error", error_event.model_dump_json())
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
updated_history = agent_service.build_updated_history(
|
|
194
|
+
existing_history=history,
|
|
195
|
+
user_prompt=user_prompt,
|
|
196
|
+
final_response=final_response,
|
|
197
|
+
images=images,
|
|
198
|
+
documents=documents,
|
|
199
|
+
)
|
|
200
|
+
chat_service.save_messages(
|
|
201
|
+
user_id=credentials.user_id,
|
|
202
|
+
chat_id=chat_id,
|
|
203
|
+
messages=updated_history,
|
|
204
|
+
output_dir=agent_service.output_dir,
|
|
205
|
+
metadata=chat_data.metadata,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
for file_event in _yield_file_events(agent_service.output_dir, chat_id):
|
|
209
|
+
yield file_event
|
|
210
|
+
|
|
211
|
+
if done_event:
|
|
212
|
+
yield _format_sse_event("done", done_event.model_dump_json())
|
|
213
|
+
|
|
214
|
+
return StreamingResponse(
|
|
215
|
+
event_generator(),
|
|
216
|
+
media_type="text/event-stream",
|
|
217
|
+
headers={"Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no"},
|
|
218
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Service layer for business logic."""
|
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
"""Agent service for running the Rossum Agent."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import logging
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
8
|
+
|
|
9
|
+
from rossum_agent.agent.core import RossumAgent, create_agent
|
|
10
|
+
from rossum_agent.agent.memory import AgentMemory
|
|
11
|
+
from rossum_agent.agent.models import AgentConfig, AgentStep, StepType
|
|
12
|
+
from rossum_agent.api.models.schemas import (
|
|
13
|
+
DocumentContent,
|
|
14
|
+
ImageContent,
|
|
15
|
+
StepEvent,
|
|
16
|
+
StreamDoneEvent,
|
|
17
|
+
SubAgentProgressEvent,
|
|
18
|
+
SubAgentTextEvent,
|
|
19
|
+
)
|
|
20
|
+
from rossum_agent.prompts import get_system_prompt
|
|
21
|
+
from rossum_agent.rossum_mcp_integration import connect_mcp_server
|
|
22
|
+
from rossum_agent.streamlit_app.response_formatting import get_display_tool_name
|
|
23
|
+
from rossum_agent.tools import (
|
|
24
|
+
SubAgentProgress,
|
|
25
|
+
SubAgentText,
|
|
26
|
+
set_mcp_connection,
|
|
27
|
+
set_output_dir,
|
|
28
|
+
set_progress_callback,
|
|
29
|
+
set_rossum_credentials,
|
|
30
|
+
set_text_callback,
|
|
31
|
+
)
|
|
32
|
+
from rossum_agent.url_context import extract_url_context, format_context_for_prompt
|
|
33
|
+
from rossum_agent.utils import create_session_output_dir, set_session_output_dir
|
|
34
|
+
|
|
35
|
+
if TYPE_CHECKING:
|
|
36
|
+
from collections.abc import AsyncIterator
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
|
|
39
|
+
from anthropic.types import ImageBlockParam, TextBlockParam
|
|
40
|
+
|
|
41
|
+
from rossum_agent.agent.types import UserContent
|
|
42
|
+
|
|
43
|
+
logger = logging.getLogger(__name__)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def convert_sub_agent_progress_to_event(progress: SubAgentProgress) -> SubAgentProgressEvent:
|
|
47
|
+
"""Convert a SubAgentProgress to a SubAgentProgressEvent for SSE streaming.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
progress: The SubAgentProgress from the internal tool.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
SubAgentProgressEvent suitable for SSE transmission.
|
|
54
|
+
"""
|
|
55
|
+
return SubAgentProgressEvent(
|
|
56
|
+
tool_name=progress.tool_name,
|
|
57
|
+
iteration=progress.iteration,
|
|
58
|
+
max_iterations=progress.max_iterations,
|
|
59
|
+
current_tool=progress.current_tool,
|
|
60
|
+
tool_calls=progress.tool_calls,
|
|
61
|
+
status=progress.status,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _create_tool_start_event(step: AgentStep, current_tool: str) -> StepEvent:
|
|
66
|
+
"""Create a tool_start event from an AgentStep."""
|
|
67
|
+
current_tool_args = None
|
|
68
|
+
for tc in step.tool_calls:
|
|
69
|
+
if tc.name == current_tool:
|
|
70
|
+
current_tool_args = tc.arguments
|
|
71
|
+
break
|
|
72
|
+
display_name = get_display_tool_name(current_tool, current_tool_args)
|
|
73
|
+
return StepEvent(
|
|
74
|
+
type="tool_start",
|
|
75
|
+
step_number=step.step_number,
|
|
76
|
+
tool_name=display_name,
|
|
77
|
+
tool_arguments=current_tool_args,
|
|
78
|
+
tool_progress=step.tool_progress,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _create_tool_result_event(step: AgentStep) -> StepEvent:
|
|
83
|
+
"""Create a tool_result event from an AgentStep."""
|
|
84
|
+
last_result = step.tool_results[-1]
|
|
85
|
+
return StepEvent(
|
|
86
|
+
type="tool_result",
|
|
87
|
+
step_number=step.step_number,
|
|
88
|
+
tool_name=last_result.name,
|
|
89
|
+
result=last_result.content,
|
|
90
|
+
is_error=last_result.is_error,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def convert_step_to_event(step: AgentStep) -> StepEvent:
|
|
95
|
+
"""Convert an AgentStep to a StepEvent for SSE streaming.
|
|
96
|
+
|
|
97
|
+
Extended thinking mode produces three distinct content types:
|
|
98
|
+
- "thinking": Model's chain-of-thought reasoning (from thinking blocks)
|
|
99
|
+
- "intermediate": Model's response text before tool calls
|
|
100
|
+
- "final_answer": Model's final response (no more tool calls)
|
|
101
|
+
|
|
102
|
+
Per Claude's extended thinking API, thinking blocks contain internal reasoning
|
|
103
|
+
while text blocks contain the actual response. Both are streamed separately.
|
|
104
|
+
"""
|
|
105
|
+
if step.error:
|
|
106
|
+
event = StepEvent(type="error", step_number=step.step_number, content=step.error, is_final=True)
|
|
107
|
+
elif step.is_final and step.final_answer:
|
|
108
|
+
event = StepEvent(type="final_answer", step_number=step.step_number, content=step.final_answer, is_final=True)
|
|
109
|
+
elif step.step_type == StepType.INTERMEDIATE and step.accumulated_text is not None:
|
|
110
|
+
event = StepEvent(
|
|
111
|
+
type="intermediate", step_number=step.step_number, content=step.accumulated_text, is_streaming=True
|
|
112
|
+
)
|
|
113
|
+
elif step.step_type == StepType.FINAL_ANSWER and step.accumulated_text is not None:
|
|
114
|
+
event = StepEvent(
|
|
115
|
+
type="final_answer", step_number=step.step_number, content=step.accumulated_text, is_streaming=True
|
|
116
|
+
)
|
|
117
|
+
elif step.current_tool and step.tool_progress:
|
|
118
|
+
event = _create_tool_start_event(step, step.current_tool)
|
|
119
|
+
elif step.tool_results and not step.is_streaming:
|
|
120
|
+
event = _create_tool_result_event(step)
|
|
121
|
+
elif step.step_type == StepType.THINKING or step.thinking is not None:
|
|
122
|
+
event = StepEvent(
|
|
123
|
+
type="thinking", step_number=step.step_number, content=step.thinking, is_streaming=step.is_streaming
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
event = StepEvent(type="thinking", step_number=step.step_number, content=None, is_streaming=step.is_streaming)
|
|
127
|
+
|
|
128
|
+
logger.info(f"StepEvent: type={event.type}, step={event.step_number}, is_streaming={event.is_streaming}")
|
|
129
|
+
return event
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class AgentService:
|
|
133
|
+
"""Service for running the Rossum Agent.
|
|
134
|
+
|
|
135
|
+
Manages MCP connection lifecycle and agent execution for API requests.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def __init__(self) -> None:
|
|
139
|
+
"""Initialize agent service."""
|
|
140
|
+
self._output_dir: Path | None = None
|
|
141
|
+
self._sub_agent_queue: asyncio.Queue[SubAgentProgressEvent | SubAgentTextEvent] | None = None
|
|
142
|
+
self._last_memory: AgentMemory | None = None
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def output_dir(self) -> Path | None:
|
|
146
|
+
"""Get the output directory for the current run."""
|
|
147
|
+
return self._output_dir
|
|
148
|
+
|
|
149
|
+
def _on_sub_agent_progress(self, progress: SubAgentProgress) -> None:
|
|
150
|
+
"""Callback for sub-agent progress updates.
|
|
151
|
+
|
|
152
|
+
Converts the progress to an event and puts it on the queue for streaming.
|
|
153
|
+
"""
|
|
154
|
+
if self._sub_agent_queue is not None:
|
|
155
|
+
event = convert_sub_agent_progress_to_event(progress)
|
|
156
|
+
try:
|
|
157
|
+
self._sub_agent_queue.put_nowait(event)
|
|
158
|
+
except asyncio.QueueFull:
|
|
159
|
+
logger.warning("Sub-agent progress queue full, dropping event")
|
|
160
|
+
|
|
161
|
+
def _on_sub_agent_text(self, text: SubAgentText) -> None:
|
|
162
|
+
"""Callback for sub-agent text streaming.
|
|
163
|
+
|
|
164
|
+
Converts the text to an event and puts it on the queue for streaming.
|
|
165
|
+
"""
|
|
166
|
+
if self._sub_agent_queue is not None:
|
|
167
|
+
event = SubAgentTextEvent(tool_name=text.tool_name, text=text.text, is_final=text.is_final)
|
|
168
|
+
try:
|
|
169
|
+
self._sub_agent_queue.put_nowait(event)
|
|
170
|
+
except asyncio.QueueFull:
|
|
171
|
+
logger.warning("Sub-agent text queue full, dropping event")
|
|
172
|
+
|
|
173
|
+
async def run_agent(
|
|
174
|
+
self,
|
|
175
|
+
prompt: str,
|
|
176
|
+
conversation_history: list[dict[str, Any]],
|
|
177
|
+
rossum_api_token: str,
|
|
178
|
+
rossum_api_base_url: str,
|
|
179
|
+
mcp_mode: Literal["read-only", "read-write"] = "read-only",
|
|
180
|
+
rossum_url: str | None = None,
|
|
181
|
+
images: list[ImageContent] | None = None,
|
|
182
|
+
documents: list[DocumentContent] | None = None,
|
|
183
|
+
) -> AsyncIterator[StepEvent | StreamDoneEvent | SubAgentProgressEvent | SubAgentTextEvent]:
|
|
184
|
+
"""Run the agent with a new prompt.
|
|
185
|
+
|
|
186
|
+
Creates a fresh MCP connection, initializes the agent with conversation
|
|
187
|
+
history, and streams step events.
|
|
188
|
+
|
|
189
|
+
Yields:
|
|
190
|
+
StepEvent objects during execution, SubAgentProgressEvent for sub-agent progress,
|
|
191
|
+
SubAgentTextEvent for sub-agent text streaming, StreamDoneEvent at the end.
|
|
192
|
+
"""
|
|
193
|
+
logger.info(f"Starting agent run with {len(conversation_history)} history messages")
|
|
194
|
+
if images:
|
|
195
|
+
logger.info(f"Including {len(images)} images in the prompt")
|
|
196
|
+
if documents:
|
|
197
|
+
logger.info(f"Including {len(documents)} documents in the prompt")
|
|
198
|
+
|
|
199
|
+
self._output_dir = create_session_output_dir()
|
|
200
|
+
set_session_output_dir(self._output_dir)
|
|
201
|
+
set_output_dir(self._output_dir)
|
|
202
|
+
set_rossum_credentials(rossum_api_base_url, rossum_api_token)
|
|
203
|
+
logger.info(f"Created session output directory: {self._output_dir}")
|
|
204
|
+
|
|
205
|
+
if documents:
|
|
206
|
+
self._save_documents_to_output_dir(documents)
|
|
207
|
+
|
|
208
|
+
self._sub_agent_queue = asyncio.Queue(maxsize=100)
|
|
209
|
+
set_progress_callback(self._on_sub_agent_progress)
|
|
210
|
+
set_text_callback(self._on_sub_agent_text)
|
|
211
|
+
|
|
212
|
+
system_prompt = get_system_prompt()
|
|
213
|
+
url_context = extract_url_context(rossum_url)
|
|
214
|
+
if not url_context.is_empty():
|
|
215
|
+
context_section = format_context_for_prompt(url_context)
|
|
216
|
+
system_prompt = system_prompt + "\n\n---\n" + context_section
|
|
217
|
+
|
|
218
|
+
try:
|
|
219
|
+
async with connect_mcp_server(
|
|
220
|
+
rossum_api_token=rossum_api_token,
|
|
221
|
+
rossum_api_base_url=rossum_api_base_url,
|
|
222
|
+
mcp_mode=mcp_mode,
|
|
223
|
+
) as mcp_connection:
|
|
224
|
+
agent = await create_agent(
|
|
225
|
+
mcp_connection=mcp_connection, system_prompt=system_prompt, config=AgentConfig()
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
set_mcp_connection(mcp_connection, asyncio.get_event_loop())
|
|
229
|
+
|
|
230
|
+
self._restore_conversation_history(agent, conversation_history)
|
|
231
|
+
|
|
232
|
+
total_steps = 0
|
|
233
|
+
total_input_tokens = 0
|
|
234
|
+
total_output_tokens = 0
|
|
235
|
+
|
|
236
|
+
user_content = self._build_user_content(prompt, images, documents)
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
async for step in agent.run(user_content):
|
|
240
|
+
while not self._sub_agent_queue.empty():
|
|
241
|
+
try:
|
|
242
|
+
sub_event = self._sub_agent_queue.get_nowait()
|
|
243
|
+
yield sub_event
|
|
244
|
+
except asyncio.QueueEmpty:
|
|
245
|
+
break
|
|
246
|
+
|
|
247
|
+
yield convert_step_to_event(step)
|
|
248
|
+
|
|
249
|
+
if not step.is_streaming:
|
|
250
|
+
total_steps = step.step_number
|
|
251
|
+
total_input_tokens = agent._total_input_tokens
|
|
252
|
+
total_output_tokens = agent._total_output_tokens
|
|
253
|
+
|
|
254
|
+
while not self._sub_agent_queue.empty():
|
|
255
|
+
try:
|
|
256
|
+
sub_event = self._sub_agent_queue.get_nowait()
|
|
257
|
+
yield sub_event
|
|
258
|
+
except asyncio.QueueEmpty:
|
|
259
|
+
break
|
|
260
|
+
|
|
261
|
+
self._last_memory = agent.memory
|
|
262
|
+
|
|
263
|
+
yield StreamDoneEvent(
|
|
264
|
+
total_steps=total_steps,
|
|
265
|
+
input_tokens=total_input_tokens,
|
|
266
|
+
output_tokens=total_output_tokens,
|
|
267
|
+
token_usage_breakdown=agent.get_token_usage_breakdown(),
|
|
268
|
+
)
|
|
269
|
+
agent.log_token_usage_summary()
|
|
270
|
+
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.error(f"Agent execution failed: {e}", exc_info=True)
|
|
273
|
+
yield StepEvent(
|
|
274
|
+
type="error",
|
|
275
|
+
step_number=total_steps + 1,
|
|
276
|
+
content=f"Agent execution failed: {e}",
|
|
277
|
+
is_final=True,
|
|
278
|
+
)
|
|
279
|
+
finally:
|
|
280
|
+
set_progress_callback(None)
|
|
281
|
+
set_text_callback(None)
|
|
282
|
+
set_output_dir(None)
|
|
283
|
+
set_rossum_credentials(None, None)
|
|
284
|
+
self._sub_agent_queue = None
|
|
285
|
+
|
|
286
|
+
def _save_documents_to_output_dir(self, documents: list[DocumentContent]) -> None:
|
|
287
|
+
"""Save uploaded documents to the output directory.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
documents: List of documents to save.
|
|
291
|
+
"""
|
|
292
|
+
import base64 # noqa: PLC0415 - import here to avoid circular import at module level
|
|
293
|
+
|
|
294
|
+
if self._output_dir is None:
|
|
295
|
+
logger.warning("Cannot save documents: output directory not set")
|
|
296
|
+
return
|
|
297
|
+
|
|
298
|
+
for doc in documents:
|
|
299
|
+
file_path = self._output_dir / doc.filename
|
|
300
|
+
try:
|
|
301
|
+
file_data = base64.b64decode(doc.data)
|
|
302
|
+
file_path.write_bytes(file_data)
|
|
303
|
+
logger.info(f"Saved document to {file_path}")
|
|
304
|
+
except Exception as e:
|
|
305
|
+
logger.error(f"Failed to save document {doc.filename}: {e}")
|
|
306
|
+
|
|
307
|
+
def _build_user_content(
|
|
308
|
+
self, prompt: str, images: list[ImageContent] | None, documents: list[DocumentContent] | None = None
|
|
309
|
+
) -> UserContent:
|
|
310
|
+
"""Build user content for the agent, optionally including images and documents.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
prompt: The user's text prompt.
|
|
314
|
+
images: Optional list of images to include.
|
|
315
|
+
documents: Optional list of documents (paths are included in prompt).
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
Either a plain string (text-only) or a list of content blocks (multimodal).
|
|
319
|
+
"""
|
|
320
|
+
if not images and not documents:
|
|
321
|
+
return prompt
|
|
322
|
+
|
|
323
|
+
content: list[ImageBlockParam | TextBlockParam] = []
|
|
324
|
+
if images:
|
|
325
|
+
for img in images:
|
|
326
|
+
content.append(
|
|
327
|
+
{
|
|
328
|
+
"type": "image",
|
|
329
|
+
"source": {
|
|
330
|
+
"type": "base64",
|
|
331
|
+
"media_type": img.media_type,
|
|
332
|
+
"data": img.data,
|
|
333
|
+
},
|
|
334
|
+
}
|
|
335
|
+
)
|
|
336
|
+
if documents and self._output_dir:
|
|
337
|
+
doc_paths = [str(self._output_dir / doc.filename) for doc in documents]
|
|
338
|
+
doc_info = "\n".join(f"- {path}" for path in doc_paths)
|
|
339
|
+
content.append({"type": "text", "text": f"[Uploaded documents available for processing:\n{doc_info}]"})
|
|
340
|
+
content.append({"type": "text", "text": prompt})
|
|
341
|
+
return content
|
|
342
|
+
|
|
343
|
+
def _restore_conversation_history(self, agent: RossumAgent, history: list[dict[str, Any]]) -> None:
|
|
344
|
+
"""Restore conversation history to the agent.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
agent: The RossumAgent instance.
|
|
348
|
+
history: List of step dicts with 'type' key indicating step type.
|
|
349
|
+
Supports both new format (with 'type') and legacy format (with 'role').
|
|
350
|
+
"""
|
|
351
|
+
if not history:
|
|
352
|
+
return
|
|
353
|
+
|
|
354
|
+
first_item = history[0]
|
|
355
|
+
if "type" in first_item and first_item["type"] in ("task_step", "memory_step"):
|
|
356
|
+
agent.memory = AgentMemory.from_dict(history)
|
|
357
|
+
else:
|
|
358
|
+
for msg in history:
|
|
359
|
+
role = msg.get("role")
|
|
360
|
+
content = msg.get("content", "")
|
|
361
|
+
if role == "user":
|
|
362
|
+
user_content = self._parse_stored_content(content)
|
|
363
|
+
agent.add_user_message(user_content)
|
|
364
|
+
elif role == "assistant":
|
|
365
|
+
agent.add_assistant_message(content)
|
|
366
|
+
|
|
367
|
+
def _parse_stored_content(self, content: str | list[dict[str, Any]]) -> UserContent:
|
|
368
|
+
"""Parse stored content back into UserContent format.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
content: Either a string or a list of content block dicts.
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
UserContent suitable for the agent.
|
|
375
|
+
"""
|
|
376
|
+
if isinstance(content, str):
|
|
377
|
+
return content
|
|
378
|
+
|
|
379
|
+
result: list[ImageBlockParam | TextBlockParam] = []
|
|
380
|
+
for block in content:
|
|
381
|
+
block_type = block.get("type")
|
|
382
|
+
if block_type == "image":
|
|
383
|
+
source = block.get("source", {})
|
|
384
|
+
result.append(
|
|
385
|
+
{
|
|
386
|
+
"type": "image",
|
|
387
|
+
"source": {
|
|
388
|
+
"type": source.get("type", "base64"),
|
|
389
|
+
"media_type": source.get("media_type", "image/png"),
|
|
390
|
+
"data": source.get("data", ""),
|
|
391
|
+
},
|
|
392
|
+
}
|
|
393
|
+
)
|
|
394
|
+
elif block_type == "text":
|
|
395
|
+
result.append({"type": "text", "text": block.get("text", "")})
|
|
396
|
+
|
|
397
|
+
return result if result else ""
|
|
398
|
+
|
|
399
|
+
def build_updated_history(
|
|
400
|
+
self,
|
|
401
|
+
existing_history: list[dict[str, Any]],
|
|
402
|
+
user_prompt: str,
|
|
403
|
+
final_response: str | None,
|
|
404
|
+
images: list[ImageContent] | None = None,
|
|
405
|
+
documents: list[DocumentContent] | None = None,
|
|
406
|
+
) -> list[dict[str, Any]]:
|
|
407
|
+
"""Build updated conversation history after agent execution.
|
|
408
|
+
|
|
409
|
+
Stores task steps and assistant text responses, but strips out tool calls
|
|
410
|
+
and tool results to keep context lean for multi-turn conversations.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
existing_history: Previous conversation history (ignored if memory available).
|
|
414
|
+
user_prompt: The user's prompt that was just processed.
|
|
415
|
+
final_response: The agent's final response, if any.
|
|
416
|
+
images: Optional list of images included with the user prompt.
|
|
417
|
+
documents: Optional list of documents included with the user prompt.
|
|
418
|
+
"""
|
|
419
|
+
if self._last_memory is not None:
|
|
420
|
+
lean_history: list[dict[str, Any]] = []
|
|
421
|
+
for step_dict in self._last_memory.to_dict():
|
|
422
|
+
if step_dict.get("type") == "task_step":
|
|
423
|
+
lean_history.append(step_dict)
|
|
424
|
+
elif step_dict.get("type") == "memory_step":
|
|
425
|
+
text = step_dict.get("text")
|
|
426
|
+
thinking_blocks = step_dict.get("thinking_blocks", [])
|
|
427
|
+
if text or thinking_blocks:
|
|
428
|
+
lean_history.append(
|
|
429
|
+
{
|
|
430
|
+
"type": "memory_step",
|
|
431
|
+
"step_number": step_dict.get("step_number", 0),
|
|
432
|
+
"text": text,
|
|
433
|
+
"tool_calls": [],
|
|
434
|
+
"tool_results": [],
|
|
435
|
+
"thinking_blocks": thinking_blocks,
|
|
436
|
+
}
|
|
437
|
+
)
|
|
438
|
+
return lean_history
|
|
439
|
+
|
|
440
|
+
updated = list(existing_history)
|
|
441
|
+
user_content = self._build_user_content(user_prompt, images)
|
|
442
|
+
if documents:
|
|
443
|
+
doc_names = ", ".join(doc.filename for doc in documents)
|
|
444
|
+
if isinstance(user_content, str):
|
|
445
|
+
user_content = f"[Uploaded documents: {doc_names}]\n\n{user_content}"
|
|
446
|
+
else:
|
|
447
|
+
user_content.insert(0, {"type": "text", "text": f"[Uploaded documents: {doc_names}]"})
|
|
448
|
+
updated.append({"role": "user", "content": user_content})
|
|
449
|
+
if final_response:
|
|
450
|
+
updated.append({"role": "assistant", "content": final_response})
|
|
451
|
+
return updated
|