rossum-agent 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. rossum_agent/__init__.py +9 -0
  2. rossum_agent/agent/__init__.py +32 -0
  3. rossum_agent/agent/core.py +932 -0
  4. rossum_agent/agent/memory.py +176 -0
  5. rossum_agent/agent/models.py +160 -0
  6. rossum_agent/agent/request_classifier.py +152 -0
  7. rossum_agent/agent/skills.py +132 -0
  8. rossum_agent/agent/types.py +5 -0
  9. rossum_agent/agent_logging.py +56 -0
  10. rossum_agent/api/__init__.py +1 -0
  11. rossum_agent/api/cli.py +51 -0
  12. rossum_agent/api/dependencies.py +190 -0
  13. rossum_agent/api/main.py +180 -0
  14. rossum_agent/api/models/__init__.py +1 -0
  15. rossum_agent/api/models/schemas.py +301 -0
  16. rossum_agent/api/routes/__init__.py +1 -0
  17. rossum_agent/api/routes/chats.py +95 -0
  18. rossum_agent/api/routes/files.py +113 -0
  19. rossum_agent/api/routes/health.py +44 -0
  20. rossum_agent/api/routes/messages.py +218 -0
  21. rossum_agent/api/services/__init__.py +1 -0
  22. rossum_agent/api/services/agent_service.py +451 -0
  23. rossum_agent/api/services/chat_service.py +197 -0
  24. rossum_agent/api/services/file_service.py +65 -0
  25. rossum_agent/assets/Primary_light_logo.png +0 -0
  26. rossum_agent/bedrock_client.py +64 -0
  27. rossum_agent/prompts/__init__.py +27 -0
  28. rossum_agent/prompts/base_prompt.py +80 -0
  29. rossum_agent/prompts/system_prompt.py +24 -0
  30. rossum_agent/py.typed +0 -0
  31. rossum_agent/redis_storage.py +482 -0
  32. rossum_agent/rossum_mcp_integration.py +123 -0
  33. rossum_agent/skills/hook-debugging.md +31 -0
  34. rossum_agent/skills/organization-setup.md +60 -0
  35. rossum_agent/skills/rossum-deployment.md +102 -0
  36. rossum_agent/skills/schema-patching.md +61 -0
  37. rossum_agent/skills/schema-pruning.md +23 -0
  38. rossum_agent/skills/ui-settings.md +45 -0
  39. rossum_agent/streamlit_app/__init__.py +1 -0
  40. rossum_agent/streamlit_app/app.py +646 -0
  41. rossum_agent/streamlit_app/beep_sound.py +36 -0
  42. rossum_agent/streamlit_app/cli.py +17 -0
  43. rossum_agent/streamlit_app/render_modules.py +123 -0
  44. rossum_agent/streamlit_app/response_formatting.py +305 -0
  45. rossum_agent/tools/__init__.py +214 -0
  46. rossum_agent/tools/core.py +173 -0
  47. rossum_agent/tools/deploy.py +404 -0
  48. rossum_agent/tools/dynamic_tools.py +365 -0
  49. rossum_agent/tools/file_tools.py +62 -0
  50. rossum_agent/tools/formula.py +187 -0
  51. rossum_agent/tools/skills.py +31 -0
  52. rossum_agent/tools/spawn_mcp.py +227 -0
  53. rossum_agent/tools/subagents/__init__.py +31 -0
  54. rossum_agent/tools/subagents/base.py +303 -0
  55. rossum_agent/tools/subagents/hook_debug.py +591 -0
  56. rossum_agent/tools/subagents/knowledge_base.py +305 -0
  57. rossum_agent/tools/subagents/mcp_helpers.py +47 -0
  58. rossum_agent/tools/subagents/schema_patching.py +471 -0
  59. rossum_agent/url_context.py +167 -0
  60. rossum_agent/user_detection.py +100 -0
  61. rossum_agent/utils.py +128 -0
  62. rossum_agent-1.0.0rc0.dist-info/METADATA +311 -0
  63. rossum_agent-1.0.0rc0.dist-info/RECORD +67 -0
  64. rossum_agent-1.0.0rc0.dist-info/WHEEL +5 -0
  65. rossum_agent-1.0.0rc0.dist-info/entry_points.txt +3 -0
  66. rossum_agent-1.0.0rc0.dist-info/licenses/LICENSE +21 -0
  67. rossum_agent-1.0.0rc0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,646 @@
1
+ """Rossum Streamlit Test Bed App.
2
+
3
+ Web interface for testing the Rossum Document Processing Agent using Streamlit.
4
+
5
+ Usage:
6
+ streamlit run rossum_agent/streamlit_app/app.py
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+ import base64
13
+ import logging
14
+ import os
15
+ import pathlib
16
+ import time
17
+ from typing import TYPE_CHECKING, Literal
18
+
19
+ import streamlit as st
20
+ from rossum_mcp.logging_config import setup_logging
21
+
22
+ from rossum_agent.agent import AgentConfig, create_agent
23
+ from rossum_agent.agent_logging import log_agent_result
24
+ from rossum_agent.prompts.system_prompt import get_system_prompt
25
+ from rossum_agent.redis_storage import ChatMetadata, RedisStorage, get_commit_sha
26
+ from rossum_agent.rossum_mcp_integration import connect_mcp_server
27
+ from rossum_agent.streamlit_app.beep_sound import generate_beep_wav
28
+ from rossum_agent.streamlit_app.render_modules import (
29
+ MERMAID_BLOCK_PATTERN,
30
+ render_chat_history,
31
+ render_markdown_with_mermaid,
32
+ )
33
+ from rossum_agent.streamlit_app.response_formatting import ChatResponse, parse_and_format_final_answer
34
+ from rossum_agent.tools import set_mcp_connection, set_output_dir
35
+ from rossum_agent.url_context import RossumUrlContext, extract_url_context, format_context_for_prompt
36
+ from rossum_agent.user_detection import get_user_from_jwt, normalize_user_id
37
+ from rossum_agent.utils import (
38
+ cleanup_session_output_dir,
39
+ create_session_output_dir,
40
+ generate_chat_id,
41
+ get_generated_files,
42
+ get_generated_files_with_metadata,
43
+ is_valid_chat_id,
44
+ set_session_output_dir,
45
+ )
46
+
47
+ if TYPE_CHECKING:
48
+ from collections.abc import Callable
49
+
50
+ from anthropic.types import ImageBlockParam, TextBlockParam
51
+
52
+ from rossum_agent.agent import AgentStep
53
+ from rossum_agent.agent.types import UserContent
54
+
55
+ # Generate beep and encode as base64 data URL
56
+ _beep_wav = generate_beep_wav(frequency=440, duration=0.33)
57
+ _beep_b64 = base64.b64encode(_beep_wav).decode("ascii")
58
+ BEEP_HTML = f'<audio src="data:audio/wav;base64,{_beep_b64}" autoplay></audio>'
59
+
60
+ LOGO_PATH = pathlib.Path(__file__).parent.parent / "assets" / "Primary_light_logo.png"
61
+
62
+ # Configure logging with Redis integration
63
+ setup_logging(app_name="rossum-agent", log_level=os.getenv("LOG_LEVEL", "INFO"))
64
+ logger = logging.getLogger(__name__)
65
+
66
+
67
+ # Page config - must be first Streamlit command and at module level
68
+ st.set_page_config(page_title="Rossum Agent", page_icon="🤖", layout="wide", initial_sidebar_state="expanded")
69
+
70
+
71
+ async def run_agent_turn(
72
+ rossum_api_token: str,
73
+ rossum_api_base_url: str,
74
+ mcp_mode: Literal["read-only", "read-write"],
75
+ prompt: UserContent,
76
+ conversation_history: list[dict[str, str]],
77
+ on_step: Callable[[AgentStep], None],
78
+ rossum_url: str | None = None,
79
+ ) -> None:
80
+ """Run a single agent turn with proper MCP connection lifecycle.
81
+
82
+ Creates MCP connection, runs the agent, and cleans up within a single event loop.
83
+
84
+ Args:
85
+ rossum_api_token: Rossum API token.
86
+ rossum_api_base_url: Rossum API base URL.
87
+ mcp_mode: MCP mode ('read-only' or 'read-write').
88
+ prompt: User's input prompt (text or multimodal content).
89
+ conversation_history: Previous messages for context.
90
+ on_step: Callback function called for each step as it completes.
91
+ rossum_url: Optional Rossum app URL for context extraction.
92
+ """
93
+ system_prompt = get_system_prompt()
94
+
95
+ url_context = extract_url_context(rossum_url)
96
+ if not url_context.is_empty():
97
+ context_section = format_context_for_prompt(url_context)
98
+ system_prompt = system_prompt + "\n\n---\n" + context_section
99
+
100
+ async with connect_mcp_server(
101
+ rossum_api_token=rossum_api_token, rossum_api_base_url=rossum_api_base_url, mcp_mode=mcp_mode
102
+ ) as mcp_connection:
103
+ set_mcp_connection(mcp_connection, asyncio.get_event_loop())
104
+
105
+ agent = await create_agent(mcp_connection=mcp_connection, system_prompt=system_prompt, config=AgentConfig())
106
+
107
+ for msg in conversation_history:
108
+ if msg["role"] == "user":
109
+ agent.add_user_message(msg["content"])
110
+ elif msg["role"] == "assistant":
111
+ agent.add_assistant_message(msg["content"])
112
+
113
+ async for step in agent.run(prompt):
114
+ on_step(step)
115
+
116
+ agent.log_token_usage_summary()
117
+
118
+
119
+ def _initialize_user_and_storage() -> None:
120
+ """Initialize user ID and Redis storage in session state."""
121
+ jwt_enabled = bool(os.getenv("TELEPORT_JWT_JWKS_URL"))
122
+ st.session_state.user_isolation_enabled = jwt_enabled
123
+
124
+ if "user_id" not in st.session_state:
125
+ headers = dict(st.context.headers) if hasattr(st.context, "headers") else None
126
+ jwt_token = headers.get("Teleport-Jwt-Assertion") if headers else None
127
+ user_id = get_user_from_jwt(jwt_token)
128
+ st.session_state.user_id = normalize_user_id(user_id)
129
+
130
+ if "redis_storage" not in st.session_state:
131
+ st.session_state.redis_storage = RedisStorage()
132
+
133
+
134
+ def _initialize_chat_id() -> None:
135
+ """Initialize chat ID from URL or generate a new one."""
136
+ url_chat_id = st.query_params.get("chat_id")
137
+ url_shared_user_id = st.query_params.get("user_id")
138
+
139
+ if url_chat_id and is_valid_chat_id(url_chat_id):
140
+ if "chat_id" not in st.session_state or st.session_state.chat_id != url_chat_id:
141
+ st.session_state.chat_id = url_chat_id
142
+ st.session_state.shared_user_id = url_shared_user_id
143
+ for key in ["messages", "output_dir"]:
144
+ if key in st.session_state:
145
+ del st.session_state[key]
146
+ if "uploaded_images" in st.session_state:
147
+ st.session_state.uploaded_images = []
148
+ if "uploaded_documents" in st.session_state:
149
+ st.session_state.uploaded_documents = []
150
+ if "uploader_key_counter" in st.session_state:
151
+ st.session_state.uploader_key_counter += 1
152
+ logger.info(f"Loaded chat ID from URL: {url_chat_id}, shared_user_id: {url_shared_user_id}")
153
+ elif "chat_id" not in st.session_state:
154
+ st.session_state.chat_id = generate_chat_id()
155
+ st.query_params["chat_id"] = st.session_state.chat_id
156
+ logger.info(f"Generated new chat ID: {st.session_state.chat_id}")
157
+
158
+
159
+ def _initialize_session_defaults() -> None:
160
+ """Initialize default session state values."""
161
+ if "output_dir" not in st.session_state:
162
+ st.session_state.output_dir = create_session_output_dir()
163
+ set_session_output_dir(st.session_state.output_dir)
164
+ set_output_dir(st.session_state.output_dir)
165
+
166
+ if "rossum_api_token" not in st.session_state:
167
+ st.session_state.rossum_api_token = os.getenv("ROSSUM_API_TOKEN", "") if os.getenv("DEBUG") else ""
168
+ if "rossum_api_base_url" not in st.session_state:
169
+ st.session_state.rossum_api_base_url = os.getenv("ROSSUM_API_BASE_URL", "") if os.getenv("DEBUG") else ""
170
+ if "credentials_saved" not in st.session_state:
171
+ st.session_state.credentials_saved = bool(
172
+ st.session_state.rossum_api_token and st.session_state.rossum_api_base_url
173
+ )
174
+
175
+ if "read_write_disabled" not in st.session_state:
176
+ st.session_state.read_write_disabled = os.getenv("ROSSUM_DISABLE_READ_WRITE", "").lower() in [
177
+ "true",
178
+ "1",
179
+ "yes",
180
+ ]
181
+ if "mcp_mode" not in st.session_state:
182
+ st.session_state.mcp_mode = "read-write"
183
+
184
+ if "rossum_url_context" not in st.session_state:
185
+ st.session_state.rossum_url_context = RossumUrlContext()
186
+
187
+ if "uploaded_images" not in st.session_state:
188
+ st.session_state.uploaded_images = []
189
+
190
+ if "uploaded_documents" not in st.session_state:
191
+ st.session_state.uploaded_documents = []
192
+
193
+ if "uploader_key_counter" not in st.session_state:
194
+ st.session_state.uploader_key_counter = 0
195
+
196
+
197
+ def _load_messages_from_redis() -> None:
198
+ """Load messages from Redis or initialize empty list."""
199
+ if "messages" in st.session_state:
200
+ return
201
+
202
+ if not st.session_state.redis_storage.is_connected():
203
+ st.session_state.messages = []
204
+ return
205
+
206
+ shared_user_id = st.session_state.get("shared_user_id")
207
+ if shared_user_id:
208
+ user_id = shared_user_id if st.session_state.user_isolation_enabled else None
209
+ logger.info(f"Loading shared conversation from user: {shared_user_id}")
210
+ else:
211
+ user_id = st.session_state.user_id if st.session_state.user_isolation_enabled else None
212
+
213
+ chat_data = st.session_state.redis_storage.load_chat(
214
+ user_id, st.session_state.chat_id, st.session_state.output_dir
215
+ )
216
+ if chat_data:
217
+ st.session_state.messages = chat_data.messages
218
+ logger.info(f"Loaded {len(chat_data.messages)} messages from Redis for chat {st.session_state.chat_id}")
219
+ else:
220
+ st.session_state.messages = []
221
+ logger.info(f"No messages found in Redis for chat {st.session_state.chat_id}, starting fresh")
222
+
223
+
224
+ def _render_credentials_section() -> None:
225
+ """Render the credentials section in sidebar."""
226
+ st.markdown("---")
227
+ st.subheader("Rossum API Credentials")
228
+
229
+ if not st.session_state.credentials_saved:
230
+ st.warning("⚠️ Please enter your Rossum API credentials")
231
+ api_base_url = st.text_input(
232
+ "API Base URL",
233
+ value=st.session_state.rossum_api_base_url,
234
+ placeholder="https://your-instance.rossum.app",
235
+ type="default",
236
+ )
237
+ api_token = st.text_input(
238
+ "API Token",
239
+ value=st.session_state.rossum_api_token,
240
+ placeholder="Your Rossum API token",
241
+ type="password",
242
+ )
243
+ if st.button("Save Credentials", type="primary"):
244
+ if api_base_url and api_token:
245
+ st.session_state.rossum_api_token = api_token
246
+ st.session_state.rossum_api_base_url = api_base_url
247
+ st.session_state.credentials_saved = True
248
+ st.rerun()
249
+ else:
250
+ st.error("Both fields are required")
251
+ else:
252
+ st.success("✅ Credentials configured")
253
+ with st.expander("View Credentials"):
254
+ st.text_input("API Base URL", value=st.session_state.rossum_api_base_url, disabled=True)
255
+ token_display = (
256
+ st.session_state.rossum_api_token[:8] + "..."
257
+ if len(st.session_state.rossum_api_token) > 8
258
+ else st.session_state.rossum_api_token
259
+ )
260
+ st.text_input("API Token", value=token_display, disabled=True)
261
+ if st.button("Update Credentials"):
262
+ st.session_state.credentials_saved = False
263
+ st.rerun()
264
+
265
+ if os.getenv("DEBUG"):
266
+ st.markdown("---")
267
+ st.subheader("MCP Mode")
268
+ mode_options = ["read-only", "read-write"]
269
+ current_index = (
270
+ mode_options.index(st.session_state.mcp_mode) if st.session_state.mcp_mode in mode_options else 0
271
+ )
272
+ selected_mode = st.radio(
273
+ "Select mode:",
274
+ options=mode_options,
275
+ index=current_index,
276
+ horizontal=True,
277
+ disabled=st.session_state.read_write_disabled,
278
+ )
279
+ if selected_mode != st.session_state.mcp_mode:
280
+ st.session_state.mcp_mode = selected_mode
281
+
282
+
283
+ def _render_url_context_section() -> None:
284
+ """Render the URL context section in sidebar."""
285
+ st.markdown("---")
286
+ st.subheader("Current Context")
287
+
288
+ current_url = st.text_input(
289
+ "Rossum URL",
290
+ value=st.session_state.rossum_url_context.raw_url or "",
291
+ placeholder="Paste Rossum app URL here",
292
+ help="Paste a Rossum application URL to provide context (queue, annotation, etc.)",
293
+ )
294
+
295
+ if current_url != (st.session_state.rossum_url_context.raw_url or ""):
296
+ st.session_state.rossum_url_context = extract_url_context(current_url)
297
+
298
+ if not st.session_state.rossum_url_context.is_empty():
299
+ context_str = st.session_state.rossum_url_context.to_context_string()
300
+ st.success(f"✅ {context_str}")
301
+ elif current_url:
302
+ st.warning("⚠️ No context extracted from URL")
303
+
304
+
305
+ def _render_quick_actions() -> None:
306
+ """Render quick actions section in sidebar."""
307
+ st.subheader("Quick Actions")
308
+
309
+ if not st.session_state.get("shared_user_id") and st.button("🔗 Get Shareable Link"):
310
+ public_url = os.getenv("PUBLIC_URL")
311
+ if public_url:
312
+ base_url = public_url.rstrip("/")
313
+ else:
314
+ host = st.context.headers.get("host", "localhost:8501")
315
+ protocol = "https" if "localhost" not in host else "http"
316
+ base_url = f"{protocol}://{host}"
317
+
318
+ share_url = f"{base_url}/?chat_id={st.session_state.chat_id}&user_id={st.session_state.user_id}"
319
+ st.code(share_url, language=None)
320
+
321
+ if st.button("🔄 Reset Conversation"):
322
+ st.session_state.messages = []
323
+ st.session_state.uploaded_images = []
324
+ st.session_state.uploader_key_counter += 1
325
+ if "output_dir" in st.session_state:
326
+ cleanup_session_output_dir(st.session_state.output_dir)
327
+ st.session_state.output_dir = create_session_output_dir()
328
+ set_session_output_dir(st.session_state.output_dir)
329
+ st.session_state.chat_id = generate_chat_id()
330
+ st.query_params["chat_id"] = st.session_state.chat_id
331
+ logger.info(f"Reset conversation with new chat ID: {st.session_state.chat_id}")
332
+ st.rerun()
333
+
334
+
335
+ def _render_generated_files() -> dict[str, float]:
336
+ """Render generated files section and return file metadata."""
337
+ st.markdown("---")
338
+ st.subheader("Generated Files")
339
+ generated_files = get_generated_files(st.session_state.output_dir)
340
+ generated_files_metadata = get_generated_files_with_metadata(st.session_state.output_dir)
341
+
342
+ if generated_files:
343
+ st.write(f"📁 {len(generated_files)} file(s) generated:")
344
+ for file_path in generated_files:
345
+ file_name = pathlib.Path(file_path).name
346
+ try:
347
+ with open(file_path, "rb") as f:
348
+ file_content = f.read()
349
+ col1, col2 = st.columns([3, 1])
350
+ with col1:
351
+ st.text(file_name)
352
+ with col2:
353
+ st.download_button(label="⬇️", data=file_content, file_name=file_name, key=f"download_{file_path}")
354
+ except Exception as e:
355
+ st.error(f"Error loading {file_name}: {e}")
356
+ else:
357
+ st.info("No files generated yet")
358
+
359
+ return generated_files_metadata
360
+
361
+
362
+ def _render_sidebar() -> dict[str, float]:
363
+ """Render the sidebar and return generated files metadata."""
364
+ with st.sidebar:
365
+ st.image(str(LOGO_PATH), width=200)
366
+ _render_credentials_section()
367
+ _render_url_context_section()
368
+ _render_quick_actions()
369
+ generated_files_metadata = _render_generated_files()
370
+
371
+ user_id = st.session_state.user_id if st.session_state.user_isolation_enabled else None
372
+ render_chat_history(st.session_state.redis_storage, st.session_state.chat_id, user_id)
373
+
374
+ st.sidebar.divider()
375
+ st.sidebar.caption(f"User ID: {st.session_state.user_id}")
376
+
377
+ return generated_files_metadata
378
+
379
+
380
+ def _build_agent_prompt(
381
+ prompt: str, uploaded_images: list[dict], uploaded_documents: list[dict]
382
+ ) -> tuple[UserContent, int, int]:
383
+ """Build agent prompt from text and optional images/documents.
384
+
385
+ Returns:
386
+ Tuple of (agent_prompt, num_images, num_documents)
387
+ """
388
+ if uploaded_images or uploaded_documents:
389
+ content_blocks: list[ImageBlockParam | TextBlockParam] = []
390
+ for img_data in uploaded_images:
391
+ content_blocks.append(
392
+ {
393
+ "type": "image",
394
+ "source": {"type": "base64", "media_type": img_data["media_type"], "data": img_data["data"]},
395
+ }
396
+ )
397
+ if uploaded_documents:
398
+ output_dir = st.session_state.output_dir
399
+ doc_paths = [str(output_dir / doc["name"]) for doc in uploaded_documents]
400
+ doc_info = "\n".join(f"- {path}" for path in doc_paths)
401
+ content_blocks.append(
402
+ {"type": "text", "text": f"[Uploaded documents available for processing:\n{doc_info}]"}
403
+ )
404
+ content_blocks.append({"type": "text", "text": prompt})
405
+ st.session_state.uploaded_images = []
406
+ st.session_state.uploaded_documents = []
407
+ return content_blocks, len(uploaded_images), len(uploaded_documents)
408
+ return prompt, 0, 0
409
+
410
+
411
+ def _render_file_upload() -> None:
412
+ """Render file upload section for images and PDFs."""
413
+ col1, col2 = st.columns([1, 15])
414
+ with col1:
415
+ with st.popover("+", help="Attach files"):
416
+ uploaded_files = st.file_uploader(
417
+ "Upload files",
418
+ type=["png", "jpg", "jpeg", "gif", "webp", "pdf"],
419
+ accept_multiple_files=True,
420
+ key=f"file_uploader_{st.session_state.uploader_key_counter}",
421
+ label_visibility="collapsed",
422
+ )
423
+ if uploaded_files:
424
+ st.session_state.uploaded_images = []
425
+ st.session_state.uploaded_documents = []
426
+ image_count = 0
427
+ doc_count = 0
428
+ for uploaded_file in uploaded_files:
429
+ file_bytes = uploaded_file.read()
430
+ b64_data = base64.b64encode(file_bytes).decode("utf-8")
431
+ mime_type = uploaded_file.type or ""
432
+ if mime_type == "application/pdf" and doc_count < 5:
433
+ st.session_state.uploaded_documents.append(
434
+ {"name": uploaded_file.name, "media_type": mime_type, "data": b64_data}
435
+ )
436
+ doc_count += 1
437
+ elif mime_type.startswith("image/") and image_count < 5:
438
+ st.session_state.uploaded_images.append(
439
+ {"name": uploaded_file.name, "media_type": mime_type, "data": b64_data}
440
+ )
441
+ image_count += 1
442
+ uploaded_file.seek(0)
443
+ if image_count >= 5 or doc_count >= 5:
444
+ st.warning("Max 5 files per type allowed.")
445
+
446
+ with col2:
447
+ has_uploads = st.session_state.uploaded_images or st.session_state.uploaded_documents
448
+ if has_uploads:
449
+ total_items = len(st.session_state.uploaded_images) + len(st.session_state.uploaded_documents) + 1
450
+ thumb_cols = st.columns(total_items)
451
+ col_idx = 0
452
+ for img_data in st.session_state.uploaded_images:
453
+ with thumb_cols[col_idx]:
454
+ st.image(f"data:{img_data['media_type']};base64,{img_data['data']}", width=50)
455
+ col_idx += 1
456
+ for doc_data in st.session_state.uploaded_documents:
457
+ with thumb_cols[col_idx]:
458
+ st.markdown(f"📄 {doc_data['name'][:10]}...")
459
+ col_idx += 1
460
+ with thumb_cols[-1]:
461
+ if st.button("✕", key="clear_files", help="Clear files"):
462
+ st.session_state.uploaded_images = []
463
+ st.session_state.uploaded_documents = []
464
+ st.rerun()
465
+
466
+
467
+ def _save_documents_to_output_dir(uploaded_documents: list[dict]) -> None:
468
+ """Save uploaded documents to the output directory."""
469
+ output_dir = st.session_state.output_dir
470
+ for doc in uploaded_documents:
471
+ file_path = output_dir / doc["name"]
472
+ try:
473
+ file_data = base64.b64decode(doc["data"])
474
+ file_path.write_bytes(file_data)
475
+ logger.info(f"Saved document to {file_path}")
476
+ except Exception as e:
477
+ logger.error(f"Failed to save document {doc['name']}: {e}")
478
+
479
+
480
+ def _process_user_input(generated_files_metadata: dict[str, float]) -> None:
481
+ """Process user input and run the agent."""
482
+ prompt = st.chat_input("Enter your instruction...")
483
+ if not prompt:
484
+ return
485
+
486
+ logger.info(f"User prompt received: {prompt[:100]}...")
487
+
488
+ uploaded_images = st.session_state.uploaded_images
489
+ uploaded_documents = st.session_state.uploaded_documents
490
+
491
+ if uploaded_documents:
492
+ _save_documents_to_output_dir(uploaded_documents)
493
+
494
+ agent_prompt, num_images, num_documents = _build_agent_prompt(prompt, uploaded_images, uploaded_documents)
495
+
496
+ attachment_parts = []
497
+ if num_images > 0:
498
+ attachment_parts.append(f"{num_images} image(s)")
499
+ if num_documents > 0:
500
+ attachment_parts.append(f"{num_documents} document(s)")
501
+ display_content = f"[{', '.join(attachment_parts)} attached]\n\n{prompt}" if attachment_parts else prompt
502
+ st.session_state.messages.append({"role": "user", "content": display_content})
503
+
504
+ if st.session_state.redis_storage.is_connected():
505
+ user_id = st.session_state.user_id if st.session_state.user_isolation_enabled else None
506
+ st.session_state.redis_storage.save_chat(
507
+ user_id, st.session_state.chat_id, st.session_state.messages, str(st.session_state.output_dir)
508
+ )
509
+
510
+ with st.chat_message("user"):
511
+ st.markdown(display_content)
512
+
513
+ with st.chat_message("assistant"):
514
+ _run_agent_and_display(prompt, agent_prompt, num_images, num_documents, generated_files_metadata)
515
+
516
+
517
+ def _run_agent_and_display(
518
+ prompt: str,
519
+ agent_prompt: UserContent,
520
+ num_images: int,
521
+ num_documents: int,
522
+ generated_files_metadata: dict[str, float],
523
+ ) -> None:
524
+ """Run the agent and display results."""
525
+ final_answer_text = None
526
+ final_error_text = None
527
+
528
+ try:
529
+ start_time = time.time()
530
+ chat_response = ChatResponse(prompt, output_placeholder=st.empty())
531
+ conversation_history = st.session_state.messages[:-1]
532
+
533
+ mcp_mode: Literal["read-only", "read-write"] = (
534
+ "read-write" if st.session_state.mcp_mode == "read-write" else "read-only"
535
+ )
536
+
537
+ def process_step(step: AgentStep) -> None:
538
+ nonlocal final_answer_text, final_error_text
539
+ chat_response.process_step(step)
540
+ if step.is_final:
541
+ if step.final_answer:
542
+ final_answer_text = parse_and_format_final_answer(step.final_answer)
543
+ elif step.error:
544
+ final_error_text = f"❌ Error: {step.error}"
545
+
546
+ logger.info(
547
+ f"Agent input context:\n"
548
+ f" - Prompt: {prompt[:500]}{'...' if len(prompt) > 500 else ''}\n"
549
+ f" - Num images: {num_images}\n"
550
+ f" - Num documents: {num_documents}\n"
551
+ f" - Conversation history length: {len(conversation_history)}\n"
552
+ f" - MCP mode: {mcp_mode}\n"
553
+ f" - Rossum URL context: {st.session_state.rossum_url_context.raw_url}"
554
+ )
555
+
556
+ asyncio.run(
557
+ run_agent_turn(
558
+ rossum_api_token=st.session_state.rossum_api_token,
559
+ rossum_api_base_url=st.session_state.rossum_api_base_url,
560
+ mcp_mode=mcp_mode,
561
+ prompt=agent_prompt,
562
+ conversation_history=conversation_history,
563
+ on_step=process_step,
564
+ rossum_url=st.session_state.rossum_url_context.raw_url,
565
+ )
566
+ )
567
+
568
+ final_content = final_answer_text or final_error_text
569
+ if final_content:
570
+ st.session_state.messages.append({"role": "assistant", "content": final_content})
571
+ _save_response_to_redis(chat_response)
572
+
573
+ duration = time.time() - start_time
574
+ if chat_response.result:
575
+ log_agent_result(
576
+ chat_response.result,
577
+ prompt,
578
+ duration,
579
+ total_input_tokens=chat_response.total_input_tokens,
580
+ total_output_tokens=chat_response.total_output_tokens,
581
+ )
582
+ logger.info("Agent response generated successfully")
583
+
584
+ if final_answer_text:
585
+ st.components.v1.html(BEEP_HTML, height=0)
586
+
587
+ current_files_metadata = get_generated_files_with_metadata(st.session_state.output_dir)
588
+ has_mermaid = final_answer_text and MERMAID_BLOCK_PATTERN.search(final_answer_text)
589
+ if current_files_metadata != generated_files_metadata or has_mermaid:
590
+ st.rerun()
591
+
592
+ except Exception as e:
593
+ logger.error(f"Error processing user request: {e}", exc_info=True)
594
+ error_msg = f"❌ Error: {e!s}"
595
+ st.error(error_msg)
596
+ st.session_state.messages.append({"role": "assistant", "content": error_msg})
597
+
598
+
599
+ def _save_response_to_redis(chat_response: ChatResponse) -> None:
600
+ """Save agent response to Redis with metadata."""
601
+ if not st.session_state.redis_storage.is_connected():
602
+ return
603
+
604
+ user_id = st.session_state.get("user_id") if st.session_state.get("user_isolation_enabled", False) else None
605
+ metadata = ChatMetadata(
606
+ commit_sha=get_commit_sha(),
607
+ total_input_tokens=chat_response.total_input_tokens,
608
+ total_output_tokens=chat_response.total_output_tokens,
609
+ total_tool_calls=chat_response.total_tool_calls,
610
+ total_steps=chat_response.total_steps,
611
+ )
612
+ st.session_state.redis_storage.save_chat(
613
+ user_id,
614
+ st.session_state.chat_id,
615
+ st.session_state.messages,
616
+ str(st.session_state.output_dir),
617
+ metadata=metadata,
618
+ )
619
+
620
+
621
+ def main() -> None:
622
+ """Main entry point for the Streamlit app."""
623
+ _initialize_user_and_storage()
624
+ _initialize_chat_id()
625
+ _initialize_session_defaults()
626
+ _load_messages_from_redis()
627
+
628
+ generated_files_metadata = _render_sidebar()
629
+
630
+ st.title("Rossum Agent")
631
+ st.markdown("Test-bed agent for automating Rossum setup processes.")
632
+
633
+ for message in st.session_state.messages:
634
+ with st.chat_message(message["role"]):
635
+ render_markdown_with_mermaid(message["content"])
636
+
637
+ if not st.session_state.credentials_saved:
638
+ st.chat_input("👈 Please enter your Rossum API credentials in the sidebar", disabled=True)
639
+ return
640
+
641
+ _render_file_upload()
642
+ _process_user_input(generated_files_metadata)
643
+
644
+
645
+ if __name__ == "__main__":
646
+ main()
@@ -0,0 +1,36 @@
1
+ """Beep sound for UI notifications."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import wave
7
+
8
+ import numpy as np
9
+
10
+
11
+ def generate_beep_wav(frequency: int = 460, duration: float = 0.33, sample_rate: int = 16000) -> bytes:
12
+ """Generate a beep sound as WAV bytes.
13
+
14
+ Args:
15
+ frequency: Frequency of the beep in Hz
16
+ duration: Duration of the beep in seconds
17
+ sample_rate: Sample rate in Hz
18
+
19
+ Returns:
20
+ WAV file as bytes
21
+ """
22
+ time_points = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
23
+ wave_data = np.sin(2 * np.pi * frequency * time_points)
24
+
25
+ # Normalize to 16-bit integer range
26
+ wave_data = (wave_data * 32767).astype(np.int16)
27
+
28
+ # Create WAV file in memory
29
+ buffer = io.BytesIO()
30
+ with wave.open(buffer, "wb") as wav_file:
31
+ wav_file.setnchannels(1) # Mono
32
+ wav_file.setsampwidth(2) # 16-bit
33
+ wav_file.setframerate(sample_rate)
34
+ wav_file.writeframes(wave_data.tobytes())
35
+
36
+ return buffer.getvalue()