rossum-agent 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. rossum_agent/__init__.py +9 -0
  2. rossum_agent/agent/__init__.py +32 -0
  3. rossum_agent/agent/core.py +932 -0
  4. rossum_agent/agent/memory.py +176 -0
  5. rossum_agent/agent/models.py +160 -0
  6. rossum_agent/agent/request_classifier.py +152 -0
  7. rossum_agent/agent/skills.py +132 -0
  8. rossum_agent/agent/types.py +5 -0
  9. rossum_agent/agent_logging.py +56 -0
  10. rossum_agent/api/__init__.py +1 -0
  11. rossum_agent/api/cli.py +51 -0
  12. rossum_agent/api/dependencies.py +190 -0
  13. rossum_agent/api/main.py +180 -0
  14. rossum_agent/api/models/__init__.py +1 -0
  15. rossum_agent/api/models/schemas.py +301 -0
  16. rossum_agent/api/routes/__init__.py +1 -0
  17. rossum_agent/api/routes/chats.py +95 -0
  18. rossum_agent/api/routes/files.py +113 -0
  19. rossum_agent/api/routes/health.py +44 -0
  20. rossum_agent/api/routes/messages.py +218 -0
  21. rossum_agent/api/services/__init__.py +1 -0
  22. rossum_agent/api/services/agent_service.py +451 -0
  23. rossum_agent/api/services/chat_service.py +197 -0
  24. rossum_agent/api/services/file_service.py +65 -0
  25. rossum_agent/assets/Primary_light_logo.png +0 -0
  26. rossum_agent/bedrock_client.py +64 -0
  27. rossum_agent/prompts/__init__.py +27 -0
  28. rossum_agent/prompts/base_prompt.py +80 -0
  29. rossum_agent/prompts/system_prompt.py +24 -0
  30. rossum_agent/py.typed +0 -0
  31. rossum_agent/redis_storage.py +482 -0
  32. rossum_agent/rossum_mcp_integration.py +123 -0
  33. rossum_agent/skills/hook-debugging.md +31 -0
  34. rossum_agent/skills/organization-setup.md +60 -0
  35. rossum_agent/skills/rossum-deployment.md +102 -0
  36. rossum_agent/skills/schema-patching.md +61 -0
  37. rossum_agent/skills/schema-pruning.md +23 -0
  38. rossum_agent/skills/ui-settings.md +45 -0
  39. rossum_agent/streamlit_app/__init__.py +1 -0
  40. rossum_agent/streamlit_app/app.py +646 -0
  41. rossum_agent/streamlit_app/beep_sound.py +36 -0
  42. rossum_agent/streamlit_app/cli.py +17 -0
  43. rossum_agent/streamlit_app/render_modules.py +123 -0
  44. rossum_agent/streamlit_app/response_formatting.py +305 -0
  45. rossum_agent/tools/__init__.py +214 -0
  46. rossum_agent/tools/core.py +173 -0
  47. rossum_agent/tools/deploy.py +404 -0
  48. rossum_agent/tools/dynamic_tools.py +365 -0
  49. rossum_agent/tools/file_tools.py +62 -0
  50. rossum_agent/tools/formula.py +187 -0
  51. rossum_agent/tools/skills.py +31 -0
  52. rossum_agent/tools/spawn_mcp.py +227 -0
  53. rossum_agent/tools/subagents/__init__.py +31 -0
  54. rossum_agent/tools/subagents/base.py +303 -0
  55. rossum_agent/tools/subagents/hook_debug.py +591 -0
  56. rossum_agent/tools/subagents/knowledge_base.py +305 -0
  57. rossum_agent/tools/subagents/mcp_helpers.py +47 -0
  58. rossum_agent/tools/subagents/schema_patching.py +471 -0
  59. rossum_agent/url_context.py +167 -0
  60. rossum_agent/user_detection.py +100 -0
  61. rossum_agent/utils.py +128 -0
  62. rossum_agent-1.0.0rc0.dist-info/METADATA +311 -0
  63. rossum_agent-1.0.0rc0.dist-info/RECORD +67 -0
  64. rossum_agent-1.0.0rc0.dist-info/WHEEL +5 -0
  65. rossum_agent-1.0.0rc0.dist-info/entry_points.txt +3 -0
  66. rossum_agent-1.0.0rc0.dist-info/licenses/LICENSE +21 -0
  67. rossum_agent-1.0.0rc0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,305 @@
1
+ """Knowledge base search sub-agent.
2
+
3
+ Provides tools for searching and analyzing the Rossum Knowledge Base.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import concurrent.futures
10
+ import json
11
+ import logging
12
+ from typing import TYPE_CHECKING
13
+
14
+ import httpx
15
+ from anthropic import beta_tool
16
+ from ddgs import DDGS
17
+ from ddgs.exceptions import DDGSException
18
+
19
+ from rossum_agent.bedrock_client import create_bedrock_client, get_model_id
20
+ from rossum_agent.tools.core import (
21
+ SubAgentProgress,
22
+ SubAgentText,
23
+ SubAgentTokenUsage,
24
+ report_progress,
25
+ report_text,
26
+ report_token_usage,
27
+ )
28
+
29
+ if TYPE_CHECKING:
30
+ from collections.abc import Coroutine
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+ _WEB_SEARCH_ANALYSIS_SYSTEM_PROMPT = """Goal: Extract actionable technical information from Rossum Knowledge Base search results.
35
+
36
+ ## Output Format
37
+
38
+ | Section | Content |
39
+ |---------|---------|
40
+ | Key Information | Facts, JSON configs, code examples answering the query |
41
+ | Implementation | Steps, code patterns if applicable |
42
+ | Configuration | Data types, singlevalue vs multivalue (bold) |
43
+ | Related Topics | Brief mention of related docs |
44
+
45
+ ## AI Feature Requirements
46
+
47
+ For document splitting, AI predictions, or field-based automation:
48
+
49
+ | Requirement | Rule |
50
+ |-------------|------|
51
+ | hidden | Must be `false`. Hidden datapoints invisible to AI—features fail silently. |
52
+ | Multivalue parent | Required for splitting (one value per split document). |
53
+
54
+ ## JSON Schema Examples
55
+
56
+ For AI features, always use `"hidden": false`:
57
+ ```json
58
+ {"hidden": false, "type": "string", "id": "invoice_id"}
59
+ ```
60
+
61
+ Never `"hidden": true`—causes silent failures."""
62
+
63
+ _KNOWLEDGE_BASE_DOMAIN = "knowledge-base.rossum.ai"
64
+ _MAX_SEARCH_RESULTS = 5
65
+ _WEBPAGE_FETCH_TIMEOUT = 30
66
+ _JINA_READER_PREFIX = "https://r.jina.ai/"
67
+
68
+
69
+ class WebSearchError(Exception):
70
+ """Raised when web search fails."""
71
+
72
+
73
+ def _run_async[T](coro: Coroutine[None, None, T]) -> T:
74
+ """Run a coroutine, handling both sync and async caller contexts."""
75
+ try:
76
+ asyncio.get_running_loop()
77
+ except RuntimeError:
78
+ return asyncio.run(coro)
79
+
80
+ # In async context: run in new thread with its own event loop
81
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
82
+ return pool.submit(asyncio.run, coro).result() # type: ignore[return-value] - Future.result() returns Any
83
+
84
+
85
+ def _call_opus_for_web_search_analysis(
86
+ query: str, search_results: str, user_query: str | None = None
87
+ ) -> tuple[str, int, int]:
88
+ """Call Opus model to analyze web search results."""
89
+ try:
90
+ report_progress(
91
+ SubAgentProgress(tool_name="search_knowledge_base", iteration=1, max_iterations=1, status="analyzing")
92
+ )
93
+
94
+ client = create_bedrock_client()
95
+
96
+ user_query_context = ""
97
+ if user_query and user_query != query:
98
+ user_query_context = f"""
99
+ ## User's Original Question
100
+
101
+ The user asked: "{user_query}"
102
+
103
+ Keep this context in mind when analyzing the search results and tailor your response to address the user's specific question.
104
+
105
+ """
106
+
107
+ user_content = f"""Analyze these Rossum Knowledge Base search results for the query: "{query}"
108
+ {user_query_context}
109
+ ## Search Results
110
+
111
+ {search_results}
112
+
113
+ ## Goal
114
+
115
+ Extract actionable technical information a developer can use immediately.
116
+
117
+ | Priority | Content |
118
+ |----------|---------|
119
+ | Schema definitions | Data types, singlevalue vs multivalue |
120
+ | Code examples | JSON configs, API payloads |
121
+ | Implementation steps | When present |
122
+
123
+ ## AI Feature Topics (document splitting, AI predictions, field automation)
124
+
125
+ When the topic involves document splitting, beware of the bug in knowledge base, emphasize below:
126
+
127
+ ⛔ CRITICAL SCHEMA REQUIREMENTS
128
+ | Property | Constraint |
129
+ |----------|------------|
130
+ | hidden | false |
131
+ | Multivalue parent | Required for splitting (one value per split document)."""
132
+
133
+ response = client.messages.create(
134
+ model=get_model_id(),
135
+ max_tokens=4096,
136
+ temperature=0.25,
137
+ system=_WEB_SEARCH_ANALYSIS_SYSTEM_PROMPT,
138
+ messages=[{"role": "user", "content": user_content}],
139
+ )
140
+
141
+ input_tokens = response.usage.input_tokens
142
+ output_tokens = response.usage.output_tokens
143
+
144
+ logger.info(f"search_knowledge_base: LLM analysis, tokens in={input_tokens} out={output_tokens}")
145
+
146
+ report_token_usage(
147
+ SubAgentTokenUsage(
148
+ tool_name="search_knowledge_base", input_tokens=input_tokens, output_tokens=output_tokens, iteration=1
149
+ )
150
+ )
151
+
152
+ text_parts = [block.text for block in response.content if hasattr(block, "text")]
153
+ analysis_result = "\n".join(text_parts) if text_parts else "No analysis provided"
154
+
155
+ report_progress(
156
+ SubAgentProgress(tool_name="search_knowledge_base", iteration=1, max_iterations=1, status="completed")
157
+ )
158
+
159
+ report_text(SubAgentText(tool_name="search_knowledge_base", text=analysis_result, is_final=True))
160
+
161
+ return analysis_result, input_tokens, output_tokens
162
+
163
+ except Exception as e:
164
+ logger.exception("Error calling Opus for web search analysis")
165
+ return f"Error analyzing search results: {e}\n\nRaw results:\n{search_results}", 0, 0
166
+
167
+
168
+ async def _fetch_webpage_content(client: httpx.AsyncClient, url: str) -> str:
169
+ """Fetch and extract webpage content using Jina Reader for JS-rendered pages.
170
+
171
+ Uses Jina Reader API to render JavaScript content from SPAs like the Rossum knowledge base.
172
+
173
+ Returns:
174
+ Markdown content of the page, or error message if fetch fails.
175
+ """
176
+ jina_url = f"{_JINA_READER_PREFIX}{url}"
177
+ try:
178
+ response = await client.get(jina_url, timeout=_WEBPAGE_FETCH_TIMEOUT)
179
+ response.raise_for_status()
180
+ content = response.text
181
+ return content[:50000]
182
+ except httpx.HTTPError as e:
183
+ logger.warning(f"Failed to fetch webpage {url} via Jina Reader: {e}")
184
+ return f"[Failed to fetch content: {e}]"
185
+
186
+
187
+ async def _search_knowledge_base(query: str) -> list[dict[str, str]]:
188
+ """Search Rossum Knowledge Base using DDGS metasearch library.
189
+
190
+ Args:
191
+ query: Search query string.
192
+
193
+ Returns:
194
+ List of search result dicts with title, url, and content.
195
+
196
+ Raises:
197
+ WebSearchError: If search fails completely.
198
+ """
199
+ report_progress(
200
+ SubAgentProgress(tool_name="search_knowledge_base", iteration=0, max_iterations=0, status="searching")
201
+ )
202
+
203
+ site_query = f"site:{_KNOWLEDGE_BASE_DOMAIN} {query}"
204
+ logger.info(f"Searching knowledge base: {site_query}")
205
+
206
+ try:
207
+ with DDGS() as ddgs:
208
+ raw_results = ddgs.text(site_query, max_results=_MAX_SEARCH_RESULTS)
209
+ except DDGSException as e:
210
+ logger.error(f"Knowledge base search failed: {e}")
211
+ raise WebSearchError(f"Search failed: {e}")
212
+
213
+ filtered_results = [r for r in raw_results if _KNOWLEDGE_BASE_DOMAIN in r.get("href", "")][:2]
214
+
215
+ async with httpx.AsyncClient() as client:
216
+ fetch_tasks = []
217
+ for r in filtered_results:
218
+ url = r.get("href", "")
219
+ logger.info(f"Fetching full content from: {url}")
220
+ fetch_tasks.append(_fetch_webpage_content(client, url))
221
+
222
+ contents = await asyncio.gather(*fetch_tasks, return_exceptions=True)
223
+
224
+ results = []
225
+ for r, content in zip(filtered_results, contents):
226
+ if isinstance(content, Exception):
227
+ logger.warning(f"Failed to fetch {r.get('href', '')}: {content}")
228
+ content = f"[Failed to fetch content: {content}]"
229
+ results.append({"title": r.get("title", ""), "url": r.get("href", ""), "content": content})
230
+
231
+ logger.info(f"Found {len(results)} results for query: {query}")
232
+ return results
233
+
234
+
235
+ async def _search_and_analyze_knowledge_base(query: str, user_query: str | None = None) -> str:
236
+ """Search Rossum Knowledge Base and analyze results with Opus.
237
+
238
+ Args:
239
+ query: Search query string.
240
+ user_query: The original user query/question for context (optional).
241
+
242
+ Returns:
243
+ JSON string with analyzed results or error message.
244
+
245
+ Raises:
246
+ WebSearchError: If search fails completely.
247
+ """
248
+ results = await _search_knowledge_base(query)
249
+
250
+ if not results:
251
+ logger.warning(f"No results found for query: {query}")
252
+ return json.dumps(
253
+ {
254
+ "status": "no_results",
255
+ "query": query,
256
+ "message": (
257
+ f"No results found in Rossum Knowledge Base for: '{query}'. "
258
+ "Try different keywords or check the extension/hook name spelling."
259
+ ),
260
+ "input_tokens": 0,
261
+ "output_tokens": 0,
262
+ }
263
+ )
264
+
265
+ search_results_text = "\n\n---\n\n".join(f"## {r['title']}\nURL: {r['url']}\n\n{r['content']}" for r in results)
266
+ logger.info("Analyzing knowledge base results with Opus sub-agent")
267
+ analyzed, input_tokens, output_tokens = _call_opus_for_web_search_analysis(
268
+ query, search_results_text, user_query=user_query
269
+ )
270
+
271
+ logger.info(f"search_knowledge_base: completed, tokens in={input_tokens} out={output_tokens}")
272
+
273
+ return json.dumps(
274
+ {
275
+ "status": "success",
276
+ "query": query,
277
+ "analysis": analyzed,
278
+ "source_urls": [r["url"] for r in results],
279
+ "input_tokens": input_tokens,
280
+ "output_tokens": output_tokens,
281
+ }
282
+ )
283
+
284
+
285
+ @beta_tool
286
+ def search_knowledge_base(query: str, user_query: str | None = None) -> str:
287
+ """Search the Rossum Knowledge Base for documentation about extensions, hooks, and configurations.
288
+
289
+ Use this tool to find information about Rossum features, troubleshoot errors,
290
+ and understand extension configurations. The search is performed against
291
+ https://knowledge-base.rossum.ai/docs.
292
+
293
+ Args:
294
+ query: Search query. Be specific - include extension names, error messages,
295
+ or feature names. Examples: 'document splitting extension',
296
+ 'duplicate handling configuration', 'webhook timeout error'.
297
+ user_query: The original user question for context. Pass the user's full
298
+ question here so Opus can tailor the analysis to address their specific needs.
299
+
300
+ Returns:
301
+ JSON with search results containing title, URL, snippet, and token usage.
302
+ """
303
+ if not query:
304
+ return json.dumps({"status": "error", "message": "Query is required", "input_tokens": 0, "output_tokens": 0})
305
+ return _run_async(_search_and_analyze_knowledge_base(query, user_query=user_query))
@@ -0,0 +1,47 @@
1
+ """Shared MCP helper functions for sub-agents.
2
+
3
+ Provides common utilities for calling MCP tools from synchronous thread contexts.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import logging
10
+ import time
11
+ from typing import TYPE_CHECKING
12
+
13
+ if TYPE_CHECKING:
14
+ from typing import Any
15
+
16
+ from rossum_agent.tools.core import get_mcp_connection, get_mcp_event_loop
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ def call_mcp_tool(name: str, arguments: dict[str, Any], timeout: int = 60) -> Any:
22
+ """Call an MCP tool synchronously from within a thread.
23
+
24
+ Uses run_coroutine_threadsafe to call async MCP tools from sync context.
25
+
26
+ Args:
27
+ name: MCP tool name to call.
28
+ arguments: Arguments to pass to the tool.
29
+ timeout: Timeout in seconds (default 60).
30
+
31
+ Returns:
32
+ The result from the MCP tool.
33
+
34
+ Raises:
35
+ RuntimeError: If MCP connection is not set.
36
+ """
37
+ mcp_connection = get_mcp_connection()
38
+ mcp_event_loop = get_mcp_event_loop()
39
+ if mcp_connection is None or mcp_event_loop is None:
40
+ raise RuntimeError("MCP connection not set. Call set_mcp_connection first.")
41
+
42
+ start = time.perf_counter()
43
+ future = asyncio.run_coroutine_threadsafe(mcp_connection.call_tool(name, arguments), mcp_event_loop)
44
+ result = future.result(timeout=timeout)
45
+ elapsed_ms = (time.perf_counter() - start) * 1000
46
+ logger.info(f"MCP call '{name}' completed in {elapsed_ms:.1f}ms")
47
+ return result