rossum-agent 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. rossum_agent/__init__.py +9 -0
  2. rossum_agent/agent/__init__.py +32 -0
  3. rossum_agent/agent/core.py +932 -0
  4. rossum_agent/agent/memory.py +176 -0
  5. rossum_agent/agent/models.py +160 -0
  6. rossum_agent/agent/request_classifier.py +152 -0
  7. rossum_agent/agent/skills.py +132 -0
  8. rossum_agent/agent/types.py +5 -0
  9. rossum_agent/agent_logging.py +56 -0
  10. rossum_agent/api/__init__.py +1 -0
  11. rossum_agent/api/cli.py +51 -0
  12. rossum_agent/api/dependencies.py +190 -0
  13. rossum_agent/api/main.py +180 -0
  14. rossum_agent/api/models/__init__.py +1 -0
  15. rossum_agent/api/models/schemas.py +301 -0
  16. rossum_agent/api/routes/__init__.py +1 -0
  17. rossum_agent/api/routes/chats.py +95 -0
  18. rossum_agent/api/routes/files.py +113 -0
  19. rossum_agent/api/routes/health.py +44 -0
  20. rossum_agent/api/routes/messages.py +218 -0
  21. rossum_agent/api/services/__init__.py +1 -0
  22. rossum_agent/api/services/agent_service.py +451 -0
  23. rossum_agent/api/services/chat_service.py +197 -0
  24. rossum_agent/api/services/file_service.py +65 -0
  25. rossum_agent/assets/Primary_light_logo.png +0 -0
  26. rossum_agent/bedrock_client.py +64 -0
  27. rossum_agent/prompts/__init__.py +27 -0
  28. rossum_agent/prompts/base_prompt.py +80 -0
  29. rossum_agent/prompts/system_prompt.py +24 -0
  30. rossum_agent/py.typed +0 -0
  31. rossum_agent/redis_storage.py +482 -0
  32. rossum_agent/rossum_mcp_integration.py +123 -0
  33. rossum_agent/skills/hook-debugging.md +31 -0
  34. rossum_agent/skills/organization-setup.md +60 -0
  35. rossum_agent/skills/rossum-deployment.md +102 -0
  36. rossum_agent/skills/schema-patching.md +61 -0
  37. rossum_agent/skills/schema-pruning.md +23 -0
  38. rossum_agent/skills/ui-settings.md +45 -0
  39. rossum_agent/streamlit_app/__init__.py +1 -0
  40. rossum_agent/streamlit_app/app.py +646 -0
  41. rossum_agent/streamlit_app/beep_sound.py +36 -0
  42. rossum_agent/streamlit_app/cli.py +17 -0
  43. rossum_agent/streamlit_app/render_modules.py +123 -0
  44. rossum_agent/streamlit_app/response_formatting.py +305 -0
  45. rossum_agent/tools/__init__.py +214 -0
  46. rossum_agent/tools/core.py +173 -0
  47. rossum_agent/tools/deploy.py +404 -0
  48. rossum_agent/tools/dynamic_tools.py +365 -0
  49. rossum_agent/tools/file_tools.py +62 -0
  50. rossum_agent/tools/formula.py +187 -0
  51. rossum_agent/tools/skills.py +31 -0
  52. rossum_agent/tools/spawn_mcp.py +227 -0
  53. rossum_agent/tools/subagents/__init__.py +31 -0
  54. rossum_agent/tools/subagents/base.py +303 -0
  55. rossum_agent/tools/subagents/hook_debug.py +591 -0
  56. rossum_agent/tools/subagents/knowledge_base.py +305 -0
  57. rossum_agent/tools/subagents/mcp_helpers.py +47 -0
  58. rossum_agent/tools/subagents/schema_patching.py +471 -0
  59. rossum_agent/url_context.py +167 -0
  60. rossum_agent/user_detection.py +100 -0
  61. rossum_agent/utils.py +128 -0
  62. rossum_agent-1.0.0rc0.dist-info/METADATA +311 -0
  63. rossum_agent-1.0.0rc0.dist-info/RECORD +67 -0
  64. rossum_agent-1.0.0rc0.dist-info/WHEEL +5 -0
  65. rossum_agent-1.0.0rc0.dist-info/entry_points.txt +3 -0
  66. rossum_agent-1.0.0rc0.dist-info/licenses/LICENSE +21 -0
  67. rossum_agent-1.0.0rc0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,471 @@
1
+ """Schema patching sub-agent.
2
+
3
+ Provides deterministic programmatic schema manipulation. The workflow:
4
+ 1. Get schema tree structure (lightweight view)
5
+ 2. Get full schema content
6
+ 3. LLM instructs which fields to keep/add based on user requirements
7
+ 4. Programmatic filtering/modification of schema content
8
+ 5. Single PUT to update schema
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import copy
14
+ import json
15
+ import logging
16
+ import time
17
+ from typing import TYPE_CHECKING
18
+
19
+ if TYPE_CHECKING:
20
+ from typing import Any
21
+
22
+ from anthropic import beta_tool
23
+
24
+ from rossum_agent.tools.subagents.base import (
25
+ SubAgent,
26
+ SubAgentConfig,
27
+ SubAgentResult,
28
+ )
29
+ from rossum_agent.tools.subagents.mcp_helpers import call_mcp_tool
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ _SCHEMA_PATCHING_SYSTEM_PROMPT = """Goal: Update schema to match EXACTLY the requested fields—programmatically.
34
+
35
+ ## Workflow
36
+
37
+ 1. get_schema_tree_structure → see current field IDs
38
+ 2. get_full_schema → get complete schema con
39
+ 3. Analyze current vs requested fields
40
+ 4. Call apply_schema_changes with:
41
+ - fields_to_keep: list of field IDs to retain
42
+ - fields_to_add: list of new field specifications
43
+ 5. Return summary of changes
44
+
45
+ ## Field Specification Format (for fields_to_add)
46
+
47
+ | Property | Required | Notes |
48
+ |----------|----------|-------|
49
+ | id | Yes | Unique identifier |
50
+ | label | Yes | Display name |
51
+ | parent_section | Yes | Section ID to add field to |
52
+ | type | Yes | string, number, date, enum |
53
+ | table_id | If table | Multivalue ID for table columns |
54
+
55
+ Optional: format, options (for enum), rir_field_names, hidden, can_export, ui_configuration
56
+
57
+ ## Constraints
58
+
59
+ - Field `id` must be valid identifier (lowercase, underscores, no spaces)
60
+ - Do NOT set `rir_field_names` unless user explicitly provides engine field names
61
+ - If user mentions extraction/AI capture, check existing schema for rir_field_names patterns first
62
+ - `ui_configuration.type` must be one of: captured, data, manual, formula, reasoning
63
+ - `ui_configuration.edit` must be one of: enabled, enabled_without_warning, disabled
64
+
65
+ ## Type Mappings
66
+
67
+ | User Request | Schema Config |
68
+ |--------------|---------------|
69
+ | String | type: "string" |
70
+ | Float/Number | type: "number" |
71
+ | Integer | type: "number", format: "#" |
72
+ | Date | type: "date" |
73
+ | Enum | type: "enum", options: [...] |
74
+
75
+ Not supported: multiline fields. Use regular string type instead.
76
+
77
+ Return: Summary of fields kept, added, removed."""
78
+
79
+ _GET_SCHEMA_TREE_STRUCTURE_TOOL: dict[str, Any] = {
80
+ "name": "get_schema_tree_structure",
81
+ "description": "Get lightweight tree view with field IDs, labels, categories, types. Call first.",
82
+ "input_schema": {
83
+ "type": "object",
84
+ "properties": {"schema_id": {"type": "integer", "description": "Schema ID"}},
85
+ "required": ["schema_id"],
86
+ },
87
+ }
88
+
89
+ _GET_FULL_SCHEMA_TOOL: dict[str, Any] = {
90
+ "name": "get_full_schema",
91
+ "description": "Get complete schema content for modification.",
92
+ "input_schema": {
93
+ "type": "object",
94
+ "properties": {"schema_id": {"type": "integer", "description": "Schema ID"}},
95
+ "required": ["schema_id"],
96
+ },
97
+ }
98
+
99
+ _APPLY_SCHEMA_CHANGES_TOOL: dict[str, Any] = {
100
+ "name": "apply_schema_changes",
101
+ "description": "Programmatically filter schema and add new fields, then PUT in one call.",
102
+ "input_schema": {
103
+ "type": "object",
104
+ "properties": {
105
+ "schema_id": {"type": "integer", "description": "Schema ID"},
106
+ "fields_to_keep": {
107
+ "type": "array",
108
+ "items": {"type": "string"},
109
+ "description": "Field IDs to retain. Sections always kept. Omit to keep all.",
110
+ },
111
+ "fields_to_add": {
112
+ "type": "array",
113
+ "items": {
114
+ "type": "object",
115
+ "properties": {
116
+ "id": {"type": "string"},
117
+ "label": {"type": "string"},
118
+ "parent_section": {"type": "string"},
119
+ "type": {"type": "string"},
120
+ "table_id": {"type": "string"},
121
+ "format": {"type": "string"},
122
+ "options": {"type": "array"},
123
+ "rir_field_names": {"type": "array"},
124
+ "hidden": {"type": "boolean"},
125
+ "can_export": {"type": "boolean"},
126
+ "ui_configuration": {
127
+ "type": "object",
128
+ "properties": {
129
+ "type": {
130
+ "type": "string",
131
+ "enum": ["captured", "data", "manual", "formula", "reasoning"],
132
+ "description": "Field value source type",
133
+ },
134
+ "edit": {
135
+ "type": "string",
136
+ "enum": ["enabled", "enabled_without_warning", "disabled"],
137
+ "description": "Edit behavior in UI",
138
+ },
139
+ },
140
+ },
141
+ },
142
+ "required": ["id", "label", "parent_section", "type"],
143
+ },
144
+ "description": "New fields to add to schema.",
145
+ },
146
+ },
147
+ "required": ["schema_id"],
148
+ },
149
+ }
150
+
151
+ _OPUS_TOOLS: list[dict[str, Any]] = [
152
+ _GET_SCHEMA_TREE_STRUCTURE_TOOL,
153
+ _GET_FULL_SCHEMA_TOOL,
154
+ _APPLY_SCHEMA_CHANGES_TOOL,
155
+ ]
156
+
157
+
158
+ def _collect_field_ids(content: list[dict[str, Any]]) -> set[str]:
159
+ """Collect all field IDs from schema content recursively."""
160
+ ids: set[str] = set()
161
+ for node in content:
162
+ if node_id := node.get("id"):
163
+ ids.add(node_id)
164
+ if children := node.get("children"):
165
+ if isinstance(children, list):
166
+ ids.update(_collect_field_ids(children))
167
+ elif isinstance(children, dict):
168
+ if child_id := children.get("id"):
169
+ ids.add(child_id)
170
+ nested = children.get("children")
171
+ if nested and isinstance(nested, list):
172
+ ids.update(_collect_field_ids(nested))
173
+ return ids
174
+
175
+
176
+ def _filter_content(
177
+ content: list[dict[str, Any]],
178
+ fields_to_keep: set[str],
179
+ ) -> tuple[list[dict[str, Any]], list[str]]:
180
+ """Filter schema content to keep only specified fields. Sections always preserved."""
181
+ filtered: list[dict[str, Any]] = []
182
+ removed: list[str] = []
183
+
184
+ for node in content:
185
+ node_id = node.get("id", "")
186
+ category = node.get("category", "")
187
+
188
+ if category == "section":
189
+ new_section = copy.deepcopy(node)
190
+ if "children" in new_section and isinstance(new_section["children"], list):
191
+ new_children, section_removed = _filter_content(new_section["children"], fields_to_keep)
192
+ new_section["children"] = new_children
193
+ removed.extend(section_removed)
194
+ filtered.append(new_section)
195
+
196
+ elif category == "multivalue":
197
+ new_mv = copy.deepcopy(node)
198
+ mv_children_removed: list[str] = []
199
+
200
+ if "children" in new_mv and isinstance(new_mv["children"], dict):
201
+ tuple_node = new_mv["children"]
202
+ if "children" in tuple_node and isinstance(tuple_node["children"], list):
203
+ tuple_children, mv_children_removed = _filter_content(tuple_node["children"], fields_to_keep)
204
+ tuple_node["children"] = tuple_children
205
+
206
+ has_remaining_children = bool(new_mv.get("children", {}).get("children", []))
207
+
208
+ if node_id in fields_to_keep or has_remaining_children:
209
+ filtered.append(new_mv)
210
+ removed.extend(mv_children_removed)
211
+ else:
212
+ removed.append(node_id)
213
+ removed.extend(_collect_field_ids([node]) - {node_id})
214
+
215
+ else:
216
+ if node_id in fields_to_keep:
217
+ filtered.append(copy.deepcopy(node))
218
+ elif node_id:
219
+ removed.append(node_id)
220
+
221
+ return filtered, removed
222
+
223
+
224
+ def _build_field_node(spec: dict[str, Any]) -> dict[str, Any]:
225
+ """Build a schema field node from specification."""
226
+ field_type = spec.get("type", "string")
227
+ node: dict[str, Any] = {
228
+ "id": spec["id"],
229
+ "label": spec.get("label", spec["id"]),
230
+ "category": "datapoint",
231
+ "type": field_type,
232
+ }
233
+
234
+ if field_type == "enum" and spec.get("options"):
235
+ node["options"] = spec["options"]
236
+
237
+ if spec.get("format"):
238
+ node["format"] = spec["format"]
239
+
240
+ if spec.get("rir_field_names"):
241
+ node["rir_field_names"] = spec["rir_field_names"]
242
+
243
+ if spec.get("hidden") is not None:
244
+ node["hidden"] = spec["hidden"]
245
+
246
+ if spec.get("can_export") is not None:
247
+ node["can_export"] = spec["can_export"]
248
+
249
+ if spec.get("ui_configuration"):
250
+ node["ui_configuration"] = spec["ui_configuration"]
251
+
252
+ return node
253
+
254
+
255
+ def _add_fields_to_content(
256
+ content: list[dict[str, Any]],
257
+ fields_to_add: list[dict[str, Any]],
258
+ ) -> tuple[list[dict[str, Any]], list[str]]:
259
+ """Add new fields to schema content. Returns (modified_content, added_ids)."""
260
+ modified = copy.deepcopy(content)
261
+ added: list[str] = []
262
+
263
+ for spec in fields_to_add:
264
+ parent_section = spec.get("parent_section")
265
+ table_id = spec.get("table_id")
266
+ field_node = _build_field_node(spec)
267
+
268
+ for section in modified:
269
+ if section.get("category") != "section" or section.get("id") != parent_section:
270
+ continue
271
+
272
+ if table_id:
273
+ for child in section.get("children", []):
274
+ if child.get("category") == "multivalue" and child.get("id") == table_id:
275
+ tuple_node = child.get("children", {})
276
+ if isinstance(tuple_node, dict) and "children" in tuple_node:
277
+ tuple_node["children"].append(field_node)
278
+ added.append(spec["id"])
279
+ break
280
+ else:
281
+ if "children" not in section:
282
+ section["children"] = []
283
+ section["children"].append(field_node)
284
+ added.append(spec["id"])
285
+ break
286
+
287
+ return modified, added
288
+
289
+
290
+ def _apply_schema_changes(
291
+ schema_id: int,
292
+ current_content: list[dict[str, Any]],
293
+ fields_to_keep: list[str] | None,
294
+ fields_to_add: list[dict[str, Any]] | None,
295
+ ) -> dict[str, Any]:
296
+ """Apply changes to schema content and PUT to API."""
297
+ result: dict[str, Any] = {
298
+ "schema_id": schema_id,
299
+ "fields_removed": [],
300
+ "fields_added": [],
301
+ "fields_kept": [],
302
+ }
303
+
304
+ modified_content = current_content
305
+
306
+ if fields_to_keep is not None:
307
+ keep_set = set(fields_to_keep)
308
+ section_ids = {s.get("id") for s in current_content if s.get("category") == "section" and s.get("id")}
309
+ keep_set.update(sid for sid in section_ids if sid is not None)
310
+
311
+ modified_content, removed = _filter_content(modified_content, keep_set)
312
+ result["fields_removed"] = removed
313
+
314
+ if fields_to_add:
315
+ modified_content, added = _add_fields_to_content(modified_content, fields_to_add)
316
+ result["fields_added"] = added
317
+
318
+ mcp_result = call_mcp_tool("update_schema", {"schema_id": schema_id, "schema_data": {"content": modified_content}})
319
+ result["fields_kept"] = sorted(_collect_field_ids(modified_content))
320
+ result["update_result"] = "success" if mcp_result else "failed"
321
+
322
+ return result
323
+
324
+
325
+ _schema_content_cache: dict[int, list[dict[str, Any]]] = {}
326
+
327
+
328
+ def _execute_opus_tool(tool_name: str, tool_input: dict[str, Any]) -> str:
329
+ schema_id = tool_input.get("schema_id")
330
+
331
+ if tool_name == "get_schema_tree_structure":
332
+ mcp_result = call_mcp_tool("get_schema_tree_structure", tool_input)
333
+ return json.dumps(mcp_result, indent=2, default=str) if mcp_result else "No data returned"
334
+
335
+ if tool_name == "get_full_schema":
336
+ mcp_result = call_mcp_tool("get_schema", tool_input)
337
+ if mcp_result and schema_id:
338
+ content = mcp_result.get("content", []) if isinstance(mcp_result, dict) else []
339
+ _schema_content_cache[schema_id] = content
340
+ return json.dumps(mcp_result, indent=2, default=str) if mcp_result else "No data returned"
341
+
342
+ if tool_name == "apply_schema_changes":
343
+ if not schema_id or schema_id not in _schema_content_cache:
344
+ return json.dumps({"error": "Must call get_full_schema first to load content"})
345
+
346
+ current_content = _schema_content_cache[schema_id]
347
+ fields_to_keep = tool_input.get("fields_to_keep")
348
+ fields_to_add = tool_input.get("fields_to_add")
349
+
350
+ result = _apply_schema_changes(schema_id, current_content, fields_to_keep, fields_to_add)
351
+ del _schema_content_cache[schema_id]
352
+ return json.dumps(result, indent=2, default=str)
353
+
354
+ return f"Unknown tool: {tool_name}"
355
+
356
+
357
+ class SchemaPatchingSubAgent(SubAgent):
358
+ """Sub-agent for schema patching with programmatic bulk replacement."""
359
+
360
+ def __init__(self) -> None:
361
+ config = SubAgentConfig(
362
+ tool_name="patch_schema",
363
+ system_prompt=_SCHEMA_PATCHING_SYSTEM_PROMPT,
364
+ tools=_OPUS_TOOLS,
365
+ max_iterations=5,
366
+ max_tokens=4096,
367
+ )
368
+ super().__init__(config)
369
+
370
+ def execute_tool(self, tool_name: str, tool_input: dict[str, Any]) -> str:
371
+ """Execute a tool call from the LLM."""
372
+ return _execute_opus_tool(tool_name, tool_input)
373
+
374
+ def process_response_block(self, block: Any, iteration: int, max_iterations: int) -> dict[str, Any] | None:
375
+ """No special block processing needed for schema patching."""
376
+ return None
377
+
378
+
379
+ def _call_opus_for_patching(schema_id: str, changes: list[dict[str, Any]]) -> SubAgentResult:
380
+ """Call Opus model for schema patching with deterministic tool workflow.
381
+
382
+ Returns:
383
+ SubAgentResult with analysis text and token counts.
384
+ """
385
+ changes_text = "\n".join(
386
+ f"- {c.get('action', 'add')} field '{c.get('id')}' ({c.get('type', 'string')}) "
387
+ f"in section '{c.get('parent_section')}'"
388
+ + (f" with label '{c.get('label')}'" if c.get("label") else "")
389
+ + (f" [TABLE: {c.get('table_id')}]" if c.get("table_field") or c.get("table_id") else "")
390
+ for c in changes
391
+ )
392
+
393
+ user_content = f"""Update schema {schema_id} to have EXACTLY these fields:
394
+
395
+ {changes_text}
396
+
397
+ Workflow:
398
+ 1. get_schema_tree_structure to see current field IDs
399
+ 2. get_full_schema to load content
400
+ 3. apply_schema_changes with fields_to_keep (IDs to retain) and/or fields_to_add
401
+ 4. Return summary"""
402
+
403
+ sub_agent = SchemaPatchingSubAgent()
404
+ return sub_agent.run(user_content)
405
+
406
+
407
+ @beta_tool
408
+ def patch_schema_with_subagent(schema_id: str, changes: str) -> str:
409
+ """Update a Rossum schema using an Opus sub-agent with programmatic bulk replacement.
410
+
411
+ Delegates schema update to a sub-agent that:
412
+ 1. Fetches schema tree structure (lightweight view)
413
+ 2. Fetches full schema content
414
+ 3. Programmatically filters to keep only required fields
415
+ 4. Adds new fields as specified
416
+ 5. PUTs entire content in ONE API call
417
+
418
+ Args:
419
+ schema_id: The schema ID to update.
420
+ changes: JSON array of field specifications. Each object should have:
421
+ - action: "add" or "remove" (default: "add")
422
+ - id: Field ID
423
+ - parent_section: Section ID for the field
424
+ - type: Field type (string, number, date, enum)
425
+ - label: Field label (optional, defaults to id)
426
+ - table_id: Multivalue ID if this is a table column
427
+
428
+ Returns:
429
+ JSON with update results including fields added, removed, and summary.
430
+ """
431
+ start_time = time.perf_counter()
432
+
433
+ if not schema_id:
434
+ return json.dumps(
435
+ {"error": "No schema_id provided", "elapsed_ms": round((time.perf_counter() - start_time) * 1000, 3)}
436
+ )
437
+
438
+ try:
439
+ changes_list = json.loads(changes)
440
+ except json.JSONDecodeError as e:
441
+ return json.dumps(
442
+ {"error": f"Invalid changes JSON: {e}", "elapsed_ms": round((time.perf_counter() - start_time) * 1000, 3)}
443
+ )
444
+
445
+ if not changes_list:
446
+ return json.dumps(
447
+ {"error": "No changes provided", "elapsed_ms": round((time.perf_counter() - start_time) * 1000, 3)}
448
+ )
449
+
450
+ logger.info(f"patch_schema: Calling Opus for schema_id={schema_id}, {len(changes_list)} changes")
451
+ result = _call_opus_for_patching(schema_id, changes_list)
452
+ elapsed_ms = round((time.perf_counter() - start_time) * 1000, 3)
453
+
454
+ logger.info(
455
+ f"patch_schema: completed in {elapsed_ms:.1f}ms, "
456
+ f"tokens in={result.input_tokens} out={result.output_tokens}, "
457
+ f"iterations={result.iterations_used}"
458
+ )
459
+
460
+ return json.dumps(
461
+ {
462
+ "schema_id": schema_id,
463
+ "changes_requested": len(changes_list),
464
+ "analysis": result.analysis,
465
+ "elapsed_ms": elapsed_ms,
466
+ "input_tokens": result.input_tokens,
467
+ "output_tokens": result.output_tokens,
468
+ },
469
+ ensure_ascii=False,
470
+ default=str,
471
+ )
@@ -0,0 +1,167 @@
1
+ """URL context extraction for Rossum application URLs.
2
+
3
+ This module provides utilities to extract context (queue_id, document_id, hook_id, engine_id)
4
+ from Rossum application URLs, enabling the agent to understand the user's current
5
+ context when they paste a URL.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import re
12
+ from dataclasses import dataclass, field
13
+ from urllib.parse import parse_qs, urlparse
14
+
15
+
16
+ @dataclass
17
+ class RossumUrlContext:
18
+ """Extracted context from a Rossum application URL."""
19
+
20
+ queue_id: int | None = None
21
+ document_id: int | None = None
22
+ hook_id: int | None = None
23
+ engine_id: int | None = None
24
+ raw_url: str | None = None
25
+ page_type: str | None = None
26
+ additional_context: dict[str, str] = field(default_factory=dict)
27
+
28
+ def is_empty(self) -> bool:
29
+ """Check if no context was extracted."""
30
+ return all(getattr(self, f) is None for f in ["queue_id", "document_id", "hook_id", "engine_id"])
31
+
32
+ def to_context_string(self) -> str:
33
+ """Convert the context to a human-readable string for the agent."""
34
+ parts = []
35
+ if self.queue_id:
36
+ parts.append(f"Queue ID: {self.queue_id}")
37
+ if self.document_id:
38
+ parts.append(f"Document ID: {self.document_id}")
39
+ if self.hook_id:
40
+ parts.append(f"Hook ID: {self.hook_id}")
41
+ if self.engine_id:
42
+ parts.append(f"Engine ID: {self.engine_id}")
43
+ if self.page_type:
44
+ parts.append(f"Page type: {self.page_type}")
45
+ for key, value in self.additional_context.items():
46
+ parts.append(f"{key}: {value}")
47
+ return ", ".join(parts) if parts else ""
48
+
49
+
50
+ # URL patterns for different Rossum pages
51
+ # Format: /queues/{queue_id}/...
52
+ QUEUE_PATTERN = re.compile(r"/queues/(\d+)")
53
+ # Format: /document/{document_id}
54
+ DOCUMENT_PATTERN = re.compile(r"/document/(\d+)")
55
+ # Format: /hooks/{hook_id} or /extensions/{hook_id} or /extensions/my-extensions/{hook_id}
56
+ HOOK_PATTERN = re.compile(r"/(hooks|extensions|extensions/my-extensions)/(\d+)")
57
+ # Format: /engines/{engine_id} or /automation/engines/{engine_id}
58
+ ENGINE_PATTERN = re.compile(r"/(automation/)?engines/(\d+)")
59
+
60
+ # Documents list view pattern
61
+ DOCUMENTS_VIEW_PATTERN = re.compile(r"/documents(\?|$)")
62
+
63
+ # Page type patterns (order matters - more specific patterns first)
64
+ PAGE_TYPE_PATTERNS = [
65
+ (re.compile(r"/automation/engines/\d+/settings/basic"), "engine_settings"),
66
+ (re.compile(r"/automation/engines/\d+/settings"), "engine_settings"),
67
+ (re.compile(r"/settings/basic"), "queue_settings"),
68
+ (re.compile(r"/settings/schema"), "schema_settings"),
69
+ (re.compile(r"/settings/hooks"), "hooks_settings"),
70
+ (re.compile(r"/settings/automation"), "automation_settings"),
71
+ (re.compile(r"/settings/emails"), "email_settings"),
72
+ (re.compile(r"/settings"), "settings"),
73
+ (re.compile(r"/documents(\?|$)"), "documents_list"),
74
+ (re.compile(r"/all$"), "all_documents"),
75
+ (re.compile(r"/queues/\d+/review"), "review"),
76
+ (re.compile(r"/upload"), "upload"),
77
+ ]
78
+
79
+
80
+ def _extract_documents_view_context(url: str, context: RossumUrlContext) -> None:
81
+ """Extract context from the documents list view URL.
82
+
83
+ Parses the filtering query parameter to extract queue_id and other context.
84
+ Example URL: /documents?filtering={"items":[{"field":"queue","value":["3866808"],...}]}
85
+ """
86
+ try:
87
+ parsed = urlparse(url)
88
+ query_params = parse_qs(parsed.query)
89
+
90
+ if "filtering" in query_params:
91
+ filtering_json = query_params["filtering"][0]
92
+ filtering = json.loads(filtering_json)
93
+
94
+ for item in filtering.get("items", []):
95
+ if item.get("field") == "queue" and item.get("value"):
96
+ queue_values = item["value"]
97
+ if queue_values and len(queue_values) == 1:
98
+ context.queue_id = int(queue_values[0])
99
+ elif queue_values:
100
+ context.additional_context["queue_ids"] = ",".join(queue_values)
101
+
102
+ if "level" in query_params:
103
+ context.additional_context["view_level"] = query_params["level"][0]
104
+
105
+ except (json.JSONDecodeError, KeyError, ValueError, IndexError):
106
+ pass
107
+
108
+
109
+ def extract_url_context(url: str | None) -> RossumUrlContext:
110
+ """Extract context from a Rossum application URL.
111
+
112
+ Args:
113
+ url: The Rossum application URL (e.g.,
114
+ "https://elis.rossum.ai/queues/3866808/settings/basic")
115
+
116
+ Returns:
117
+ RossumUrlContext with extracted IDs and page type.
118
+ """
119
+ if not url:
120
+ return RossumUrlContext()
121
+
122
+ context = RossumUrlContext(raw_url=url)
123
+
124
+ if match := QUEUE_PATTERN.search(url):
125
+ context.queue_id = int(match.group(1))
126
+
127
+ if match := DOCUMENT_PATTERN.search(url):
128
+ context.document_id = int(match.group(1))
129
+
130
+ if match := HOOK_PATTERN.search(url):
131
+ context.hook_id = int(match.group(2))
132
+
133
+ if match := ENGINE_PATTERN.search(url):
134
+ context.engine_id = int(match.group(2))
135
+
136
+ if DOCUMENTS_VIEW_PATTERN.search(url):
137
+ _extract_documents_view_context(url, context)
138
+
139
+ for pattern, page_type in PAGE_TYPE_PATTERNS:
140
+ if pattern.search(url):
141
+ context.page_type = page_type
142
+ break
143
+
144
+ return context
145
+
146
+
147
+ def format_context_for_prompt(context: RossumUrlContext) -> str:
148
+ """Format the URL context for inclusion in the agent prompt.
149
+
150
+ Args:
151
+ context: The extracted URL context.
152
+
153
+ Returns:
154
+ A formatted string to prepend to user messages.
155
+ """
156
+ if context.is_empty():
157
+ return ""
158
+
159
+ context_str = context.to_context_string()
160
+ return f"""
161
+ **Current Context from URL:**
162
+ {context_str}
163
+
164
+ When the user refers to "this queue", "this schema", "this annotation", etc., use the IDs from the context above.
165
+ ---
166
+
167
+ """