rossum-agent 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rossum_agent/__init__.py +9 -0
- rossum_agent/agent/__init__.py +32 -0
- rossum_agent/agent/core.py +932 -0
- rossum_agent/agent/memory.py +176 -0
- rossum_agent/agent/models.py +160 -0
- rossum_agent/agent/request_classifier.py +152 -0
- rossum_agent/agent/skills.py +132 -0
- rossum_agent/agent/types.py +5 -0
- rossum_agent/agent_logging.py +56 -0
- rossum_agent/api/__init__.py +1 -0
- rossum_agent/api/cli.py +51 -0
- rossum_agent/api/dependencies.py +190 -0
- rossum_agent/api/main.py +180 -0
- rossum_agent/api/models/__init__.py +1 -0
- rossum_agent/api/models/schemas.py +301 -0
- rossum_agent/api/routes/__init__.py +1 -0
- rossum_agent/api/routes/chats.py +95 -0
- rossum_agent/api/routes/files.py +113 -0
- rossum_agent/api/routes/health.py +44 -0
- rossum_agent/api/routes/messages.py +218 -0
- rossum_agent/api/services/__init__.py +1 -0
- rossum_agent/api/services/agent_service.py +451 -0
- rossum_agent/api/services/chat_service.py +197 -0
- rossum_agent/api/services/file_service.py +65 -0
- rossum_agent/assets/Primary_light_logo.png +0 -0
- rossum_agent/bedrock_client.py +64 -0
- rossum_agent/prompts/__init__.py +27 -0
- rossum_agent/prompts/base_prompt.py +80 -0
- rossum_agent/prompts/system_prompt.py +24 -0
- rossum_agent/py.typed +0 -0
- rossum_agent/redis_storage.py +482 -0
- rossum_agent/rossum_mcp_integration.py +123 -0
- rossum_agent/skills/hook-debugging.md +31 -0
- rossum_agent/skills/organization-setup.md +60 -0
- rossum_agent/skills/rossum-deployment.md +102 -0
- rossum_agent/skills/schema-patching.md +61 -0
- rossum_agent/skills/schema-pruning.md +23 -0
- rossum_agent/skills/ui-settings.md +45 -0
- rossum_agent/streamlit_app/__init__.py +1 -0
- rossum_agent/streamlit_app/app.py +646 -0
- rossum_agent/streamlit_app/beep_sound.py +36 -0
- rossum_agent/streamlit_app/cli.py +17 -0
- rossum_agent/streamlit_app/render_modules.py +123 -0
- rossum_agent/streamlit_app/response_formatting.py +305 -0
- rossum_agent/tools/__init__.py +214 -0
- rossum_agent/tools/core.py +173 -0
- rossum_agent/tools/deploy.py +404 -0
- rossum_agent/tools/dynamic_tools.py +365 -0
- rossum_agent/tools/file_tools.py +62 -0
- rossum_agent/tools/formula.py +187 -0
- rossum_agent/tools/skills.py +31 -0
- rossum_agent/tools/spawn_mcp.py +227 -0
- rossum_agent/tools/subagents/__init__.py +31 -0
- rossum_agent/tools/subagents/base.py +303 -0
- rossum_agent/tools/subagents/hook_debug.py +591 -0
- rossum_agent/tools/subagents/knowledge_base.py +305 -0
- rossum_agent/tools/subagents/mcp_helpers.py +47 -0
- rossum_agent/tools/subagents/schema_patching.py +471 -0
- rossum_agent/url_context.py +167 -0
- rossum_agent/user_detection.py +100 -0
- rossum_agent/utils.py +128 -0
- rossum_agent-1.0.0rc0.dist-info/METADATA +311 -0
- rossum_agent-1.0.0rc0.dist-info/RECORD +67 -0
- rossum_agent-1.0.0rc0.dist-info/WHEEL +5 -0
- rossum_agent-1.0.0rc0.dist-info/entry_points.txt +3 -0
- rossum_agent-1.0.0rc0.dist-info/licenses/LICENSE +21 -0
- rossum_agent-1.0.0rc0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
"""Schema patching sub-agent.
|
|
2
|
+
|
|
3
|
+
Provides deterministic programmatic schema manipulation. The workflow:
|
|
4
|
+
1. Get schema tree structure (lightweight view)
|
|
5
|
+
2. Get full schema content
|
|
6
|
+
3. LLM instructs which fields to keep/add based on user requirements
|
|
7
|
+
4. Programmatic filtering/modification of schema content
|
|
8
|
+
5. Single PUT to update schema
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import copy
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
import time
|
|
17
|
+
from typing import TYPE_CHECKING
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from anthropic import beta_tool
|
|
23
|
+
|
|
24
|
+
from rossum_agent.tools.subagents.base import (
|
|
25
|
+
SubAgent,
|
|
26
|
+
SubAgentConfig,
|
|
27
|
+
SubAgentResult,
|
|
28
|
+
)
|
|
29
|
+
from rossum_agent.tools.subagents.mcp_helpers import call_mcp_tool
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
_SCHEMA_PATCHING_SYSTEM_PROMPT = """Goal: Update schema to match EXACTLY the requested fields—programmatically.
|
|
34
|
+
|
|
35
|
+
## Workflow
|
|
36
|
+
|
|
37
|
+
1. get_schema_tree_structure → see current field IDs
|
|
38
|
+
2. get_full_schema → get complete schema con
|
|
39
|
+
3. Analyze current vs requested fields
|
|
40
|
+
4. Call apply_schema_changes with:
|
|
41
|
+
- fields_to_keep: list of field IDs to retain
|
|
42
|
+
- fields_to_add: list of new field specifications
|
|
43
|
+
5. Return summary of changes
|
|
44
|
+
|
|
45
|
+
## Field Specification Format (for fields_to_add)
|
|
46
|
+
|
|
47
|
+
| Property | Required | Notes |
|
|
48
|
+
|----------|----------|-------|
|
|
49
|
+
| id | Yes | Unique identifier |
|
|
50
|
+
| label | Yes | Display name |
|
|
51
|
+
| parent_section | Yes | Section ID to add field to |
|
|
52
|
+
| type | Yes | string, number, date, enum |
|
|
53
|
+
| table_id | If table | Multivalue ID for table columns |
|
|
54
|
+
|
|
55
|
+
Optional: format, options (for enum), rir_field_names, hidden, can_export, ui_configuration
|
|
56
|
+
|
|
57
|
+
## Constraints
|
|
58
|
+
|
|
59
|
+
- Field `id` must be valid identifier (lowercase, underscores, no spaces)
|
|
60
|
+
- Do NOT set `rir_field_names` unless user explicitly provides engine field names
|
|
61
|
+
- If user mentions extraction/AI capture, check existing schema for rir_field_names patterns first
|
|
62
|
+
- `ui_configuration.type` must be one of: captured, data, manual, formula, reasoning
|
|
63
|
+
- `ui_configuration.edit` must be one of: enabled, enabled_without_warning, disabled
|
|
64
|
+
|
|
65
|
+
## Type Mappings
|
|
66
|
+
|
|
67
|
+
| User Request | Schema Config |
|
|
68
|
+
|--------------|---------------|
|
|
69
|
+
| String | type: "string" |
|
|
70
|
+
| Float/Number | type: "number" |
|
|
71
|
+
| Integer | type: "number", format: "#" |
|
|
72
|
+
| Date | type: "date" |
|
|
73
|
+
| Enum | type: "enum", options: [...] |
|
|
74
|
+
|
|
75
|
+
Not supported: multiline fields. Use regular string type instead.
|
|
76
|
+
|
|
77
|
+
Return: Summary of fields kept, added, removed."""
|
|
78
|
+
|
|
79
|
+
_GET_SCHEMA_TREE_STRUCTURE_TOOL: dict[str, Any] = {
|
|
80
|
+
"name": "get_schema_tree_structure",
|
|
81
|
+
"description": "Get lightweight tree view with field IDs, labels, categories, types. Call first.",
|
|
82
|
+
"input_schema": {
|
|
83
|
+
"type": "object",
|
|
84
|
+
"properties": {"schema_id": {"type": "integer", "description": "Schema ID"}},
|
|
85
|
+
"required": ["schema_id"],
|
|
86
|
+
},
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
_GET_FULL_SCHEMA_TOOL: dict[str, Any] = {
|
|
90
|
+
"name": "get_full_schema",
|
|
91
|
+
"description": "Get complete schema content for modification.",
|
|
92
|
+
"input_schema": {
|
|
93
|
+
"type": "object",
|
|
94
|
+
"properties": {"schema_id": {"type": "integer", "description": "Schema ID"}},
|
|
95
|
+
"required": ["schema_id"],
|
|
96
|
+
},
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
_APPLY_SCHEMA_CHANGES_TOOL: dict[str, Any] = {
|
|
100
|
+
"name": "apply_schema_changes",
|
|
101
|
+
"description": "Programmatically filter schema and add new fields, then PUT in one call.",
|
|
102
|
+
"input_schema": {
|
|
103
|
+
"type": "object",
|
|
104
|
+
"properties": {
|
|
105
|
+
"schema_id": {"type": "integer", "description": "Schema ID"},
|
|
106
|
+
"fields_to_keep": {
|
|
107
|
+
"type": "array",
|
|
108
|
+
"items": {"type": "string"},
|
|
109
|
+
"description": "Field IDs to retain. Sections always kept. Omit to keep all.",
|
|
110
|
+
},
|
|
111
|
+
"fields_to_add": {
|
|
112
|
+
"type": "array",
|
|
113
|
+
"items": {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"properties": {
|
|
116
|
+
"id": {"type": "string"},
|
|
117
|
+
"label": {"type": "string"},
|
|
118
|
+
"parent_section": {"type": "string"},
|
|
119
|
+
"type": {"type": "string"},
|
|
120
|
+
"table_id": {"type": "string"},
|
|
121
|
+
"format": {"type": "string"},
|
|
122
|
+
"options": {"type": "array"},
|
|
123
|
+
"rir_field_names": {"type": "array"},
|
|
124
|
+
"hidden": {"type": "boolean"},
|
|
125
|
+
"can_export": {"type": "boolean"},
|
|
126
|
+
"ui_configuration": {
|
|
127
|
+
"type": "object",
|
|
128
|
+
"properties": {
|
|
129
|
+
"type": {
|
|
130
|
+
"type": "string",
|
|
131
|
+
"enum": ["captured", "data", "manual", "formula", "reasoning"],
|
|
132
|
+
"description": "Field value source type",
|
|
133
|
+
},
|
|
134
|
+
"edit": {
|
|
135
|
+
"type": "string",
|
|
136
|
+
"enum": ["enabled", "enabled_without_warning", "disabled"],
|
|
137
|
+
"description": "Edit behavior in UI",
|
|
138
|
+
},
|
|
139
|
+
},
|
|
140
|
+
},
|
|
141
|
+
},
|
|
142
|
+
"required": ["id", "label", "parent_section", "type"],
|
|
143
|
+
},
|
|
144
|
+
"description": "New fields to add to schema.",
|
|
145
|
+
},
|
|
146
|
+
},
|
|
147
|
+
"required": ["schema_id"],
|
|
148
|
+
},
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
_OPUS_TOOLS: list[dict[str, Any]] = [
|
|
152
|
+
_GET_SCHEMA_TREE_STRUCTURE_TOOL,
|
|
153
|
+
_GET_FULL_SCHEMA_TOOL,
|
|
154
|
+
_APPLY_SCHEMA_CHANGES_TOOL,
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _collect_field_ids(content: list[dict[str, Any]]) -> set[str]:
|
|
159
|
+
"""Collect all field IDs from schema content recursively."""
|
|
160
|
+
ids: set[str] = set()
|
|
161
|
+
for node in content:
|
|
162
|
+
if node_id := node.get("id"):
|
|
163
|
+
ids.add(node_id)
|
|
164
|
+
if children := node.get("children"):
|
|
165
|
+
if isinstance(children, list):
|
|
166
|
+
ids.update(_collect_field_ids(children))
|
|
167
|
+
elif isinstance(children, dict):
|
|
168
|
+
if child_id := children.get("id"):
|
|
169
|
+
ids.add(child_id)
|
|
170
|
+
nested = children.get("children")
|
|
171
|
+
if nested and isinstance(nested, list):
|
|
172
|
+
ids.update(_collect_field_ids(nested))
|
|
173
|
+
return ids
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _filter_content(
|
|
177
|
+
content: list[dict[str, Any]],
|
|
178
|
+
fields_to_keep: set[str],
|
|
179
|
+
) -> tuple[list[dict[str, Any]], list[str]]:
|
|
180
|
+
"""Filter schema content to keep only specified fields. Sections always preserved."""
|
|
181
|
+
filtered: list[dict[str, Any]] = []
|
|
182
|
+
removed: list[str] = []
|
|
183
|
+
|
|
184
|
+
for node in content:
|
|
185
|
+
node_id = node.get("id", "")
|
|
186
|
+
category = node.get("category", "")
|
|
187
|
+
|
|
188
|
+
if category == "section":
|
|
189
|
+
new_section = copy.deepcopy(node)
|
|
190
|
+
if "children" in new_section and isinstance(new_section["children"], list):
|
|
191
|
+
new_children, section_removed = _filter_content(new_section["children"], fields_to_keep)
|
|
192
|
+
new_section["children"] = new_children
|
|
193
|
+
removed.extend(section_removed)
|
|
194
|
+
filtered.append(new_section)
|
|
195
|
+
|
|
196
|
+
elif category == "multivalue":
|
|
197
|
+
new_mv = copy.deepcopy(node)
|
|
198
|
+
mv_children_removed: list[str] = []
|
|
199
|
+
|
|
200
|
+
if "children" in new_mv and isinstance(new_mv["children"], dict):
|
|
201
|
+
tuple_node = new_mv["children"]
|
|
202
|
+
if "children" in tuple_node and isinstance(tuple_node["children"], list):
|
|
203
|
+
tuple_children, mv_children_removed = _filter_content(tuple_node["children"], fields_to_keep)
|
|
204
|
+
tuple_node["children"] = tuple_children
|
|
205
|
+
|
|
206
|
+
has_remaining_children = bool(new_mv.get("children", {}).get("children", []))
|
|
207
|
+
|
|
208
|
+
if node_id in fields_to_keep or has_remaining_children:
|
|
209
|
+
filtered.append(new_mv)
|
|
210
|
+
removed.extend(mv_children_removed)
|
|
211
|
+
else:
|
|
212
|
+
removed.append(node_id)
|
|
213
|
+
removed.extend(_collect_field_ids([node]) - {node_id})
|
|
214
|
+
|
|
215
|
+
else:
|
|
216
|
+
if node_id in fields_to_keep:
|
|
217
|
+
filtered.append(copy.deepcopy(node))
|
|
218
|
+
elif node_id:
|
|
219
|
+
removed.append(node_id)
|
|
220
|
+
|
|
221
|
+
return filtered, removed
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _build_field_node(spec: dict[str, Any]) -> dict[str, Any]:
|
|
225
|
+
"""Build a schema field node from specification."""
|
|
226
|
+
field_type = spec.get("type", "string")
|
|
227
|
+
node: dict[str, Any] = {
|
|
228
|
+
"id": spec["id"],
|
|
229
|
+
"label": spec.get("label", spec["id"]),
|
|
230
|
+
"category": "datapoint",
|
|
231
|
+
"type": field_type,
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if field_type == "enum" and spec.get("options"):
|
|
235
|
+
node["options"] = spec["options"]
|
|
236
|
+
|
|
237
|
+
if spec.get("format"):
|
|
238
|
+
node["format"] = spec["format"]
|
|
239
|
+
|
|
240
|
+
if spec.get("rir_field_names"):
|
|
241
|
+
node["rir_field_names"] = spec["rir_field_names"]
|
|
242
|
+
|
|
243
|
+
if spec.get("hidden") is not None:
|
|
244
|
+
node["hidden"] = spec["hidden"]
|
|
245
|
+
|
|
246
|
+
if spec.get("can_export") is not None:
|
|
247
|
+
node["can_export"] = spec["can_export"]
|
|
248
|
+
|
|
249
|
+
if spec.get("ui_configuration"):
|
|
250
|
+
node["ui_configuration"] = spec["ui_configuration"]
|
|
251
|
+
|
|
252
|
+
return node
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _add_fields_to_content(
|
|
256
|
+
content: list[dict[str, Any]],
|
|
257
|
+
fields_to_add: list[dict[str, Any]],
|
|
258
|
+
) -> tuple[list[dict[str, Any]], list[str]]:
|
|
259
|
+
"""Add new fields to schema content. Returns (modified_content, added_ids)."""
|
|
260
|
+
modified = copy.deepcopy(content)
|
|
261
|
+
added: list[str] = []
|
|
262
|
+
|
|
263
|
+
for spec in fields_to_add:
|
|
264
|
+
parent_section = spec.get("parent_section")
|
|
265
|
+
table_id = spec.get("table_id")
|
|
266
|
+
field_node = _build_field_node(spec)
|
|
267
|
+
|
|
268
|
+
for section in modified:
|
|
269
|
+
if section.get("category") != "section" or section.get("id") != parent_section:
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
if table_id:
|
|
273
|
+
for child in section.get("children", []):
|
|
274
|
+
if child.get("category") == "multivalue" and child.get("id") == table_id:
|
|
275
|
+
tuple_node = child.get("children", {})
|
|
276
|
+
if isinstance(tuple_node, dict) and "children" in tuple_node:
|
|
277
|
+
tuple_node["children"].append(field_node)
|
|
278
|
+
added.append(spec["id"])
|
|
279
|
+
break
|
|
280
|
+
else:
|
|
281
|
+
if "children" not in section:
|
|
282
|
+
section["children"] = []
|
|
283
|
+
section["children"].append(field_node)
|
|
284
|
+
added.append(spec["id"])
|
|
285
|
+
break
|
|
286
|
+
|
|
287
|
+
return modified, added
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _apply_schema_changes(
|
|
291
|
+
schema_id: int,
|
|
292
|
+
current_content: list[dict[str, Any]],
|
|
293
|
+
fields_to_keep: list[str] | None,
|
|
294
|
+
fields_to_add: list[dict[str, Any]] | None,
|
|
295
|
+
) -> dict[str, Any]:
|
|
296
|
+
"""Apply changes to schema content and PUT to API."""
|
|
297
|
+
result: dict[str, Any] = {
|
|
298
|
+
"schema_id": schema_id,
|
|
299
|
+
"fields_removed": [],
|
|
300
|
+
"fields_added": [],
|
|
301
|
+
"fields_kept": [],
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
modified_content = current_content
|
|
305
|
+
|
|
306
|
+
if fields_to_keep is not None:
|
|
307
|
+
keep_set = set(fields_to_keep)
|
|
308
|
+
section_ids = {s.get("id") for s in current_content if s.get("category") == "section" and s.get("id")}
|
|
309
|
+
keep_set.update(sid for sid in section_ids if sid is not None)
|
|
310
|
+
|
|
311
|
+
modified_content, removed = _filter_content(modified_content, keep_set)
|
|
312
|
+
result["fields_removed"] = removed
|
|
313
|
+
|
|
314
|
+
if fields_to_add:
|
|
315
|
+
modified_content, added = _add_fields_to_content(modified_content, fields_to_add)
|
|
316
|
+
result["fields_added"] = added
|
|
317
|
+
|
|
318
|
+
mcp_result = call_mcp_tool("update_schema", {"schema_id": schema_id, "schema_data": {"content": modified_content}})
|
|
319
|
+
result["fields_kept"] = sorted(_collect_field_ids(modified_content))
|
|
320
|
+
result["update_result"] = "success" if mcp_result else "failed"
|
|
321
|
+
|
|
322
|
+
return result
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
_schema_content_cache: dict[int, list[dict[str, Any]]] = {}
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _execute_opus_tool(tool_name: str, tool_input: dict[str, Any]) -> str:
|
|
329
|
+
schema_id = tool_input.get("schema_id")
|
|
330
|
+
|
|
331
|
+
if tool_name == "get_schema_tree_structure":
|
|
332
|
+
mcp_result = call_mcp_tool("get_schema_tree_structure", tool_input)
|
|
333
|
+
return json.dumps(mcp_result, indent=2, default=str) if mcp_result else "No data returned"
|
|
334
|
+
|
|
335
|
+
if tool_name == "get_full_schema":
|
|
336
|
+
mcp_result = call_mcp_tool("get_schema", tool_input)
|
|
337
|
+
if mcp_result and schema_id:
|
|
338
|
+
content = mcp_result.get("content", []) if isinstance(mcp_result, dict) else []
|
|
339
|
+
_schema_content_cache[schema_id] = content
|
|
340
|
+
return json.dumps(mcp_result, indent=2, default=str) if mcp_result else "No data returned"
|
|
341
|
+
|
|
342
|
+
if tool_name == "apply_schema_changes":
|
|
343
|
+
if not schema_id or schema_id not in _schema_content_cache:
|
|
344
|
+
return json.dumps({"error": "Must call get_full_schema first to load content"})
|
|
345
|
+
|
|
346
|
+
current_content = _schema_content_cache[schema_id]
|
|
347
|
+
fields_to_keep = tool_input.get("fields_to_keep")
|
|
348
|
+
fields_to_add = tool_input.get("fields_to_add")
|
|
349
|
+
|
|
350
|
+
result = _apply_schema_changes(schema_id, current_content, fields_to_keep, fields_to_add)
|
|
351
|
+
del _schema_content_cache[schema_id]
|
|
352
|
+
return json.dumps(result, indent=2, default=str)
|
|
353
|
+
|
|
354
|
+
return f"Unknown tool: {tool_name}"
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
class SchemaPatchingSubAgent(SubAgent):
|
|
358
|
+
"""Sub-agent for schema patching with programmatic bulk replacement."""
|
|
359
|
+
|
|
360
|
+
def __init__(self) -> None:
|
|
361
|
+
config = SubAgentConfig(
|
|
362
|
+
tool_name="patch_schema",
|
|
363
|
+
system_prompt=_SCHEMA_PATCHING_SYSTEM_PROMPT,
|
|
364
|
+
tools=_OPUS_TOOLS,
|
|
365
|
+
max_iterations=5,
|
|
366
|
+
max_tokens=4096,
|
|
367
|
+
)
|
|
368
|
+
super().__init__(config)
|
|
369
|
+
|
|
370
|
+
def execute_tool(self, tool_name: str, tool_input: dict[str, Any]) -> str:
|
|
371
|
+
"""Execute a tool call from the LLM."""
|
|
372
|
+
return _execute_opus_tool(tool_name, tool_input)
|
|
373
|
+
|
|
374
|
+
def process_response_block(self, block: Any, iteration: int, max_iterations: int) -> dict[str, Any] | None:
|
|
375
|
+
"""No special block processing needed for schema patching."""
|
|
376
|
+
return None
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _call_opus_for_patching(schema_id: str, changes: list[dict[str, Any]]) -> SubAgentResult:
|
|
380
|
+
"""Call Opus model for schema patching with deterministic tool workflow.
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
SubAgentResult with analysis text and token counts.
|
|
384
|
+
"""
|
|
385
|
+
changes_text = "\n".join(
|
|
386
|
+
f"- {c.get('action', 'add')} field '{c.get('id')}' ({c.get('type', 'string')}) "
|
|
387
|
+
f"in section '{c.get('parent_section')}'"
|
|
388
|
+
+ (f" with label '{c.get('label')}'" if c.get("label") else "")
|
|
389
|
+
+ (f" [TABLE: {c.get('table_id')}]" if c.get("table_field") or c.get("table_id") else "")
|
|
390
|
+
for c in changes
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
user_content = f"""Update schema {schema_id} to have EXACTLY these fields:
|
|
394
|
+
|
|
395
|
+
{changes_text}
|
|
396
|
+
|
|
397
|
+
Workflow:
|
|
398
|
+
1. get_schema_tree_structure to see current field IDs
|
|
399
|
+
2. get_full_schema to load content
|
|
400
|
+
3. apply_schema_changes with fields_to_keep (IDs to retain) and/or fields_to_add
|
|
401
|
+
4. Return summary"""
|
|
402
|
+
|
|
403
|
+
sub_agent = SchemaPatchingSubAgent()
|
|
404
|
+
return sub_agent.run(user_content)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
@beta_tool
|
|
408
|
+
def patch_schema_with_subagent(schema_id: str, changes: str) -> str:
|
|
409
|
+
"""Update a Rossum schema using an Opus sub-agent with programmatic bulk replacement.
|
|
410
|
+
|
|
411
|
+
Delegates schema update to a sub-agent that:
|
|
412
|
+
1. Fetches schema tree structure (lightweight view)
|
|
413
|
+
2. Fetches full schema content
|
|
414
|
+
3. Programmatically filters to keep only required fields
|
|
415
|
+
4. Adds new fields as specified
|
|
416
|
+
5. PUTs entire content in ONE API call
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
schema_id: The schema ID to update.
|
|
420
|
+
changes: JSON array of field specifications. Each object should have:
|
|
421
|
+
- action: "add" or "remove" (default: "add")
|
|
422
|
+
- id: Field ID
|
|
423
|
+
- parent_section: Section ID for the field
|
|
424
|
+
- type: Field type (string, number, date, enum)
|
|
425
|
+
- label: Field label (optional, defaults to id)
|
|
426
|
+
- table_id: Multivalue ID if this is a table column
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
JSON with update results including fields added, removed, and summary.
|
|
430
|
+
"""
|
|
431
|
+
start_time = time.perf_counter()
|
|
432
|
+
|
|
433
|
+
if not schema_id:
|
|
434
|
+
return json.dumps(
|
|
435
|
+
{"error": "No schema_id provided", "elapsed_ms": round((time.perf_counter() - start_time) * 1000, 3)}
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
try:
|
|
439
|
+
changes_list = json.loads(changes)
|
|
440
|
+
except json.JSONDecodeError as e:
|
|
441
|
+
return json.dumps(
|
|
442
|
+
{"error": f"Invalid changes JSON: {e}", "elapsed_ms": round((time.perf_counter() - start_time) * 1000, 3)}
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
if not changes_list:
|
|
446
|
+
return json.dumps(
|
|
447
|
+
{"error": "No changes provided", "elapsed_ms": round((time.perf_counter() - start_time) * 1000, 3)}
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
logger.info(f"patch_schema: Calling Opus for schema_id={schema_id}, {len(changes_list)} changes")
|
|
451
|
+
result = _call_opus_for_patching(schema_id, changes_list)
|
|
452
|
+
elapsed_ms = round((time.perf_counter() - start_time) * 1000, 3)
|
|
453
|
+
|
|
454
|
+
logger.info(
|
|
455
|
+
f"patch_schema: completed in {elapsed_ms:.1f}ms, "
|
|
456
|
+
f"tokens in={result.input_tokens} out={result.output_tokens}, "
|
|
457
|
+
f"iterations={result.iterations_used}"
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
return json.dumps(
|
|
461
|
+
{
|
|
462
|
+
"schema_id": schema_id,
|
|
463
|
+
"changes_requested": len(changes_list),
|
|
464
|
+
"analysis": result.analysis,
|
|
465
|
+
"elapsed_ms": elapsed_ms,
|
|
466
|
+
"input_tokens": result.input_tokens,
|
|
467
|
+
"output_tokens": result.output_tokens,
|
|
468
|
+
},
|
|
469
|
+
ensure_ascii=False,
|
|
470
|
+
default=str,
|
|
471
|
+
)
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""URL context extraction for Rossum application URLs.
|
|
2
|
+
|
|
3
|
+
This module provides utilities to extract context (queue_id, document_id, hook_id, engine_id)
|
|
4
|
+
from Rossum application URLs, enabling the agent to understand the user's current
|
|
5
|
+
context when they paste a URL.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from urllib.parse import parse_qs, urlparse
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class RossumUrlContext:
|
|
18
|
+
"""Extracted context from a Rossum application URL."""
|
|
19
|
+
|
|
20
|
+
queue_id: int | None = None
|
|
21
|
+
document_id: int | None = None
|
|
22
|
+
hook_id: int | None = None
|
|
23
|
+
engine_id: int | None = None
|
|
24
|
+
raw_url: str | None = None
|
|
25
|
+
page_type: str | None = None
|
|
26
|
+
additional_context: dict[str, str] = field(default_factory=dict)
|
|
27
|
+
|
|
28
|
+
def is_empty(self) -> bool:
|
|
29
|
+
"""Check if no context was extracted."""
|
|
30
|
+
return all(getattr(self, f) is None for f in ["queue_id", "document_id", "hook_id", "engine_id"])
|
|
31
|
+
|
|
32
|
+
def to_context_string(self) -> str:
|
|
33
|
+
"""Convert the context to a human-readable string for the agent."""
|
|
34
|
+
parts = []
|
|
35
|
+
if self.queue_id:
|
|
36
|
+
parts.append(f"Queue ID: {self.queue_id}")
|
|
37
|
+
if self.document_id:
|
|
38
|
+
parts.append(f"Document ID: {self.document_id}")
|
|
39
|
+
if self.hook_id:
|
|
40
|
+
parts.append(f"Hook ID: {self.hook_id}")
|
|
41
|
+
if self.engine_id:
|
|
42
|
+
parts.append(f"Engine ID: {self.engine_id}")
|
|
43
|
+
if self.page_type:
|
|
44
|
+
parts.append(f"Page type: {self.page_type}")
|
|
45
|
+
for key, value in self.additional_context.items():
|
|
46
|
+
parts.append(f"{key}: {value}")
|
|
47
|
+
return ", ".join(parts) if parts else ""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# URL patterns for different Rossum pages
|
|
51
|
+
# Format: /queues/{queue_id}/...
|
|
52
|
+
QUEUE_PATTERN = re.compile(r"/queues/(\d+)")
|
|
53
|
+
# Format: /document/{document_id}
|
|
54
|
+
DOCUMENT_PATTERN = re.compile(r"/document/(\d+)")
|
|
55
|
+
# Format: /hooks/{hook_id} or /extensions/{hook_id} or /extensions/my-extensions/{hook_id}
|
|
56
|
+
HOOK_PATTERN = re.compile(r"/(hooks|extensions|extensions/my-extensions)/(\d+)")
|
|
57
|
+
# Format: /engines/{engine_id} or /automation/engines/{engine_id}
|
|
58
|
+
ENGINE_PATTERN = re.compile(r"/(automation/)?engines/(\d+)")
|
|
59
|
+
|
|
60
|
+
# Documents list view pattern
|
|
61
|
+
DOCUMENTS_VIEW_PATTERN = re.compile(r"/documents(\?|$)")
|
|
62
|
+
|
|
63
|
+
# Page type patterns (order matters - more specific patterns first)
|
|
64
|
+
PAGE_TYPE_PATTERNS = [
|
|
65
|
+
(re.compile(r"/automation/engines/\d+/settings/basic"), "engine_settings"),
|
|
66
|
+
(re.compile(r"/automation/engines/\d+/settings"), "engine_settings"),
|
|
67
|
+
(re.compile(r"/settings/basic"), "queue_settings"),
|
|
68
|
+
(re.compile(r"/settings/schema"), "schema_settings"),
|
|
69
|
+
(re.compile(r"/settings/hooks"), "hooks_settings"),
|
|
70
|
+
(re.compile(r"/settings/automation"), "automation_settings"),
|
|
71
|
+
(re.compile(r"/settings/emails"), "email_settings"),
|
|
72
|
+
(re.compile(r"/settings"), "settings"),
|
|
73
|
+
(re.compile(r"/documents(\?|$)"), "documents_list"),
|
|
74
|
+
(re.compile(r"/all$"), "all_documents"),
|
|
75
|
+
(re.compile(r"/queues/\d+/review"), "review"),
|
|
76
|
+
(re.compile(r"/upload"), "upload"),
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _extract_documents_view_context(url: str, context: RossumUrlContext) -> None:
|
|
81
|
+
"""Extract context from the documents list view URL.
|
|
82
|
+
|
|
83
|
+
Parses the filtering query parameter to extract queue_id and other context.
|
|
84
|
+
Example URL: /documents?filtering={"items":[{"field":"queue","value":["3866808"],...}]}
|
|
85
|
+
"""
|
|
86
|
+
try:
|
|
87
|
+
parsed = urlparse(url)
|
|
88
|
+
query_params = parse_qs(parsed.query)
|
|
89
|
+
|
|
90
|
+
if "filtering" in query_params:
|
|
91
|
+
filtering_json = query_params["filtering"][0]
|
|
92
|
+
filtering = json.loads(filtering_json)
|
|
93
|
+
|
|
94
|
+
for item in filtering.get("items", []):
|
|
95
|
+
if item.get("field") == "queue" and item.get("value"):
|
|
96
|
+
queue_values = item["value"]
|
|
97
|
+
if queue_values and len(queue_values) == 1:
|
|
98
|
+
context.queue_id = int(queue_values[0])
|
|
99
|
+
elif queue_values:
|
|
100
|
+
context.additional_context["queue_ids"] = ",".join(queue_values)
|
|
101
|
+
|
|
102
|
+
if "level" in query_params:
|
|
103
|
+
context.additional_context["view_level"] = query_params["level"][0]
|
|
104
|
+
|
|
105
|
+
except (json.JSONDecodeError, KeyError, ValueError, IndexError):
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def extract_url_context(url: str | None) -> RossumUrlContext:
|
|
110
|
+
"""Extract context from a Rossum application URL.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
url: The Rossum application URL (e.g.,
|
|
114
|
+
"https://elis.rossum.ai/queues/3866808/settings/basic")
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
RossumUrlContext with extracted IDs and page type.
|
|
118
|
+
"""
|
|
119
|
+
if not url:
|
|
120
|
+
return RossumUrlContext()
|
|
121
|
+
|
|
122
|
+
context = RossumUrlContext(raw_url=url)
|
|
123
|
+
|
|
124
|
+
if match := QUEUE_PATTERN.search(url):
|
|
125
|
+
context.queue_id = int(match.group(1))
|
|
126
|
+
|
|
127
|
+
if match := DOCUMENT_PATTERN.search(url):
|
|
128
|
+
context.document_id = int(match.group(1))
|
|
129
|
+
|
|
130
|
+
if match := HOOK_PATTERN.search(url):
|
|
131
|
+
context.hook_id = int(match.group(2))
|
|
132
|
+
|
|
133
|
+
if match := ENGINE_PATTERN.search(url):
|
|
134
|
+
context.engine_id = int(match.group(2))
|
|
135
|
+
|
|
136
|
+
if DOCUMENTS_VIEW_PATTERN.search(url):
|
|
137
|
+
_extract_documents_view_context(url, context)
|
|
138
|
+
|
|
139
|
+
for pattern, page_type in PAGE_TYPE_PATTERNS:
|
|
140
|
+
if pattern.search(url):
|
|
141
|
+
context.page_type = page_type
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
return context
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def format_context_for_prompt(context: RossumUrlContext) -> str:
|
|
148
|
+
"""Format the URL context for inclusion in the agent prompt.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
context: The extracted URL context.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
A formatted string to prepend to user messages.
|
|
155
|
+
"""
|
|
156
|
+
if context.is_empty():
|
|
157
|
+
return ""
|
|
158
|
+
|
|
159
|
+
context_str = context.to_context_string()
|
|
160
|
+
return f"""
|
|
161
|
+
**Current Context from URL:**
|
|
162
|
+
{context_str}
|
|
163
|
+
|
|
164
|
+
When the user refers to "this queue", "this schema", "this annotation", etc., use the IDs from the context above.
|
|
165
|
+
---
|
|
166
|
+
|
|
167
|
+
"""
|