rossum-mcp 1.0.1__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rossum_mcp/__init__.py +1 -1
- rossum_mcp/server.py +18 -5
- rossum_mcp/tools/base.py +21 -2
- rossum_mcp/tools/catalog.py +3 -0
- rossum_mcp/tools/rules.py +180 -2
- rossum_mcp/tools/schemas/__init__.py +182 -0
- rossum_mcp/tools/schemas/models.py +151 -0
- rossum_mcp/tools/schemas/operations.py +183 -0
- rossum_mcp/tools/schemas/patching.py +202 -0
- rossum_mcp/tools/schemas/pruning.py +133 -0
- rossum_mcp/tools/schemas/validation.py +128 -0
- {rossum_mcp-1.0.1.dist-info → rossum_mcp-1.1.1.dist-info}/METADATA +26 -6
- rossum_mcp-1.1.1.dist-info/RECORD +30 -0
- rossum_mcp/tools/schemas.py +0 -800
- rossum_mcp-1.0.1.dist-info/RECORD +0 -25
- {rossum_mcp-1.0.1.dist-info → rossum_mcp-1.1.1.dist-info}/WHEEL +0 -0
- {rossum_mcp-1.0.1.dist-info → rossum_mcp-1.1.1.dist-info}/entry_points.txt +0 -0
- {rossum_mcp-1.0.1.dist-info → rossum_mcp-1.1.1.dist-info}/licenses/LICENSE +0 -0
- {rossum_mcp-1.0.1.dist-info → rossum_mcp-1.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Schema CRUD operations for Rossum MCP Server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import asdict, is_dataclass
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
from rossum_api import APIClientError
|
|
10
|
+
from rossum_api.domain_logic.resources import Resource
|
|
11
|
+
from rossum_api.models.schema import Schema
|
|
12
|
+
|
|
13
|
+
from rossum_mcp.tools.base import TRUNCATED_MARKER, delete_resource, is_read_write_mode
|
|
14
|
+
from rossum_mcp.tools.schemas.models import SchemaNode, SchemaNodeUpdate # noqa: TC001 - needed at runtime for FastMCP
|
|
15
|
+
from rossum_mcp.tools.schemas.patching import PatchOperation, apply_schema_patch
|
|
16
|
+
from rossum_mcp.tools.schemas.pruning import (
|
|
17
|
+
_collect_all_field_ids,
|
|
18
|
+
_collect_ancestor_ids,
|
|
19
|
+
_extract_schema_tree,
|
|
20
|
+
_remove_fields_from_content,
|
|
21
|
+
)
|
|
22
|
+
from rossum_mcp.tools.schemas.validation import sanitize_schema_content
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from rossum_api import AsyncRossumAPIClient
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _truncate_schema_for_list(schema: Schema) -> Schema:
|
|
31
|
+
"""Truncate content field in schema to save context in list responses."""
|
|
32
|
+
from dataclasses import replace # noqa: PLC0415 - avoid circular import with models
|
|
33
|
+
|
|
34
|
+
return replace(schema, content=TRUNCATED_MARKER)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
async def get_schema(client: AsyncRossumAPIClient, schema_id: int) -> Schema | dict:
|
|
38
|
+
try:
|
|
39
|
+
schema: Schema = await client.retrieve_schema(schema_id)
|
|
40
|
+
return schema
|
|
41
|
+
except APIClientError as e:
|
|
42
|
+
if e.status_code == 404:
|
|
43
|
+
return {"error": f"Schema {schema_id} not found"}
|
|
44
|
+
raise
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
async def list_schemas(
|
|
48
|
+
client: AsyncRossumAPIClient, name: str | None = None, queue_id: int | None = None
|
|
49
|
+
) -> list[Schema]:
|
|
50
|
+
logger.debug(f"Listing schemas: name={name}, queue_id={queue_id}")
|
|
51
|
+
filters: dict[str, int | str] = {}
|
|
52
|
+
if name is not None:
|
|
53
|
+
filters["name"] = name
|
|
54
|
+
if queue_id is not None:
|
|
55
|
+
filters["queue"] = queue_id
|
|
56
|
+
|
|
57
|
+
schemas = [schema async for schema in client.list_schemas(**filters)] # type: ignore[arg-type]
|
|
58
|
+
return [_truncate_schema_for_list(schema) for schema in schemas]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
async def update_schema(client: AsyncRossumAPIClient, schema_id: int, schema_data: dict) -> Schema | dict:
|
|
62
|
+
if not is_read_write_mode():
|
|
63
|
+
return {"error": "update_schema is not available in read-only mode"}
|
|
64
|
+
|
|
65
|
+
logger.debug(f"Updating schema: schema_id={schema_id}")
|
|
66
|
+
await client._http_client.update(Resource.Schema, schema_id, schema_data)
|
|
67
|
+
updated_schema: Schema = await client.retrieve_schema(schema_id)
|
|
68
|
+
return updated_schema
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
async def create_schema(client: AsyncRossumAPIClient, name: str, content: list[dict]) -> Schema | dict:
|
|
72
|
+
if not is_read_write_mode():
|
|
73
|
+
return {"error": "create_schema is not available in read-only mode"}
|
|
74
|
+
|
|
75
|
+
logger.debug(f"Creating schema: name={name}")
|
|
76
|
+
sanitized_content = sanitize_schema_content(content)
|
|
77
|
+
schema_data = {"name": name, "content": sanitized_content}
|
|
78
|
+
schema: Schema = await client.create_new_schema(schema_data)
|
|
79
|
+
return schema
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
async def patch_schema(
|
|
83
|
+
client: AsyncRossumAPIClient,
|
|
84
|
+
schema_id: int,
|
|
85
|
+
operation: PatchOperation,
|
|
86
|
+
node_id: str,
|
|
87
|
+
node_data: SchemaNode | SchemaNodeUpdate | None = None,
|
|
88
|
+
parent_id: str | None = None,
|
|
89
|
+
position: int | None = None,
|
|
90
|
+
) -> Schema | dict:
|
|
91
|
+
if not is_read_write_mode():
|
|
92
|
+
return {"error": "patch_schema is not available in read-only mode"}
|
|
93
|
+
|
|
94
|
+
if operation not in ("add", "update", "remove"):
|
|
95
|
+
return {"error": f"Invalid operation '{operation}'. Must be 'add', 'update', or 'remove'."}
|
|
96
|
+
|
|
97
|
+
logger.debug(f"Patching schema: schema_id={schema_id}, operation={operation}, node_id={node_id}")
|
|
98
|
+
|
|
99
|
+
node_data_dict: dict | None = None
|
|
100
|
+
if node_data is not None:
|
|
101
|
+
if isinstance(node_data, dict):
|
|
102
|
+
node_data_dict = node_data
|
|
103
|
+
elif hasattr(node_data, "to_dict"):
|
|
104
|
+
node_data_dict = node_data.to_dict()
|
|
105
|
+
else:
|
|
106
|
+
node_data_dict = asdict(node_data)
|
|
107
|
+
|
|
108
|
+
current_schema: dict = await client._http_client.request_json("GET", f"schemas/{schema_id}")
|
|
109
|
+
content_list = current_schema.get("content", [])
|
|
110
|
+
if not isinstance(content_list, list):
|
|
111
|
+
return {"error": "Unexpected schema content format"}
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
patched_content = apply_schema_patch(
|
|
115
|
+
content=content_list,
|
|
116
|
+
operation=operation,
|
|
117
|
+
node_id=node_id,
|
|
118
|
+
node_data=node_data_dict,
|
|
119
|
+
parent_id=parent_id,
|
|
120
|
+
position=position,
|
|
121
|
+
)
|
|
122
|
+
except ValueError as e:
|
|
123
|
+
return {"error": str(e)}
|
|
124
|
+
|
|
125
|
+
sanitized_content = sanitize_schema_content(patched_content)
|
|
126
|
+
await client._http_client.update(Resource.Schema, schema_id, {"content": sanitized_content})
|
|
127
|
+
updated_schema: Schema = await client.retrieve_schema(schema_id)
|
|
128
|
+
return updated_schema
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
async def get_schema_tree_structure(client: AsyncRossumAPIClient, schema_id: int) -> list[dict] | dict:
|
|
132
|
+
schema = await get_schema(client, schema_id)
|
|
133
|
+
if isinstance(schema, dict):
|
|
134
|
+
return schema
|
|
135
|
+
content_dicts: list[dict[str, Any]] = [
|
|
136
|
+
asdict(section) if is_dataclass(section) else dict(section) # type: ignore[arg-type]
|
|
137
|
+
for section in schema.content
|
|
138
|
+
]
|
|
139
|
+
return _extract_schema_tree(content_dicts)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def prune_schema_fields(
|
|
143
|
+
client: AsyncRossumAPIClient,
|
|
144
|
+
schema_id: int,
|
|
145
|
+
fields_to_keep: list[str] | None = None,
|
|
146
|
+
fields_to_remove: list[str] | None = None,
|
|
147
|
+
) -> dict:
|
|
148
|
+
if not is_read_write_mode():
|
|
149
|
+
return {"error": "prune_schema_fields is not available in read-only mode"}
|
|
150
|
+
|
|
151
|
+
if fields_to_keep and fields_to_remove:
|
|
152
|
+
return {"error": "Specify fields_to_keep OR fields_to_remove, not both"}
|
|
153
|
+
if not fields_to_keep and not fields_to_remove:
|
|
154
|
+
return {"error": "Must specify fields_to_keep or fields_to_remove"}
|
|
155
|
+
|
|
156
|
+
current_schema: dict = await client._http_client.request_json("GET", f"schemas/{schema_id}")
|
|
157
|
+
content = current_schema.get("content", [])
|
|
158
|
+
if not isinstance(content, list):
|
|
159
|
+
return {"error": "Unexpected schema content format"}
|
|
160
|
+
all_ids = _collect_all_field_ids(content)
|
|
161
|
+
|
|
162
|
+
section_ids = {s.get("id") for s in content if s.get("category") == "section"}
|
|
163
|
+
|
|
164
|
+
if fields_to_keep:
|
|
165
|
+
fields_to_keep_set = set(fields_to_keep) | section_ids
|
|
166
|
+
ancestor_ids = _collect_ancestor_ids(content, fields_to_keep_set)
|
|
167
|
+
fields_to_keep_set |= ancestor_ids
|
|
168
|
+
remove_set = all_ids - fields_to_keep_set
|
|
169
|
+
else:
|
|
170
|
+
remove_set = set(fields_to_remove) - section_ids # type: ignore[arg-type]
|
|
171
|
+
|
|
172
|
+
if not remove_set:
|
|
173
|
+
return {"removed_fields": [], "remaining_fields": sorted(all_ids)}
|
|
174
|
+
|
|
175
|
+
pruned_content, removed = _remove_fields_from_content(content, remove_set)
|
|
176
|
+
await client._http_client.update(Resource.Schema, schema_id, {"content": pruned_content})
|
|
177
|
+
|
|
178
|
+
remaining_ids = _collect_all_field_ids(pruned_content)
|
|
179
|
+
return {"removed_fields": sorted(removed), "remaining_fields": sorted(remaining_ids)}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
async def delete_schema(client: AsyncRossumAPIClient, schema_id: int) -> dict:
|
|
183
|
+
return await delete_resource("schema", schema_id, client.delete_schema)
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""Schema patching utilities for Rossum MCP Server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import copy
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
PatchOperation = Literal["add", "update", "remove"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _find_node_in_children(
|
|
12
|
+
children: list[dict], node_id: str, parent_node: dict | None = None
|
|
13
|
+
) -> tuple[dict | None, int | None, list[dict] | None, dict | None]:
|
|
14
|
+
"""Recursively find a node by ID in schema children.
|
|
15
|
+
|
|
16
|
+
Returns (node, index, parent_children_list, parent_node) or (None, None, None, None) if not found.
|
|
17
|
+
The parent_node is needed for multivalue's dict children where we need to modify the parent directly.
|
|
18
|
+
"""
|
|
19
|
+
for i, child in enumerate(children):
|
|
20
|
+
if child.get("id") == node_id:
|
|
21
|
+
return child, i, children, parent_node
|
|
22
|
+
|
|
23
|
+
nested_children = child.get("children")
|
|
24
|
+
if nested_children:
|
|
25
|
+
if isinstance(nested_children, list):
|
|
26
|
+
result = _find_node_in_children(nested_children, node_id, child)
|
|
27
|
+
if result[0] is not None:
|
|
28
|
+
return result
|
|
29
|
+
elif isinstance(nested_children, dict):
|
|
30
|
+
if nested_children.get("id") == node_id:
|
|
31
|
+
return nested_children, 0, None, child
|
|
32
|
+
if "children" in nested_children:
|
|
33
|
+
result = _find_node_in_children(nested_children["children"], node_id, nested_children)
|
|
34
|
+
if result[0] is not None:
|
|
35
|
+
return result
|
|
36
|
+
|
|
37
|
+
return None, None, None, None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _is_multivalue_node(node: dict) -> bool:
|
|
41
|
+
"""Check if a node is a multivalue (has dict children or category is multivalue)."""
|
|
42
|
+
return node.get("category") == "multivalue" or ("children" in node and isinstance(node["children"], dict))
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _find_parent_children_list(content: list[dict], parent_id: str) -> tuple[list[dict] | None, bool]:
|
|
46
|
+
"""Find the children list of a parent node by its ID.
|
|
47
|
+
|
|
48
|
+
Returns (children_list, is_multivalue) tuple.
|
|
49
|
+
For multivalue nodes, returns (None, True) since they can't have children added.
|
|
50
|
+
"""
|
|
51
|
+
for section in content:
|
|
52
|
+
if section.get("id") == parent_id:
|
|
53
|
+
if _is_multivalue_node(section):
|
|
54
|
+
return None, True
|
|
55
|
+
children: list[dict] = section.setdefault("children", [])
|
|
56
|
+
return children, False
|
|
57
|
+
|
|
58
|
+
section_children = section.get("children")
|
|
59
|
+
if section_children is None:
|
|
60
|
+
continue
|
|
61
|
+
|
|
62
|
+
if isinstance(section_children, list):
|
|
63
|
+
node, _, _, _ = _find_node_in_children(section_children, parent_id)
|
|
64
|
+
else:
|
|
65
|
+
if section_children.get("id") == parent_id:
|
|
66
|
+
node = section_children
|
|
67
|
+
elif "children" in section_children:
|
|
68
|
+
node, _, _, _ = _find_node_in_children(section_children.get("children", []), parent_id)
|
|
69
|
+
else:
|
|
70
|
+
node = None
|
|
71
|
+
|
|
72
|
+
if node is not None:
|
|
73
|
+
if _is_multivalue_node(node):
|
|
74
|
+
return None, True
|
|
75
|
+
if "children" in node:
|
|
76
|
+
if isinstance(node["children"], list):
|
|
77
|
+
result: list[dict] = node["children"]
|
|
78
|
+
return result, False
|
|
79
|
+
else:
|
|
80
|
+
node["children"] = []
|
|
81
|
+
node_children: list[dict] = node["children"]
|
|
82
|
+
return node_children, False
|
|
83
|
+
|
|
84
|
+
return None, False
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _apply_add_operation(
|
|
88
|
+
content: list[dict], node_id: str, node_data: dict | None, parent_id: str | None, position: int | None
|
|
89
|
+
) -> list[dict]:
|
|
90
|
+
if node_data is None:
|
|
91
|
+
raise ValueError("node_data is required for 'add' operation")
|
|
92
|
+
if parent_id is None:
|
|
93
|
+
raise ValueError("parent_id is required for 'add' operation")
|
|
94
|
+
|
|
95
|
+
node_data = copy.deepcopy(node_data)
|
|
96
|
+
node_data["id"] = node_id
|
|
97
|
+
|
|
98
|
+
parent_children, is_multivalue = _find_parent_children_list(content, parent_id)
|
|
99
|
+
if is_multivalue:
|
|
100
|
+
raise ValueError(
|
|
101
|
+
f"Cannot add children to multivalue '{parent_id}'. "
|
|
102
|
+
"Multivalue nodes have a single child (tuple or datapoint). "
|
|
103
|
+
"Use 'update' to replace the multivalue's children, or add to the tuple inside it."
|
|
104
|
+
)
|
|
105
|
+
if parent_children is None:
|
|
106
|
+
raise ValueError(f"Parent node '{parent_id}' not found in schema")
|
|
107
|
+
|
|
108
|
+
if position is not None and 0 <= position <= len(parent_children):
|
|
109
|
+
parent_children.insert(position, node_data)
|
|
110
|
+
else:
|
|
111
|
+
parent_children.append(node_data)
|
|
112
|
+
return content
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _get_section_children_as_list(section: dict) -> list[dict]:
|
|
116
|
+
"""Get section children as a list, handling both list and dict (multivalue) cases."""
|
|
117
|
+
children = section.get("children")
|
|
118
|
+
if children is None:
|
|
119
|
+
return []
|
|
120
|
+
if isinstance(children, list):
|
|
121
|
+
return children
|
|
122
|
+
if isinstance(children, dict):
|
|
123
|
+
return [children]
|
|
124
|
+
return []
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _find_node_anywhere(
|
|
128
|
+
content: list[dict], node_id: str
|
|
129
|
+
) -> tuple[dict | None, int | None, list[dict] | None, dict | None]:
|
|
130
|
+
"""Find a node by ID anywhere in the schema content.
|
|
131
|
+
|
|
132
|
+
Returns (node, index, parent_children_list, parent_node).
|
|
133
|
+
"""
|
|
134
|
+
for section in content:
|
|
135
|
+
if section.get("id") == node_id:
|
|
136
|
+
return section, None, None, None
|
|
137
|
+
|
|
138
|
+
section_children = _get_section_children_as_list(section)
|
|
139
|
+
result = _find_node_in_children(section_children, node_id, section)
|
|
140
|
+
if result[0] is not None:
|
|
141
|
+
return result
|
|
142
|
+
|
|
143
|
+
return None, None, None, None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _apply_update_operation(content: list[dict], node_id: str, node_data: dict | None) -> list[dict]:
|
|
147
|
+
if node_data is None:
|
|
148
|
+
raise ValueError("node_data is required for 'update' operation")
|
|
149
|
+
|
|
150
|
+
node, _, _, _ = _find_node_anywhere(content, node_id)
|
|
151
|
+
|
|
152
|
+
if node is None:
|
|
153
|
+
raise ValueError(f"Node '{node_id}' not found in schema")
|
|
154
|
+
|
|
155
|
+
node.update(node_data)
|
|
156
|
+
return content
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _apply_remove_operation(content: list[dict], node_id: str) -> list[dict]:
|
|
160
|
+
for section in content:
|
|
161
|
+
if section.get("id") == node_id and section.get("category") == "section":
|
|
162
|
+
raise ValueError("Cannot remove a section - sections must exist")
|
|
163
|
+
|
|
164
|
+
node, idx, parent_list, parent_node = _find_node_anywhere(content, node_id)
|
|
165
|
+
|
|
166
|
+
if node is None:
|
|
167
|
+
raise ValueError(f"Node '{node_id}' not found in schema")
|
|
168
|
+
|
|
169
|
+
if idx is None and parent_list is None:
|
|
170
|
+
if node.get("category") == "section":
|
|
171
|
+
raise ValueError("Cannot remove a section - sections must exist")
|
|
172
|
+
raise ValueError(f"Cannot determine how to remove node '{node_id}'")
|
|
173
|
+
|
|
174
|
+
if parent_list is not None and idx is not None:
|
|
175
|
+
parent_list.pop(idx)
|
|
176
|
+
elif parent_node is not None:
|
|
177
|
+
if parent_node.get("category") == "multivalue":
|
|
178
|
+
raise ValueError(f"Cannot remove '{node_id}' from multivalue - remove the multivalue instead")
|
|
179
|
+
raise ValueError(f"Cannot remove '{node_id}' - unexpected parent structure")
|
|
180
|
+
|
|
181
|
+
return content
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def apply_schema_patch(
|
|
185
|
+
content: list[dict],
|
|
186
|
+
operation: PatchOperation,
|
|
187
|
+
node_id: str,
|
|
188
|
+
node_data: dict | None = None,
|
|
189
|
+
parent_id: str | None = None,
|
|
190
|
+
position: int | None = None,
|
|
191
|
+
) -> list[dict]:
|
|
192
|
+
"""Apply a patch operation to schema content."""
|
|
193
|
+
content = copy.deepcopy(content)
|
|
194
|
+
|
|
195
|
+
if operation == "add":
|
|
196
|
+
return _apply_add_operation(content, node_id, node_data, parent_id, position)
|
|
197
|
+
if operation == "update":
|
|
198
|
+
return _apply_update_operation(content, node_id, node_data)
|
|
199
|
+
if operation == "remove":
|
|
200
|
+
return _apply_remove_operation(content, node_id)
|
|
201
|
+
|
|
202
|
+
return content
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Schema pruning utilities for Rossum MCP Server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import copy
|
|
6
|
+
|
|
7
|
+
from rossum_mcp.tools.schemas.models import SchemaTreeNode
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _build_tree_node(node: dict) -> SchemaTreeNode:
|
|
11
|
+
"""Build a lightweight tree node from a schema node."""
|
|
12
|
+
category = node.get("category", "")
|
|
13
|
+
node_id = node.get("id", "")
|
|
14
|
+
label = node.get("label", "")
|
|
15
|
+
node_type = node.get("type") if category == "datapoint" else None
|
|
16
|
+
|
|
17
|
+
children_data = node.get("children")
|
|
18
|
+
children: list[SchemaTreeNode] | None = None
|
|
19
|
+
|
|
20
|
+
if children_data is not None:
|
|
21
|
+
if isinstance(children_data, list):
|
|
22
|
+
children = [_build_tree_node(child) for child in children_data]
|
|
23
|
+
elif isinstance(children_data, dict):
|
|
24
|
+
children = [_build_tree_node(children_data)]
|
|
25
|
+
|
|
26
|
+
return SchemaTreeNode(id=node_id, label=label, category=category, type=node_type, children=children)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _extract_schema_tree(content: list[dict]) -> list[dict]:
|
|
30
|
+
"""Extract lightweight tree structure from schema content."""
|
|
31
|
+
return [_build_tree_node(section).to_dict() for section in content]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _collect_all_field_ids(content: list[dict]) -> set[str]:
|
|
35
|
+
"""Collect all field IDs from schema content recursively."""
|
|
36
|
+
ids: set[str] = set()
|
|
37
|
+
|
|
38
|
+
def _traverse(node: dict) -> None:
|
|
39
|
+
node_id = node.get("id")
|
|
40
|
+
if node_id:
|
|
41
|
+
ids.add(node_id)
|
|
42
|
+
children = node.get("children")
|
|
43
|
+
if children is not None:
|
|
44
|
+
if isinstance(children, list):
|
|
45
|
+
for child in children:
|
|
46
|
+
_traverse(child)
|
|
47
|
+
elif isinstance(children, dict):
|
|
48
|
+
_traverse(children)
|
|
49
|
+
|
|
50
|
+
for section in content:
|
|
51
|
+
_traverse(section)
|
|
52
|
+
|
|
53
|
+
return ids
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _collect_ancestor_ids(content: list[dict], target_ids: set[str]) -> set[str]:
|
|
57
|
+
"""Collect all ancestor IDs for the given target field IDs.
|
|
58
|
+
|
|
59
|
+
Returns set of IDs for all parent containers (multivalue, tuple, section) of target fields.
|
|
60
|
+
"""
|
|
61
|
+
ancestors: set[str] = set()
|
|
62
|
+
|
|
63
|
+
def _find_ancestors(node: dict, path: list[str]) -> None:
|
|
64
|
+
node_id = node.get("id", "")
|
|
65
|
+
current_path = [*path, node_id] if node_id else path
|
|
66
|
+
|
|
67
|
+
if node_id in target_ids:
|
|
68
|
+
ancestors.update(current_path[:-1])
|
|
69
|
+
|
|
70
|
+
children = node.get("children")
|
|
71
|
+
if children is not None:
|
|
72
|
+
if isinstance(children, list):
|
|
73
|
+
for child in children:
|
|
74
|
+
_find_ancestors(child, current_path)
|
|
75
|
+
elif isinstance(children, dict):
|
|
76
|
+
_find_ancestors(children, current_path)
|
|
77
|
+
|
|
78
|
+
for section in content:
|
|
79
|
+
_find_ancestors(section, [])
|
|
80
|
+
|
|
81
|
+
return ancestors
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _remove_fields_from_content(content: list[dict], fields_to_remove: set[str]) -> tuple[list[dict], list[str]]:
|
|
85
|
+
"""Remove multiple fields from schema content.
|
|
86
|
+
|
|
87
|
+
Returns (modified_content, list_of_removed_field_ids).
|
|
88
|
+
Sections cannot be removed.
|
|
89
|
+
"""
|
|
90
|
+
content = copy.deepcopy(content)
|
|
91
|
+
removed: list[str] = []
|
|
92
|
+
|
|
93
|
+
def _filter_children(children: list[dict]) -> list[dict]:
|
|
94
|
+
result = []
|
|
95
|
+
for child in children:
|
|
96
|
+
child_id = child.get("id", "")
|
|
97
|
+
category = child.get("category", "")
|
|
98
|
+
|
|
99
|
+
if child_id in fields_to_remove and category != "section":
|
|
100
|
+
removed.append(child_id)
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
nested = child.get("children")
|
|
104
|
+
if nested is not None:
|
|
105
|
+
if isinstance(nested, list):
|
|
106
|
+
child["children"] = _filter_children(nested)
|
|
107
|
+
elif isinstance(nested, dict):
|
|
108
|
+
nested_id = nested.get("id", "")
|
|
109
|
+
if nested_id in fields_to_remove:
|
|
110
|
+
removed.append(nested_id)
|
|
111
|
+
removed.append(child_id)
|
|
112
|
+
continue
|
|
113
|
+
nested_children = nested.get("children")
|
|
114
|
+
if isinstance(nested_children, list):
|
|
115
|
+
filtered_nested = _filter_children(nested_children)
|
|
116
|
+
if not filtered_nested:
|
|
117
|
+
removed.append(nested_id)
|
|
118
|
+
removed.append(child_id)
|
|
119
|
+
continue
|
|
120
|
+
nested["children"] = filtered_nested
|
|
121
|
+
result.append(child)
|
|
122
|
+
return result
|
|
123
|
+
|
|
124
|
+
for section in content:
|
|
125
|
+
section_children = section.get("children")
|
|
126
|
+
if isinstance(section_children, list):
|
|
127
|
+
section["children"] = _filter_children(section_children)
|
|
128
|
+
|
|
129
|
+
removed_sections = [s.get("id", "") for s in content if not s.get("children")]
|
|
130
|
+
removed.extend(removed_sections)
|
|
131
|
+
content = [s for s in content if s.get("children")]
|
|
132
|
+
|
|
133
|
+
return content, removed
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Schema validation utilities for Rossum MCP Server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
MAX_ID_LENGTH = 50
|
|
6
|
+
VALID_DATAPOINT_TYPES = {"string", "number", "date", "enum", "button"}
|
|
7
|
+
VALID_UI_CONFIGURATION_TYPES = {"captured", "data", "manual", "formula", "reasoning", None}
|
|
8
|
+
VALID_UI_CONFIGURATION_EDIT = {"enabled", "enabled_without_warning", "disabled"}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SchemaValidationError(ValueError):
|
|
12
|
+
"""Raised when schema validation fails."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _sanitize_ui_configuration(node: dict) -> None:
|
|
16
|
+
"""Remove invalid ui_configuration.type values to prevent API errors."""
|
|
17
|
+
ui_config = node.get("ui_configuration")
|
|
18
|
+
if not isinstance(ui_config, dict):
|
|
19
|
+
return
|
|
20
|
+
if "type" in ui_config and ui_config["type"] not in VALID_UI_CONFIGURATION_TYPES:
|
|
21
|
+
del ui_config["type"]
|
|
22
|
+
if "edit" in ui_config and ui_config["edit"] not in VALID_UI_CONFIGURATION_EDIT:
|
|
23
|
+
del ui_config["edit"]
|
|
24
|
+
if not ui_config:
|
|
25
|
+
del node["ui_configuration"]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def sanitize_schema_content(content: list[dict]) -> list[dict]:
|
|
29
|
+
"""Sanitize schema content by removing invalid ui_configuration values.
|
|
30
|
+
|
|
31
|
+
Recursively traverses all nodes and removes invalid ui_configuration.type
|
|
32
|
+
values that would cause API errors (e.g., 'area', 'textarea').
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def _traverse(node: dict) -> None:
|
|
36
|
+
_sanitize_ui_configuration(node)
|
|
37
|
+
children = node.get("children")
|
|
38
|
+
if children is not None:
|
|
39
|
+
if isinstance(children, list):
|
|
40
|
+
for child in children:
|
|
41
|
+
_traverse(child)
|
|
42
|
+
elif isinstance(children, dict):
|
|
43
|
+
_traverse(children)
|
|
44
|
+
|
|
45
|
+
for section in content:
|
|
46
|
+
_traverse(section)
|
|
47
|
+
return content
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _validate_id(node_id: str, context: str = "") -> None:
|
|
51
|
+
"""Validate node ID constraints."""
|
|
52
|
+
if not node_id:
|
|
53
|
+
raise SchemaValidationError(f"Node id is required{context}")
|
|
54
|
+
if len(node_id) > MAX_ID_LENGTH:
|
|
55
|
+
raise SchemaValidationError(f"Node id '{node_id}' exceeds {MAX_ID_LENGTH} characters{context}")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _validate_datapoint(node: dict, context: str = "") -> None:
|
|
59
|
+
"""Validate a datapoint node has required fields."""
|
|
60
|
+
if "label" not in node:
|
|
61
|
+
raise SchemaValidationError(f"Datapoint missing required 'label'{context}")
|
|
62
|
+
if "type" not in node:
|
|
63
|
+
raise SchemaValidationError(f"Datapoint missing required 'type'{context}")
|
|
64
|
+
if node["type"] not in VALID_DATAPOINT_TYPES:
|
|
65
|
+
raise SchemaValidationError(
|
|
66
|
+
f"Invalid datapoint type '{node['type']}'. Must be one of: {', '.join(VALID_DATAPOINT_TYPES)}{context}"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _validate_tuple(node: dict, node_id: str, context: str) -> None:
|
|
71
|
+
"""Validate a tuple node."""
|
|
72
|
+
if "label" not in node:
|
|
73
|
+
raise SchemaValidationError(f"Tuple missing required 'label'{context}")
|
|
74
|
+
if "id" not in node:
|
|
75
|
+
raise SchemaValidationError(f"Tuple missing required 'id'{context}")
|
|
76
|
+
children = node.get("children", [])
|
|
77
|
+
if not isinstance(children, list):
|
|
78
|
+
raise SchemaValidationError(f"Tuple children must be a list{context}")
|
|
79
|
+
for i, child in enumerate(children):
|
|
80
|
+
child_id = child.get("id", f"index {i}")
|
|
81
|
+
_validate_node(child, f" in tuple '{node_id}' child '{child_id}'")
|
|
82
|
+
if "id" not in child:
|
|
83
|
+
raise SchemaValidationError(f"Datapoint inside tuple must have 'id'{context} child index {i}")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _validate_multivalue(node: dict, node_id: str, context: str) -> None:
|
|
87
|
+
"""Validate a multivalue node."""
|
|
88
|
+
if "label" not in node:
|
|
89
|
+
raise SchemaValidationError(f"Multivalue missing required 'label'{context}")
|
|
90
|
+
children = node.get("children")
|
|
91
|
+
if children is None:
|
|
92
|
+
raise SchemaValidationError(f"Multivalue missing required 'children'{context}")
|
|
93
|
+
if isinstance(children, list):
|
|
94
|
+
raise SchemaValidationError(f"Multivalue 'children' must be a single object (dict), not a list{context}")
|
|
95
|
+
if isinstance(children, dict):
|
|
96
|
+
_validate_node(children, f" in multivalue '{node_id}' children")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _validate_section(node: dict, node_id: str, context: str) -> None:
|
|
100
|
+
"""Validate a section node."""
|
|
101
|
+
if "label" not in node:
|
|
102
|
+
raise SchemaValidationError(f"Section missing required 'label'{context}")
|
|
103
|
+
if "id" not in node:
|
|
104
|
+
raise SchemaValidationError(f"Section missing required 'id'{context}")
|
|
105
|
+
children = node.get("children", [])
|
|
106
|
+
if not isinstance(children, list):
|
|
107
|
+
raise SchemaValidationError(f"Section children must be a list{context}")
|
|
108
|
+
for child in children:
|
|
109
|
+
child_id = child.get("id", "unknown")
|
|
110
|
+
_validate_node(child, f" in section '{node_id}' child '{child_id}'")
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _validate_node(node: dict, context: str = "") -> None:
|
|
114
|
+
"""Validate a schema node recursively."""
|
|
115
|
+
category = node.get("category")
|
|
116
|
+
node_id = node.get("id", "")
|
|
117
|
+
|
|
118
|
+
if node_id:
|
|
119
|
+
_validate_id(node_id, context)
|
|
120
|
+
|
|
121
|
+
if category == "datapoint":
|
|
122
|
+
_validate_datapoint(node, context)
|
|
123
|
+
elif category == "tuple":
|
|
124
|
+
_validate_tuple(node, node_id, context)
|
|
125
|
+
elif category == "multivalue":
|
|
126
|
+
_validate_multivalue(node, node_id, context)
|
|
127
|
+
elif category == "section":
|
|
128
|
+
_validate_section(node, node_id, context)
|