rdf-construct 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +12 -0
- rdf_construct/__main__.py +0 -0
- rdf_construct/cli.py +3429 -0
- rdf_construct/core/__init__.py +33 -0
- rdf_construct/core/config.py +116 -0
- rdf_construct/core/ordering.py +219 -0
- rdf_construct/core/predicate_order.py +212 -0
- rdf_construct/core/profile.py +157 -0
- rdf_construct/core/selector.py +64 -0
- rdf_construct/core/serialiser.py +232 -0
- rdf_construct/core/utils.py +89 -0
- rdf_construct/cq/__init__.py +77 -0
- rdf_construct/cq/expectations.py +365 -0
- rdf_construct/cq/formatters/__init__.py +45 -0
- rdf_construct/cq/formatters/json.py +104 -0
- rdf_construct/cq/formatters/junit.py +104 -0
- rdf_construct/cq/formatters/text.py +146 -0
- rdf_construct/cq/loader.py +300 -0
- rdf_construct/cq/runner.py +321 -0
- rdf_construct/diff/__init__.py +59 -0
- rdf_construct/diff/change_types.py +214 -0
- rdf_construct/diff/comparator.py +338 -0
- rdf_construct/diff/filters.py +133 -0
- rdf_construct/diff/formatters/__init__.py +71 -0
- rdf_construct/diff/formatters/json.py +192 -0
- rdf_construct/diff/formatters/markdown.py +210 -0
- rdf_construct/diff/formatters/text.py +195 -0
- rdf_construct/docs/__init__.py +60 -0
- rdf_construct/docs/config.py +238 -0
- rdf_construct/docs/extractors.py +603 -0
- rdf_construct/docs/generator.py +360 -0
- rdf_construct/docs/renderers/__init__.py +7 -0
- rdf_construct/docs/renderers/html.py +803 -0
- rdf_construct/docs/renderers/json.py +390 -0
- rdf_construct/docs/renderers/markdown.py +628 -0
- rdf_construct/docs/search.py +278 -0
- rdf_construct/docs/templates/html/base.html.jinja +44 -0
- rdf_construct/docs/templates/html/class.html.jinja +152 -0
- rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
- rdf_construct/docs/templates/html/index.html.jinja +110 -0
- rdf_construct/docs/templates/html/instance.html.jinja +90 -0
- rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
- rdf_construct/docs/templates/html/property.html.jinja +124 -0
- rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
- rdf_construct/lint/__init__.py +75 -0
- rdf_construct/lint/config.py +214 -0
- rdf_construct/lint/engine.py +396 -0
- rdf_construct/lint/formatters.py +327 -0
- rdf_construct/lint/rules.py +692 -0
- rdf_construct/localise/__init__.py +114 -0
- rdf_construct/localise/config.py +508 -0
- rdf_construct/localise/extractor.py +427 -0
- rdf_construct/localise/formatters/__init__.py +36 -0
- rdf_construct/localise/formatters/markdown.py +229 -0
- rdf_construct/localise/formatters/text.py +224 -0
- rdf_construct/localise/merger.py +346 -0
- rdf_construct/localise/reporter.py +356 -0
- rdf_construct/main.py +6 -0
- rdf_construct/merge/__init__.py +165 -0
- rdf_construct/merge/config.py +354 -0
- rdf_construct/merge/conflicts.py +281 -0
- rdf_construct/merge/formatters.py +426 -0
- rdf_construct/merge/merger.py +425 -0
- rdf_construct/merge/migrator.py +339 -0
- rdf_construct/merge/rules.py +377 -0
- rdf_construct/merge/splitter.py +1102 -0
- rdf_construct/puml2rdf/__init__.py +103 -0
- rdf_construct/puml2rdf/config.py +230 -0
- rdf_construct/puml2rdf/converter.py +420 -0
- rdf_construct/puml2rdf/merger.py +200 -0
- rdf_construct/puml2rdf/model.py +202 -0
- rdf_construct/puml2rdf/parser.py +565 -0
- rdf_construct/puml2rdf/validators.py +451 -0
- rdf_construct/refactor/__init__.py +72 -0
- rdf_construct/refactor/config.py +362 -0
- rdf_construct/refactor/deprecator.py +328 -0
- rdf_construct/refactor/formatters/__init__.py +8 -0
- rdf_construct/refactor/formatters/text.py +311 -0
- rdf_construct/refactor/renamer.py +294 -0
- rdf_construct/shacl/__init__.py +56 -0
- rdf_construct/shacl/config.py +166 -0
- rdf_construct/shacl/converters.py +520 -0
- rdf_construct/shacl/generator.py +364 -0
- rdf_construct/shacl/namespaces.py +93 -0
- rdf_construct/stats/__init__.py +29 -0
- rdf_construct/stats/collector.py +178 -0
- rdf_construct/stats/comparator.py +298 -0
- rdf_construct/stats/formatters/__init__.py +83 -0
- rdf_construct/stats/formatters/json.py +38 -0
- rdf_construct/stats/formatters/markdown.py +153 -0
- rdf_construct/stats/formatters/text.py +186 -0
- rdf_construct/stats/metrics/__init__.py +26 -0
- rdf_construct/stats/metrics/basic.py +147 -0
- rdf_construct/stats/metrics/complexity.py +137 -0
- rdf_construct/stats/metrics/connectivity.py +130 -0
- rdf_construct/stats/metrics/documentation.py +128 -0
- rdf_construct/stats/metrics/hierarchy.py +207 -0
- rdf_construct/stats/metrics/properties.py +88 -0
- rdf_construct/uml/__init__.py +22 -0
- rdf_construct/uml/context.py +194 -0
- rdf_construct/uml/mapper.py +371 -0
- rdf_construct/uml/odm_renderer.py +789 -0
- rdf_construct/uml/renderer.py +684 -0
- rdf_construct/uml/uml_layout.py +393 -0
- rdf_construct/uml/uml_style.py +613 -0
- rdf_construct-0.3.0.dist-info/METADATA +496 -0
- rdf_construct-0.3.0.dist-info/RECORD +110 -0
- rdf_construct-0.3.0.dist-info/WHEEL +4 -0
- rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
- rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Core RDF ordering and serialization functionality."""
|
|
2
|
+
|
|
3
|
+
from .ordering import sort_subjects, topo_sort_subset, sort_with_roots
|
|
4
|
+
from .profile import OrderingConfig, OrderingProfile, load_yaml
|
|
5
|
+
from .selector import select_subjects
|
|
6
|
+
from .serialiser import serialise_turtle, build_section_graph
|
|
7
|
+
from .utils import (
|
|
8
|
+
expand_curie,
|
|
9
|
+
extract_prefix_map,
|
|
10
|
+
qname_sort_key,
|
|
11
|
+
rebind_prefixes,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
# Ordering
|
|
16
|
+
"sort_subjects",
|
|
17
|
+
"topo_sort_subset",
|
|
18
|
+
"sort_with_roots",
|
|
19
|
+
# Profile
|
|
20
|
+
"OrderingConfig",
|
|
21
|
+
"OrderingProfile",
|
|
22
|
+
"load_yaml",
|
|
23
|
+
# Selector
|
|
24
|
+
"select_subjects",
|
|
25
|
+
# Serialiser
|
|
26
|
+
"serialise_turtle",
|
|
27
|
+
"build_section_graph",
|
|
28
|
+
# Utils
|
|
29
|
+
"expand_curie",
|
|
30
|
+
"extract_prefix_map",
|
|
31
|
+
"qname_sort_key",
|
|
32
|
+
"rebind_prefixes",
|
|
33
|
+
]
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Configuration and YAML handling for rdf-construct.
|
|
3
|
+
|
|
4
|
+
This module handles loading ordering profiles from YAML files and managing
|
|
5
|
+
RDF namespace prefixes and CURIE expansion.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Dict, List, Optional
|
|
11
|
+
|
|
12
|
+
import yaml
|
|
13
|
+
from rdflib import Graph, Namespace, URIRef
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class SectionConfig:
|
|
18
|
+
"""Configuration for a single section in an ordering profile."""
|
|
19
|
+
|
|
20
|
+
name: str
|
|
21
|
+
select: str
|
|
22
|
+
sort: str = "qname_alpha"
|
|
23
|
+
roots: Optional[List[str]] = None
|
|
24
|
+
cluster: Optional[str] = None
|
|
25
|
+
within_level: Optional[str] = None
|
|
26
|
+
group_by: Optional[str] = None
|
|
27
|
+
group_order: Optional[str] = None
|
|
28
|
+
explicit_group_sequence: Optional[List[str]] = None
|
|
29
|
+
within_group_tie: Optional[str] = None
|
|
30
|
+
anchors: Optional[List[str]] = None
|
|
31
|
+
after_anchors: Optional[str] = None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ProfileConfig:
|
|
36
|
+
"""Configuration for an ordering profile."""
|
|
37
|
+
|
|
38
|
+
name: str
|
|
39
|
+
description: str = ""
|
|
40
|
+
sections: List[SectionConfig] = field(default_factory=list)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class OrderingSpec:
|
|
45
|
+
"""Complete ordering specification from YAML."""
|
|
46
|
+
|
|
47
|
+
defaults: Dict = field(default_factory=dict)
|
|
48
|
+
selectors: Dict[str, str] = field(default_factory=dict)
|
|
49
|
+
prefix_order: List[str] = field(default_factory=list)
|
|
50
|
+
profiles: Dict[str, ProfileConfig] = field(default_factory=dict)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def load_yaml(path: Path) -> dict:
|
|
54
|
+
"""Load and parse a YAML file.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
path: Path to the YAML file
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Parsed YAML content as dictionary
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
FileNotFoundError: If the file doesn't exist
|
|
64
|
+
yaml.YAMLError: If the file is not valid YAML
|
|
65
|
+
"""
|
|
66
|
+
return yaml.safe_load(path.read_text(encoding="utf-8"))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def load_ordering_spec(path: Path) -> OrderingSpec:
|
|
70
|
+
"""Load and validate an ordering specification from YAML.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
path: Path to the YAML ordering specification
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Validated OrderingSpec object
|
|
77
|
+
"""
|
|
78
|
+
data = load_yaml(path)
|
|
79
|
+
|
|
80
|
+
# Parse profiles
|
|
81
|
+
profiles = {}
|
|
82
|
+
for prof_name, prof_data in data.get("profiles", {}).items():
|
|
83
|
+
sections = []
|
|
84
|
+
for sec in prof_data.get("sections", []):
|
|
85
|
+
if not isinstance(sec, dict) or not sec:
|
|
86
|
+
continue
|
|
87
|
+
sec_name, sec_cfg = next(iter(sec.items()))
|
|
88
|
+
sec_cfg = sec_cfg or {}
|
|
89
|
+
|
|
90
|
+
sections.append(
|
|
91
|
+
SectionConfig(
|
|
92
|
+
name=sec_name,
|
|
93
|
+
select=sec_cfg.get("select", sec_name),
|
|
94
|
+
sort=sec_cfg.get("sort", "qname_alpha"),
|
|
95
|
+
roots=sec_cfg.get("roots"),
|
|
96
|
+
cluster=sec_cfg.get("cluster"),
|
|
97
|
+
within_level=sec_cfg.get("within_level"),
|
|
98
|
+
group_by=sec_cfg.get("group_by"),
|
|
99
|
+
group_order=sec_cfg.get("group_order"),
|
|
100
|
+
explicit_group_sequence=sec_cfg.get("explicit_group_sequence"),
|
|
101
|
+
within_group_tie=sec_cfg.get("within_group_tie"),
|
|
102
|
+
anchors=sec_cfg.get("anchors"),
|
|
103
|
+
after_anchors=sec_cfg.get("after_anchors"),
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
profiles[prof_name] = ProfileConfig(
|
|
108
|
+
name=prof_name, description=prof_data.get("description", ""), sections=sections
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return OrderingSpec(
|
|
112
|
+
defaults=data.get("defaults", {}),
|
|
113
|
+
selectors=data.get("selectors", {}),
|
|
114
|
+
prefix_order=data.get("prefix_order", []),
|
|
115
|
+
profiles=profiles,
|
|
116
|
+
)
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""Ordering and sorting logic for RDF subjects."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from rdflib import Graph, URIRef, RDF, RDFS
|
|
6
|
+
from rdflib.namespace import OWL
|
|
7
|
+
|
|
8
|
+
from .utils import expand_curie, qname_sort_key
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def build_adjacency(
|
|
12
|
+
graph: Graph, nodes: set, edge_predicate: URIRef
|
|
13
|
+
) -> tuple[dict[URIRef, set[URIRef]], dict[URIRef, int]]:
|
|
14
|
+
"""Build adjacency list and indegree map for topological sorting.
|
|
15
|
+
|
|
16
|
+
Creates parent->children adjacency representation within the given node set.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
graph: RDF graph containing the relationships
|
|
20
|
+
nodes: Set of nodes to build adjacency for
|
|
21
|
+
edge_predicate: Predicate defining parent-child relationship
|
|
22
|
+
(typically rdfs:subClassOf or rdfs:subPropertyOf)
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Tuple of (adjacency dict, indegree dict) where:
|
|
26
|
+
- adjacency maps parent URIRef to set of child URIRefs
|
|
27
|
+
- indegree maps each URIRef to its incoming edge count
|
|
28
|
+
"""
|
|
29
|
+
adj: dict[URIRef, set[URIRef]] = {n: set() for n in nodes}
|
|
30
|
+
indeg: dict[URIRef, int] = {n: 0 for n in nodes}
|
|
31
|
+
|
|
32
|
+
for n in nodes:
|
|
33
|
+
for parent in graph.objects(n, edge_predicate):
|
|
34
|
+
if parent in nodes:
|
|
35
|
+
adj[parent].add(n) # parent before child
|
|
36
|
+
indeg[n] += 1
|
|
37
|
+
|
|
38
|
+
return adj, indeg
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def topo_sort_subset(graph: Graph, nodes: set, edge_predicate: URIRef) -> list:
|
|
42
|
+
"""Topologically sort a subset of nodes using Kahn's algorithm.
|
|
43
|
+
|
|
44
|
+
Sorts nodes so parents appear before children. Uses alphabetical
|
|
45
|
+
tie-breaking for deterministic output. Handles cycles by appending
|
|
46
|
+
remaining nodes alphabetically.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
graph: RDF graph containing the relationships
|
|
50
|
+
nodes: Set of nodes to sort
|
|
51
|
+
edge_predicate: Predicate defining parent-child relationship
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
List of URIRefs in topological order
|
|
55
|
+
"""
|
|
56
|
+
if not nodes:
|
|
57
|
+
return []
|
|
58
|
+
|
|
59
|
+
adj, indeg = build_adjacency(graph, nodes, edge_predicate)
|
|
60
|
+
|
|
61
|
+
# Start with nodes that have no incoming edges
|
|
62
|
+
zero = [n for n, d in indeg.items() if d == 0]
|
|
63
|
+
zero.sort(key=lambda t: qname_sort_key(graph, t))
|
|
64
|
+
|
|
65
|
+
out: list = []
|
|
66
|
+
|
|
67
|
+
while zero:
|
|
68
|
+
u = zero.pop(0)
|
|
69
|
+
out.append(u)
|
|
70
|
+
|
|
71
|
+
# Process children in alphabetical order
|
|
72
|
+
for v in sorted(adj[u], key=lambda t: qname_sort_key(graph, t)):
|
|
73
|
+
indeg[v] -= 1
|
|
74
|
+
if indeg[v] == 0:
|
|
75
|
+
zero.append(v)
|
|
76
|
+
zero.sort(key=lambda t: qname_sort_key(graph, t))
|
|
77
|
+
|
|
78
|
+
# Handle any remaining nodes (cycles or disconnected components)
|
|
79
|
+
if len(out) < len(nodes):
|
|
80
|
+
remaining = [n for n in nodes if n not in out]
|
|
81
|
+
remaining.sort(key=lambda t: qname_sort_key(graph, t))
|
|
82
|
+
out.extend(remaining)
|
|
83
|
+
|
|
84
|
+
return out
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def descendants_of(
|
|
88
|
+
graph: Graph, root: URIRef, nodes: set, edge_predicate: URIRef
|
|
89
|
+
) -> set:
|
|
90
|
+
"""Find all descendants of a root node within a set of nodes.
|
|
91
|
+
|
|
92
|
+
Traverses the graph following child edges (subClassOf/subPropertyOf)
|
|
93
|
+
to find all nodes reachable from the root.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
graph: RDF graph containing the relationships
|
|
97
|
+
root: Root node to start traversal from
|
|
98
|
+
nodes: Set of nodes to consider (search space)
|
|
99
|
+
edge_predicate: Predicate defining parent-child relationship
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Set of URIRefs reachable from root (including root itself if in nodes)
|
|
103
|
+
"""
|
|
104
|
+
# Build parent->children map
|
|
105
|
+
children: dict[URIRef, set[URIRef]] = {n: set() for n in nodes}
|
|
106
|
+
for n in nodes:
|
|
107
|
+
for parent in graph.objects(n, edge_predicate):
|
|
108
|
+
if parent in nodes:
|
|
109
|
+
children[parent].add(n)
|
|
110
|
+
|
|
111
|
+
reachable = set()
|
|
112
|
+
stack = [root] if root in nodes else []
|
|
113
|
+
|
|
114
|
+
while stack:
|
|
115
|
+
u = stack.pop()
|
|
116
|
+
for v in children.get(u, ()):
|
|
117
|
+
if v not in reachable:
|
|
118
|
+
reachable.add(v)
|
|
119
|
+
stack.append(v)
|
|
120
|
+
|
|
121
|
+
# Include root itself if it's in the node set
|
|
122
|
+
if root in nodes:
|
|
123
|
+
reachable.add(root)
|
|
124
|
+
|
|
125
|
+
return reachable
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def sort_with_roots(
|
|
129
|
+
graph: Graph, subjects: set, mode: str, roots_cfg: Optional[list[str]]
|
|
130
|
+
) -> list:
|
|
131
|
+
"""Sort subjects with explicit root ordering.
|
|
132
|
+
|
|
133
|
+
When roots are provided, emits each root's branch contiguously
|
|
134
|
+
(topologically within each branch), then emits remaining subjects
|
|
135
|
+
topologically. This creates a deterministic ordering that respects
|
|
136
|
+
both hierarchy and explicit sequencing preferences.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
graph: RDF graph containing the relationships
|
|
140
|
+
subjects: Set of subjects to sort
|
|
141
|
+
mode: Sorting mode (should be 'topological' or 'topological_then_alpha')
|
|
142
|
+
roots_cfg: List of root CURIEs/IRIs defining branch order
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
List of URIRefs in the specified order
|
|
146
|
+
"""
|
|
147
|
+
mode = (mode or "qname_alpha").lower()
|
|
148
|
+
|
|
149
|
+
# Determine appropriate edge predicate based on subject types
|
|
150
|
+
looks_like_props = any(
|
|
151
|
+
(s, RDF.type, OWL.ObjectProperty) in graph
|
|
152
|
+
or (s, RDF.type, OWL.DatatypeProperty) in graph
|
|
153
|
+
for s in subjects
|
|
154
|
+
)
|
|
155
|
+
edge = RDFS.subPropertyOf if looks_like_props else RDFS.subClassOf
|
|
156
|
+
|
|
157
|
+
# Fall back to simple topological if no roots or mode doesn't support them
|
|
158
|
+
if mode not in ("topological", "topological_then_alpha") or not roots_cfg:
|
|
159
|
+
if mode in ("topological", "topological_then_alpha"):
|
|
160
|
+
return topo_sort_subset(graph, subjects, edge)
|
|
161
|
+
return sorted(subjects, key=lambda t: qname_sort_key(graph, t))
|
|
162
|
+
|
|
163
|
+
# Expand roots to IRIs
|
|
164
|
+
root_iris: list[URIRef] = []
|
|
165
|
+
for r in roots_cfg:
|
|
166
|
+
iri = expand_curie(graph, r)
|
|
167
|
+
if iri is not None:
|
|
168
|
+
root_iris.append(iri)
|
|
169
|
+
|
|
170
|
+
remaining: set = set(subjects)
|
|
171
|
+
ordered: list = []
|
|
172
|
+
|
|
173
|
+
# Emit branches in the order of roots list
|
|
174
|
+
for root in root_iris:
|
|
175
|
+
branch_nodes = descendants_of(graph, root, remaining, edge)
|
|
176
|
+
if not branch_nodes:
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
branch_order = topo_sort_subset(graph, branch_nodes, edge)
|
|
180
|
+
for n in branch_order:
|
|
181
|
+
if n in remaining:
|
|
182
|
+
ordered.append(n)
|
|
183
|
+
remaining.remove(n)
|
|
184
|
+
|
|
185
|
+
# Emit whatever is left (disconnected components)
|
|
186
|
+
tail_order = topo_sort_subset(graph, remaining, edge)
|
|
187
|
+
ordered.extend(tail_order)
|
|
188
|
+
|
|
189
|
+
return ordered
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def sort_subjects(
|
|
193
|
+
graph: Graph, subjects: set, sort_mode: str, roots_cfg: Optional[list[str]] = None
|
|
194
|
+
) -> list:
|
|
195
|
+
"""Sort subjects according to the specified mode.
|
|
196
|
+
|
|
197
|
+
Supported modes:
|
|
198
|
+
- 'alpha' or 'qname_alpha': Alphabetical by QName
|
|
199
|
+
- 'topological' or 'topological_then_alpha': Topological with optional roots
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
graph: RDF graph containing the relationships
|
|
203
|
+
subjects: Set of subjects to sort
|
|
204
|
+
sort_mode: Sorting mode identifier
|
|
205
|
+
roots_cfg: Optional list of root CURIEs for topological sorting
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
List of URIRefs in the specified order
|
|
209
|
+
"""
|
|
210
|
+
mode = (sort_mode or "qname_alpha").lower()
|
|
211
|
+
|
|
212
|
+
if mode in ("alpha", "qname_alpha"):
|
|
213
|
+
return sorted(subjects, key=lambda t: qname_sort_key(graph, t))
|
|
214
|
+
|
|
215
|
+
if mode in ("topological", "topological_then_alpha"):
|
|
216
|
+
return sort_with_roots(graph, subjects, mode, roots_cfg)
|
|
217
|
+
|
|
218
|
+
# Fallback to alphabetical
|
|
219
|
+
return sorted(subjects, key=lambda t: qname_sort_key(graph, t))
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""Predicate ordering configuration and logic for RDF serialisation.
|
|
2
|
+
|
|
3
|
+
Controls the order in which predicates (properties) appear when serialising
|
|
4
|
+
RDF subjects. Supports different orderings for different subject types
|
|
5
|
+
(classes, properties, individuals).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from rdflib import Graph, RDF, RDFS, URIRef
|
|
12
|
+
from rdflib.namespace import OWL
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class PredicateOrderSpec:
|
|
17
|
+
"""Ordering specification for predicates of a particular subject type.
|
|
18
|
+
|
|
19
|
+
Attributes:
|
|
20
|
+
first: Predicates to appear first, in order (after rdf:type)
|
|
21
|
+
last: Predicates to appear last, in order
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
first: list[str] = field(default_factory=list)
|
|
25
|
+
last: list[str] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def from_dict(cls, data: dict[str, Any] | None) -> "PredicateOrderSpec":
|
|
29
|
+
"""Create from dictionary configuration.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
data: Dictionary with 'first' and/or 'last' keys
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
PredicateOrderSpec instance
|
|
36
|
+
"""
|
|
37
|
+
if not data:
|
|
38
|
+
return cls()
|
|
39
|
+
return cls(
|
|
40
|
+
first=data.get("first", []) or [],
|
|
41
|
+
last=data.get("last", []) or [],
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class PredicateOrderConfig:
|
|
47
|
+
"""Configuration for predicate ordering across subject types.
|
|
48
|
+
|
|
49
|
+
Defines how predicates should be ordered for different types of
|
|
50
|
+
RDF subjects (classes, properties, individuals).
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
classes: Ordering for owl:Class and rdfs:Class subjects
|
|
54
|
+
properties: Ordering for property subjects (ObjectProperty, etc.)
|
|
55
|
+
individuals: Ordering for individual/instance subjects
|
|
56
|
+
default: Fallback ordering for unmatched subject types
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
classes: PredicateOrderSpec = field(default_factory=PredicateOrderSpec)
|
|
60
|
+
properties: PredicateOrderSpec = field(default_factory=PredicateOrderSpec)
|
|
61
|
+
individuals: PredicateOrderSpec = field(default_factory=PredicateOrderSpec)
|
|
62
|
+
default: PredicateOrderSpec = field(default_factory=PredicateOrderSpec)
|
|
63
|
+
|
|
64
|
+
@classmethod
|
|
65
|
+
def from_dict(cls, data: dict[str, Any] | None) -> "PredicateOrderConfig":
|
|
66
|
+
"""Create from dictionary configuration.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
data: Dictionary with subject type keys (classes, properties, etc.)
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
PredicateOrderConfig instance
|
|
73
|
+
"""
|
|
74
|
+
if not data:
|
|
75
|
+
return cls()
|
|
76
|
+
return cls(
|
|
77
|
+
classes=PredicateOrderSpec.from_dict(data.get("classes")),
|
|
78
|
+
properties=PredicateOrderSpec.from_dict(data.get("properties")),
|
|
79
|
+
individuals=PredicateOrderSpec.from_dict(data.get("individuals")),
|
|
80
|
+
default=PredicateOrderSpec.from_dict(data.get("default")),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def get_spec_for_type(self, subject_type: str) -> PredicateOrderSpec:
|
|
84
|
+
"""Get the predicate ordering spec for a subject type.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
subject_type: One of 'class', 'property', 'individual', or other
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Appropriate PredicateOrderSpec for the subject type
|
|
91
|
+
"""
|
|
92
|
+
if subject_type == "class":
|
|
93
|
+
return self.classes if self.classes.first or self.classes.last else self.default
|
|
94
|
+
elif subject_type == "property":
|
|
95
|
+
return self.properties if self.properties.first or self.properties.last else self.default
|
|
96
|
+
elif subject_type == "individual":
|
|
97
|
+
return self.individuals if self.individuals.first or self.individuals.last else self.default
|
|
98
|
+
else:
|
|
99
|
+
return self.default
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def classify_subject(graph: Graph, subject: URIRef) -> str:
|
|
103
|
+
"""Determine the type category of an RDF subject.
|
|
104
|
+
|
|
105
|
+
Classifies subjects into one of: 'class', 'property', 'individual'.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
graph: RDF graph containing the subject
|
|
109
|
+
subject: The subject URI to classify
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Subject type: 'class', 'property', or 'individual'
|
|
113
|
+
"""
|
|
114
|
+
types = set(graph.objects(subject, RDF.type))
|
|
115
|
+
|
|
116
|
+
# Check if it's a class
|
|
117
|
+
if OWL.Class in types or RDFS.Class in types:
|
|
118
|
+
return "class"
|
|
119
|
+
|
|
120
|
+
# Check if it's a property
|
|
121
|
+
property_types = {
|
|
122
|
+
OWL.ObjectProperty,
|
|
123
|
+
OWL.DatatypeProperty,
|
|
124
|
+
OWL.AnnotationProperty,
|
|
125
|
+
RDF.Property,
|
|
126
|
+
}
|
|
127
|
+
if types & property_types:
|
|
128
|
+
return "property"
|
|
129
|
+
|
|
130
|
+
# Default to individual
|
|
131
|
+
return "individual"
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def expand_curie(graph: Graph, curie: str) -> URIRef | None:
|
|
135
|
+
"""Expand a CURIE (prefix:local) to a full URI.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
graph: RDF graph with namespace bindings
|
|
139
|
+
curie: CURIE string like 'rdfs:label'
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Expanded URIRef, or None if prefix not found
|
|
143
|
+
"""
|
|
144
|
+
if ":" not in curie:
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
prefix, local = curie.split(":", 1)
|
|
148
|
+
for bound_prefix, namespace in graph.namespace_manager.namespaces():
|
|
149
|
+
if bound_prefix == prefix:
|
|
150
|
+
return URIRef(str(namespace) + local)
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def order_predicates(
|
|
155
|
+
graph: Graph,
|
|
156
|
+
predicates: list[URIRef],
|
|
157
|
+
spec: PredicateOrderSpec,
|
|
158
|
+
format_fn: callable,
|
|
159
|
+
) -> list[URIRef]:
|
|
160
|
+
"""Order predicates according to a specification.
|
|
161
|
+
|
|
162
|
+
Ordering logic:
|
|
163
|
+
1. rdf:type always first (handled by caller)
|
|
164
|
+
2. 'first' predicates in specified order
|
|
165
|
+
3. Remaining predicates sorted alphabetically by QName
|
|
166
|
+
4. 'last' predicates in specified order
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
graph: RDF graph with namespace bindings
|
|
170
|
+
predicates: List of predicate URIs to order
|
|
171
|
+
spec: Predicate ordering specification
|
|
172
|
+
format_fn: Function to format URIRef as string (for sorting)
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Ordered list of predicates
|
|
176
|
+
"""
|
|
177
|
+
# Expand CURIEs to URIs
|
|
178
|
+
first_uris = [expand_curie(graph, c) for c in spec.first]
|
|
179
|
+
first_uris = [u for u in first_uris if u is not None]
|
|
180
|
+
|
|
181
|
+
last_uris = [expand_curie(graph, c) for c in spec.last]
|
|
182
|
+
last_uris = [u for u in last_uris if u is not None]
|
|
183
|
+
|
|
184
|
+
# Build sets for quick lookup
|
|
185
|
+
first_set = set(first_uris)
|
|
186
|
+
last_set = set(last_uris)
|
|
187
|
+
special_set = first_set | last_set | {RDF.type}
|
|
188
|
+
|
|
189
|
+
# Partition predicates
|
|
190
|
+
first_found = []
|
|
191
|
+
middle = []
|
|
192
|
+
last_found = []
|
|
193
|
+
|
|
194
|
+
# Collect 'first' predicates in specified order
|
|
195
|
+
for uri in first_uris:
|
|
196
|
+
if uri in predicates:
|
|
197
|
+
first_found.append(uri)
|
|
198
|
+
|
|
199
|
+
# Collect 'last' predicates in specified order
|
|
200
|
+
for uri in last_uris:
|
|
201
|
+
if uri in predicates:
|
|
202
|
+
last_found.append(uri)
|
|
203
|
+
|
|
204
|
+
# Collect middle predicates (everything else except rdf:type)
|
|
205
|
+
for pred in predicates:
|
|
206
|
+
if pred not in special_set:
|
|
207
|
+
middle.append(pred)
|
|
208
|
+
|
|
209
|
+
# Sort middle predicates alphabetically by QName
|
|
210
|
+
middle.sort(key=lambda x: format_fn(x))
|
|
211
|
+
|
|
212
|
+
return first_found + middle + last_found
|