rdf-construct 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +12 -0
- rdf_construct/__main__.py +0 -0
- rdf_construct/cli.py +3429 -0
- rdf_construct/core/__init__.py +33 -0
- rdf_construct/core/config.py +116 -0
- rdf_construct/core/ordering.py +219 -0
- rdf_construct/core/predicate_order.py +212 -0
- rdf_construct/core/profile.py +157 -0
- rdf_construct/core/selector.py +64 -0
- rdf_construct/core/serialiser.py +232 -0
- rdf_construct/core/utils.py +89 -0
- rdf_construct/cq/__init__.py +77 -0
- rdf_construct/cq/expectations.py +365 -0
- rdf_construct/cq/formatters/__init__.py +45 -0
- rdf_construct/cq/formatters/json.py +104 -0
- rdf_construct/cq/formatters/junit.py +104 -0
- rdf_construct/cq/formatters/text.py +146 -0
- rdf_construct/cq/loader.py +300 -0
- rdf_construct/cq/runner.py +321 -0
- rdf_construct/diff/__init__.py +59 -0
- rdf_construct/diff/change_types.py +214 -0
- rdf_construct/diff/comparator.py +338 -0
- rdf_construct/diff/filters.py +133 -0
- rdf_construct/diff/formatters/__init__.py +71 -0
- rdf_construct/diff/formatters/json.py +192 -0
- rdf_construct/diff/formatters/markdown.py +210 -0
- rdf_construct/diff/formatters/text.py +195 -0
- rdf_construct/docs/__init__.py +60 -0
- rdf_construct/docs/config.py +238 -0
- rdf_construct/docs/extractors.py +603 -0
- rdf_construct/docs/generator.py +360 -0
- rdf_construct/docs/renderers/__init__.py +7 -0
- rdf_construct/docs/renderers/html.py +803 -0
- rdf_construct/docs/renderers/json.py +390 -0
- rdf_construct/docs/renderers/markdown.py +628 -0
- rdf_construct/docs/search.py +278 -0
- rdf_construct/docs/templates/html/base.html.jinja +44 -0
- rdf_construct/docs/templates/html/class.html.jinja +152 -0
- rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
- rdf_construct/docs/templates/html/index.html.jinja +110 -0
- rdf_construct/docs/templates/html/instance.html.jinja +90 -0
- rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
- rdf_construct/docs/templates/html/property.html.jinja +124 -0
- rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
- rdf_construct/lint/__init__.py +75 -0
- rdf_construct/lint/config.py +214 -0
- rdf_construct/lint/engine.py +396 -0
- rdf_construct/lint/formatters.py +327 -0
- rdf_construct/lint/rules.py +692 -0
- rdf_construct/localise/__init__.py +114 -0
- rdf_construct/localise/config.py +508 -0
- rdf_construct/localise/extractor.py +427 -0
- rdf_construct/localise/formatters/__init__.py +36 -0
- rdf_construct/localise/formatters/markdown.py +229 -0
- rdf_construct/localise/formatters/text.py +224 -0
- rdf_construct/localise/merger.py +346 -0
- rdf_construct/localise/reporter.py +356 -0
- rdf_construct/main.py +6 -0
- rdf_construct/merge/__init__.py +165 -0
- rdf_construct/merge/config.py +354 -0
- rdf_construct/merge/conflicts.py +281 -0
- rdf_construct/merge/formatters.py +426 -0
- rdf_construct/merge/merger.py +425 -0
- rdf_construct/merge/migrator.py +339 -0
- rdf_construct/merge/rules.py +377 -0
- rdf_construct/merge/splitter.py +1102 -0
- rdf_construct/puml2rdf/__init__.py +103 -0
- rdf_construct/puml2rdf/config.py +230 -0
- rdf_construct/puml2rdf/converter.py +420 -0
- rdf_construct/puml2rdf/merger.py +200 -0
- rdf_construct/puml2rdf/model.py +202 -0
- rdf_construct/puml2rdf/parser.py +565 -0
- rdf_construct/puml2rdf/validators.py +451 -0
- rdf_construct/refactor/__init__.py +72 -0
- rdf_construct/refactor/config.py +362 -0
- rdf_construct/refactor/deprecator.py +328 -0
- rdf_construct/refactor/formatters/__init__.py +8 -0
- rdf_construct/refactor/formatters/text.py +311 -0
- rdf_construct/refactor/renamer.py +294 -0
- rdf_construct/shacl/__init__.py +56 -0
- rdf_construct/shacl/config.py +166 -0
- rdf_construct/shacl/converters.py +520 -0
- rdf_construct/shacl/generator.py +364 -0
- rdf_construct/shacl/namespaces.py +93 -0
- rdf_construct/stats/__init__.py +29 -0
- rdf_construct/stats/collector.py +178 -0
- rdf_construct/stats/comparator.py +298 -0
- rdf_construct/stats/formatters/__init__.py +83 -0
- rdf_construct/stats/formatters/json.py +38 -0
- rdf_construct/stats/formatters/markdown.py +153 -0
- rdf_construct/stats/formatters/text.py +186 -0
- rdf_construct/stats/metrics/__init__.py +26 -0
- rdf_construct/stats/metrics/basic.py +147 -0
- rdf_construct/stats/metrics/complexity.py +137 -0
- rdf_construct/stats/metrics/connectivity.py +130 -0
- rdf_construct/stats/metrics/documentation.py +128 -0
- rdf_construct/stats/metrics/hierarchy.py +207 -0
- rdf_construct/stats/metrics/properties.py +88 -0
- rdf_construct/uml/__init__.py +22 -0
- rdf_construct/uml/context.py +194 -0
- rdf_construct/uml/mapper.py +371 -0
- rdf_construct/uml/odm_renderer.py +789 -0
- rdf_construct/uml/renderer.py +684 -0
- rdf_construct/uml/uml_layout.py +393 -0
- rdf_construct/uml/uml_style.py +613 -0
- rdf_construct-0.3.0.dist-info/METADATA +496 -0
- rdf_construct-0.3.0.dist-info/RECORD +110 -0
- rdf_construct-0.3.0.dist-info/WHEEL +4 -0
- rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
- rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
"""SHACL shape generator from OWL ontologies.
|
|
2
|
+
|
|
3
|
+
Generates SHACL NodeShapes from OWL class definitions, converting
|
|
4
|
+
domain/range, cardinality restrictions, and other OWL patterns
|
|
5
|
+
to equivalent SHACL constraints.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from rdflib import BNode, Graph, Literal, Namespace, RDF, RDFS, URIRef
|
|
13
|
+
from rdflib.namespace import OWL
|
|
14
|
+
|
|
15
|
+
from .config import ShaclConfig, Severity, StrictnessLevel
|
|
16
|
+
from .converters import PropertyConstraint, get_converters_for_level
|
|
17
|
+
from .namespaces import SH, SHACL_PREFIXES
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ShapeGenerator:
|
|
21
|
+
"""Generates SHACL shapes from OWL ontology definitions.
|
|
22
|
+
|
|
23
|
+
Orchestrates the conversion process, applying converters and
|
|
24
|
+
building the output shapes graph.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
config: Generation configuration.
|
|
28
|
+
source_graph: The OWL ontology to convert.
|
|
29
|
+
shapes_graph: The output SHACL shapes graph.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, source_graph: Graph, config: ShaclConfig | None = None):
|
|
33
|
+
"""Initialise generator.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
source_graph: The OWL ontology graph.
|
|
37
|
+
config: Optional configuration (defaults provided).
|
|
38
|
+
"""
|
|
39
|
+
self.config = config or ShaclConfig()
|
|
40
|
+
self.source_graph = source_graph
|
|
41
|
+
self.shapes_graph = Graph()
|
|
42
|
+
|
|
43
|
+
# Bind prefixes
|
|
44
|
+
for prefix, ns in SHACL_PREFIXES.items():
|
|
45
|
+
self.shapes_graph.bind(prefix, ns)
|
|
46
|
+
|
|
47
|
+
# Copy source prefixes
|
|
48
|
+
for prefix, ns in source_graph.namespaces():
|
|
49
|
+
if prefix and prefix not in ("xml", "xsd", "rdf", "rdfs", "owl"):
|
|
50
|
+
self.shapes_graph.bind(prefix, ns)
|
|
51
|
+
|
|
52
|
+
# Determine shape namespace
|
|
53
|
+
self._shape_ns = self._determine_shape_namespace()
|
|
54
|
+
self.shapes_graph.bind("shape", Namespace(self._shape_ns))
|
|
55
|
+
|
|
56
|
+
def _determine_shape_namespace(self) -> str:
|
|
57
|
+
"""Determine the namespace for generated shapes.
|
|
58
|
+
|
|
59
|
+
Uses the ontology namespace if available, otherwise falls back
|
|
60
|
+
to a default.
|
|
61
|
+
"""
|
|
62
|
+
# Look for owl:Ontology
|
|
63
|
+
for ont in self.source_graph.subjects(RDF.type, OWL.Ontology):
|
|
64
|
+
if isinstance(ont, URIRef):
|
|
65
|
+
base = str(ont)
|
|
66
|
+
# Append shapes suffix
|
|
67
|
+
if base.endswith("#") or base.endswith("/"):
|
|
68
|
+
return base[:-1] + "-shapes#"
|
|
69
|
+
return base + "-shapes#"
|
|
70
|
+
|
|
71
|
+
# Fallback: use first non-standard namespace
|
|
72
|
+
for prefix, ns in self.source_graph.namespaces():
|
|
73
|
+
ns_str = str(ns)
|
|
74
|
+
if not any(
|
|
75
|
+
ns_str.startswith(std)
|
|
76
|
+
for std in (
|
|
77
|
+
"http://www.w3.org/",
|
|
78
|
+
"http://purl.org/dc/",
|
|
79
|
+
"http://xmlns.com/",
|
|
80
|
+
)
|
|
81
|
+
):
|
|
82
|
+
if ns_str.endswith("#") or ns_str.endswith("/"):
|
|
83
|
+
return ns_str[:-1] + "-shapes#"
|
|
84
|
+
return ns_str + "-shapes#"
|
|
85
|
+
|
|
86
|
+
return "http://example.org/shapes#"
|
|
87
|
+
|
|
88
|
+
def generate(self) -> Graph:
|
|
89
|
+
"""Generate SHACL shapes from the source ontology.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Graph containing the generated SHACL shapes.
|
|
93
|
+
"""
|
|
94
|
+
# Get all classes from ontology
|
|
95
|
+
classes = self._get_target_classes()
|
|
96
|
+
|
|
97
|
+
# Get converters for current strictness level
|
|
98
|
+
converters = get_converters_for_level(self.config.level)
|
|
99
|
+
|
|
100
|
+
# Generate shape for each class
|
|
101
|
+
for cls in classes:
|
|
102
|
+
if not self.config.should_generate_for(cls, self.source_graph):
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
self._create_node_shape(cls, converters)
|
|
106
|
+
|
|
107
|
+
return self.shapes_graph
|
|
108
|
+
|
|
109
|
+
def _get_target_classes(self) -> list[URIRef]:
|
|
110
|
+
"""Get all target classes from the ontology.
|
|
111
|
+
|
|
112
|
+
Finds both owl:Class and rdfs:Class entities.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
List of class URIs.
|
|
116
|
+
"""
|
|
117
|
+
classes: set[URIRef] = set()
|
|
118
|
+
|
|
119
|
+
# OWL classes
|
|
120
|
+
for cls in self.source_graph.subjects(RDF.type, OWL.Class):
|
|
121
|
+
if isinstance(cls, URIRef):
|
|
122
|
+
classes.add(cls)
|
|
123
|
+
|
|
124
|
+
# RDFS classes
|
|
125
|
+
for cls in self.source_graph.subjects(RDF.type, RDFS.Class):
|
|
126
|
+
if isinstance(cls, URIRef):
|
|
127
|
+
classes.add(cls)
|
|
128
|
+
|
|
129
|
+
# Sort by local name for consistent output
|
|
130
|
+
return sorted(classes, key=lambda c: self._local_name(c))
|
|
131
|
+
|
|
132
|
+
def _local_name(self, uri: URIRef) -> str:
|
|
133
|
+
"""Extract local name from URI."""
|
|
134
|
+
s = str(uri)
|
|
135
|
+
if "#" in s:
|
|
136
|
+
return s.rsplit("#", 1)[1]
|
|
137
|
+
if "/" in s:
|
|
138
|
+
return s.rsplit("/", 1)[1]
|
|
139
|
+
return s
|
|
140
|
+
|
|
141
|
+
def _create_node_shape(self, cls: URIRef, converters: list) -> URIRef:
|
|
142
|
+
"""Create a NodeShape for a class.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
cls: The class to create a shape for.
|
|
146
|
+
converters: List of converters to apply.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
URI of the created shape.
|
|
150
|
+
"""
|
|
151
|
+
shape_uri = URIRef(f"{self._shape_ns}{self._local_name(cls)}Shape")
|
|
152
|
+
|
|
153
|
+
# Basic shape definition
|
|
154
|
+
self.shapes_graph.add((shape_uri, RDF.type, SH.NodeShape))
|
|
155
|
+
self.shapes_graph.add((shape_uri, SH.targetClass, cls))
|
|
156
|
+
|
|
157
|
+
# Add name from rdfs:label if available
|
|
158
|
+
if self.config.include_labels:
|
|
159
|
+
label = self.source_graph.value(cls, RDFS.label)
|
|
160
|
+
if label:
|
|
161
|
+
self.shapes_graph.add((shape_uri, SH.name, Literal(str(label))))
|
|
162
|
+
|
|
163
|
+
# Add description from rdfs:comment
|
|
164
|
+
if self.config.include_descriptions:
|
|
165
|
+
comment = self.source_graph.value(cls, RDFS.comment)
|
|
166
|
+
if comment:
|
|
167
|
+
self.shapes_graph.add((shape_uri, SH.description, Literal(str(comment))))
|
|
168
|
+
|
|
169
|
+
# Collect all property constraints
|
|
170
|
+
prop_constraints: dict[URIRef, PropertyConstraint] = {}
|
|
171
|
+
|
|
172
|
+
# Apply each converter
|
|
173
|
+
for converter in converters:
|
|
174
|
+
constraints = converter.convert_for_class(
|
|
175
|
+
cls, self.source_graph, self.config
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
for constraint in constraints:
|
|
179
|
+
if constraint.path in prop_constraints:
|
|
180
|
+
# Merge with existing constraint
|
|
181
|
+
prop_constraints[constraint.path] = prop_constraints[
|
|
182
|
+
constraint.path
|
|
183
|
+
].merge(constraint)
|
|
184
|
+
else:
|
|
185
|
+
prop_constraints[constraint.path] = constraint
|
|
186
|
+
|
|
187
|
+
# Inherit constraints from superclasses if configured
|
|
188
|
+
if self.config.inherit_constraints:
|
|
189
|
+
inherited = self._get_inherited_constraints(cls, converters)
|
|
190
|
+
for path, constraint in inherited.items():
|
|
191
|
+
if path not in prop_constraints:
|
|
192
|
+
prop_constraints[path] = constraint
|
|
193
|
+
|
|
194
|
+
# Add property shapes, sorted by path for consistent output
|
|
195
|
+
order = 1
|
|
196
|
+
for path in sorted(prop_constraints.keys(), key=str):
|
|
197
|
+
constraint = prop_constraints[path]
|
|
198
|
+
constraint.order = order
|
|
199
|
+
order += 1
|
|
200
|
+
|
|
201
|
+
prop_shape = constraint.to_rdf(self.shapes_graph)
|
|
202
|
+
self.shapes_graph.add((shape_uri, SH.property, prop_shape))
|
|
203
|
+
|
|
204
|
+
# Handle closed shapes
|
|
205
|
+
if self.config.closed and self.config.level == StrictnessLevel.STRICT:
|
|
206
|
+
self.shapes_graph.add((shape_uri, SH.closed, Literal(True)))
|
|
207
|
+
|
|
208
|
+
# Add ignored properties
|
|
209
|
+
ignored = self._get_ignored_properties()
|
|
210
|
+
if ignored:
|
|
211
|
+
ignored_list = self._create_rdf_list(ignored)
|
|
212
|
+
self.shapes_graph.add((shape_uri, SH.ignoredProperties, ignored_list))
|
|
213
|
+
|
|
214
|
+
return shape_uri
|
|
215
|
+
|
|
216
|
+
def _get_inherited_constraints(
|
|
217
|
+
self, cls: URIRef, converters: list
|
|
218
|
+
) -> dict[URIRef, PropertyConstraint]:
|
|
219
|
+
"""Get property constraints from superclasses.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
cls: The class to get inherited constraints for.
|
|
223
|
+
converters: Converters to apply.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Dictionary mapping property URIs to constraints.
|
|
227
|
+
"""
|
|
228
|
+
inherited: dict[URIRef, PropertyConstraint] = {}
|
|
229
|
+
|
|
230
|
+
# Walk up the class hierarchy
|
|
231
|
+
visited: set[URIRef] = set()
|
|
232
|
+
to_visit = list(self.source_graph.objects(cls, RDFS.subClassOf))
|
|
233
|
+
|
|
234
|
+
while to_visit:
|
|
235
|
+
superclass = to_visit.pop()
|
|
236
|
+
if not isinstance(superclass, URIRef) or superclass in visited:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
visited.add(superclass)
|
|
240
|
+
|
|
241
|
+
# Apply converters to superclass
|
|
242
|
+
for converter in converters:
|
|
243
|
+
constraints = converter.convert_for_class(
|
|
244
|
+
superclass, self.source_graph, self.config
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
for constraint in constraints:
|
|
248
|
+
if constraint.path not in inherited:
|
|
249
|
+
inherited[constraint.path] = constraint
|
|
250
|
+
else:
|
|
251
|
+
inherited[constraint.path] = inherited[constraint.path].merge(
|
|
252
|
+
constraint
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Add parent's parents
|
|
256
|
+
to_visit.extend(self.source_graph.objects(superclass, RDFS.subClassOf))
|
|
257
|
+
|
|
258
|
+
return inherited
|
|
259
|
+
|
|
260
|
+
def _get_ignored_properties(self) -> list[URIRef]:
|
|
261
|
+
"""Get list of properties to ignore in closed shapes."""
|
|
262
|
+
ignored = [RDF.type] # Always ignore rdf:type
|
|
263
|
+
|
|
264
|
+
# Add user-configured ignored properties
|
|
265
|
+
for prop_str in self.config.ignored_properties:
|
|
266
|
+
# Expand CURIE if possible
|
|
267
|
+
expanded = self._expand_curie(prop_str)
|
|
268
|
+
if expanded:
|
|
269
|
+
ignored.append(expanded)
|
|
270
|
+
|
|
271
|
+
return ignored
|
|
272
|
+
|
|
273
|
+
def _expand_curie(self, curie: str) -> URIRef | None:
|
|
274
|
+
"""Expand a CURIE to full URI."""
|
|
275
|
+
if ":" in curie and not curie.startswith("http"):
|
|
276
|
+
prefix, local = curie.split(":", 1)
|
|
277
|
+
for p, ns in self.source_graph.namespaces():
|
|
278
|
+
if p == prefix:
|
|
279
|
+
return URIRef(str(ns) + local)
|
|
280
|
+
|
|
281
|
+
# Already a URI?
|
|
282
|
+
if curie.startswith("http"):
|
|
283
|
+
return URIRef(curie)
|
|
284
|
+
|
|
285
|
+
return None
|
|
286
|
+
|
|
287
|
+
def _create_rdf_list(self, items: list[URIRef]) -> BNode:
|
|
288
|
+
"""Create an RDF list from items."""
|
|
289
|
+
if not items:
|
|
290
|
+
return RDF.nil
|
|
291
|
+
|
|
292
|
+
head = BNode()
|
|
293
|
+
current = head
|
|
294
|
+
|
|
295
|
+
for i, item in enumerate(items):
|
|
296
|
+
self.shapes_graph.add((current, RDF.first, item))
|
|
297
|
+
|
|
298
|
+
if i < len(items) - 1:
|
|
299
|
+
next_node = BNode()
|
|
300
|
+
self.shapes_graph.add((current, RDF.rest, next_node))
|
|
301
|
+
current = next_node
|
|
302
|
+
else:
|
|
303
|
+
self.shapes_graph.add((current, RDF.rest, RDF.nil))
|
|
304
|
+
|
|
305
|
+
return head
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def generate_shapes(
|
|
309
|
+
source: Path | Graph,
|
|
310
|
+
config: ShaclConfig | None = None,
|
|
311
|
+
output_format: str = "turtle",
|
|
312
|
+
) -> tuple[Graph, str]:
|
|
313
|
+
"""Generate SHACL shapes from an OWL ontology.
|
|
314
|
+
|
|
315
|
+
Main entry point for shape generation.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
source: Path to ontology file or pre-loaded Graph.
|
|
319
|
+
config: Optional generation configuration.
|
|
320
|
+
output_format: Output serialisation format.
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
Tuple of (shapes graph, serialised string).
|
|
324
|
+
"""
|
|
325
|
+
# Load source if path
|
|
326
|
+
if isinstance(source, Path):
|
|
327
|
+
source_graph = Graph()
|
|
328
|
+
source_graph.parse(str(source), format="turtle")
|
|
329
|
+
else:
|
|
330
|
+
source_graph = source
|
|
331
|
+
|
|
332
|
+
# Generate shapes
|
|
333
|
+
generator = ShapeGenerator(source_graph, config)
|
|
334
|
+
shapes_graph = generator.generate()
|
|
335
|
+
|
|
336
|
+
# Serialise
|
|
337
|
+
output = shapes_graph.serialize(format=output_format)
|
|
338
|
+
|
|
339
|
+
return shapes_graph, output
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def generate_shapes_to_file(
|
|
343
|
+
source: Path,
|
|
344
|
+
output: Path,
|
|
345
|
+
config: ShaclConfig | None = None,
|
|
346
|
+
output_format: str = "turtle",
|
|
347
|
+
) -> Graph:
|
|
348
|
+
"""Generate SHACL shapes and write to file.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
source: Path to ontology file.
|
|
352
|
+
output: Path to write shapes to.
|
|
353
|
+
config: Optional generation configuration.
|
|
354
|
+
output_format: Output serialisation format.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
The generated shapes graph.
|
|
358
|
+
"""
|
|
359
|
+
shapes_graph, serialised = generate_shapes(source, config, output_format)
|
|
360
|
+
|
|
361
|
+
output.parent.mkdir(parents=True, exist_ok=True)
|
|
362
|
+
output.write_text(serialised)
|
|
363
|
+
|
|
364
|
+
return shapes_graph
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""SHACL namespace definitions and utilities."""
|
|
2
|
+
|
|
3
|
+
from rdflib import Namespace
|
|
4
|
+
from rdflib.namespace import DefinedNamespace
|
|
5
|
+
|
|
6
|
+
# SHACL namespace
|
|
7
|
+
SH = Namespace("http://www.w3.org/ns/shacl#")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SHACL(DefinedNamespace):
|
|
11
|
+
"""SHACL namespace with commonly used terms.
|
|
12
|
+
|
|
13
|
+
Provides typed access to SHACL vocabulary terms for shape generation.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
# Core shape types
|
|
17
|
+
NodeShape: str
|
|
18
|
+
PropertyShape: str
|
|
19
|
+
Shape: str
|
|
20
|
+
|
|
21
|
+
# Targeting
|
|
22
|
+
targetClass: str
|
|
23
|
+
targetNode: str
|
|
24
|
+
targetSubjectsOf: str
|
|
25
|
+
targetObjectsOf: str
|
|
26
|
+
|
|
27
|
+
# Property constraints
|
|
28
|
+
property: str
|
|
29
|
+
path: str
|
|
30
|
+
name: str
|
|
31
|
+
description: str
|
|
32
|
+
order: str
|
|
33
|
+
group: str
|
|
34
|
+
|
|
35
|
+
# Cardinality
|
|
36
|
+
minCount: str
|
|
37
|
+
maxCount: str
|
|
38
|
+
|
|
39
|
+
# Value type constraints
|
|
40
|
+
datatype: str
|
|
41
|
+
nodeKind: str
|
|
42
|
+
|
|
43
|
+
# Node kinds
|
|
44
|
+
BlankNode: str
|
|
45
|
+
IRI: str
|
|
46
|
+
Literal: str
|
|
47
|
+
BlankNodeOrIRI: str
|
|
48
|
+
BlankNodeOrLiteral: str
|
|
49
|
+
IRIOrLiteral: str
|
|
50
|
+
|
|
51
|
+
# Value constraints
|
|
52
|
+
node: str # For sh:class equivalent - but we use class directly
|
|
53
|
+
hasValue: str
|
|
54
|
+
|
|
55
|
+
# Note: 'class' is a Python reserved word, access via SH["class"] or SH.class_
|
|
56
|
+
|
|
57
|
+
# Value range
|
|
58
|
+
minExclusive: str
|
|
59
|
+
minInclusive: str
|
|
60
|
+
maxExclusive: str
|
|
61
|
+
maxInclusive: str
|
|
62
|
+
minLength: str
|
|
63
|
+
maxLength: str
|
|
64
|
+
pattern: str
|
|
65
|
+
|
|
66
|
+
# Logical constraints
|
|
67
|
+
closed: str
|
|
68
|
+
ignoredProperties: str
|
|
69
|
+
|
|
70
|
+
# List constraints
|
|
71
|
+
in_: str # sh:in (Python reserved word)
|
|
72
|
+
|
|
73
|
+
# Severity
|
|
74
|
+
severity: str
|
|
75
|
+
Violation: str
|
|
76
|
+
Warning: str
|
|
77
|
+
Info: str
|
|
78
|
+
|
|
79
|
+
# Property paths
|
|
80
|
+
alternativePath: str
|
|
81
|
+
inversePath: str
|
|
82
|
+
oneOrMorePath: str
|
|
83
|
+
zeroOrMorePath: str
|
|
84
|
+
zeroOrOnePath: str
|
|
85
|
+
|
|
86
|
+
# Namespace
|
|
87
|
+
_NS = Namespace("http://www.w3.org/ns/shacl#")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# Standard SHACL prefix bindings for serialisation
|
|
91
|
+
SHACL_PREFIXES = {
|
|
92
|
+
"sh": SH,
|
|
93
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Statistics and metrics module for RDF ontologies.
|
|
2
|
+
|
|
3
|
+
Computes comprehensive metrics about an ontology's structure, complexity,
|
|
4
|
+
and documentation coverage.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from rdf_construct.stats.collector import (
|
|
8
|
+
OntologyStats,
|
|
9
|
+
collect_stats,
|
|
10
|
+
)
|
|
11
|
+
from rdf_construct.stats.comparator import (
|
|
12
|
+
ComparisonResult,
|
|
13
|
+
MetricChange,
|
|
14
|
+
compare_stats,
|
|
15
|
+
)
|
|
16
|
+
from rdf_construct.stats.formatters import format_stats, format_comparison
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Main collection
|
|
20
|
+
"OntologyStats",
|
|
21
|
+
"collect_stats",
|
|
22
|
+
# Comparison
|
|
23
|
+
"ComparisonResult",
|
|
24
|
+
"MetricChange",
|
|
25
|
+
"compare_stats",
|
|
26
|
+
# Formatting
|
|
27
|
+
"format_stats",
|
|
28
|
+
"format_comparison",
|
|
29
|
+
]
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Main statistics collector for RDF ontologies.
|
|
2
|
+
|
|
3
|
+
Orchestrates metric collection from multiple specialised collectors and
|
|
4
|
+
aggregates results into a single OntologyStats object.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from rdflib import Graph
|
|
13
|
+
|
|
14
|
+
from rdf_construct.stats.metrics.basic import BasicStats, collect_basic_stats
|
|
15
|
+
from rdf_construct.stats.metrics.hierarchy import HierarchyStats, collect_hierarchy_stats
|
|
16
|
+
from rdf_construct.stats.metrics.properties import PropertyStats, collect_property_stats
|
|
17
|
+
from rdf_construct.stats.metrics.documentation import DocumentationStats, collect_documentation_stats
|
|
18
|
+
from rdf_construct.stats.metrics.complexity import ComplexityStats, collect_complexity_stats
|
|
19
|
+
from rdf_construct.stats.metrics.connectivity import ConnectivityStats, collect_connectivity_stats
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class OntologyStats:
|
|
24
|
+
"""Complete statistics for an ontology.
|
|
25
|
+
|
|
26
|
+
Aggregates metrics from all categories into a single structure.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
source: Path to the source ontology file
|
|
30
|
+
timestamp: When the stats were collected
|
|
31
|
+
basic: Basic count metrics (triples, classes, properties)
|
|
32
|
+
hierarchy: Hierarchy metrics (depth, branching, orphans)
|
|
33
|
+
properties: Property metrics (domain/range coverage)
|
|
34
|
+
documentation: Documentation coverage (labels, comments)
|
|
35
|
+
complexity: Complexity indicators (multiple inheritance, axioms)
|
|
36
|
+
connectivity: Connectivity metrics (most connected, isolated)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
source: str
|
|
40
|
+
timestamp: datetime
|
|
41
|
+
basic: BasicStats
|
|
42
|
+
hierarchy: HierarchyStats
|
|
43
|
+
properties: PropertyStats
|
|
44
|
+
documentation: DocumentationStats
|
|
45
|
+
complexity: ComplexityStats
|
|
46
|
+
connectivity: ConnectivityStats
|
|
47
|
+
|
|
48
|
+
def to_dict(self) -> dict[str, Any]:
|
|
49
|
+
"""Convert stats to dictionary for JSON serialisation.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Dictionary representation of all statistics.
|
|
53
|
+
"""
|
|
54
|
+
return {
|
|
55
|
+
"source": self.source,
|
|
56
|
+
"timestamp": self.timestamp.isoformat(),
|
|
57
|
+
"basic": {
|
|
58
|
+
"triples": self.basic.triples,
|
|
59
|
+
"classes": self.basic.classes,
|
|
60
|
+
"object_properties": self.basic.object_properties,
|
|
61
|
+
"datatype_properties": self.basic.datatype_properties,
|
|
62
|
+
"annotation_properties": self.basic.annotation_properties,
|
|
63
|
+
"individuals": self.basic.individuals,
|
|
64
|
+
},
|
|
65
|
+
"hierarchy": {
|
|
66
|
+
"root_classes": self.hierarchy.root_classes,
|
|
67
|
+
"leaf_classes": self.hierarchy.leaf_classes,
|
|
68
|
+
"max_depth": self.hierarchy.max_depth,
|
|
69
|
+
"avg_depth": self.hierarchy.avg_depth,
|
|
70
|
+
"avg_branching": self.hierarchy.avg_branching,
|
|
71
|
+
"orphan_classes": self.hierarchy.orphan_classes,
|
|
72
|
+
"orphan_rate": self.hierarchy.orphan_rate,
|
|
73
|
+
},
|
|
74
|
+
"properties": {
|
|
75
|
+
"with_domain": self.properties.with_domain,
|
|
76
|
+
"with_range": self.properties.with_range,
|
|
77
|
+
"domain_coverage": self.properties.domain_coverage,
|
|
78
|
+
"range_coverage": self.properties.range_coverage,
|
|
79
|
+
"inverse_pairs": self.properties.inverse_pairs,
|
|
80
|
+
"functional": self.properties.functional,
|
|
81
|
+
"symmetric": self.properties.symmetric,
|
|
82
|
+
},
|
|
83
|
+
"documentation": {
|
|
84
|
+
"classes_labelled": self.documentation.classes_labelled,
|
|
85
|
+
"classes_labelled_pct": self.documentation.classes_labelled_pct,
|
|
86
|
+
"classes_documented": self.documentation.classes_documented,
|
|
87
|
+
"classes_documented_pct": self.documentation.classes_documented_pct,
|
|
88
|
+
"properties_labelled": self.documentation.properties_labelled,
|
|
89
|
+
"properties_labelled_pct": self.documentation.properties_labelled_pct,
|
|
90
|
+
},
|
|
91
|
+
"complexity": {
|
|
92
|
+
"avg_properties_per_class": self.complexity.avg_properties_per_class,
|
|
93
|
+
"avg_superclasses_per_class": self.complexity.avg_superclasses_per_class,
|
|
94
|
+
"multiple_inheritance_count": self.complexity.multiple_inheritance_count,
|
|
95
|
+
"owl_restriction_count": self.complexity.owl_restriction_count,
|
|
96
|
+
"owl_equivalent_count": self.complexity.owl_equivalent_count,
|
|
97
|
+
},
|
|
98
|
+
"connectivity": {
|
|
99
|
+
"most_connected_class": self.connectivity.most_connected_class,
|
|
100
|
+
"most_connected_count": self.connectivity.most_connected_count,
|
|
101
|
+
"isolated_classes": self.connectivity.isolated_classes,
|
|
102
|
+
},
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# Category names for filtering
|
|
107
|
+
METRIC_CATEGORIES = frozenset({
|
|
108
|
+
"basic",
|
|
109
|
+
"hierarchy",
|
|
110
|
+
"properties",
|
|
111
|
+
"documentation",
|
|
112
|
+
"complexity",
|
|
113
|
+
"connectivity",
|
|
114
|
+
})
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def collect_stats(
|
|
118
|
+
graph: Graph,
|
|
119
|
+
source: str | Path = "<graph>",
|
|
120
|
+
include: set[str] | None = None,
|
|
121
|
+
exclude: set[str] | None = None,
|
|
122
|
+
) -> OntologyStats:
|
|
123
|
+
"""Collect comprehensive statistics for an ontology.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
graph: The RDF graph to analyse.
|
|
127
|
+
source: Source file path or identifier for reporting.
|
|
128
|
+
include: Set of category names to include (default: all).
|
|
129
|
+
exclude: Set of category names to exclude (default: none).
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
OntologyStats containing all collected metrics.
|
|
133
|
+
|
|
134
|
+
Raises:
|
|
135
|
+
ValueError: If include/exclude contain unknown category names.
|
|
136
|
+
"""
|
|
137
|
+
# Validate category names
|
|
138
|
+
all_categories = METRIC_CATEGORIES
|
|
139
|
+
if include:
|
|
140
|
+
unknown = include - all_categories
|
|
141
|
+
if unknown:
|
|
142
|
+
raise ValueError(f"Unknown metric categories: {', '.join(sorted(unknown))}")
|
|
143
|
+
if exclude:
|
|
144
|
+
unknown = exclude - all_categories
|
|
145
|
+
if unknown:
|
|
146
|
+
raise ValueError(f"Unknown metric categories: {', '.join(sorted(unknown))}")
|
|
147
|
+
|
|
148
|
+
# Determine which categories to collect
|
|
149
|
+
categories = set(all_categories)
|
|
150
|
+
if include:
|
|
151
|
+
categories = include
|
|
152
|
+
if exclude:
|
|
153
|
+
categories = categories - exclude
|
|
154
|
+
|
|
155
|
+
# Collect each category (use defaults for excluded ones)
|
|
156
|
+
basic = collect_basic_stats(graph) if "basic" in categories else BasicStats()
|
|
157
|
+
hierarchy = collect_hierarchy_stats(graph) if "hierarchy" in categories else HierarchyStats()
|
|
158
|
+
properties = collect_property_stats(graph) if "properties" in categories else PropertyStats()
|
|
159
|
+
documentation = (
|
|
160
|
+
collect_documentation_stats(graph) if "documentation" in categories else DocumentationStats()
|
|
161
|
+
)
|
|
162
|
+
complexity = (
|
|
163
|
+
collect_complexity_stats(graph) if "complexity" in categories else ComplexityStats()
|
|
164
|
+
)
|
|
165
|
+
connectivity = (
|
|
166
|
+
collect_connectivity_stats(graph) if "connectivity" in categories else ConnectivityStats()
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
return OntologyStats(
|
|
170
|
+
source=str(source),
|
|
171
|
+
timestamp=datetime.now(),
|
|
172
|
+
basic=basic,
|
|
173
|
+
hierarchy=hierarchy,
|
|
174
|
+
properties=properties,
|
|
175
|
+
documentation=documentation,
|
|
176
|
+
complexity=complexity,
|
|
177
|
+
connectivity=connectivity,
|
|
178
|
+
)
|