rdf-construct 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. rdf_construct/__init__.py +12 -0
  2. rdf_construct/__main__.py +0 -0
  3. rdf_construct/cli.py +3429 -0
  4. rdf_construct/core/__init__.py +33 -0
  5. rdf_construct/core/config.py +116 -0
  6. rdf_construct/core/ordering.py +219 -0
  7. rdf_construct/core/predicate_order.py +212 -0
  8. rdf_construct/core/profile.py +157 -0
  9. rdf_construct/core/selector.py +64 -0
  10. rdf_construct/core/serialiser.py +232 -0
  11. rdf_construct/core/utils.py +89 -0
  12. rdf_construct/cq/__init__.py +77 -0
  13. rdf_construct/cq/expectations.py +365 -0
  14. rdf_construct/cq/formatters/__init__.py +45 -0
  15. rdf_construct/cq/formatters/json.py +104 -0
  16. rdf_construct/cq/formatters/junit.py +104 -0
  17. rdf_construct/cq/formatters/text.py +146 -0
  18. rdf_construct/cq/loader.py +300 -0
  19. rdf_construct/cq/runner.py +321 -0
  20. rdf_construct/diff/__init__.py +59 -0
  21. rdf_construct/diff/change_types.py +214 -0
  22. rdf_construct/diff/comparator.py +338 -0
  23. rdf_construct/diff/filters.py +133 -0
  24. rdf_construct/diff/formatters/__init__.py +71 -0
  25. rdf_construct/diff/formatters/json.py +192 -0
  26. rdf_construct/diff/formatters/markdown.py +210 -0
  27. rdf_construct/diff/formatters/text.py +195 -0
  28. rdf_construct/docs/__init__.py +60 -0
  29. rdf_construct/docs/config.py +238 -0
  30. rdf_construct/docs/extractors.py +603 -0
  31. rdf_construct/docs/generator.py +360 -0
  32. rdf_construct/docs/renderers/__init__.py +7 -0
  33. rdf_construct/docs/renderers/html.py +803 -0
  34. rdf_construct/docs/renderers/json.py +390 -0
  35. rdf_construct/docs/renderers/markdown.py +628 -0
  36. rdf_construct/docs/search.py +278 -0
  37. rdf_construct/docs/templates/html/base.html.jinja +44 -0
  38. rdf_construct/docs/templates/html/class.html.jinja +152 -0
  39. rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
  40. rdf_construct/docs/templates/html/index.html.jinja +110 -0
  41. rdf_construct/docs/templates/html/instance.html.jinja +90 -0
  42. rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
  43. rdf_construct/docs/templates/html/property.html.jinja +124 -0
  44. rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
  45. rdf_construct/lint/__init__.py +75 -0
  46. rdf_construct/lint/config.py +214 -0
  47. rdf_construct/lint/engine.py +396 -0
  48. rdf_construct/lint/formatters.py +327 -0
  49. rdf_construct/lint/rules.py +692 -0
  50. rdf_construct/localise/__init__.py +114 -0
  51. rdf_construct/localise/config.py +508 -0
  52. rdf_construct/localise/extractor.py +427 -0
  53. rdf_construct/localise/formatters/__init__.py +36 -0
  54. rdf_construct/localise/formatters/markdown.py +229 -0
  55. rdf_construct/localise/formatters/text.py +224 -0
  56. rdf_construct/localise/merger.py +346 -0
  57. rdf_construct/localise/reporter.py +356 -0
  58. rdf_construct/main.py +6 -0
  59. rdf_construct/merge/__init__.py +165 -0
  60. rdf_construct/merge/config.py +354 -0
  61. rdf_construct/merge/conflicts.py +281 -0
  62. rdf_construct/merge/formatters.py +426 -0
  63. rdf_construct/merge/merger.py +425 -0
  64. rdf_construct/merge/migrator.py +339 -0
  65. rdf_construct/merge/rules.py +377 -0
  66. rdf_construct/merge/splitter.py +1102 -0
  67. rdf_construct/puml2rdf/__init__.py +103 -0
  68. rdf_construct/puml2rdf/config.py +230 -0
  69. rdf_construct/puml2rdf/converter.py +420 -0
  70. rdf_construct/puml2rdf/merger.py +200 -0
  71. rdf_construct/puml2rdf/model.py +202 -0
  72. rdf_construct/puml2rdf/parser.py +565 -0
  73. rdf_construct/puml2rdf/validators.py +451 -0
  74. rdf_construct/refactor/__init__.py +72 -0
  75. rdf_construct/refactor/config.py +362 -0
  76. rdf_construct/refactor/deprecator.py +328 -0
  77. rdf_construct/refactor/formatters/__init__.py +8 -0
  78. rdf_construct/refactor/formatters/text.py +311 -0
  79. rdf_construct/refactor/renamer.py +294 -0
  80. rdf_construct/shacl/__init__.py +56 -0
  81. rdf_construct/shacl/config.py +166 -0
  82. rdf_construct/shacl/converters.py +520 -0
  83. rdf_construct/shacl/generator.py +364 -0
  84. rdf_construct/shacl/namespaces.py +93 -0
  85. rdf_construct/stats/__init__.py +29 -0
  86. rdf_construct/stats/collector.py +178 -0
  87. rdf_construct/stats/comparator.py +298 -0
  88. rdf_construct/stats/formatters/__init__.py +83 -0
  89. rdf_construct/stats/formatters/json.py +38 -0
  90. rdf_construct/stats/formatters/markdown.py +153 -0
  91. rdf_construct/stats/formatters/text.py +186 -0
  92. rdf_construct/stats/metrics/__init__.py +26 -0
  93. rdf_construct/stats/metrics/basic.py +147 -0
  94. rdf_construct/stats/metrics/complexity.py +137 -0
  95. rdf_construct/stats/metrics/connectivity.py +130 -0
  96. rdf_construct/stats/metrics/documentation.py +128 -0
  97. rdf_construct/stats/metrics/hierarchy.py +207 -0
  98. rdf_construct/stats/metrics/properties.py +88 -0
  99. rdf_construct/uml/__init__.py +22 -0
  100. rdf_construct/uml/context.py +194 -0
  101. rdf_construct/uml/mapper.py +371 -0
  102. rdf_construct/uml/odm_renderer.py +789 -0
  103. rdf_construct/uml/renderer.py +684 -0
  104. rdf_construct/uml/uml_layout.py +393 -0
  105. rdf_construct/uml/uml_style.py +613 -0
  106. rdf_construct-0.3.0.dist-info/METADATA +496 -0
  107. rdf_construct-0.3.0.dist-info/RECORD +110 -0
  108. rdf_construct-0.3.0.dist-info/WHEEL +4 -0
  109. rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
  110. rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,364 @@
1
+ """SHACL shape generator from OWL ontologies.
2
+
3
+ Generates SHACL NodeShapes from OWL class definitions, converting
4
+ domain/range, cardinality restrictions, and other OWL patterns
5
+ to equivalent SHACL constraints.
6
+ """
7
+
8
+ from collections import defaultdict
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from rdflib import BNode, Graph, Literal, Namespace, RDF, RDFS, URIRef
13
+ from rdflib.namespace import OWL
14
+
15
+ from .config import ShaclConfig, Severity, StrictnessLevel
16
+ from .converters import PropertyConstraint, get_converters_for_level
17
+ from .namespaces import SH, SHACL_PREFIXES
18
+
19
+
20
+ class ShapeGenerator:
21
+ """Generates SHACL shapes from OWL ontology definitions.
22
+
23
+ Orchestrates the conversion process, applying converters and
24
+ building the output shapes graph.
25
+
26
+ Attributes:
27
+ config: Generation configuration.
28
+ source_graph: The OWL ontology to convert.
29
+ shapes_graph: The output SHACL shapes graph.
30
+ """
31
+
32
+ def __init__(self, source_graph: Graph, config: ShaclConfig | None = None):
33
+ """Initialise generator.
34
+
35
+ Args:
36
+ source_graph: The OWL ontology graph.
37
+ config: Optional configuration (defaults provided).
38
+ """
39
+ self.config = config or ShaclConfig()
40
+ self.source_graph = source_graph
41
+ self.shapes_graph = Graph()
42
+
43
+ # Bind prefixes
44
+ for prefix, ns in SHACL_PREFIXES.items():
45
+ self.shapes_graph.bind(prefix, ns)
46
+
47
+ # Copy source prefixes
48
+ for prefix, ns in source_graph.namespaces():
49
+ if prefix and prefix not in ("xml", "xsd", "rdf", "rdfs", "owl"):
50
+ self.shapes_graph.bind(prefix, ns)
51
+
52
+ # Determine shape namespace
53
+ self._shape_ns = self._determine_shape_namespace()
54
+ self.shapes_graph.bind("shape", Namespace(self._shape_ns))
55
+
56
+ def _determine_shape_namespace(self) -> str:
57
+ """Determine the namespace for generated shapes.
58
+
59
+ Uses the ontology namespace if available, otherwise falls back
60
+ to a default.
61
+ """
62
+ # Look for owl:Ontology
63
+ for ont in self.source_graph.subjects(RDF.type, OWL.Ontology):
64
+ if isinstance(ont, URIRef):
65
+ base = str(ont)
66
+ # Append shapes suffix
67
+ if base.endswith("#") or base.endswith("/"):
68
+ return base[:-1] + "-shapes#"
69
+ return base + "-shapes#"
70
+
71
+ # Fallback: use first non-standard namespace
72
+ for prefix, ns in self.source_graph.namespaces():
73
+ ns_str = str(ns)
74
+ if not any(
75
+ ns_str.startswith(std)
76
+ for std in (
77
+ "http://www.w3.org/",
78
+ "http://purl.org/dc/",
79
+ "http://xmlns.com/",
80
+ )
81
+ ):
82
+ if ns_str.endswith("#") or ns_str.endswith("/"):
83
+ return ns_str[:-1] + "-shapes#"
84
+ return ns_str + "-shapes#"
85
+
86
+ return "http://example.org/shapes#"
87
+
88
+ def generate(self) -> Graph:
89
+ """Generate SHACL shapes from the source ontology.
90
+
91
+ Returns:
92
+ Graph containing the generated SHACL shapes.
93
+ """
94
+ # Get all classes from ontology
95
+ classes = self._get_target_classes()
96
+
97
+ # Get converters for current strictness level
98
+ converters = get_converters_for_level(self.config.level)
99
+
100
+ # Generate shape for each class
101
+ for cls in classes:
102
+ if not self.config.should_generate_for(cls, self.source_graph):
103
+ continue
104
+
105
+ self._create_node_shape(cls, converters)
106
+
107
+ return self.shapes_graph
108
+
109
+ def _get_target_classes(self) -> list[URIRef]:
110
+ """Get all target classes from the ontology.
111
+
112
+ Finds both owl:Class and rdfs:Class entities.
113
+
114
+ Returns:
115
+ List of class URIs.
116
+ """
117
+ classes: set[URIRef] = set()
118
+
119
+ # OWL classes
120
+ for cls in self.source_graph.subjects(RDF.type, OWL.Class):
121
+ if isinstance(cls, URIRef):
122
+ classes.add(cls)
123
+
124
+ # RDFS classes
125
+ for cls in self.source_graph.subjects(RDF.type, RDFS.Class):
126
+ if isinstance(cls, URIRef):
127
+ classes.add(cls)
128
+
129
+ # Sort by local name for consistent output
130
+ return sorted(classes, key=lambda c: self._local_name(c))
131
+
132
+ def _local_name(self, uri: URIRef) -> str:
133
+ """Extract local name from URI."""
134
+ s = str(uri)
135
+ if "#" in s:
136
+ return s.rsplit("#", 1)[1]
137
+ if "/" in s:
138
+ return s.rsplit("/", 1)[1]
139
+ return s
140
+
141
+ def _create_node_shape(self, cls: URIRef, converters: list) -> URIRef:
142
+ """Create a NodeShape for a class.
143
+
144
+ Args:
145
+ cls: The class to create a shape for.
146
+ converters: List of converters to apply.
147
+
148
+ Returns:
149
+ URI of the created shape.
150
+ """
151
+ shape_uri = URIRef(f"{self._shape_ns}{self._local_name(cls)}Shape")
152
+
153
+ # Basic shape definition
154
+ self.shapes_graph.add((shape_uri, RDF.type, SH.NodeShape))
155
+ self.shapes_graph.add((shape_uri, SH.targetClass, cls))
156
+
157
+ # Add name from rdfs:label if available
158
+ if self.config.include_labels:
159
+ label = self.source_graph.value(cls, RDFS.label)
160
+ if label:
161
+ self.shapes_graph.add((shape_uri, SH.name, Literal(str(label))))
162
+
163
+ # Add description from rdfs:comment
164
+ if self.config.include_descriptions:
165
+ comment = self.source_graph.value(cls, RDFS.comment)
166
+ if comment:
167
+ self.shapes_graph.add((shape_uri, SH.description, Literal(str(comment))))
168
+
169
+ # Collect all property constraints
170
+ prop_constraints: dict[URIRef, PropertyConstraint] = {}
171
+
172
+ # Apply each converter
173
+ for converter in converters:
174
+ constraints = converter.convert_for_class(
175
+ cls, self.source_graph, self.config
176
+ )
177
+
178
+ for constraint in constraints:
179
+ if constraint.path in prop_constraints:
180
+ # Merge with existing constraint
181
+ prop_constraints[constraint.path] = prop_constraints[
182
+ constraint.path
183
+ ].merge(constraint)
184
+ else:
185
+ prop_constraints[constraint.path] = constraint
186
+
187
+ # Inherit constraints from superclasses if configured
188
+ if self.config.inherit_constraints:
189
+ inherited = self._get_inherited_constraints(cls, converters)
190
+ for path, constraint in inherited.items():
191
+ if path not in prop_constraints:
192
+ prop_constraints[path] = constraint
193
+
194
+ # Add property shapes, sorted by path for consistent output
195
+ order = 1
196
+ for path in sorted(prop_constraints.keys(), key=str):
197
+ constraint = prop_constraints[path]
198
+ constraint.order = order
199
+ order += 1
200
+
201
+ prop_shape = constraint.to_rdf(self.shapes_graph)
202
+ self.shapes_graph.add((shape_uri, SH.property, prop_shape))
203
+
204
+ # Handle closed shapes
205
+ if self.config.closed and self.config.level == StrictnessLevel.STRICT:
206
+ self.shapes_graph.add((shape_uri, SH.closed, Literal(True)))
207
+
208
+ # Add ignored properties
209
+ ignored = self._get_ignored_properties()
210
+ if ignored:
211
+ ignored_list = self._create_rdf_list(ignored)
212
+ self.shapes_graph.add((shape_uri, SH.ignoredProperties, ignored_list))
213
+
214
+ return shape_uri
215
+
216
+ def _get_inherited_constraints(
217
+ self, cls: URIRef, converters: list
218
+ ) -> dict[URIRef, PropertyConstraint]:
219
+ """Get property constraints from superclasses.
220
+
221
+ Args:
222
+ cls: The class to get inherited constraints for.
223
+ converters: Converters to apply.
224
+
225
+ Returns:
226
+ Dictionary mapping property URIs to constraints.
227
+ """
228
+ inherited: dict[URIRef, PropertyConstraint] = {}
229
+
230
+ # Walk up the class hierarchy
231
+ visited: set[URIRef] = set()
232
+ to_visit = list(self.source_graph.objects(cls, RDFS.subClassOf))
233
+
234
+ while to_visit:
235
+ superclass = to_visit.pop()
236
+ if not isinstance(superclass, URIRef) or superclass in visited:
237
+ continue
238
+
239
+ visited.add(superclass)
240
+
241
+ # Apply converters to superclass
242
+ for converter in converters:
243
+ constraints = converter.convert_for_class(
244
+ superclass, self.source_graph, self.config
245
+ )
246
+
247
+ for constraint in constraints:
248
+ if constraint.path not in inherited:
249
+ inherited[constraint.path] = constraint
250
+ else:
251
+ inherited[constraint.path] = inherited[constraint.path].merge(
252
+ constraint
253
+ )
254
+
255
+ # Add parent's parents
256
+ to_visit.extend(self.source_graph.objects(superclass, RDFS.subClassOf))
257
+
258
+ return inherited
259
+
260
+ def _get_ignored_properties(self) -> list[URIRef]:
261
+ """Get list of properties to ignore in closed shapes."""
262
+ ignored = [RDF.type] # Always ignore rdf:type
263
+
264
+ # Add user-configured ignored properties
265
+ for prop_str in self.config.ignored_properties:
266
+ # Expand CURIE if possible
267
+ expanded = self._expand_curie(prop_str)
268
+ if expanded:
269
+ ignored.append(expanded)
270
+
271
+ return ignored
272
+
273
+ def _expand_curie(self, curie: str) -> URIRef | None:
274
+ """Expand a CURIE to full URI."""
275
+ if ":" in curie and not curie.startswith("http"):
276
+ prefix, local = curie.split(":", 1)
277
+ for p, ns in self.source_graph.namespaces():
278
+ if p == prefix:
279
+ return URIRef(str(ns) + local)
280
+
281
+ # Already a URI?
282
+ if curie.startswith("http"):
283
+ return URIRef(curie)
284
+
285
+ return None
286
+
287
+ def _create_rdf_list(self, items: list[URIRef]) -> BNode:
288
+ """Create an RDF list from items."""
289
+ if not items:
290
+ return RDF.nil
291
+
292
+ head = BNode()
293
+ current = head
294
+
295
+ for i, item in enumerate(items):
296
+ self.shapes_graph.add((current, RDF.first, item))
297
+
298
+ if i < len(items) - 1:
299
+ next_node = BNode()
300
+ self.shapes_graph.add((current, RDF.rest, next_node))
301
+ current = next_node
302
+ else:
303
+ self.shapes_graph.add((current, RDF.rest, RDF.nil))
304
+
305
+ return head
306
+
307
+
308
+ def generate_shapes(
309
+ source: Path | Graph,
310
+ config: ShaclConfig | None = None,
311
+ output_format: str = "turtle",
312
+ ) -> tuple[Graph, str]:
313
+ """Generate SHACL shapes from an OWL ontology.
314
+
315
+ Main entry point for shape generation.
316
+
317
+ Args:
318
+ source: Path to ontology file or pre-loaded Graph.
319
+ config: Optional generation configuration.
320
+ output_format: Output serialisation format.
321
+
322
+ Returns:
323
+ Tuple of (shapes graph, serialised string).
324
+ """
325
+ # Load source if path
326
+ if isinstance(source, Path):
327
+ source_graph = Graph()
328
+ source_graph.parse(str(source), format="turtle")
329
+ else:
330
+ source_graph = source
331
+
332
+ # Generate shapes
333
+ generator = ShapeGenerator(source_graph, config)
334
+ shapes_graph = generator.generate()
335
+
336
+ # Serialise
337
+ output = shapes_graph.serialize(format=output_format)
338
+
339
+ return shapes_graph, output
340
+
341
+
342
+ def generate_shapes_to_file(
343
+ source: Path,
344
+ output: Path,
345
+ config: ShaclConfig | None = None,
346
+ output_format: str = "turtle",
347
+ ) -> Graph:
348
+ """Generate SHACL shapes and write to file.
349
+
350
+ Args:
351
+ source: Path to ontology file.
352
+ output: Path to write shapes to.
353
+ config: Optional generation configuration.
354
+ output_format: Output serialisation format.
355
+
356
+ Returns:
357
+ The generated shapes graph.
358
+ """
359
+ shapes_graph, serialised = generate_shapes(source, config, output_format)
360
+
361
+ output.parent.mkdir(parents=True, exist_ok=True)
362
+ output.write_text(serialised)
363
+
364
+ return shapes_graph
@@ -0,0 +1,93 @@
1
+ """SHACL namespace definitions and utilities."""
2
+
3
+ from rdflib import Namespace
4
+ from rdflib.namespace import DefinedNamespace
5
+
6
+ # SHACL namespace
7
+ SH = Namespace("http://www.w3.org/ns/shacl#")
8
+
9
+
10
+ class SHACL(DefinedNamespace):
11
+ """SHACL namespace with commonly used terms.
12
+
13
+ Provides typed access to SHACL vocabulary terms for shape generation.
14
+ """
15
+
16
+ # Core shape types
17
+ NodeShape: str
18
+ PropertyShape: str
19
+ Shape: str
20
+
21
+ # Targeting
22
+ targetClass: str
23
+ targetNode: str
24
+ targetSubjectsOf: str
25
+ targetObjectsOf: str
26
+
27
+ # Property constraints
28
+ property: str
29
+ path: str
30
+ name: str
31
+ description: str
32
+ order: str
33
+ group: str
34
+
35
+ # Cardinality
36
+ minCount: str
37
+ maxCount: str
38
+
39
+ # Value type constraints
40
+ datatype: str
41
+ nodeKind: str
42
+
43
+ # Node kinds
44
+ BlankNode: str
45
+ IRI: str
46
+ Literal: str
47
+ BlankNodeOrIRI: str
48
+ BlankNodeOrLiteral: str
49
+ IRIOrLiteral: str
50
+
51
+ # Value constraints
52
+ node: str # For sh:class equivalent - but we use class directly
53
+ hasValue: str
54
+
55
+ # Note: 'class' is a Python reserved word, access via SH["class"] or SH.class_
56
+
57
+ # Value range
58
+ minExclusive: str
59
+ minInclusive: str
60
+ maxExclusive: str
61
+ maxInclusive: str
62
+ minLength: str
63
+ maxLength: str
64
+ pattern: str
65
+
66
+ # Logical constraints
67
+ closed: str
68
+ ignoredProperties: str
69
+
70
+ # List constraints
71
+ in_: str # sh:in (Python reserved word)
72
+
73
+ # Severity
74
+ severity: str
75
+ Violation: str
76
+ Warning: str
77
+ Info: str
78
+
79
+ # Property paths
80
+ alternativePath: str
81
+ inversePath: str
82
+ oneOrMorePath: str
83
+ zeroOrMorePath: str
84
+ zeroOrOnePath: str
85
+
86
+ # Namespace
87
+ _NS = Namespace("http://www.w3.org/ns/shacl#")
88
+
89
+
90
+ # Standard SHACL prefix bindings for serialisation
91
+ SHACL_PREFIXES = {
92
+ "sh": SH,
93
+ }
@@ -0,0 +1,29 @@
1
+ """Statistics and metrics module for RDF ontologies.
2
+
3
+ Computes comprehensive metrics about an ontology's structure, complexity,
4
+ and documentation coverage.
5
+ """
6
+
7
+ from rdf_construct.stats.collector import (
8
+ OntologyStats,
9
+ collect_stats,
10
+ )
11
+ from rdf_construct.stats.comparator import (
12
+ ComparisonResult,
13
+ MetricChange,
14
+ compare_stats,
15
+ )
16
+ from rdf_construct.stats.formatters import format_stats, format_comparison
17
+
18
+ __all__ = [
19
+ # Main collection
20
+ "OntologyStats",
21
+ "collect_stats",
22
+ # Comparison
23
+ "ComparisonResult",
24
+ "MetricChange",
25
+ "compare_stats",
26
+ # Formatting
27
+ "format_stats",
28
+ "format_comparison",
29
+ ]
@@ -0,0 +1,178 @@
1
+ """Main statistics collector for RDF ontologies.
2
+
3
+ Orchestrates metric collection from multiple specialised collectors and
4
+ aggregates results into a single OntologyStats object.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from rdflib import Graph
13
+
14
+ from rdf_construct.stats.metrics.basic import BasicStats, collect_basic_stats
15
+ from rdf_construct.stats.metrics.hierarchy import HierarchyStats, collect_hierarchy_stats
16
+ from rdf_construct.stats.metrics.properties import PropertyStats, collect_property_stats
17
+ from rdf_construct.stats.metrics.documentation import DocumentationStats, collect_documentation_stats
18
+ from rdf_construct.stats.metrics.complexity import ComplexityStats, collect_complexity_stats
19
+ from rdf_construct.stats.metrics.connectivity import ConnectivityStats, collect_connectivity_stats
20
+
21
+
22
+ @dataclass
23
+ class OntologyStats:
24
+ """Complete statistics for an ontology.
25
+
26
+ Aggregates metrics from all categories into a single structure.
27
+
28
+ Attributes:
29
+ source: Path to the source ontology file
30
+ timestamp: When the stats were collected
31
+ basic: Basic count metrics (triples, classes, properties)
32
+ hierarchy: Hierarchy metrics (depth, branching, orphans)
33
+ properties: Property metrics (domain/range coverage)
34
+ documentation: Documentation coverage (labels, comments)
35
+ complexity: Complexity indicators (multiple inheritance, axioms)
36
+ connectivity: Connectivity metrics (most connected, isolated)
37
+ """
38
+
39
+ source: str
40
+ timestamp: datetime
41
+ basic: BasicStats
42
+ hierarchy: HierarchyStats
43
+ properties: PropertyStats
44
+ documentation: DocumentationStats
45
+ complexity: ComplexityStats
46
+ connectivity: ConnectivityStats
47
+
48
+ def to_dict(self) -> dict[str, Any]:
49
+ """Convert stats to dictionary for JSON serialisation.
50
+
51
+ Returns:
52
+ Dictionary representation of all statistics.
53
+ """
54
+ return {
55
+ "source": self.source,
56
+ "timestamp": self.timestamp.isoformat(),
57
+ "basic": {
58
+ "triples": self.basic.triples,
59
+ "classes": self.basic.classes,
60
+ "object_properties": self.basic.object_properties,
61
+ "datatype_properties": self.basic.datatype_properties,
62
+ "annotation_properties": self.basic.annotation_properties,
63
+ "individuals": self.basic.individuals,
64
+ },
65
+ "hierarchy": {
66
+ "root_classes": self.hierarchy.root_classes,
67
+ "leaf_classes": self.hierarchy.leaf_classes,
68
+ "max_depth": self.hierarchy.max_depth,
69
+ "avg_depth": self.hierarchy.avg_depth,
70
+ "avg_branching": self.hierarchy.avg_branching,
71
+ "orphan_classes": self.hierarchy.orphan_classes,
72
+ "orphan_rate": self.hierarchy.orphan_rate,
73
+ },
74
+ "properties": {
75
+ "with_domain": self.properties.with_domain,
76
+ "with_range": self.properties.with_range,
77
+ "domain_coverage": self.properties.domain_coverage,
78
+ "range_coverage": self.properties.range_coverage,
79
+ "inverse_pairs": self.properties.inverse_pairs,
80
+ "functional": self.properties.functional,
81
+ "symmetric": self.properties.symmetric,
82
+ },
83
+ "documentation": {
84
+ "classes_labelled": self.documentation.classes_labelled,
85
+ "classes_labelled_pct": self.documentation.classes_labelled_pct,
86
+ "classes_documented": self.documentation.classes_documented,
87
+ "classes_documented_pct": self.documentation.classes_documented_pct,
88
+ "properties_labelled": self.documentation.properties_labelled,
89
+ "properties_labelled_pct": self.documentation.properties_labelled_pct,
90
+ },
91
+ "complexity": {
92
+ "avg_properties_per_class": self.complexity.avg_properties_per_class,
93
+ "avg_superclasses_per_class": self.complexity.avg_superclasses_per_class,
94
+ "multiple_inheritance_count": self.complexity.multiple_inheritance_count,
95
+ "owl_restriction_count": self.complexity.owl_restriction_count,
96
+ "owl_equivalent_count": self.complexity.owl_equivalent_count,
97
+ },
98
+ "connectivity": {
99
+ "most_connected_class": self.connectivity.most_connected_class,
100
+ "most_connected_count": self.connectivity.most_connected_count,
101
+ "isolated_classes": self.connectivity.isolated_classes,
102
+ },
103
+ }
104
+
105
+
106
+ # Category names for filtering
107
+ METRIC_CATEGORIES = frozenset({
108
+ "basic",
109
+ "hierarchy",
110
+ "properties",
111
+ "documentation",
112
+ "complexity",
113
+ "connectivity",
114
+ })
115
+
116
+
117
+ def collect_stats(
118
+ graph: Graph,
119
+ source: str | Path = "<graph>",
120
+ include: set[str] | None = None,
121
+ exclude: set[str] | None = None,
122
+ ) -> OntologyStats:
123
+ """Collect comprehensive statistics for an ontology.
124
+
125
+ Args:
126
+ graph: The RDF graph to analyse.
127
+ source: Source file path or identifier for reporting.
128
+ include: Set of category names to include (default: all).
129
+ exclude: Set of category names to exclude (default: none).
130
+
131
+ Returns:
132
+ OntologyStats containing all collected metrics.
133
+
134
+ Raises:
135
+ ValueError: If include/exclude contain unknown category names.
136
+ """
137
+ # Validate category names
138
+ all_categories = METRIC_CATEGORIES
139
+ if include:
140
+ unknown = include - all_categories
141
+ if unknown:
142
+ raise ValueError(f"Unknown metric categories: {', '.join(sorted(unknown))}")
143
+ if exclude:
144
+ unknown = exclude - all_categories
145
+ if unknown:
146
+ raise ValueError(f"Unknown metric categories: {', '.join(sorted(unknown))}")
147
+
148
+ # Determine which categories to collect
149
+ categories = set(all_categories)
150
+ if include:
151
+ categories = include
152
+ if exclude:
153
+ categories = categories - exclude
154
+
155
+ # Collect each category (use defaults for excluded ones)
156
+ basic = collect_basic_stats(graph) if "basic" in categories else BasicStats()
157
+ hierarchy = collect_hierarchy_stats(graph) if "hierarchy" in categories else HierarchyStats()
158
+ properties = collect_property_stats(graph) if "properties" in categories else PropertyStats()
159
+ documentation = (
160
+ collect_documentation_stats(graph) if "documentation" in categories else DocumentationStats()
161
+ )
162
+ complexity = (
163
+ collect_complexity_stats(graph) if "complexity" in categories else ComplexityStats()
164
+ )
165
+ connectivity = (
166
+ collect_connectivity_stats(graph) if "connectivity" in categories else ConnectivityStats()
167
+ )
168
+
169
+ return OntologyStats(
170
+ source=str(source),
171
+ timestamp=datetime.now(),
172
+ basic=basic,
173
+ hierarchy=hierarchy,
174
+ properties=properties,
175
+ documentation=documentation,
176
+ complexity=complexity,
177
+ connectivity=connectivity,
178
+ )