rdf-construct 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +12 -0
- rdf_construct/__main__.py +0 -0
- rdf_construct/cli.py +1762 -0
- rdf_construct/core/__init__.py +33 -0
- rdf_construct/core/config.py +116 -0
- rdf_construct/core/ordering.py +219 -0
- rdf_construct/core/predicate_order.py +212 -0
- rdf_construct/core/profile.py +157 -0
- rdf_construct/core/selector.py +64 -0
- rdf_construct/core/serialiser.py +232 -0
- rdf_construct/core/utils.py +89 -0
- rdf_construct/cq/__init__.py +77 -0
- rdf_construct/cq/expectations.py +365 -0
- rdf_construct/cq/formatters/__init__.py +45 -0
- rdf_construct/cq/formatters/json.py +104 -0
- rdf_construct/cq/formatters/junit.py +104 -0
- rdf_construct/cq/formatters/text.py +146 -0
- rdf_construct/cq/loader.py +300 -0
- rdf_construct/cq/runner.py +321 -0
- rdf_construct/diff/__init__.py +59 -0
- rdf_construct/diff/change_types.py +214 -0
- rdf_construct/diff/comparator.py +338 -0
- rdf_construct/diff/filters.py +133 -0
- rdf_construct/diff/formatters/__init__.py +71 -0
- rdf_construct/diff/formatters/json.py +192 -0
- rdf_construct/diff/formatters/markdown.py +210 -0
- rdf_construct/diff/formatters/text.py +195 -0
- rdf_construct/docs/__init__.py +60 -0
- rdf_construct/docs/config.py +238 -0
- rdf_construct/docs/extractors.py +603 -0
- rdf_construct/docs/generator.py +360 -0
- rdf_construct/docs/renderers/__init__.py +7 -0
- rdf_construct/docs/renderers/html.py +803 -0
- rdf_construct/docs/renderers/json.py +390 -0
- rdf_construct/docs/renderers/markdown.py +628 -0
- rdf_construct/docs/search.py +278 -0
- rdf_construct/docs/templates/html/base.html.jinja +44 -0
- rdf_construct/docs/templates/html/class.html.jinja +152 -0
- rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
- rdf_construct/docs/templates/html/index.html.jinja +110 -0
- rdf_construct/docs/templates/html/instance.html.jinja +90 -0
- rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
- rdf_construct/docs/templates/html/property.html.jinja +124 -0
- rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
- rdf_construct/lint/__init__.py +75 -0
- rdf_construct/lint/config.py +214 -0
- rdf_construct/lint/engine.py +396 -0
- rdf_construct/lint/formatters.py +327 -0
- rdf_construct/lint/rules.py +692 -0
- rdf_construct/main.py +6 -0
- rdf_construct/puml2rdf/__init__.py +103 -0
- rdf_construct/puml2rdf/config.py +230 -0
- rdf_construct/puml2rdf/converter.py +420 -0
- rdf_construct/puml2rdf/merger.py +200 -0
- rdf_construct/puml2rdf/model.py +202 -0
- rdf_construct/puml2rdf/parser.py +565 -0
- rdf_construct/puml2rdf/validators.py +451 -0
- rdf_construct/shacl/__init__.py +56 -0
- rdf_construct/shacl/config.py +166 -0
- rdf_construct/shacl/converters.py +520 -0
- rdf_construct/shacl/generator.py +364 -0
- rdf_construct/shacl/namespaces.py +93 -0
- rdf_construct/stats/__init__.py +29 -0
- rdf_construct/stats/collector.py +178 -0
- rdf_construct/stats/comparator.py +298 -0
- rdf_construct/stats/formatters/__init__.py +83 -0
- rdf_construct/stats/formatters/json.py +38 -0
- rdf_construct/stats/formatters/markdown.py +153 -0
- rdf_construct/stats/formatters/text.py +186 -0
- rdf_construct/stats/metrics/__init__.py +26 -0
- rdf_construct/stats/metrics/basic.py +147 -0
- rdf_construct/stats/metrics/complexity.py +137 -0
- rdf_construct/stats/metrics/connectivity.py +130 -0
- rdf_construct/stats/metrics/documentation.py +128 -0
- rdf_construct/stats/metrics/hierarchy.py +207 -0
- rdf_construct/stats/metrics/properties.py +88 -0
- rdf_construct/uml/__init__.py +22 -0
- rdf_construct/uml/context.py +194 -0
- rdf_construct/uml/mapper.py +371 -0
- rdf_construct/uml/odm_renderer.py +789 -0
- rdf_construct/uml/renderer.py +684 -0
- rdf_construct/uml/uml_layout.py +393 -0
- rdf_construct/uml/uml_style.py +613 -0
- rdf_construct-0.2.0.dist-info/METADATA +431 -0
- rdf_construct-0.2.0.dist-info/RECORD +88 -0
- rdf_construct-0.2.0.dist-info/WHEEL +4 -0
- rdf_construct-0.2.0.dist-info/entry_points.txt +3 -0
- rdf_construct-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Text output formatter for ontology statistics."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from rdflib import Graph
|
|
6
|
+
|
|
7
|
+
from rdf_construct.stats.collector import OntologyStats
|
|
8
|
+
from rdf_construct.stats.comparator import ComparisonResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _format_pct(value: float) -> str:
|
|
12
|
+
"""Format a percentage value for display."""
|
|
13
|
+
return f"{value * 100:.1f}%"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _shorten_uri(uri: str, graph: Optional[Graph] = None) -> str:
|
|
17
|
+
"""Shorten a URI to CURIE if possible."""
|
|
18
|
+
if graph:
|
|
19
|
+
try:
|
|
20
|
+
qname = graph.namespace_manager.qname(uri)
|
|
21
|
+
return qname
|
|
22
|
+
except Exception:
|
|
23
|
+
pass
|
|
24
|
+
# Fallback: extract local name
|
|
25
|
+
if "#" in uri:
|
|
26
|
+
return uri.split("#")[-1]
|
|
27
|
+
if "/" in uri:
|
|
28
|
+
return uri.rsplit("/", 1)[-1]
|
|
29
|
+
return uri
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def format_text_stats(stats: OntologyStats, graph: Optional[Graph] = None) -> str:
|
|
33
|
+
"""Format ontology statistics as aligned text.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
stats: The statistics to format.
|
|
37
|
+
graph: Optional graph for CURIE formatting.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Formatted text string.
|
|
41
|
+
"""
|
|
42
|
+
lines = []
|
|
43
|
+
|
|
44
|
+
# Header
|
|
45
|
+
lines.append(f"Ontology Statistics: {stats.source}")
|
|
46
|
+
lines.append("=" * 50)
|
|
47
|
+
lines.append("")
|
|
48
|
+
|
|
49
|
+
# Basic counts
|
|
50
|
+
lines.append("BASIC COUNTS")
|
|
51
|
+
lines.append(f" Triples: {stats.basic.triples:,}")
|
|
52
|
+
lines.append(f" Classes: {stats.basic.classes:,}")
|
|
53
|
+
lines.append(f" Object Properties: {stats.basic.object_properties:,}")
|
|
54
|
+
lines.append(f" Datatype Properties: {stats.basic.datatype_properties:,}")
|
|
55
|
+
lines.append(f" Annotation Properties: {stats.basic.annotation_properties:,}")
|
|
56
|
+
lines.append(f" Individuals: {stats.basic.individuals:,}")
|
|
57
|
+
lines.append("")
|
|
58
|
+
|
|
59
|
+
# Hierarchy
|
|
60
|
+
lines.append("HIERARCHY")
|
|
61
|
+
lines.append(f" Root Classes: {stats.hierarchy.root_classes:,}")
|
|
62
|
+
lines.append(f" Leaf Classes: {stats.hierarchy.leaf_classes:,}")
|
|
63
|
+
lines.append(f" Max Depth: {stats.hierarchy.max_depth}")
|
|
64
|
+
lines.append(f" Avg Depth: {stats.hierarchy.avg_depth:.1f}")
|
|
65
|
+
lines.append(f" Avg Branching: {stats.hierarchy.avg_branching:.1f}")
|
|
66
|
+
orphan_pct = _format_pct(stats.hierarchy.orphan_rate)
|
|
67
|
+
lines.append(f" Orphan Classes: {stats.hierarchy.orphan_classes} ({orphan_pct})")
|
|
68
|
+
lines.append("")
|
|
69
|
+
|
|
70
|
+
# Properties
|
|
71
|
+
lines.append("PROPERTIES")
|
|
72
|
+
dom_pct = _format_pct(stats.properties.domain_coverage)
|
|
73
|
+
range_pct = _format_pct(stats.properties.range_coverage)
|
|
74
|
+
lines.append(f" With Domain: {stats.properties.with_domain} ({dom_pct})")
|
|
75
|
+
lines.append(f" With Range: {stats.properties.with_range} ({range_pct})")
|
|
76
|
+
lines.append(f" Inverse Pairs: {stats.properties.inverse_pairs}")
|
|
77
|
+
lines.append(f" Functional: {stats.properties.functional}")
|
|
78
|
+
lines.append(f" Symmetric: {stats.properties.symmetric}")
|
|
79
|
+
lines.append("")
|
|
80
|
+
|
|
81
|
+
# Documentation
|
|
82
|
+
lines.append("DOCUMENTATION")
|
|
83
|
+
cls_label_pct = _format_pct(stats.documentation.classes_labelled_pct)
|
|
84
|
+
cls_doc_pct = _format_pct(stats.documentation.classes_documented_pct)
|
|
85
|
+
prop_label_pct = _format_pct(stats.documentation.properties_labelled_pct)
|
|
86
|
+
lines.append(f" Classes Labelled: {stats.documentation.classes_labelled} ({cls_label_pct})")
|
|
87
|
+
lines.append(f" Classes Documented: {stats.documentation.classes_documented} ({cls_doc_pct})")
|
|
88
|
+
lines.append(f" Properties Labelled: {stats.documentation.properties_labelled} ({prop_label_pct})")
|
|
89
|
+
lines.append("")
|
|
90
|
+
|
|
91
|
+
# Complexity
|
|
92
|
+
lines.append("COMPLEXITY")
|
|
93
|
+
lines.append(f" Avg Props/Class: {stats.complexity.avg_properties_per_class:.1f}")
|
|
94
|
+
lines.append(f" Avg Superclasses: {stats.complexity.avg_superclasses_per_class:.1f}")
|
|
95
|
+
lines.append(f" Multiple Inheritance: {stats.complexity.multiple_inheritance_count}")
|
|
96
|
+
lines.append(f" OWL Restrictions: {stats.complexity.owl_restriction_count}")
|
|
97
|
+
lines.append(f" Equivalent Classes: {stats.complexity.owl_equivalent_count}")
|
|
98
|
+
lines.append("")
|
|
99
|
+
|
|
100
|
+
# Connectivity
|
|
101
|
+
lines.append("CONNECTIVITY")
|
|
102
|
+
if stats.connectivity.most_connected_class:
|
|
103
|
+
most_connected = _shorten_uri(stats.connectivity.most_connected_class, graph)
|
|
104
|
+
lines.append(f" Most Connected: {most_connected} ({stats.connectivity.most_connected_count} refs)")
|
|
105
|
+
else:
|
|
106
|
+
lines.append(f" Most Connected: (none)")
|
|
107
|
+
lines.append(f" Isolated Classes: {stats.connectivity.isolated_classes}")
|
|
108
|
+
|
|
109
|
+
return "\n".join(lines)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def format_text_comparison(
|
|
113
|
+
comparison: ComparisonResult,
|
|
114
|
+
graph: Optional[Graph] = None,
|
|
115
|
+
) -> str:
|
|
116
|
+
"""Format comparison results as aligned text.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
comparison: The comparison result to format.
|
|
120
|
+
graph: Optional graph for CURIE formatting.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Formatted text string.
|
|
124
|
+
"""
|
|
125
|
+
lines = []
|
|
126
|
+
|
|
127
|
+
# Header
|
|
128
|
+
lines.append(f"Comparing: {comparison.old_source} → {comparison.new_source}")
|
|
129
|
+
lines.append("=" * 60)
|
|
130
|
+
lines.append("")
|
|
131
|
+
|
|
132
|
+
if not comparison.changes:
|
|
133
|
+
lines.append("No changes detected.")
|
|
134
|
+
return "\n".join(lines)
|
|
135
|
+
|
|
136
|
+
# Table header
|
|
137
|
+
lines.append(f"{'Metric':<30} {'Old':>10} {'New':>10} {'Change':>15}")
|
|
138
|
+
lines.append("-" * 65)
|
|
139
|
+
|
|
140
|
+
# Group changes by category
|
|
141
|
+
current_category = None
|
|
142
|
+
for change in comparison.changes:
|
|
143
|
+
if change.category != current_category:
|
|
144
|
+
current_category = change.category
|
|
145
|
+
lines.append(f"\n{current_category.upper()}")
|
|
146
|
+
|
|
147
|
+
# Format the change
|
|
148
|
+
old_str = _format_value(change.old_value)
|
|
149
|
+
new_str = _format_value(change.new_value)
|
|
150
|
+
|
|
151
|
+
# Format delta with sign
|
|
152
|
+
if change.delta is not None:
|
|
153
|
+
if change.pct_change is not None:
|
|
154
|
+
delta_str = f"{change.delta:+g} ({change.pct_change:+.1f}%)"
|
|
155
|
+
else:
|
|
156
|
+
delta_str = f"{change.delta:+g}"
|
|
157
|
+
else:
|
|
158
|
+
delta_str = "-"
|
|
159
|
+
|
|
160
|
+
# Add indicator
|
|
161
|
+
if change.improved is True:
|
|
162
|
+
delta_str += " ✓"
|
|
163
|
+
elif change.improved is False:
|
|
164
|
+
delta_str += " ⚠"
|
|
165
|
+
|
|
166
|
+
metric_name = change.metric.replace("_", " ").title()
|
|
167
|
+
lines.append(f" {metric_name:<28} {old_str:>10} {new_str:>10} {delta_str:>15}")
|
|
168
|
+
|
|
169
|
+
lines.append("")
|
|
170
|
+
lines.append(f"Summary: {comparison.summary}")
|
|
171
|
+
|
|
172
|
+
return "\n".join(lines)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _format_value(value: float | int | str | None) -> str:
|
|
176
|
+
"""Format a metric value for display."""
|
|
177
|
+
if value is None:
|
|
178
|
+
return "-"
|
|
179
|
+
if isinstance(value, float):
|
|
180
|
+
if value < 1:
|
|
181
|
+
# Probably a percentage/rate
|
|
182
|
+
return f"{value * 100:.1f}%"
|
|
183
|
+
return f"{value:.2f}"
|
|
184
|
+
if isinstance(value, int):
|
|
185
|
+
return f"{value:,}"
|
|
186
|
+
return str(value)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Metric collectors for RDF ontology statistics."""
|
|
2
|
+
|
|
3
|
+
from rdf_construct.stats.metrics.basic import BasicStats, collect_basic_stats
|
|
4
|
+
from rdf_construct.stats.metrics.hierarchy import HierarchyStats, collect_hierarchy_stats
|
|
5
|
+
from rdf_construct.stats.metrics.properties import PropertyStats, collect_property_stats
|
|
6
|
+
from rdf_construct.stats.metrics.documentation import (
|
|
7
|
+
DocumentationStats,
|
|
8
|
+
collect_documentation_stats,
|
|
9
|
+
)
|
|
10
|
+
from rdf_construct.stats.metrics.complexity import ComplexityStats, collect_complexity_stats
|
|
11
|
+
from rdf_construct.stats.metrics.connectivity import ConnectivityStats, collect_connectivity_stats
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"BasicStats",
|
|
15
|
+
"collect_basic_stats",
|
|
16
|
+
"HierarchyStats",
|
|
17
|
+
"collect_hierarchy_stats",
|
|
18
|
+
"PropertyStats",
|
|
19
|
+
"collect_property_stats",
|
|
20
|
+
"DocumentationStats",
|
|
21
|
+
"collect_documentation_stats",
|
|
22
|
+
"ComplexityStats",
|
|
23
|
+
"collect_complexity_stats",
|
|
24
|
+
"ConnectivityStats",
|
|
25
|
+
"collect_connectivity_stats",
|
|
26
|
+
]
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Basic count metrics for RDF ontologies.
|
|
2
|
+
|
|
3
|
+
Provides fundamental counts: triples, classes, properties, individuals.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
from rdflib import Graph, RDF, RDFS
|
|
9
|
+
from rdflib.namespace import OWL
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class BasicStats:
|
|
14
|
+
"""Basic count statistics for an ontology.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
triples: Total number of triples in the graph.
|
|
18
|
+
classes: Number of owl:Class + rdfs:Class entities.
|
|
19
|
+
object_properties: Number of owl:ObjectProperty entities.
|
|
20
|
+
datatype_properties: Number of owl:DatatypeProperty entities.
|
|
21
|
+
annotation_properties: Number of owl:AnnotationProperty entities.
|
|
22
|
+
individuals: Number of named individuals (non-class, non-property).
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
triples: int = 0
|
|
26
|
+
classes: int = 0
|
|
27
|
+
object_properties: int = 0
|
|
28
|
+
datatype_properties: int = 0
|
|
29
|
+
annotation_properties: int = 0
|
|
30
|
+
individuals: int = 0
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
def total_properties(self) -> int:
|
|
34
|
+
"""Total count of all property types."""
|
|
35
|
+
return self.object_properties + self.datatype_properties + self.annotation_properties
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def get_all_classes(graph: Graph) -> set:
|
|
39
|
+
"""Get all classes from the graph (owl:Class + rdfs:Class).
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
graph: RDF graph to query.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Set of class URIRefs.
|
|
46
|
+
"""
|
|
47
|
+
classes = set(graph.subjects(RDF.type, OWL.Class))
|
|
48
|
+
classes |= set(graph.subjects(RDF.type, RDFS.Class))
|
|
49
|
+
return classes
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_all_properties(graph: Graph) -> set:
|
|
53
|
+
"""Get all properties from the graph.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
graph: RDF graph to query.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Set of property URIRefs (object, datatype, annotation).
|
|
60
|
+
"""
|
|
61
|
+
props = set()
|
|
62
|
+
for prop_type in (
|
|
63
|
+
OWL.ObjectProperty,
|
|
64
|
+
OWL.DatatypeProperty,
|
|
65
|
+
OWL.AnnotationProperty,
|
|
66
|
+
RDF.Property,
|
|
67
|
+
):
|
|
68
|
+
props |= set(graph.subjects(RDF.type, prop_type))
|
|
69
|
+
return props
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_individuals(graph: Graph) -> set:
|
|
73
|
+
"""Get all named individuals from the graph.
|
|
74
|
+
|
|
75
|
+
Individuals are subjects that are typed but not classes or properties.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
graph: RDF graph to query.
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
Set of individual URIRefs.
|
|
82
|
+
"""
|
|
83
|
+
classes = get_all_classes(graph)
|
|
84
|
+
properties = get_all_properties(graph)
|
|
85
|
+
|
|
86
|
+
# Get all typed subjects
|
|
87
|
+
all_typed = set()
|
|
88
|
+
for s in graph.subjects(RDF.type, None):
|
|
89
|
+
# Skip blank nodes and literal subjects
|
|
90
|
+
if hasattr(s, "n3"):
|
|
91
|
+
all_typed.add(s)
|
|
92
|
+
|
|
93
|
+
# Exclude classes, properties, and ontology declarations
|
|
94
|
+
ontologies = set(graph.subjects(RDF.type, OWL.Ontology))
|
|
95
|
+
individuals = all_typed - classes - properties - ontologies
|
|
96
|
+
|
|
97
|
+
# Also exclude property types themselves and OWL constructs
|
|
98
|
+
owl_constructs = {
|
|
99
|
+
OWL.Class,
|
|
100
|
+
OWL.ObjectProperty,
|
|
101
|
+
OWL.DatatypeProperty,
|
|
102
|
+
OWL.AnnotationProperty,
|
|
103
|
+
OWL.Restriction,
|
|
104
|
+
OWL.Ontology,
|
|
105
|
+
RDFS.Class,
|
|
106
|
+
RDF.Property,
|
|
107
|
+
}
|
|
108
|
+
individuals = {i for i in individuals if i not in owl_constructs}
|
|
109
|
+
|
|
110
|
+
return individuals
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def collect_basic_stats(graph: Graph) -> BasicStats:
|
|
114
|
+
"""Collect basic count statistics from an RDF graph.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
graph: RDF graph to analyse.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
BasicStats with all count metrics populated.
|
|
121
|
+
"""
|
|
122
|
+
# Total triples
|
|
123
|
+
triples = len(graph)
|
|
124
|
+
|
|
125
|
+
# Classes (both owl:Class and rdfs:Class)
|
|
126
|
+
classes = len(get_all_classes(graph))
|
|
127
|
+
|
|
128
|
+
# Object properties
|
|
129
|
+
obj_props = len(set(graph.subjects(RDF.type, OWL.ObjectProperty)))
|
|
130
|
+
|
|
131
|
+
# Datatype properties
|
|
132
|
+
data_props = len(set(graph.subjects(RDF.type, OWL.DatatypeProperty)))
|
|
133
|
+
|
|
134
|
+
# Annotation properties
|
|
135
|
+
ann_props = len(set(graph.subjects(RDF.type, OWL.AnnotationProperty)))
|
|
136
|
+
|
|
137
|
+
# Individuals
|
|
138
|
+
individuals = len(get_individuals(graph))
|
|
139
|
+
|
|
140
|
+
return BasicStats(
|
|
141
|
+
triples=triples,
|
|
142
|
+
classes=classes,
|
|
143
|
+
object_properties=obj_props,
|
|
144
|
+
datatype_properties=data_props,
|
|
145
|
+
annotation_properties=ann_props,
|
|
146
|
+
individuals=individuals,
|
|
147
|
+
)
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Complexity metrics for RDF ontologies.
|
|
2
|
+
|
|
3
|
+
Analyses structural complexity: multiple inheritance, axioms, restrictions.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
from rdflib import Graph, RDF, RDFS
|
|
10
|
+
from rdflib.namespace import OWL
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ComplexityStats:
|
|
15
|
+
"""Complexity statistics for an ontology.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
avg_properties_per_class: Average properties referencing each class.
|
|
19
|
+
avg_superclasses_per_class: Average number of superclasses per class.
|
|
20
|
+
multiple_inheritance_count: Classes with 2+ direct superclasses.
|
|
21
|
+
owl_restriction_count: Number of owl:Restriction nodes.
|
|
22
|
+
owl_equivalent_count: Number of owl:equivalentClass statements.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
avg_properties_per_class: float = 0.0
|
|
26
|
+
avg_superclasses_per_class: float = 0.0
|
|
27
|
+
multiple_inheritance_count: int = 0
|
|
28
|
+
owl_restriction_count: int = 0
|
|
29
|
+
owl_equivalent_count: int = 0
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _get_all_classes(graph: Graph) -> set:
|
|
33
|
+
"""Get all classes from the graph."""
|
|
34
|
+
classes = set(graph.subjects(RDF.type, OWL.Class))
|
|
35
|
+
classes |= set(graph.subjects(RDF.type, RDFS.Class))
|
|
36
|
+
return classes
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _count_properties_per_class(graph: Graph, classes: set) -> dict:
|
|
40
|
+
"""Count how many properties reference each class (via domain/range).
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
graph: RDF graph to query.
|
|
44
|
+
classes: Set of class URIRefs.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Dictionary mapping class -> property count.
|
|
48
|
+
"""
|
|
49
|
+
counts: dict = defaultdict(int)
|
|
50
|
+
|
|
51
|
+
# Count domain references
|
|
52
|
+
for s, p, o in graph.triples((None, RDFS.domain, None)):
|
|
53
|
+
if o in classes:
|
|
54
|
+
counts[o] += 1
|
|
55
|
+
|
|
56
|
+
# Count range references
|
|
57
|
+
for s, p, o in graph.triples((None, RDFS.range, None)):
|
|
58
|
+
if o in classes:
|
|
59
|
+
counts[o] += 1
|
|
60
|
+
|
|
61
|
+
return dict(counts)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _count_superclasses(graph: Graph, classes: set) -> dict:
|
|
65
|
+
"""Count direct superclasses for each class.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
graph: RDF graph to query.
|
|
69
|
+
classes: Set of class URIRefs.
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Dictionary mapping class -> superclass count.
|
|
73
|
+
"""
|
|
74
|
+
counts: dict = {}
|
|
75
|
+
|
|
76
|
+
for cls in classes:
|
|
77
|
+
superclasses = set(graph.objects(cls, RDFS.subClassOf))
|
|
78
|
+
# Only count named classes, not restrictions or other constructs
|
|
79
|
+
named_supers = superclasses & classes
|
|
80
|
+
counts[cls] = len(named_supers)
|
|
81
|
+
|
|
82
|
+
return counts
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _count_multiple_inheritance(superclass_counts: dict) -> int:
|
|
86
|
+
"""Count classes with multiple inheritance (2+ superclasses).
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
superclass_counts: Dictionary mapping class -> superclass count.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Number of classes with multiple inheritance.
|
|
93
|
+
"""
|
|
94
|
+
return sum(1 for count in superclass_counts.values() if count >= 2)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def collect_complexity_stats(graph: Graph) -> ComplexityStats:
|
|
98
|
+
"""Collect complexity statistics from an RDF graph.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
graph: RDF graph to analyse.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
ComplexityStats with all complexity metrics populated.
|
|
105
|
+
"""
|
|
106
|
+
classes = _get_all_classes(graph)
|
|
107
|
+
total_classes = len(classes)
|
|
108
|
+
|
|
109
|
+
if total_classes == 0:
|
|
110
|
+
return ComplexityStats()
|
|
111
|
+
|
|
112
|
+
# Properties per class
|
|
113
|
+
prop_counts = _count_properties_per_class(graph, classes)
|
|
114
|
+
total_prop_refs = sum(prop_counts.values())
|
|
115
|
+
avg_props = total_prop_refs / total_classes if total_classes else 0.0
|
|
116
|
+
|
|
117
|
+
# Superclasses per class
|
|
118
|
+
super_counts = _count_superclasses(graph, classes)
|
|
119
|
+
total_supers = sum(super_counts.values())
|
|
120
|
+
avg_supers = total_supers / total_classes if total_classes else 0.0
|
|
121
|
+
|
|
122
|
+
# Multiple inheritance
|
|
123
|
+
multi_inherit = _count_multiple_inheritance(super_counts)
|
|
124
|
+
|
|
125
|
+
# OWL restrictions
|
|
126
|
+
restrictions = len(set(graph.subjects(RDF.type, OWL.Restriction)))
|
|
127
|
+
|
|
128
|
+
# Equivalent class statements
|
|
129
|
+
equivalents = len(list(graph.triples((None, OWL.equivalentClass, None))))
|
|
130
|
+
|
|
131
|
+
return ComplexityStats(
|
|
132
|
+
avg_properties_per_class=round(avg_props, 2),
|
|
133
|
+
avg_superclasses_per_class=round(avg_supers, 2),
|
|
134
|
+
multiple_inheritance_count=multi_inherit,
|
|
135
|
+
owl_restriction_count=restrictions,
|
|
136
|
+
owl_equivalent_count=equivalents,
|
|
137
|
+
)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Connectivity metrics for RDF ontologies.
|
|
2
|
+
|
|
3
|
+
Analyses how classes are connected through properties.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from rdflib import Graph, RDF, RDFS, URIRef
|
|
11
|
+
from rdflib.namespace import OWL
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class ConnectivityStats:
|
|
16
|
+
"""Connectivity statistics for an ontology.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
most_connected_class: URI of the class referenced by most properties.
|
|
20
|
+
most_connected_count: Number of properties referencing the most connected class.
|
|
21
|
+
isolated_classes: Classes not referenced by any property domain/range.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
most_connected_class: Optional[str] = None
|
|
25
|
+
most_connected_count: int = 0
|
|
26
|
+
isolated_classes: int = 0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _get_all_classes(graph: Graph) -> set:
|
|
30
|
+
"""Get all classes from the graph."""
|
|
31
|
+
classes = set(graph.subjects(RDF.type, OWL.Class))
|
|
32
|
+
classes |= set(graph.subjects(RDF.type, RDFS.Class))
|
|
33
|
+
return classes
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _count_property_references(graph: Graph, classes: set) -> dict:
|
|
37
|
+
"""Count property references to each class (via domain/range).
|
|
38
|
+
|
|
39
|
+
A class is "connected" if a property references it in its domain or range.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
graph: RDF graph to query.
|
|
43
|
+
classes: Set of class URIRefs.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Dictionary mapping class -> reference count.
|
|
47
|
+
"""
|
|
48
|
+
counts: dict = defaultdict(int)
|
|
49
|
+
|
|
50
|
+
# Count domain references
|
|
51
|
+
for s, p, o in graph.triples((None, RDFS.domain, None)):
|
|
52
|
+
if o in classes:
|
|
53
|
+
counts[o] += 1
|
|
54
|
+
|
|
55
|
+
# Count range references
|
|
56
|
+
for s, p, o in graph.triples((None, RDFS.range, None)):
|
|
57
|
+
if o in classes:
|
|
58
|
+
counts[o] += 1
|
|
59
|
+
|
|
60
|
+
return dict(counts)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _find_most_connected(ref_counts: dict) -> tuple[Optional[str], int]:
|
|
64
|
+
"""Find the class with the most property references.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
ref_counts: Dictionary mapping class -> reference count.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Tuple of (class URI string, reference count).
|
|
71
|
+
"""
|
|
72
|
+
if not ref_counts:
|
|
73
|
+
return None, 0
|
|
74
|
+
|
|
75
|
+
most_connected = max(ref_counts.items(), key=lambda x: x[1])
|
|
76
|
+
uri = str(most_connected[0]) if most_connected[0] else None
|
|
77
|
+
return uri, most_connected[1]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _count_isolated(classes: set, ref_counts: dict, graph: Graph) -> int:
|
|
81
|
+
"""Count classes not connected to any property.
|
|
82
|
+
|
|
83
|
+
A class is isolated if:
|
|
84
|
+
- No property has it as domain or range
|
|
85
|
+
- AND it has no subclasses or superclasses (not part of hierarchy)
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
classes: Set of class URIRefs.
|
|
89
|
+
ref_counts: Dictionary mapping class -> reference count.
|
|
90
|
+
graph: RDF graph to query for hierarchy.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Number of isolated classes.
|
|
94
|
+
"""
|
|
95
|
+
isolated = 0
|
|
96
|
+
for cls in classes:
|
|
97
|
+
# Not referenced by properties
|
|
98
|
+
if ref_counts.get(cls, 0) == 0:
|
|
99
|
+
# Also check if it's disconnected from hierarchy
|
|
100
|
+
has_super = any(graph.objects(cls, RDFS.subClassOf))
|
|
101
|
+
has_sub = any(graph.subjects(RDFS.subClassOf, cls))
|
|
102
|
+
if not has_super and not has_sub:
|
|
103
|
+
isolated += 1
|
|
104
|
+
return isolated
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def collect_connectivity_stats(graph: Graph) -> ConnectivityStats:
|
|
108
|
+
"""Collect connectivity statistics from an RDF graph.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
graph: RDF graph to analyse.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
ConnectivityStats with all connectivity metrics populated.
|
|
115
|
+
"""
|
|
116
|
+
classes = _get_all_classes(graph)
|
|
117
|
+
|
|
118
|
+
if not classes:
|
|
119
|
+
return ConnectivityStats()
|
|
120
|
+
|
|
121
|
+
ref_counts = _count_property_references(graph, classes)
|
|
122
|
+
|
|
123
|
+
most_uri, most_count = _find_most_connected(ref_counts)
|
|
124
|
+
isolated = _count_isolated(classes, ref_counts, graph)
|
|
125
|
+
|
|
126
|
+
return ConnectivityStats(
|
|
127
|
+
most_connected_class=most_uri,
|
|
128
|
+
most_connected_count=most_count,
|
|
129
|
+
isolated_classes=isolated,
|
|
130
|
+
)
|