rdf-construct 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. rdf_construct/__init__.py +12 -0
  2. rdf_construct/__main__.py +0 -0
  3. rdf_construct/cli.py +1762 -0
  4. rdf_construct/core/__init__.py +33 -0
  5. rdf_construct/core/config.py +116 -0
  6. rdf_construct/core/ordering.py +219 -0
  7. rdf_construct/core/predicate_order.py +212 -0
  8. rdf_construct/core/profile.py +157 -0
  9. rdf_construct/core/selector.py +64 -0
  10. rdf_construct/core/serialiser.py +232 -0
  11. rdf_construct/core/utils.py +89 -0
  12. rdf_construct/cq/__init__.py +77 -0
  13. rdf_construct/cq/expectations.py +365 -0
  14. rdf_construct/cq/formatters/__init__.py +45 -0
  15. rdf_construct/cq/formatters/json.py +104 -0
  16. rdf_construct/cq/formatters/junit.py +104 -0
  17. rdf_construct/cq/formatters/text.py +146 -0
  18. rdf_construct/cq/loader.py +300 -0
  19. rdf_construct/cq/runner.py +321 -0
  20. rdf_construct/diff/__init__.py +59 -0
  21. rdf_construct/diff/change_types.py +214 -0
  22. rdf_construct/diff/comparator.py +338 -0
  23. rdf_construct/diff/filters.py +133 -0
  24. rdf_construct/diff/formatters/__init__.py +71 -0
  25. rdf_construct/diff/formatters/json.py +192 -0
  26. rdf_construct/diff/formatters/markdown.py +210 -0
  27. rdf_construct/diff/formatters/text.py +195 -0
  28. rdf_construct/docs/__init__.py +60 -0
  29. rdf_construct/docs/config.py +238 -0
  30. rdf_construct/docs/extractors.py +603 -0
  31. rdf_construct/docs/generator.py +360 -0
  32. rdf_construct/docs/renderers/__init__.py +7 -0
  33. rdf_construct/docs/renderers/html.py +803 -0
  34. rdf_construct/docs/renderers/json.py +390 -0
  35. rdf_construct/docs/renderers/markdown.py +628 -0
  36. rdf_construct/docs/search.py +278 -0
  37. rdf_construct/docs/templates/html/base.html.jinja +44 -0
  38. rdf_construct/docs/templates/html/class.html.jinja +152 -0
  39. rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
  40. rdf_construct/docs/templates/html/index.html.jinja +110 -0
  41. rdf_construct/docs/templates/html/instance.html.jinja +90 -0
  42. rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
  43. rdf_construct/docs/templates/html/property.html.jinja +124 -0
  44. rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
  45. rdf_construct/lint/__init__.py +75 -0
  46. rdf_construct/lint/config.py +214 -0
  47. rdf_construct/lint/engine.py +396 -0
  48. rdf_construct/lint/formatters.py +327 -0
  49. rdf_construct/lint/rules.py +692 -0
  50. rdf_construct/main.py +6 -0
  51. rdf_construct/puml2rdf/__init__.py +103 -0
  52. rdf_construct/puml2rdf/config.py +230 -0
  53. rdf_construct/puml2rdf/converter.py +420 -0
  54. rdf_construct/puml2rdf/merger.py +200 -0
  55. rdf_construct/puml2rdf/model.py +202 -0
  56. rdf_construct/puml2rdf/parser.py +565 -0
  57. rdf_construct/puml2rdf/validators.py +451 -0
  58. rdf_construct/shacl/__init__.py +56 -0
  59. rdf_construct/shacl/config.py +166 -0
  60. rdf_construct/shacl/converters.py +520 -0
  61. rdf_construct/shacl/generator.py +364 -0
  62. rdf_construct/shacl/namespaces.py +93 -0
  63. rdf_construct/stats/__init__.py +29 -0
  64. rdf_construct/stats/collector.py +178 -0
  65. rdf_construct/stats/comparator.py +298 -0
  66. rdf_construct/stats/formatters/__init__.py +83 -0
  67. rdf_construct/stats/formatters/json.py +38 -0
  68. rdf_construct/stats/formatters/markdown.py +153 -0
  69. rdf_construct/stats/formatters/text.py +186 -0
  70. rdf_construct/stats/metrics/__init__.py +26 -0
  71. rdf_construct/stats/metrics/basic.py +147 -0
  72. rdf_construct/stats/metrics/complexity.py +137 -0
  73. rdf_construct/stats/metrics/connectivity.py +130 -0
  74. rdf_construct/stats/metrics/documentation.py +128 -0
  75. rdf_construct/stats/metrics/hierarchy.py +207 -0
  76. rdf_construct/stats/metrics/properties.py +88 -0
  77. rdf_construct/uml/__init__.py +22 -0
  78. rdf_construct/uml/context.py +194 -0
  79. rdf_construct/uml/mapper.py +371 -0
  80. rdf_construct/uml/odm_renderer.py +789 -0
  81. rdf_construct/uml/renderer.py +684 -0
  82. rdf_construct/uml/uml_layout.py +393 -0
  83. rdf_construct/uml/uml_style.py +613 -0
  84. rdf_construct-0.2.0.dist-info/METADATA +431 -0
  85. rdf_construct-0.2.0.dist-info/RECORD +88 -0
  86. rdf_construct-0.2.0.dist-info/WHEEL +4 -0
  87. rdf_construct-0.2.0.dist-info/entry_points.txt +3 -0
  88. rdf_construct-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,186 @@
1
+ """Text output formatter for ontology statistics."""
2
+
3
+ from typing import Optional
4
+
5
+ from rdflib import Graph
6
+
7
+ from rdf_construct.stats.collector import OntologyStats
8
+ from rdf_construct.stats.comparator import ComparisonResult
9
+
10
+
11
+ def _format_pct(value: float) -> str:
12
+ """Format a percentage value for display."""
13
+ return f"{value * 100:.1f}%"
14
+
15
+
16
+ def _shorten_uri(uri: str, graph: Optional[Graph] = None) -> str:
17
+ """Shorten a URI to CURIE if possible."""
18
+ if graph:
19
+ try:
20
+ qname = graph.namespace_manager.qname(uri)
21
+ return qname
22
+ except Exception:
23
+ pass
24
+ # Fallback: extract local name
25
+ if "#" in uri:
26
+ return uri.split("#")[-1]
27
+ if "/" in uri:
28
+ return uri.rsplit("/", 1)[-1]
29
+ return uri
30
+
31
+
32
+ def format_text_stats(stats: OntologyStats, graph: Optional[Graph] = None) -> str:
33
+ """Format ontology statistics as aligned text.
34
+
35
+ Args:
36
+ stats: The statistics to format.
37
+ graph: Optional graph for CURIE formatting.
38
+
39
+ Returns:
40
+ Formatted text string.
41
+ """
42
+ lines = []
43
+
44
+ # Header
45
+ lines.append(f"Ontology Statistics: {stats.source}")
46
+ lines.append("=" * 50)
47
+ lines.append("")
48
+
49
+ # Basic counts
50
+ lines.append("BASIC COUNTS")
51
+ lines.append(f" Triples: {stats.basic.triples:,}")
52
+ lines.append(f" Classes: {stats.basic.classes:,}")
53
+ lines.append(f" Object Properties: {stats.basic.object_properties:,}")
54
+ lines.append(f" Datatype Properties: {stats.basic.datatype_properties:,}")
55
+ lines.append(f" Annotation Properties: {stats.basic.annotation_properties:,}")
56
+ lines.append(f" Individuals: {stats.basic.individuals:,}")
57
+ lines.append("")
58
+
59
+ # Hierarchy
60
+ lines.append("HIERARCHY")
61
+ lines.append(f" Root Classes: {stats.hierarchy.root_classes:,}")
62
+ lines.append(f" Leaf Classes: {stats.hierarchy.leaf_classes:,}")
63
+ lines.append(f" Max Depth: {stats.hierarchy.max_depth}")
64
+ lines.append(f" Avg Depth: {stats.hierarchy.avg_depth:.1f}")
65
+ lines.append(f" Avg Branching: {stats.hierarchy.avg_branching:.1f}")
66
+ orphan_pct = _format_pct(stats.hierarchy.orphan_rate)
67
+ lines.append(f" Orphan Classes: {stats.hierarchy.orphan_classes} ({orphan_pct})")
68
+ lines.append("")
69
+
70
+ # Properties
71
+ lines.append("PROPERTIES")
72
+ dom_pct = _format_pct(stats.properties.domain_coverage)
73
+ range_pct = _format_pct(stats.properties.range_coverage)
74
+ lines.append(f" With Domain: {stats.properties.with_domain} ({dom_pct})")
75
+ lines.append(f" With Range: {stats.properties.with_range} ({range_pct})")
76
+ lines.append(f" Inverse Pairs: {stats.properties.inverse_pairs}")
77
+ lines.append(f" Functional: {stats.properties.functional}")
78
+ lines.append(f" Symmetric: {stats.properties.symmetric}")
79
+ lines.append("")
80
+
81
+ # Documentation
82
+ lines.append("DOCUMENTATION")
83
+ cls_label_pct = _format_pct(stats.documentation.classes_labelled_pct)
84
+ cls_doc_pct = _format_pct(stats.documentation.classes_documented_pct)
85
+ prop_label_pct = _format_pct(stats.documentation.properties_labelled_pct)
86
+ lines.append(f" Classes Labelled: {stats.documentation.classes_labelled} ({cls_label_pct})")
87
+ lines.append(f" Classes Documented: {stats.documentation.classes_documented} ({cls_doc_pct})")
88
+ lines.append(f" Properties Labelled: {stats.documentation.properties_labelled} ({prop_label_pct})")
89
+ lines.append("")
90
+
91
+ # Complexity
92
+ lines.append("COMPLEXITY")
93
+ lines.append(f" Avg Props/Class: {stats.complexity.avg_properties_per_class:.1f}")
94
+ lines.append(f" Avg Superclasses: {stats.complexity.avg_superclasses_per_class:.1f}")
95
+ lines.append(f" Multiple Inheritance: {stats.complexity.multiple_inheritance_count}")
96
+ lines.append(f" OWL Restrictions: {stats.complexity.owl_restriction_count}")
97
+ lines.append(f" Equivalent Classes: {stats.complexity.owl_equivalent_count}")
98
+ lines.append("")
99
+
100
+ # Connectivity
101
+ lines.append("CONNECTIVITY")
102
+ if stats.connectivity.most_connected_class:
103
+ most_connected = _shorten_uri(stats.connectivity.most_connected_class, graph)
104
+ lines.append(f" Most Connected: {most_connected} ({stats.connectivity.most_connected_count} refs)")
105
+ else:
106
+ lines.append(f" Most Connected: (none)")
107
+ lines.append(f" Isolated Classes: {stats.connectivity.isolated_classes}")
108
+
109
+ return "\n".join(lines)
110
+
111
+
112
+ def format_text_comparison(
113
+ comparison: ComparisonResult,
114
+ graph: Optional[Graph] = None,
115
+ ) -> str:
116
+ """Format comparison results as aligned text.
117
+
118
+ Args:
119
+ comparison: The comparison result to format.
120
+ graph: Optional graph for CURIE formatting.
121
+
122
+ Returns:
123
+ Formatted text string.
124
+ """
125
+ lines = []
126
+
127
+ # Header
128
+ lines.append(f"Comparing: {comparison.old_source} → {comparison.new_source}")
129
+ lines.append("=" * 60)
130
+ lines.append("")
131
+
132
+ if not comparison.changes:
133
+ lines.append("No changes detected.")
134
+ return "\n".join(lines)
135
+
136
+ # Table header
137
+ lines.append(f"{'Metric':<30} {'Old':>10} {'New':>10} {'Change':>15}")
138
+ lines.append("-" * 65)
139
+
140
+ # Group changes by category
141
+ current_category = None
142
+ for change in comparison.changes:
143
+ if change.category != current_category:
144
+ current_category = change.category
145
+ lines.append(f"\n{current_category.upper()}")
146
+
147
+ # Format the change
148
+ old_str = _format_value(change.old_value)
149
+ new_str = _format_value(change.new_value)
150
+
151
+ # Format delta with sign
152
+ if change.delta is not None:
153
+ if change.pct_change is not None:
154
+ delta_str = f"{change.delta:+g} ({change.pct_change:+.1f}%)"
155
+ else:
156
+ delta_str = f"{change.delta:+g}"
157
+ else:
158
+ delta_str = "-"
159
+
160
+ # Add indicator
161
+ if change.improved is True:
162
+ delta_str += " ✓"
163
+ elif change.improved is False:
164
+ delta_str += " ⚠"
165
+
166
+ metric_name = change.metric.replace("_", " ").title()
167
+ lines.append(f" {metric_name:<28} {old_str:>10} {new_str:>10} {delta_str:>15}")
168
+
169
+ lines.append("")
170
+ lines.append(f"Summary: {comparison.summary}")
171
+
172
+ return "\n".join(lines)
173
+
174
+
175
+ def _format_value(value: float | int | str | None) -> str:
176
+ """Format a metric value for display."""
177
+ if value is None:
178
+ return "-"
179
+ if isinstance(value, float):
180
+ if value < 1:
181
+ # Probably a percentage/rate
182
+ return f"{value * 100:.1f}%"
183
+ return f"{value:.2f}"
184
+ if isinstance(value, int):
185
+ return f"{value:,}"
186
+ return str(value)
@@ -0,0 +1,26 @@
1
+ """Metric collectors for RDF ontology statistics."""
2
+
3
+ from rdf_construct.stats.metrics.basic import BasicStats, collect_basic_stats
4
+ from rdf_construct.stats.metrics.hierarchy import HierarchyStats, collect_hierarchy_stats
5
+ from rdf_construct.stats.metrics.properties import PropertyStats, collect_property_stats
6
+ from rdf_construct.stats.metrics.documentation import (
7
+ DocumentationStats,
8
+ collect_documentation_stats,
9
+ )
10
+ from rdf_construct.stats.metrics.complexity import ComplexityStats, collect_complexity_stats
11
+ from rdf_construct.stats.metrics.connectivity import ConnectivityStats, collect_connectivity_stats
12
+
13
+ __all__ = [
14
+ "BasicStats",
15
+ "collect_basic_stats",
16
+ "HierarchyStats",
17
+ "collect_hierarchy_stats",
18
+ "PropertyStats",
19
+ "collect_property_stats",
20
+ "DocumentationStats",
21
+ "collect_documentation_stats",
22
+ "ComplexityStats",
23
+ "collect_complexity_stats",
24
+ "ConnectivityStats",
25
+ "collect_connectivity_stats",
26
+ ]
@@ -0,0 +1,147 @@
1
+ """Basic count metrics for RDF ontologies.
2
+
3
+ Provides fundamental counts: triples, classes, properties, individuals.
4
+ """
5
+
6
+ from dataclasses import dataclass, field
7
+
8
+ from rdflib import Graph, RDF, RDFS
9
+ from rdflib.namespace import OWL
10
+
11
+
12
+ @dataclass
13
+ class BasicStats:
14
+ """Basic count statistics for an ontology.
15
+
16
+ Attributes:
17
+ triples: Total number of triples in the graph.
18
+ classes: Number of owl:Class + rdfs:Class entities.
19
+ object_properties: Number of owl:ObjectProperty entities.
20
+ datatype_properties: Number of owl:DatatypeProperty entities.
21
+ annotation_properties: Number of owl:AnnotationProperty entities.
22
+ individuals: Number of named individuals (non-class, non-property).
23
+ """
24
+
25
+ triples: int = 0
26
+ classes: int = 0
27
+ object_properties: int = 0
28
+ datatype_properties: int = 0
29
+ annotation_properties: int = 0
30
+ individuals: int = 0
31
+
32
+ @property
33
+ def total_properties(self) -> int:
34
+ """Total count of all property types."""
35
+ return self.object_properties + self.datatype_properties + self.annotation_properties
36
+
37
+
38
+ def get_all_classes(graph: Graph) -> set:
39
+ """Get all classes from the graph (owl:Class + rdfs:Class).
40
+
41
+ Args:
42
+ graph: RDF graph to query.
43
+
44
+ Returns:
45
+ Set of class URIRefs.
46
+ """
47
+ classes = set(graph.subjects(RDF.type, OWL.Class))
48
+ classes |= set(graph.subjects(RDF.type, RDFS.Class))
49
+ return classes
50
+
51
+
52
+ def get_all_properties(graph: Graph) -> set:
53
+ """Get all properties from the graph.
54
+
55
+ Args:
56
+ graph: RDF graph to query.
57
+
58
+ Returns:
59
+ Set of property URIRefs (object, datatype, annotation).
60
+ """
61
+ props = set()
62
+ for prop_type in (
63
+ OWL.ObjectProperty,
64
+ OWL.DatatypeProperty,
65
+ OWL.AnnotationProperty,
66
+ RDF.Property,
67
+ ):
68
+ props |= set(graph.subjects(RDF.type, prop_type))
69
+ return props
70
+
71
+
72
+ def get_individuals(graph: Graph) -> set:
73
+ """Get all named individuals from the graph.
74
+
75
+ Individuals are subjects that are typed but not classes or properties.
76
+
77
+ Args:
78
+ graph: RDF graph to query.
79
+
80
+ Returns:
81
+ Set of individual URIRefs.
82
+ """
83
+ classes = get_all_classes(graph)
84
+ properties = get_all_properties(graph)
85
+
86
+ # Get all typed subjects
87
+ all_typed = set()
88
+ for s in graph.subjects(RDF.type, None):
89
+ # Skip blank nodes and literal subjects
90
+ if hasattr(s, "n3"):
91
+ all_typed.add(s)
92
+
93
+ # Exclude classes, properties, and ontology declarations
94
+ ontologies = set(graph.subjects(RDF.type, OWL.Ontology))
95
+ individuals = all_typed - classes - properties - ontologies
96
+
97
+ # Also exclude property types themselves and OWL constructs
98
+ owl_constructs = {
99
+ OWL.Class,
100
+ OWL.ObjectProperty,
101
+ OWL.DatatypeProperty,
102
+ OWL.AnnotationProperty,
103
+ OWL.Restriction,
104
+ OWL.Ontology,
105
+ RDFS.Class,
106
+ RDF.Property,
107
+ }
108
+ individuals = {i for i in individuals if i not in owl_constructs}
109
+
110
+ return individuals
111
+
112
+
113
+ def collect_basic_stats(graph: Graph) -> BasicStats:
114
+ """Collect basic count statistics from an RDF graph.
115
+
116
+ Args:
117
+ graph: RDF graph to analyse.
118
+
119
+ Returns:
120
+ BasicStats with all count metrics populated.
121
+ """
122
+ # Total triples
123
+ triples = len(graph)
124
+
125
+ # Classes (both owl:Class and rdfs:Class)
126
+ classes = len(get_all_classes(graph))
127
+
128
+ # Object properties
129
+ obj_props = len(set(graph.subjects(RDF.type, OWL.ObjectProperty)))
130
+
131
+ # Datatype properties
132
+ data_props = len(set(graph.subjects(RDF.type, OWL.DatatypeProperty)))
133
+
134
+ # Annotation properties
135
+ ann_props = len(set(graph.subjects(RDF.type, OWL.AnnotationProperty)))
136
+
137
+ # Individuals
138
+ individuals = len(get_individuals(graph))
139
+
140
+ return BasicStats(
141
+ triples=triples,
142
+ classes=classes,
143
+ object_properties=obj_props,
144
+ datatype_properties=data_props,
145
+ annotation_properties=ann_props,
146
+ individuals=individuals,
147
+ )
@@ -0,0 +1,137 @@
1
+ """Complexity metrics for RDF ontologies.
2
+
3
+ Analyses structural complexity: multiple inheritance, axioms, restrictions.
4
+ """
5
+
6
+ from collections import defaultdict
7
+ from dataclasses import dataclass
8
+
9
+ from rdflib import Graph, RDF, RDFS
10
+ from rdflib.namespace import OWL
11
+
12
+
13
+ @dataclass
14
+ class ComplexityStats:
15
+ """Complexity statistics for an ontology.
16
+
17
+ Attributes:
18
+ avg_properties_per_class: Average properties referencing each class.
19
+ avg_superclasses_per_class: Average number of superclasses per class.
20
+ multiple_inheritance_count: Classes with 2+ direct superclasses.
21
+ owl_restriction_count: Number of owl:Restriction nodes.
22
+ owl_equivalent_count: Number of owl:equivalentClass statements.
23
+ """
24
+
25
+ avg_properties_per_class: float = 0.0
26
+ avg_superclasses_per_class: float = 0.0
27
+ multiple_inheritance_count: int = 0
28
+ owl_restriction_count: int = 0
29
+ owl_equivalent_count: int = 0
30
+
31
+
32
+ def _get_all_classes(graph: Graph) -> set:
33
+ """Get all classes from the graph."""
34
+ classes = set(graph.subjects(RDF.type, OWL.Class))
35
+ classes |= set(graph.subjects(RDF.type, RDFS.Class))
36
+ return classes
37
+
38
+
39
+ def _count_properties_per_class(graph: Graph, classes: set) -> dict:
40
+ """Count how many properties reference each class (via domain/range).
41
+
42
+ Args:
43
+ graph: RDF graph to query.
44
+ classes: Set of class URIRefs.
45
+
46
+ Returns:
47
+ Dictionary mapping class -> property count.
48
+ """
49
+ counts: dict = defaultdict(int)
50
+
51
+ # Count domain references
52
+ for s, p, o in graph.triples((None, RDFS.domain, None)):
53
+ if o in classes:
54
+ counts[o] += 1
55
+
56
+ # Count range references
57
+ for s, p, o in graph.triples((None, RDFS.range, None)):
58
+ if o in classes:
59
+ counts[o] += 1
60
+
61
+ return dict(counts)
62
+
63
+
64
+ def _count_superclasses(graph: Graph, classes: set) -> dict:
65
+ """Count direct superclasses for each class.
66
+
67
+ Args:
68
+ graph: RDF graph to query.
69
+ classes: Set of class URIRefs.
70
+
71
+ Returns:
72
+ Dictionary mapping class -> superclass count.
73
+ """
74
+ counts: dict = {}
75
+
76
+ for cls in classes:
77
+ superclasses = set(graph.objects(cls, RDFS.subClassOf))
78
+ # Only count named classes, not restrictions or other constructs
79
+ named_supers = superclasses & classes
80
+ counts[cls] = len(named_supers)
81
+
82
+ return counts
83
+
84
+
85
+ def _count_multiple_inheritance(superclass_counts: dict) -> int:
86
+ """Count classes with multiple inheritance (2+ superclasses).
87
+
88
+ Args:
89
+ superclass_counts: Dictionary mapping class -> superclass count.
90
+
91
+ Returns:
92
+ Number of classes with multiple inheritance.
93
+ """
94
+ return sum(1 for count in superclass_counts.values() if count >= 2)
95
+
96
+
97
+ def collect_complexity_stats(graph: Graph) -> ComplexityStats:
98
+ """Collect complexity statistics from an RDF graph.
99
+
100
+ Args:
101
+ graph: RDF graph to analyse.
102
+
103
+ Returns:
104
+ ComplexityStats with all complexity metrics populated.
105
+ """
106
+ classes = _get_all_classes(graph)
107
+ total_classes = len(classes)
108
+
109
+ if total_classes == 0:
110
+ return ComplexityStats()
111
+
112
+ # Properties per class
113
+ prop_counts = _count_properties_per_class(graph, classes)
114
+ total_prop_refs = sum(prop_counts.values())
115
+ avg_props = total_prop_refs / total_classes if total_classes else 0.0
116
+
117
+ # Superclasses per class
118
+ super_counts = _count_superclasses(graph, classes)
119
+ total_supers = sum(super_counts.values())
120
+ avg_supers = total_supers / total_classes if total_classes else 0.0
121
+
122
+ # Multiple inheritance
123
+ multi_inherit = _count_multiple_inheritance(super_counts)
124
+
125
+ # OWL restrictions
126
+ restrictions = len(set(graph.subjects(RDF.type, OWL.Restriction)))
127
+
128
+ # Equivalent class statements
129
+ equivalents = len(list(graph.triples((None, OWL.equivalentClass, None))))
130
+
131
+ return ComplexityStats(
132
+ avg_properties_per_class=round(avg_props, 2),
133
+ avg_superclasses_per_class=round(avg_supers, 2),
134
+ multiple_inheritance_count=multi_inherit,
135
+ owl_restriction_count=restrictions,
136
+ owl_equivalent_count=equivalents,
137
+ )
@@ -0,0 +1,130 @@
1
+ """Connectivity metrics for RDF ontologies.
2
+
3
+ Analyses how classes are connected through properties.
4
+ """
5
+
6
+ from collections import defaultdict
7
+ from dataclasses import dataclass
8
+ from typing import Optional
9
+
10
+ from rdflib import Graph, RDF, RDFS, URIRef
11
+ from rdflib.namespace import OWL
12
+
13
+
14
+ @dataclass
15
+ class ConnectivityStats:
16
+ """Connectivity statistics for an ontology.
17
+
18
+ Attributes:
19
+ most_connected_class: URI of the class referenced by most properties.
20
+ most_connected_count: Number of properties referencing the most connected class.
21
+ isolated_classes: Classes not referenced by any property domain/range.
22
+ """
23
+
24
+ most_connected_class: Optional[str] = None
25
+ most_connected_count: int = 0
26
+ isolated_classes: int = 0
27
+
28
+
29
+ def _get_all_classes(graph: Graph) -> set:
30
+ """Get all classes from the graph."""
31
+ classes = set(graph.subjects(RDF.type, OWL.Class))
32
+ classes |= set(graph.subjects(RDF.type, RDFS.Class))
33
+ return classes
34
+
35
+
36
+ def _count_property_references(graph: Graph, classes: set) -> dict:
37
+ """Count property references to each class (via domain/range).
38
+
39
+ A class is "connected" if a property references it in its domain or range.
40
+
41
+ Args:
42
+ graph: RDF graph to query.
43
+ classes: Set of class URIRefs.
44
+
45
+ Returns:
46
+ Dictionary mapping class -> reference count.
47
+ """
48
+ counts: dict = defaultdict(int)
49
+
50
+ # Count domain references
51
+ for s, p, o in graph.triples((None, RDFS.domain, None)):
52
+ if o in classes:
53
+ counts[o] += 1
54
+
55
+ # Count range references
56
+ for s, p, o in graph.triples((None, RDFS.range, None)):
57
+ if o in classes:
58
+ counts[o] += 1
59
+
60
+ return dict(counts)
61
+
62
+
63
+ def _find_most_connected(ref_counts: dict) -> tuple[Optional[str], int]:
64
+ """Find the class with the most property references.
65
+
66
+ Args:
67
+ ref_counts: Dictionary mapping class -> reference count.
68
+
69
+ Returns:
70
+ Tuple of (class URI string, reference count).
71
+ """
72
+ if not ref_counts:
73
+ return None, 0
74
+
75
+ most_connected = max(ref_counts.items(), key=lambda x: x[1])
76
+ uri = str(most_connected[0]) if most_connected[0] else None
77
+ return uri, most_connected[1]
78
+
79
+
80
+ def _count_isolated(classes: set, ref_counts: dict, graph: Graph) -> int:
81
+ """Count classes not connected to any property.
82
+
83
+ A class is isolated if:
84
+ - No property has it as domain or range
85
+ - AND it has no subclasses or superclasses (not part of hierarchy)
86
+
87
+ Args:
88
+ classes: Set of class URIRefs.
89
+ ref_counts: Dictionary mapping class -> reference count.
90
+ graph: RDF graph to query for hierarchy.
91
+
92
+ Returns:
93
+ Number of isolated classes.
94
+ """
95
+ isolated = 0
96
+ for cls in classes:
97
+ # Not referenced by properties
98
+ if ref_counts.get(cls, 0) == 0:
99
+ # Also check if it's disconnected from hierarchy
100
+ has_super = any(graph.objects(cls, RDFS.subClassOf))
101
+ has_sub = any(graph.subjects(RDFS.subClassOf, cls))
102
+ if not has_super and not has_sub:
103
+ isolated += 1
104
+ return isolated
105
+
106
+
107
+ def collect_connectivity_stats(graph: Graph) -> ConnectivityStats:
108
+ """Collect connectivity statistics from an RDF graph.
109
+
110
+ Args:
111
+ graph: RDF graph to analyse.
112
+
113
+ Returns:
114
+ ConnectivityStats with all connectivity metrics populated.
115
+ """
116
+ classes = _get_all_classes(graph)
117
+
118
+ if not classes:
119
+ return ConnectivityStats()
120
+
121
+ ref_counts = _count_property_references(graph, classes)
122
+
123
+ most_uri, most_count = _find_most_connected(ref_counts)
124
+ isolated = _count_isolated(classes, ref_counts, graph)
125
+
126
+ return ConnectivityStats(
127
+ most_connected_class=most_uri,
128
+ most_connected_count=most_count,
129
+ isolated_classes=isolated,
130
+ )