rdf-construct 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +12 -0
- rdf_construct/__main__.py +0 -0
- rdf_construct/cli.py +3429 -0
- rdf_construct/core/__init__.py +33 -0
- rdf_construct/core/config.py +116 -0
- rdf_construct/core/ordering.py +219 -0
- rdf_construct/core/predicate_order.py +212 -0
- rdf_construct/core/profile.py +157 -0
- rdf_construct/core/selector.py +64 -0
- rdf_construct/core/serialiser.py +232 -0
- rdf_construct/core/utils.py +89 -0
- rdf_construct/cq/__init__.py +77 -0
- rdf_construct/cq/expectations.py +365 -0
- rdf_construct/cq/formatters/__init__.py +45 -0
- rdf_construct/cq/formatters/json.py +104 -0
- rdf_construct/cq/formatters/junit.py +104 -0
- rdf_construct/cq/formatters/text.py +146 -0
- rdf_construct/cq/loader.py +300 -0
- rdf_construct/cq/runner.py +321 -0
- rdf_construct/diff/__init__.py +59 -0
- rdf_construct/diff/change_types.py +214 -0
- rdf_construct/diff/comparator.py +338 -0
- rdf_construct/diff/filters.py +133 -0
- rdf_construct/diff/formatters/__init__.py +71 -0
- rdf_construct/diff/formatters/json.py +192 -0
- rdf_construct/diff/formatters/markdown.py +210 -0
- rdf_construct/diff/formatters/text.py +195 -0
- rdf_construct/docs/__init__.py +60 -0
- rdf_construct/docs/config.py +238 -0
- rdf_construct/docs/extractors.py +603 -0
- rdf_construct/docs/generator.py +360 -0
- rdf_construct/docs/renderers/__init__.py +7 -0
- rdf_construct/docs/renderers/html.py +803 -0
- rdf_construct/docs/renderers/json.py +390 -0
- rdf_construct/docs/renderers/markdown.py +628 -0
- rdf_construct/docs/search.py +278 -0
- rdf_construct/docs/templates/html/base.html.jinja +44 -0
- rdf_construct/docs/templates/html/class.html.jinja +152 -0
- rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
- rdf_construct/docs/templates/html/index.html.jinja +110 -0
- rdf_construct/docs/templates/html/instance.html.jinja +90 -0
- rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
- rdf_construct/docs/templates/html/property.html.jinja +124 -0
- rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
- rdf_construct/lint/__init__.py +75 -0
- rdf_construct/lint/config.py +214 -0
- rdf_construct/lint/engine.py +396 -0
- rdf_construct/lint/formatters.py +327 -0
- rdf_construct/lint/rules.py +692 -0
- rdf_construct/localise/__init__.py +114 -0
- rdf_construct/localise/config.py +508 -0
- rdf_construct/localise/extractor.py +427 -0
- rdf_construct/localise/formatters/__init__.py +36 -0
- rdf_construct/localise/formatters/markdown.py +229 -0
- rdf_construct/localise/formatters/text.py +224 -0
- rdf_construct/localise/merger.py +346 -0
- rdf_construct/localise/reporter.py +356 -0
- rdf_construct/main.py +6 -0
- rdf_construct/merge/__init__.py +165 -0
- rdf_construct/merge/config.py +354 -0
- rdf_construct/merge/conflicts.py +281 -0
- rdf_construct/merge/formatters.py +426 -0
- rdf_construct/merge/merger.py +425 -0
- rdf_construct/merge/migrator.py +339 -0
- rdf_construct/merge/rules.py +377 -0
- rdf_construct/merge/splitter.py +1102 -0
- rdf_construct/puml2rdf/__init__.py +103 -0
- rdf_construct/puml2rdf/config.py +230 -0
- rdf_construct/puml2rdf/converter.py +420 -0
- rdf_construct/puml2rdf/merger.py +200 -0
- rdf_construct/puml2rdf/model.py +202 -0
- rdf_construct/puml2rdf/parser.py +565 -0
- rdf_construct/puml2rdf/validators.py +451 -0
- rdf_construct/refactor/__init__.py +72 -0
- rdf_construct/refactor/config.py +362 -0
- rdf_construct/refactor/deprecator.py +328 -0
- rdf_construct/refactor/formatters/__init__.py +8 -0
- rdf_construct/refactor/formatters/text.py +311 -0
- rdf_construct/refactor/renamer.py +294 -0
- rdf_construct/shacl/__init__.py +56 -0
- rdf_construct/shacl/config.py +166 -0
- rdf_construct/shacl/converters.py +520 -0
- rdf_construct/shacl/generator.py +364 -0
- rdf_construct/shacl/namespaces.py +93 -0
- rdf_construct/stats/__init__.py +29 -0
- rdf_construct/stats/collector.py +178 -0
- rdf_construct/stats/comparator.py +298 -0
- rdf_construct/stats/formatters/__init__.py +83 -0
- rdf_construct/stats/formatters/json.py +38 -0
- rdf_construct/stats/formatters/markdown.py +153 -0
- rdf_construct/stats/formatters/text.py +186 -0
- rdf_construct/stats/metrics/__init__.py +26 -0
- rdf_construct/stats/metrics/basic.py +147 -0
- rdf_construct/stats/metrics/complexity.py +137 -0
- rdf_construct/stats/metrics/connectivity.py +130 -0
- rdf_construct/stats/metrics/documentation.py +128 -0
- rdf_construct/stats/metrics/hierarchy.py +207 -0
- rdf_construct/stats/metrics/properties.py +88 -0
- rdf_construct/uml/__init__.py +22 -0
- rdf_construct/uml/context.py +194 -0
- rdf_construct/uml/mapper.py +371 -0
- rdf_construct/uml/odm_renderer.py +789 -0
- rdf_construct/uml/renderer.py +684 -0
- rdf_construct/uml/uml_layout.py +393 -0
- rdf_construct/uml/uml_style.py +613 -0
- rdf_construct-0.3.0.dist-info/METADATA +496 -0
- rdf_construct-0.3.0.dist-info/RECORD +110 -0
- rdf_construct-0.3.0.dist-info/WHEEL +4 -0
- rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
- rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
"""Ontology statistics comparison.
|
|
2
|
+
|
|
3
|
+
Compares statistics between two ontology versions and generates change summaries.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from rdf_construct.stats.collector import OntologyStats
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class MetricChange:
|
|
14
|
+
"""A change in a single metric between two versions.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
category: The metric category (e.g., "basic", "hierarchy").
|
|
18
|
+
metric: The metric name (e.g., "classes", "max_depth").
|
|
19
|
+
old_value: Value in the old/baseline version.
|
|
20
|
+
new_value: Value in the new version.
|
|
21
|
+
delta: Numeric difference (new - old).
|
|
22
|
+
pct_change: Percentage change (may be None for non-numeric).
|
|
23
|
+
improved: Whether the change is an improvement (context-dependent).
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
category: str
|
|
27
|
+
metric: str
|
|
28
|
+
old_value: Any
|
|
29
|
+
new_value: Any
|
|
30
|
+
delta: float | int | None = None
|
|
31
|
+
pct_change: float | None = None
|
|
32
|
+
improved: bool | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ComparisonResult:
|
|
37
|
+
"""Result of comparing two ontology versions.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
old_source: Path/identifier of the old version.
|
|
41
|
+
new_source: Path/identifier of the new version.
|
|
42
|
+
changes: List of metric changes.
|
|
43
|
+
summary: Human-readable summary of the comparison.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
old_source: str
|
|
47
|
+
new_source: str
|
|
48
|
+
changes: list[MetricChange] = field(default_factory=list)
|
|
49
|
+
summary: str = ""
|
|
50
|
+
|
|
51
|
+
def to_dict(self) -> dict[str, Any]:
|
|
52
|
+
"""Convert comparison to dictionary for JSON serialisation."""
|
|
53
|
+
return {
|
|
54
|
+
"old_source": self.old_source,
|
|
55
|
+
"new_source": self.new_source,
|
|
56
|
+
"changes": [
|
|
57
|
+
{
|
|
58
|
+
"category": c.category,
|
|
59
|
+
"metric": c.metric,
|
|
60
|
+
"old_value": c.old_value,
|
|
61
|
+
"new_value": c.new_value,
|
|
62
|
+
"delta": c.delta,
|
|
63
|
+
"pct_change": c.pct_change,
|
|
64
|
+
"improved": c.improved,
|
|
65
|
+
}
|
|
66
|
+
for c in self.changes
|
|
67
|
+
],
|
|
68
|
+
"summary": self.summary,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _pct_change(old: float | int, new: float | int) -> float | None:
|
|
73
|
+
"""Calculate percentage change between two values.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
old: Original value.
|
|
77
|
+
new: New value.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Percentage change, or None if old is zero.
|
|
81
|
+
"""
|
|
82
|
+
if old == 0:
|
|
83
|
+
return None
|
|
84
|
+
return round(((new - old) / old) * 100, 1)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _is_improvement(category: str, metric: str, delta: float | int) -> bool | None:
|
|
88
|
+
"""Determine if a metric change is an improvement.
|
|
89
|
+
|
|
90
|
+
Some metrics are better when higher (e.g., documentation coverage),
|
|
91
|
+
others are better when lower (e.g., orphan rate).
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
category: The metric category.
|
|
95
|
+
metric: The metric name.
|
|
96
|
+
delta: The change in value.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
True if improved, False if degraded, None if neutral.
|
|
100
|
+
"""
|
|
101
|
+
# Metrics where higher is better
|
|
102
|
+
higher_is_better = {
|
|
103
|
+
("basic", "classes"),
|
|
104
|
+
("basic", "object_properties"),
|
|
105
|
+
("basic", "datatype_properties"),
|
|
106
|
+
("documentation", "classes_labelled"),
|
|
107
|
+
("documentation", "classes_labelled_pct"),
|
|
108
|
+
("documentation", "classes_documented"),
|
|
109
|
+
("documentation", "classes_documented_pct"),
|
|
110
|
+
("documentation", "properties_labelled"),
|
|
111
|
+
("documentation", "properties_labelled_pct"),
|
|
112
|
+
("properties", "with_domain"),
|
|
113
|
+
("properties", "with_range"),
|
|
114
|
+
("properties", "domain_coverage"),
|
|
115
|
+
("properties", "range_coverage"),
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Metrics where lower is better
|
|
119
|
+
lower_is_better = {
|
|
120
|
+
("hierarchy", "orphan_classes"),
|
|
121
|
+
("hierarchy", "orphan_rate"),
|
|
122
|
+
("connectivity", "isolated_classes"),
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
key = (category, metric)
|
|
126
|
+
if key in higher_is_better:
|
|
127
|
+
return delta > 0
|
|
128
|
+
if key in lower_is_better:
|
|
129
|
+
return delta < 0
|
|
130
|
+
|
|
131
|
+
# Neutral - neither better nor worse
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _extract_numeric_metrics(stats: OntologyStats) -> dict[tuple[str, str], float | int]:
|
|
136
|
+
"""Extract all numeric metrics from an OntologyStats object.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
stats: The statistics object.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Dictionary mapping (category, metric) -> value.
|
|
143
|
+
"""
|
|
144
|
+
metrics: dict[tuple[str, str], float | int] = {}
|
|
145
|
+
|
|
146
|
+
# Basic
|
|
147
|
+
metrics[("basic", "triples")] = stats.basic.triples
|
|
148
|
+
metrics[("basic", "classes")] = stats.basic.classes
|
|
149
|
+
metrics[("basic", "object_properties")] = stats.basic.object_properties
|
|
150
|
+
metrics[("basic", "datatype_properties")] = stats.basic.datatype_properties
|
|
151
|
+
metrics[("basic", "annotation_properties")] = stats.basic.annotation_properties
|
|
152
|
+
metrics[("basic", "individuals")] = stats.basic.individuals
|
|
153
|
+
|
|
154
|
+
# Hierarchy
|
|
155
|
+
metrics[("hierarchy", "root_classes")] = stats.hierarchy.root_classes
|
|
156
|
+
metrics[("hierarchy", "leaf_classes")] = stats.hierarchy.leaf_classes
|
|
157
|
+
metrics[("hierarchy", "max_depth")] = stats.hierarchy.max_depth
|
|
158
|
+
metrics[("hierarchy", "avg_depth")] = stats.hierarchy.avg_depth
|
|
159
|
+
metrics[("hierarchy", "avg_branching")] = stats.hierarchy.avg_branching
|
|
160
|
+
metrics[("hierarchy", "orphan_classes")] = stats.hierarchy.orphan_classes
|
|
161
|
+
metrics[("hierarchy", "orphan_rate")] = stats.hierarchy.orphan_rate
|
|
162
|
+
|
|
163
|
+
# Properties
|
|
164
|
+
metrics[("properties", "with_domain")] = stats.properties.with_domain
|
|
165
|
+
metrics[("properties", "with_range")] = stats.properties.with_range
|
|
166
|
+
metrics[("properties", "domain_coverage")] = stats.properties.domain_coverage
|
|
167
|
+
metrics[("properties", "range_coverage")] = stats.properties.range_coverage
|
|
168
|
+
metrics[("properties", "inverse_pairs")] = stats.properties.inverse_pairs
|
|
169
|
+
metrics[("properties", "functional")] = stats.properties.functional
|
|
170
|
+
metrics[("properties", "symmetric")] = stats.properties.symmetric
|
|
171
|
+
|
|
172
|
+
# Documentation
|
|
173
|
+
metrics[("documentation", "classes_labelled")] = stats.documentation.classes_labelled
|
|
174
|
+
metrics[("documentation", "classes_labelled_pct")] = stats.documentation.classes_labelled_pct
|
|
175
|
+
metrics[("documentation", "classes_documented")] = stats.documentation.classes_documented
|
|
176
|
+
metrics[("documentation", "classes_documented_pct")] = stats.documentation.classes_documented_pct
|
|
177
|
+
metrics[("documentation", "properties_labelled")] = stats.documentation.properties_labelled
|
|
178
|
+
metrics[("documentation", "properties_labelled_pct")] = stats.documentation.properties_labelled_pct
|
|
179
|
+
|
|
180
|
+
# Complexity
|
|
181
|
+
metrics[("complexity", "avg_properties_per_class")] = stats.complexity.avg_properties_per_class
|
|
182
|
+
metrics[("complexity", "avg_superclasses_per_class")] = stats.complexity.avg_superclasses_per_class
|
|
183
|
+
metrics[("complexity", "multiple_inheritance_count")] = stats.complexity.multiple_inheritance_count
|
|
184
|
+
metrics[("complexity", "owl_restriction_count")] = stats.complexity.owl_restriction_count
|
|
185
|
+
metrics[("complexity", "owl_equivalent_count")] = stats.complexity.owl_equivalent_count
|
|
186
|
+
|
|
187
|
+
# Connectivity
|
|
188
|
+
metrics[("connectivity", "most_connected_count")] = stats.connectivity.most_connected_count
|
|
189
|
+
metrics[("connectivity", "isolated_classes")] = stats.connectivity.isolated_classes
|
|
190
|
+
|
|
191
|
+
return metrics
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _generate_summary(changes: list[MetricChange]) -> str:
|
|
195
|
+
"""Generate a human-readable summary of changes.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
changes: List of metric changes.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Summary string.
|
|
202
|
+
"""
|
|
203
|
+
if not changes:
|
|
204
|
+
return "No significant changes detected."
|
|
205
|
+
|
|
206
|
+
# Count improvements and degradations
|
|
207
|
+
improvements = sum(1 for c in changes if c.improved is True)
|
|
208
|
+
degradations = sum(1 for c in changes if c.improved is False)
|
|
209
|
+
|
|
210
|
+
# Find notable changes
|
|
211
|
+
class_change = next((c for c in changes if c.metric == "classes"), None)
|
|
212
|
+
doc_change = next((c for c in changes if c.metric == "classes_documented_pct"), None)
|
|
213
|
+
orphan_change = next((c for c in changes if c.metric == "orphan_classes"), None)
|
|
214
|
+
|
|
215
|
+
parts = []
|
|
216
|
+
|
|
217
|
+
# Ontology growth/shrinkage
|
|
218
|
+
if class_change and class_change.delta:
|
|
219
|
+
if class_change.delta > 0:
|
|
220
|
+
parts.append(f"Ontology grew (+{class_change.delta} classes)")
|
|
221
|
+
else:
|
|
222
|
+
parts.append(f"Ontology shrank ({class_change.delta} classes)")
|
|
223
|
+
|
|
224
|
+
# Documentation improvements
|
|
225
|
+
if doc_change and doc_change.delta and doc_change.delta > 0:
|
|
226
|
+
parts.append("improved documentation coverage")
|
|
227
|
+
|
|
228
|
+
# Orphan changes
|
|
229
|
+
if orphan_change and orphan_change.delta:
|
|
230
|
+
if orphan_change.delta < 0:
|
|
231
|
+
parts.append("fewer orphan classes")
|
|
232
|
+
elif orphan_change.delta > 0:
|
|
233
|
+
parts.append("more orphan classes (review needed)")
|
|
234
|
+
|
|
235
|
+
if not parts:
|
|
236
|
+
if improvements > degradations:
|
|
237
|
+
return "Overall improvement in ontology quality."
|
|
238
|
+
elif degradations > improvements:
|
|
239
|
+
return "Some quality metrics have degraded."
|
|
240
|
+
else:
|
|
241
|
+
return "Minor changes with mixed impact."
|
|
242
|
+
|
|
243
|
+
return ", ".join(parts).capitalize() + "."
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def compare_stats(
|
|
247
|
+
old_stats: OntologyStats,
|
|
248
|
+
new_stats: OntologyStats,
|
|
249
|
+
) -> ComparisonResult:
|
|
250
|
+
"""Compare statistics between two ontology versions.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
old_stats: Statistics from the baseline/old version.
|
|
254
|
+
new_stats: Statistics from the new version.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
ComparisonResult with all metric changes.
|
|
258
|
+
"""
|
|
259
|
+
old_metrics = _extract_numeric_metrics(old_stats)
|
|
260
|
+
new_metrics = _extract_numeric_metrics(new_stats)
|
|
261
|
+
|
|
262
|
+
changes: list[MetricChange] = []
|
|
263
|
+
|
|
264
|
+
for key, old_val in old_metrics.items():
|
|
265
|
+
category, metric = key
|
|
266
|
+
new_val = new_metrics.get(key, 0)
|
|
267
|
+
|
|
268
|
+
# Skip if no change
|
|
269
|
+
if old_val == new_val:
|
|
270
|
+
continue
|
|
271
|
+
|
|
272
|
+
delta = new_val - old_val
|
|
273
|
+
pct = _pct_change(old_val, new_val)
|
|
274
|
+
improved = _is_improvement(category, metric, delta)
|
|
275
|
+
|
|
276
|
+
changes.append(
|
|
277
|
+
MetricChange(
|
|
278
|
+
category=category,
|
|
279
|
+
metric=metric,
|
|
280
|
+
old_value=old_val,
|
|
281
|
+
new_value=new_val,
|
|
282
|
+
delta=delta,
|
|
283
|
+
pct_change=pct,
|
|
284
|
+
improved=improved,
|
|
285
|
+
)
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Sort by category then metric
|
|
289
|
+
changes.sort(key=lambda c: (c.category, c.metric))
|
|
290
|
+
|
|
291
|
+
summary = _generate_summary(changes)
|
|
292
|
+
|
|
293
|
+
return ComparisonResult(
|
|
294
|
+
old_source=old_stats.source,
|
|
295
|
+
new_source=new_stats.source,
|
|
296
|
+
changes=changes,
|
|
297
|
+
summary=summary,
|
|
298
|
+
)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Output formatters for ontology statistics."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from rdflib import Graph
|
|
6
|
+
|
|
7
|
+
from rdf_construct.stats.collector import OntologyStats
|
|
8
|
+
from rdf_construct.stats.comparator import ComparisonResult
|
|
9
|
+
from rdf_construct.stats.formatters.text import format_text_stats, format_text_comparison
|
|
10
|
+
from rdf_construct.stats.formatters.json import format_json_stats, format_json_comparison
|
|
11
|
+
from rdf_construct.stats.formatters.markdown import format_markdown_stats, format_markdown_comparison
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def format_stats(
|
|
15
|
+
stats: OntologyStats,
|
|
16
|
+
format_name: str = "text",
|
|
17
|
+
graph: Optional[Graph] = None,
|
|
18
|
+
) -> str:
|
|
19
|
+
"""Format ontology statistics for output.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
stats: The statistics to format.
|
|
23
|
+
format_name: Output format ("text", "json", "markdown", "md").
|
|
24
|
+
graph: Optional graph for CURIE formatting.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Formatted string representation.
|
|
28
|
+
|
|
29
|
+
Raises:
|
|
30
|
+
ValueError: If format_name is not recognised.
|
|
31
|
+
"""
|
|
32
|
+
formatters = {
|
|
33
|
+
"text": format_text_stats,
|
|
34
|
+
"json": format_json_stats,
|
|
35
|
+
"markdown": format_markdown_stats,
|
|
36
|
+
"md": format_markdown_stats,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
formatter = formatters.get(format_name.lower())
|
|
40
|
+
if not formatter:
|
|
41
|
+
valid = ", ".join(sorted(formatters.keys()))
|
|
42
|
+
raise ValueError(f"Unknown format '{format_name}'. Valid formats: {valid}")
|
|
43
|
+
|
|
44
|
+
return formatter(stats, graph)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def format_comparison(
|
|
48
|
+
comparison: ComparisonResult,
|
|
49
|
+
format_name: str = "text",
|
|
50
|
+
graph: Optional[Graph] = None,
|
|
51
|
+
) -> str:
|
|
52
|
+
"""Format comparison results for output.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
comparison: The comparison result to format.
|
|
56
|
+
format_name: Output format ("text", "json", "markdown", "md").
|
|
57
|
+
graph: Optional graph for CURIE formatting.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Formatted string representation.
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
ValueError: If format_name is not recognised.
|
|
64
|
+
"""
|
|
65
|
+
formatters = {
|
|
66
|
+
"text": format_text_comparison,
|
|
67
|
+
"json": format_json_comparison,
|
|
68
|
+
"markdown": format_markdown_comparison,
|
|
69
|
+
"md": format_markdown_comparison,
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
formatter = formatters.get(format_name.lower())
|
|
73
|
+
if not formatter:
|
|
74
|
+
valid = ", ".join(sorted(formatters.keys()))
|
|
75
|
+
raise ValueError(f"Unknown format '{format_name}'. Valid formats: {valid}")
|
|
76
|
+
|
|
77
|
+
return formatter(comparison, graph)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
__all__ = [
|
|
81
|
+
"format_stats",
|
|
82
|
+
"format_comparison",
|
|
83
|
+
]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""JSON output formatter for ontology statistics."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from rdflib import Graph
|
|
7
|
+
|
|
8
|
+
from rdf_construct.stats.collector import OntologyStats
|
|
9
|
+
from rdf_construct.stats.comparator import ComparisonResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def format_json_stats(stats: OntologyStats, graph: Optional[Graph] = None) -> str:
|
|
13
|
+
"""Format ontology statistics as JSON.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
stats: The statistics to format.
|
|
17
|
+
graph: Optional graph (not used for JSON format).
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
JSON string representation.
|
|
21
|
+
"""
|
|
22
|
+
return json.dumps(stats.to_dict(), indent=2)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def format_json_comparison(
|
|
26
|
+
comparison: ComparisonResult,
|
|
27
|
+
graph: Optional[Graph] = None,
|
|
28
|
+
) -> str:
|
|
29
|
+
"""Format comparison results as JSON.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
comparison: The comparison result to format.
|
|
33
|
+
graph: Optional graph (not used for JSON format).
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
JSON string representation.
|
|
37
|
+
"""
|
|
38
|
+
return json.dumps(comparison.to_dict(), indent=2)
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""Markdown output formatter for ontology statistics."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from rdflib import Graph
|
|
6
|
+
|
|
7
|
+
from rdf_construct.stats.collector import OntologyStats
|
|
8
|
+
from rdf_construct.stats.comparator import ComparisonResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _format_pct(value: float) -> str:
|
|
12
|
+
"""Format a percentage value for display."""
|
|
13
|
+
return f"{value * 100:.1f}%"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def format_markdown_stats(stats: OntologyStats, graph: Optional[Graph] = None) -> str:
|
|
17
|
+
"""Format ontology statistics as Markdown table.
|
|
18
|
+
|
|
19
|
+
Designed for embedding in README files or documentation.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
stats: The statistics to format.
|
|
23
|
+
graph: Optional graph for CURIE formatting.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Markdown string representation.
|
|
27
|
+
"""
|
|
28
|
+
lines = []
|
|
29
|
+
|
|
30
|
+
lines.append(f"## Ontology Statistics")
|
|
31
|
+
lines.append("")
|
|
32
|
+
lines.append(f"**Source:** `{stats.source}`")
|
|
33
|
+
lines.append("")
|
|
34
|
+
|
|
35
|
+
# Summary table
|
|
36
|
+
lines.append("| Metric | Value |")
|
|
37
|
+
lines.append("|--------|-------|")
|
|
38
|
+
lines.append(f"| Classes | {stats.basic.classes} |")
|
|
39
|
+
lines.append(f"| Object Properties | {stats.basic.object_properties} |")
|
|
40
|
+
lines.append(f"| Datatype Properties | {stats.basic.datatype_properties} |")
|
|
41
|
+
lines.append(f"| Max Hierarchy Depth | {stats.hierarchy.max_depth} |")
|
|
42
|
+
lines.append(f"| Documentation Coverage | {_format_pct(stats.documentation.classes_documented_pct)} |")
|
|
43
|
+
lines.append("")
|
|
44
|
+
|
|
45
|
+
# Detailed sections
|
|
46
|
+
lines.append("### Structure")
|
|
47
|
+
lines.append("")
|
|
48
|
+
lines.append("| Metric | Value |")
|
|
49
|
+
lines.append("|--------|-------|")
|
|
50
|
+
lines.append(f"| Total Triples | {stats.basic.triples:,} |")
|
|
51
|
+
lines.append(f"| Root Classes | {stats.hierarchy.root_classes} |")
|
|
52
|
+
lines.append(f"| Leaf Classes | {stats.hierarchy.leaf_classes} |")
|
|
53
|
+
lines.append(f"| Avg Branching Factor | {stats.hierarchy.avg_branching:.1f} |")
|
|
54
|
+
lines.append(f"| Orphan Classes | {stats.hierarchy.orphan_classes} ({_format_pct(stats.hierarchy.orphan_rate)}) |")
|
|
55
|
+
lines.append("")
|
|
56
|
+
|
|
57
|
+
lines.append("### Properties")
|
|
58
|
+
lines.append("")
|
|
59
|
+
lines.append("| Metric | Value |")
|
|
60
|
+
lines.append("|--------|-------|")
|
|
61
|
+
lines.append(f"| With Domain | {stats.properties.with_domain} ({_format_pct(stats.properties.domain_coverage)}) |")
|
|
62
|
+
lines.append(f"| With Range | {stats.properties.with_range} ({_format_pct(stats.properties.range_coverage)}) |")
|
|
63
|
+
lines.append(f"| Functional | {stats.properties.functional} |")
|
|
64
|
+
lines.append(f"| Symmetric | {stats.properties.symmetric} |")
|
|
65
|
+
lines.append("")
|
|
66
|
+
|
|
67
|
+
lines.append("### Documentation")
|
|
68
|
+
lines.append("")
|
|
69
|
+
lines.append("| Metric | Value |")
|
|
70
|
+
lines.append("|--------|-------|")
|
|
71
|
+
lines.append(f"| Classes Labelled | {stats.documentation.classes_labelled} ({_format_pct(stats.documentation.classes_labelled_pct)}) |")
|
|
72
|
+
lines.append(f"| Classes Documented | {stats.documentation.classes_documented} ({_format_pct(stats.documentation.classes_documented_pct)}) |")
|
|
73
|
+
lines.append(f"| Properties Labelled | {stats.documentation.properties_labelled} ({_format_pct(stats.documentation.properties_labelled_pct)}) |")
|
|
74
|
+
lines.append("")
|
|
75
|
+
|
|
76
|
+
lines.append("*Generated by rdf-construct stats*")
|
|
77
|
+
|
|
78
|
+
return "\n".join(lines)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def format_markdown_comparison(
|
|
82
|
+
comparison: ComparisonResult,
|
|
83
|
+
graph: Optional[Graph] = None,
|
|
84
|
+
) -> str:
|
|
85
|
+
"""Format comparison results as Markdown.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
comparison: The comparison result to format.
|
|
89
|
+
graph: Optional graph for CURIE formatting.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Markdown string representation.
|
|
93
|
+
"""
|
|
94
|
+
lines = []
|
|
95
|
+
|
|
96
|
+
lines.append(f"## Ontology Comparison")
|
|
97
|
+
lines.append("")
|
|
98
|
+
lines.append(f"**Old:** `{comparison.old_source}`")
|
|
99
|
+
lines.append(f"**New:** `{comparison.new_source}`")
|
|
100
|
+
lines.append("")
|
|
101
|
+
|
|
102
|
+
if not comparison.changes:
|
|
103
|
+
lines.append("No changes detected.")
|
|
104
|
+
return "\n".join(lines)
|
|
105
|
+
|
|
106
|
+
# Summary
|
|
107
|
+
lines.append(f"> {comparison.summary}")
|
|
108
|
+
lines.append("")
|
|
109
|
+
|
|
110
|
+
# Changes table
|
|
111
|
+
lines.append("| Category | Metric | Old | New | Change |")
|
|
112
|
+
lines.append("|----------|--------|-----|-----|--------|")
|
|
113
|
+
|
|
114
|
+
for change in comparison.changes:
|
|
115
|
+
category = change.category.title()
|
|
116
|
+
metric = change.metric.replace("_", " ").title()
|
|
117
|
+
old_val = _format_value(change.old_value)
|
|
118
|
+
new_val = _format_value(change.new_value)
|
|
119
|
+
|
|
120
|
+
if change.delta is not None:
|
|
121
|
+
if change.pct_change is not None:
|
|
122
|
+
delta_str = f"{change.delta:+g} ({change.pct_change:+.1f}%)"
|
|
123
|
+
else:
|
|
124
|
+
delta_str = f"{change.delta:+g}"
|
|
125
|
+
else:
|
|
126
|
+
delta_str = "-"
|
|
127
|
+
|
|
128
|
+
# Add indicator
|
|
129
|
+
if change.improved is True:
|
|
130
|
+
delta_str += " ✓"
|
|
131
|
+
elif change.improved is False:
|
|
132
|
+
delta_str += " ⚠"
|
|
133
|
+
|
|
134
|
+
lines.append(f"| {category} | {metric} | {old_val} | {new_val} | {delta_str} |")
|
|
135
|
+
|
|
136
|
+
lines.append("")
|
|
137
|
+
lines.append("*Generated by rdf-construct stats --compare*")
|
|
138
|
+
|
|
139
|
+
return "\n".join(lines)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _format_value(value: float | int | str | None) -> str:
|
|
143
|
+
"""Format a metric value for display."""
|
|
144
|
+
if value is None:
|
|
145
|
+
return "-"
|
|
146
|
+
if isinstance(value, float):
|
|
147
|
+
if value < 1:
|
|
148
|
+
# Probably a percentage/rate
|
|
149
|
+
return f"{value * 100:.1f}%"
|
|
150
|
+
return f"{value:.2f}"
|
|
151
|
+
if isinstance(value, int):
|
|
152
|
+
return f"{value:,}"
|
|
153
|
+
return str(value)
|