rdf-construct 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +12 -0
- rdf_construct/__main__.py +0 -0
- rdf_construct/cli.py +3429 -0
- rdf_construct/core/__init__.py +33 -0
- rdf_construct/core/config.py +116 -0
- rdf_construct/core/ordering.py +219 -0
- rdf_construct/core/predicate_order.py +212 -0
- rdf_construct/core/profile.py +157 -0
- rdf_construct/core/selector.py +64 -0
- rdf_construct/core/serialiser.py +232 -0
- rdf_construct/core/utils.py +89 -0
- rdf_construct/cq/__init__.py +77 -0
- rdf_construct/cq/expectations.py +365 -0
- rdf_construct/cq/formatters/__init__.py +45 -0
- rdf_construct/cq/formatters/json.py +104 -0
- rdf_construct/cq/formatters/junit.py +104 -0
- rdf_construct/cq/formatters/text.py +146 -0
- rdf_construct/cq/loader.py +300 -0
- rdf_construct/cq/runner.py +321 -0
- rdf_construct/diff/__init__.py +59 -0
- rdf_construct/diff/change_types.py +214 -0
- rdf_construct/diff/comparator.py +338 -0
- rdf_construct/diff/filters.py +133 -0
- rdf_construct/diff/formatters/__init__.py +71 -0
- rdf_construct/diff/formatters/json.py +192 -0
- rdf_construct/diff/formatters/markdown.py +210 -0
- rdf_construct/diff/formatters/text.py +195 -0
- rdf_construct/docs/__init__.py +60 -0
- rdf_construct/docs/config.py +238 -0
- rdf_construct/docs/extractors.py +603 -0
- rdf_construct/docs/generator.py +360 -0
- rdf_construct/docs/renderers/__init__.py +7 -0
- rdf_construct/docs/renderers/html.py +803 -0
- rdf_construct/docs/renderers/json.py +390 -0
- rdf_construct/docs/renderers/markdown.py +628 -0
- rdf_construct/docs/search.py +278 -0
- rdf_construct/docs/templates/html/base.html.jinja +44 -0
- rdf_construct/docs/templates/html/class.html.jinja +152 -0
- rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
- rdf_construct/docs/templates/html/index.html.jinja +110 -0
- rdf_construct/docs/templates/html/instance.html.jinja +90 -0
- rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
- rdf_construct/docs/templates/html/property.html.jinja +124 -0
- rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
- rdf_construct/lint/__init__.py +75 -0
- rdf_construct/lint/config.py +214 -0
- rdf_construct/lint/engine.py +396 -0
- rdf_construct/lint/formatters.py +327 -0
- rdf_construct/lint/rules.py +692 -0
- rdf_construct/localise/__init__.py +114 -0
- rdf_construct/localise/config.py +508 -0
- rdf_construct/localise/extractor.py +427 -0
- rdf_construct/localise/formatters/__init__.py +36 -0
- rdf_construct/localise/formatters/markdown.py +229 -0
- rdf_construct/localise/formatters/text.py +224 -0
- rdf_construct/localise/merger.py +346 -0
- rdf_construct/localise/reporter.py +356 -0
- rdf_construct/main.py +6 -0
- rdf_construct/merge/__init__.py +165 -0
- rdf_construct/merge/config.py +354 -0
- rdf_construct/merge/conflicts.py +281 -0
- rdf_construct/merge/formatters.py +426 -0
- rdf_construct/merge/merger.py +425 -0
- rdf_construct/merge/migrator.py +339 -0
- rdf_construct/merge/rules.py +377 -0
- rdf_construct/merge/splitter.py +1102 -0
- rdf_construct/puml2rdf/__init__.py +103 -0
- rdf_construct/puml2rdf/config.py +230 -0
- rdf_construct/puml2rdf/converter.py +420 -0
- rdf_construct/puml2rdf/merger.py +200 -0
- rdf_construct/puml2rdf/model.py +202 -0
- rdf_construct/puml2rdf/parser.py +565 -0
- rdf_construct/puml2rdf/validators.py +451 -0
- rdf_construct/refactor/__init__.py +72 -0
- rdf_construct/refactor/config.py +362 -0
- rdf_construct/refactor/deprecator.py +328 -0
- rdf_construct/refactor/formatters/__init__.py +8 -0
- rdf_construct/refactor/formatters/text.py +311 -0
- rdf_construct/refactor/renamer.py +294 -0
- rdf_construct/shacl/__init__.py +56 -0
- rdf_construct/shacl/config.py +166 -0
- rdf_construct/shacl/converters.py +520 -0
- rdf_construct/shacl/generator.py +364 -0
- rdf_construct/shacl/namespaces.py +93 -0
- rdf_construct/stats/__init__.py +29 -0
- rdf_construct/stats/collector.py +178 -0
- rdf_construct/stats/comparator.py +298 -0
- rdf_construct/stats/formatters/__init__.py +83 -0
- rdf_construct/stats/formatters/json.py +38 -0
- rdf_construct/stats/formatters/markdown.py +153 -0
- rdf_construct/stats/formatters/text.py +186 -0
- rdf_construct/stats/metrics/__init__.py +26 -0
- rdf_construct/stats/metrics/basic.py +147 -0
- rdf_construct/stats/metrics/complexity.py +137 -0
- rdf_construct/stats/metrics/connectivity.py +130 -0
- rdf_construct/stats/metrics/documentation.py +128 -0
- rdf_construct/stats/metrics/hierarchy.py +207 -0
- rdf_construct/stats/metrics/properties.py +88 -0
- rdf_construct/uml/__init__.py +22 -0
- rdf_construct/uml/context.py +194 -0
- rdf_construct/uml/mapper.py +371 -0
- rdf_construct/uml/odm_renderer.py +789 -0
- rdf_construct/uml/renderer.py +684 -0
- rdf_construct/uml/uml_layout.py +393 -0
- rdf_construct/uml/uml_style.py +613 -0
- rdf_construct-0.3.0.dist-info/METADATA +496 -0
- rdf_construct-0.3.0.dist-info/RECORD +110 -0
- rdf_construct-0.3.0.dist-info/WHEEL +4 -0
- rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
- rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
"""Test execution engine for competency question tests.
|
|
2
|
+
|
|
3
|
+
Runs SPARQL queries against RDF graphs and checks results against expectations.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from rdflib import Graph
|
|
12
|
+
|
|
13
|
+
from rdf_construct.cq.loader import CQTest, CQTestSuite, build_query_with_prefixes
|
|
14
|
+
from rdf_construct.cq.expectations import CheckResult
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CQStatus(Enum):
|
|
18
|
+
"""Status of a test execution."""
|
|
19
|
+
PASS = "pass"
|
|
20
|
+
FAIL = "fail"
|
|
21
|
+
ERROR = "error"
|
|
22
|
+
SKIP = "skip"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class CQTestResult:
|
|
27
|
+
"""Result of running a single competency question test.
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
test: The test that was run
|
|
31
|
+
status: Pass/fail/error/skip status
|
|
32
|
+
duration_ms: Execution time in milliseconds
|
|
33
|
+
result_count: Number of results returned (if applicable)
|
|
34
|
+
check_result: Detailed check result from expectation
|
|
35
|
+
error: Error message if status is ERROR
|
|
36
|
+
"""
|
|
37
|
+
test: CQTest
|
|
38
|
+
status: CQStatus
|
|
39
|
+
duration_ms: float = 0.0
|
|
40
|
+
result_count: int | None = None
|
|
41
|
+
check_result: CheckResult | None = None
|
|
42
|
+
error: str | None = None
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def passed(self) -> bool:
|
|
46
|
+
"""Return True if test passed."""
|
|
47
|
+
return self.status == CQStatus.PASS
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def message(self) -> str:
|
|
51
|
+
"""Return a human-readable result message."""
|
|
52
|
+
if self.error:
|
|
53
|
+
return self.error
|
|
54
|
+
if self.check_result:
|
|
55
|
+
return self.check_result.message
|
|
56
|
+
if self.status == CQStatus.SKIP:
|
|
57
|
+
return self.test.skip_reason or "Skipped"
|
|
58
|
+
return ""
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class CQTestResults:
|
|
63
|
+
"""Results of running a full test suite.
|
|
64
|
+
|
|
65
|
+
Attributes:
|
|
66
|
+
suite: The test suite that was run
|
|
67
|
+
results: Individual test results
|
|
68
|
+
total_duration_ms: Total execution time in milliseconds
|
|
69
|
+
ontology_file: Path to the ontology file tested
|
|
70
|
+
"""
|
|
71
|
+
suite: CQTestSuite
|
|
72
|
+
results: list[CQTestResult] = field(default_factory=list)
|
|
73
|
+
total_duration_ms: float = 0.0
|
|
74
|
+
ontology_file: Path | None = None
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def total_count(self) -> int:
|
|
78
|
+
"""Total number of tests."""
|
|
79
|
+
return len(self.results)
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def passed_count(self) -> int:
|
|
83
|
+
"""Number of passed tests."""
|
|
84
|
+
return sum(1 for r in self.results if r.status == CQStatus.PASS)
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def failed_count(self) -> int:
|
|
88
|
+
"""Number of failed tests."""
|
|
89
|
+
return sum(1 for r in self.results if r.status == CQStatus.FAIL)
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def error_count(self) -> int:
|
|
93
|
+
"""Number of tests with errors."""
|
|
94
|
+
return sum(1 for r in self.results if r.status == CQStatus.ERROR)
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def skipped_count(self) -> int:
|
|
98
|
+
"""Number of skipped tests."""
|
|
99
|
+
return sum(1 for r in self.results if r.status == CQStatus.SKIP)
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def all_passed(self) -> bool:
|
|
103
|
+
"""Return True if all tests passed (excluding skips)."""
|
|
104
|
+
return self.failed_count == 0 and self.error_count == 0
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def has_failures(self) -> bool:
|
|
108
|
+
"""Return True if any tests failed."""
|
|
109
|
+
return self.failed_count > 0
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def has_errors(self) -> bool:
|
|
113
|
+
"""Return True if any tests had errors."""
|
|
114
|
+
return self.error_count > 0
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class CQTestRunner:
|
|
118
|
+
"""Runner for executing competency question tests.
|
|
119
|
+
|
|
120
|
+
Handles:
|
|
121
|
+
- Loading and combining graphs (ontology + sample data)
|
|
122
|
+
- Injecting prefixes into queries
|
|
123
|
+
- Executing SPARQL queries
|
|
124
|
+
- Checking results against expectations
|
|
125
|
+
- Timing and error handling
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
fail_fast: Stop on first failure if True
|
|
129
|
+
verbose: Output verbose logging if True
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def __init__(self, fail_fast: bool = False, verbose: bool = False):
|
|
133
|
+
self.fail_fast = fail_fast
|
|
134
|
+
self.verbose = verbose
|
|
135
|
+
|
|
136
|
+
def run(self, ontology: Graph, suite: CQTestSuite,
|
|
137
|
+
ontology_file: Path | None = None) -> CQTestResults:
|
|
138
|
+
"""Run all tests in a suite against an ontology.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
ontology: RDF graph containing the ontology
|
|
142
|
+
suite: Test suite to execute
|
|
143
|
+
ontology_file: Optional path for reporting
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
CQTestResults with all test results
|
|
147
|
+
"""
|
|
148
|
+
start_time = time.perf_counter()
|
|
149
|
+
|
|
150
|
+
# Combine ontology with test data if present
|
|
151
|
+
if suite.data_graph:
|
|
152
|
+
graph = ontology + suite.data_graph
|
|
153
|
+
else:
|
|
154
|
+
graph = ontology
|
|
155
|
+
|
|
156
|
+
# Bind prefixes for query execution
|
|
157
|
+
for prefix, uri in suite.prefixes.items():
|
|
158
|
+
graph.bind(prefix, uri)
|
|
159
|
+
|
|
160
|
+
results = []
|
|
161
|
+
for test in suite.questions:
|
|
162
|
+
result = self._run_test(graph, test, suite.prefixes)
|
|
163
|
+
results.append(result)
|
|
164
|
+
|
|
165
|
+
if self.fail_fast and result.status in (CQStatus.FAIL, CQStatus.ERROR):
|
|
166
|
+
break
|
|
167
|
+
|
|
168
|
+
total_duration = (time.perf_counter() - start_time) * 1000
|
|
169
|
+
|
|
170
|
+
return CQTestResults(
|
|
171
|
+
suite=suite,
|
|
172
|
+
results=results,
|
|
173
|
+
total_duration_ms=total_duration,
|
|
174
|
+
ontology_file=ontology_file,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _run_test(self, graph: Graph, test: CQTest,
|
|
178
|
+
prefixes: dict[str, str]) -> CQTestResult:
|
|
179
|
+
"""Run a single test.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
graph: Combined ontology + data graph
|
|
183
|
+
test: Test to run
|
|
184
|
+
prefixes: Prefix definitions for query injection
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
CQTestResult with status and details
|
|
188
|
+
"""
|
|
189
|
+
# Handle skipped tests
|
|
190
|
+
if test.skip:
|
|
191
|
+
return CQTestResult(
|
|
192
|
+
test=test,
|
|
193
|
+
status=CQStatus.SKIP,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
start_time = time.perf_counter()
|
|
197
|
+
|
|
198
|
+
try:
|
|
199
|
+
# Inject prefixes into query
|
|
200
|
+
full_query = build_query_with_prefixes(test.query, prefixes)
|
|
201
|
+
|
|
202
|
+
# Execute query
|
|
203
|
+
result = graph.query(full_query)
|
|
204
|
+
|
|
205
|
+
# Check if this is an ASK query (returns boolean) or SELECT (returns rows)
|
|
206
|
+
# rdflib Result objects have a 'type' attribute
|
|
207
|
+
is_ask_query = getattr(result, 'type', None) == 'ASK'
|
|
208
|
+
|
|
209
|
+
if is_ask_query:
|
|
210
|
+
# ASK query - result is boolean
|
|
211
|
+
result_count = None
|
|
212
|
+
check_input = result
|
|
213
|
+
else:
|
|
214
|
+
# SELECT query - need to materialise results for counting
|
|
215
|
+
# But we also need them for expectation checking
|
|
216
|
+
# So we convert to list first
|
|
217
|
+
results_list = list(result)
|
|
218
|
+
result_count = len(results_list)
|
|
219
|
+
|
|
220
|
+
# Create a fake result object that can be iterated
|
|
221
|
+
# This is a bit hacky but necessary since rdflib results
|
|
222
|
+
# are single-use iterators
|
|
223
|
+
class ResultWrapper:
|
|
224
|
+
def __init__(self, rows):
|
|
225
|
+
self.rows = rows
|
|
226
|
+
def __iter__(self):
|
|
227
|
+
return iter(self.rows)
|
|
228
|
+
def __bool__(self):
|
|
229
|
+
return len(self.rows) > 0
|
|
230
|
+
def __len__(self):
|
|
231
|
+
return len(self.rows)
|
|
232
|
+
|
|
233
|
+
check_input = ResultWrapper(results_list)
|
|
234
|
+
|
|
235
|
+
# Check expectation
|
|
236
|
+
check_result = test.expectation.check(check_input)
|
|
237
|
+
|
|
238
|
+
duration = (time.perf_counter() - start_time) * 1000
|
|
239
|
+
|
|
240
|
+
return CQTestResult(
|
|
241
|
+
test=test,
|
|
242
|
+
status=CQStatus.PASS if check_result.passed else CQStatus.FAIL,
|
|
243
|
+
duration_ms=duration,
|
|
244
|
+
result_count=result_count,
|
|
245
|
+
check_result=check_result,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
except Exception as e:
|
|
249
|
+
duration = (time.perf_counter() - start_time) * 1000
|
|
250
|
+
|
|
251
|
+
return CQTestResult(
|
|
252
|
+
test=test,
|
|
253
|
+
status=CQStatus.ERROR,
|
|
254
|
+
duration_ms=duration,
|
|
255
|
+
error=f"{type(e).__name__}: {e}",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def run_tests(ontology_path: Path, test_suite_path: Path,
|
|
260
|
+
additional_data: list[Path] | None = None,
|
|
261
|
+
include_tags: set[str] | None = None,
|
|
262
|
+
exclude_tags: set[str] | None = None,
|
|
263
|
+
fail_fast: bool = False,
|
|
264
|
+
verbose: bool = False) -> CQTestResults:
|
|
265
|
+
"""Convenience function to run tests from file paths.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
ontology_path: Path to ontology file
|
|
269
|
+
test_suite_path: Path to test suite YAML
|
|
270
|
+
additional_data: Additional data files to load
|
|
271
|
+
include_tags: Only run tests with these tags
|
|
272
|
+
exclude_tags: Exclude tests with these tags
|
|
273
|
+
fail_fast: Stop on first failure
|
|
274
|
+
verbose: Verbose output
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
CQTestResults
|
|
278
|
+
|
|
279
|
+
Raises:
|
|
280
|
+
FileNotFoundError: If files don't exist
|
|
281
|
+
ValueError: If parsing fails
|
|
282
|
+
"""
|
|
283
|
+
from .loader import load_test_suite
|
|
284
|
+
|
|
285
|
+
# Load ontology
|
|
286
|
+
ontology = Graph()
|
|
287
|
+
ontology.parse(str(ontology_path), format=_format_from_path(ontology_path))
|
|
288
|
+
|
|
289
|
+
# Load additional data
|
|
290
|
+
if additional_data:
|
|
291
|
+
for data_path in additional_data:
|
|
292
|
+
ontology.parse(str(data_path), format=_format_from_path(data_path))
|
|
293
|
+
|
|
294
|
+
# Load test suite
|
|
295
|
+
suite = load_test_suite(test_suite_path)
|
|
296
|
+
|
|
297
|
+
# Filter by tags
|
|
298
|
+
if include_tags or exclude_tags:
|
|
299
|
+
suite = suite.filter_by_tags(include_tags, exclude_tags)
|
|
300
|
+
|
|
301
|
+
# Run tests
|
|
302
|
+
runner = CQTestRunner(fail_fast=fail_fast, verbose=verbose)
|
|
303
|
+
return runner.run(ontology, suite, ontology_file=ontology_path)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def _format_from_path(path: Path) -> str:
|
|
307
|
+
"""Infer RDF format from file extension."""
|
|
308
|
+
suffix = path.suffix.lower()
|
|
309
|
+
format_map = {
|
|
310
|
+
".ttl": "turtle",
|
|
311
|
+
".turtle": "turtle",
|
|
312
|
+
".rdf": "xml",
|
|
313
|
+
".xml": "xml",
|
|
314
|
+
".owl": "xml",
|
|
315
|
+
".nt": "nt",
|
|
316
|
+
".ntriples": "nt",
|
|
317
|
+
".n3": "n3",
|
|
318
|
+
".jsonld": "json-ld",
|
|
319
|
+
".json": "json-ld",
|
|
320
|
+
}
|
|
321
|
+
return format_map.get(suffix, "turtle")
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Semantic diff for RDF ontologies.
|
|
2
|
+
|
|
3
|
+
This module provides tools for comparing RDF graphs and identifying
|
|
4
|
+
semantic differences, filtering out cosmetic changes like statement
|
|
5
|
+
order, prefix bindings, and whitespace.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from rdf_construct.diff import compare_files, format_diff
|
|
9
|
+
|
|
10
|
+
diff = compare_files(Path("old.ttl"), Path("new.ttl"))
|
|
11
|
+
print(format_diff(diff, format_name="text"))
|
|
12
|
+
|
|
13
|
+
CLI:
|
|
14
|
+
rdf-construct diff old.ttl new.ttl
|
|
15
|
+
rdf-construct diff old.ttl new.ttl --format markdown -o changelog.md
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from rdf_construct.diff.change_types import (
|
|
19
|
+
ChangeType,
|
|
20
|
+
EntityChange,
|
|
21
|
+
EntityType,
|
|
22
|
+
GraphDiff,
|
|
23
|
+
PredicateCategory,
|
|
24
|
+
TripleChange,
|
|
25
|
+
)
|
|
26
|
+
from rdf_construct.diff.comparator import compare_graphs, compare_files
|
|
27
|
+
from rdf_construct.diff.filters import filter_diff, parse_filter_string
|
|
28
|
+
from rdf_construct.diff.formatters import (
|
|
29
|
+
format_diff,
|
|
30
|
+
format_text,
|
|
31
|
+
format_markdown,
|
|
32
|
+
format_json,
|
|
33
|
+
get_formatter,
|
|
34
|
+
FORMATTERS,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
# Change types
|
|
40
|
+
"ChangeType",
|
|
41
|
+
"EntityChange",
|
|
42
|
+
"EntityType",
|
|
43
|
+
"GraphDiff",
|
|
44
|
+
"PredicateCategory",
|
|
45
|
+
"TripleChange",
|
|
46
|
+
# Comparison
|
|
47
|
+
"compare_graphs",
|
|
48
|
+
"compare_files",
|
|
49
|
+
# Filtering
|
|
50
|
+
"filter_diff",
|
|
51
|
+
"parse_filter_string",
|
|
52
|
+
# Formatting
|
|
53
|
+
"format_diff",
|
|
54
|
+
"format_text",
|
|
55
|
+
"format_markdown",
|
|
56
|
+
"format_json",
|
|
57
|
+
"get_formatter",
|
|
58
|
+
"FORMATTERS",
|
|
59
|
+
]
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Data classes for representing semantic changes between RDF graphs.
|
|
2
|
+
|
|
3
|
+
This module defines the type hierarchy for diff results:
|
|
4
|
+
- EntityChange: Changes to a single entity (added/removed/modified)
|
|
5
|
+
- TripleChange: Individual triple-level changes
|
|
6
|
+
- GraphDiff: Complete diff result containing all changes
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from enum import Enum, auto
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from rdflib import URIRef, BNode, Literal
|
|
14
|
+
from rdflib.term import Node
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ChangeType(Enum):
|
|
18
|
+
"""Classification of change types for filtering and display."""
|
|
19
|
+
|
|
20
|
+
ADDED = auto()
|
|
21
|
+
REMOVED = auto()
|
|
22
|
+
MODIFIED = auto()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class EntityType(Enum):
|
|
26
|
+
"""Classification of RDF entity types."""
|
|
27
|
+
|
|
28
|
+
CLASS = "class"
|
|
29
|
+
OBJECT_PROPERTY = "object_property"
|
|
30
|
+
DATATYPE_PROPERTY = "datatype_property"
|
|
31
|
+
ANNOTATION_PROPERTY = "annotation_property"
|
|
32
|
+
INDIVIDUAL = "individual"
|
|
33
|
+
ONTOLOGY = "ontology"
|
|
34
|
+
OTHER = "other"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class PredicateCategory(Enum):
|
|
38
|
+
"""Semantic categories for predicates (for human-readable output)."""
|
|
39
|
+
|
|
40
|
+
TYPE = "type"
|
|
41
|
+
HIERARCHY = "hierarchy"
|
|
42
|
+
DOMAIN_RANGE = "domain_range"
|
|
43
|
+
LABEL = "label"
|
|
44
|
+
DOCUMENTATION = "documentation"
|
|
45
|
+
OWL_AXIOM = "owl_axiom"
|
|
46
|
+
OTHER = "other"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class TripleChange:
|
|
51
|
+
"""Represents a single triple that was added or removed.
|
|
52
|
+
|
|
53
|
+
Attributes:
|
|
54
|
+
predicate: The predicate of the changed triple.
|
|
55
|
+
object: The object of the changed triple.
|
|
56
|
+
is_addition: True if added, False if removed.
|
|
57
|
+
category: Semantic category of the predicate.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
predicate: URIRef
|
|
61
|
+
object: Node
|
|
62
|
+
is_addition: bool
|
|
63
|
+
category: PredicateCategory = PredicateCategory.OTHER
|
|
64
|
+
|
|
65
|
+
def __post_init__(self):
|
|
66
|
+
"""Categorise the predicate if not already done."""
|
|
67
|
+
if self.category == PredicateCategory.OTHER:
|
|
68
|
+
self.category = categorise_predicate(self.predicate)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class EntityChange:
|
|
73
|
+
"""Represents all changes to a single entity.
|
|
74
|
+
|
|
75
|
+
Attributes:
|
|
76
|
+
uri: The URI of the changed entity.
|
|
77
|
+
entity_type: The type of entity (class, property, individual, etc.).
|
|
78
|
+
change_type: Whether entity was added, removed, or modified.
|
|
79
|
+
label: Human-readable label if available.
|
|
80
|
+
added_triples: List of triples added to this entity.
|
|
81
|
+
removed_triples: List of triples removed from this entity.
|
|
82
|
+
superclasses: Superclasses (for classes) or None.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
uri: URIRef | BNode
|
|
86
|
+
entity_type: EntityType
|
|
87
|
+
change_type: ChangeType
|
|
88
|
+
label: str | None = None
|
|
89
|
+
added_triples: list[TripleChange] = field(default_factory=list)
|
|
90
|
+
removed_triples: list[TripleChange] = field(default_factory=list)
|
|
91
|
+
superclasses: list[URIRef] | None = None
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def is_blank_node(self) -> bool:
|
|
95
|
+
"""Check if this entity is a blank node."""
|
|
96
|
+
return isinstance(self.uri, BNode)
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def all_changes(self) -> list[TripleChange]:
|
|
100
|
+
"""Get all triple changes (both additions and removals)."""
|
|
101
|
+
return self.added_triples + self.removed_triples
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class GraphDiff:
|
|
106
|
+
"""Complete result of comparing two RDF graphs.
|
|
107
|
+
|
|
108
|
+
Attributes:
|
|
109
|
+
old_path: Path/name of the old graph.
|
|
110
|
+
new_path: Path/name of the new graph.
|
|
111
|
+
added: Entities that exist only in the new graph.
|
|
112
|
+
removed: Entities that exist only in the old graph.
|
|
113
|
+
modified: Entities that exist in both but have changes.
|
|
114
|
+
blank_node_warning: True if blank nodes were encountered.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
old_path: str
|
|
118
|
+
new_path: str
|
|
119
|
+
added: list[EntityChange] = field(default_factory=list)
|
|
120
|
+
removed: list[EntityChange] = field(default_factory=list)
|
|
121
|
+
modified: list[EntityChange] = field(default_factory=list)
|
|
122
|
+
blank_node_warning: bool = False
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def is_identical(self) -> bool:
|
|
126
|
+
"""Check if graphs are semantically identical."""
|
|
127
|
+
return not self.added and not self.removed and not self.modified
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def summary(self) -> dict[str, int]:
|
|
131
|
+
"""Get summary counts."""
|
|
132
|
+
return {
|
|
133
|
+
"added": len(self.added),
|
|
134
|
+
"removed": len(self.removed),
|
|
135
|
+
"modified": len(self.modified),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
def entities_by_type(
|
|
139
|
+
self, change_type: ChangeType
|
|
140
|
+
) -> dict[EntityType, list[EntityChange]]:
|
|
141
|
+
"""Group entities by their type for a given change type.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
change_type: ADDED, REMOVED, or MODIFIED
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Dictionary mapping EntityType to list of EntityChange
|
|
148
|
+
"""
|
|
149
|
+
if change_type == ChangeType.ADDED:
|
|
150
|
+
entities = self.added
|
|
151
|
+
elif change_type == ChangeType.REMOVED:
|
|
152
|
+
entities = self.removed
|
|
153
|
+
else:
|
|
154
|
+
entities = self.modified
|
|
155
|
+
|
|
156
|
+
result: dict[EntityType, list[EntityChange]] = {}
|
|
157
|
+
for entity in entities:
|
|
158
|
+
if entity.entity_type not in result:
|
|
159
|
+
result[entity.entity_type] = []
|
|
160
|
+
result[entity.entity_type].append(entity)
|
|
161
|
+
|
|
162
|
+
return result
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def categorise_predicate(predicate: URIRef) -> PredicateCategory:
|
|
166
|
+
"""Categorise a predicate for human-readable output.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
predicate: The predicate URI to categorise
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
The semantic category of the predicate
|
|
173
|
+
"""
|
|
174
|
+
pred_str = str(predicate)
|
|
175
|
+
|
|
176
|
+
# Type predicates
|
|
177
|
+
if pred_str.endswith("type") or "rdf-syntax-ns#type" in pred_str:
|
|
178
|
+
return PredicateCategory.TYPE
|
|
179
|
+
|
|
180
|
+
# Hierarchy predicates
|
|
181
|
+
if any(
|
|
182
|
+
x in pred_str
|
|
183
|
+
for x in ["subClassOf", "subPropertyOf", "equivalentClass", "equivalentProperty"]
|
|
184
|
+
):
|
|
185
|
+
return PredicateCategory.HIERARCHY
|
|
186
|
+
|
|
187
|
+
# Domain/range
|
|
188
|
+
if any(x in pred_str for x in ["domain", "range"]):
|
|
189
|
+
return PredicateCategory.DOMAIN_RANGE
|
|
190
|
+
|
|
191
|
+
# Labels
|
|
192
|
+
if "label" in pred_str.lower() or "prefLabel" in pred_str:
|
|
193
|
+
return PredicateCategory.LABEL
|
|
194
|
+
|
|
195
|
+
# Documentation
|
|
196
|
+
if any(x in pred_str.lower() for x in ["comment", "description", "definition", "note"]):
|
|
197
|
+
return PredicateCategory.DOCUMENTATION
|
|
198
|
+
|
|
199
|
+
# OWL axioms
|
|
200
|
+
if "owl" in pred_str.lower() and any(
|
|
201
|
+
x in pred_str
|
|
202
|
+
for x in [
|
|
203
|
+
"disjoint",
|
|
204
|
+
"inverse",
|
|
205
|
+
"functional",
|
|
206
|
+
"cardinality",
|
|
207
|
+
"restriction",
|
|
208
|
+
"union",
|
|
209
|
+
"intersection",
|
|
210
|
+
]
|
|
211
|
+
):
|
|
212
|
+
return PredicateCategory.OWL_AXIOM
|
|
213
|
+
|
|
214
|
+
return PredicateCategory.OTHER
|