rdf-construct 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +12 -0
- rdf_construct/__main__.py +0 -0
- rdf_construct/cli.py +3429 -0
- rdf_construct/core/__init__.py +33 -0
- rdf_construct/core/config.py +116 -0
- rdf_construct/core/ordering.py +219 -0
- rdf_construct/core/predicate_order.py +212 -0
- rdf_construct/core/profile.py +157 -0
- rdf_construct/core/selector.py +64 -0
- rdf_construct/core/serialiser.py +232 -0
- rdf_construct/core/utils.py +89 -0
- rdf_construct/cq/__init__.py +77 -0
- rdf_construct/cq/expectations.py +365 -0
- rdf_construct/cq/formatters/__init__.py +45 -0
- rdf_construct/cq/formatters/json.py +104 -0
- rdf_construct/cq/formatters/junit.py +104 -0
- rdf_construct/cq/formatters/text.py +146 -0
- rdf_construct/cq/loader.py +300 -0
- rdf_construct/cq/runner.py +321 -0
- rdf_construct/diff/__init__.py +59 -0
- rdf_construct/diff/change_types.py +214 -0
- rdf_construct/diff/comparator.py +338 -0
- rdf_construct/diff/filters.py +133 -0
- rdf_construct/diff/formatters/__init__.py +71 -0
- rdf_construct/diff/formatters/json.py +192 -0
- rdf_construct/diff/formatters/markdown.py +210 -0
- rdf_construct/diff/formatters/text.py +195 -0
- rdf_construct/docs/__init__.py +60 -0
- rdf_construct/docs/config.py +238 -0
- rdf_construct/docs/extractors.py +603 -0
- rdf_construct/docs/generator.py +360 -0
- rdf_construct/docs/renderers/__init__.py +7 -0
- rdf_construct/docs/renderers/html.py +803 -0
- rdf_construct/docs/renderers/json.py +390 -0
- rdf_construct/docs/renderers/markdown.py +628 -0
- rdf_construct/docs/search.py +278 -0
- rdf_construct/docs/templates/html/base.html.jinja +44 -0
- rdf_construct/docs/templates/html/class.html.jinja +152 -0
- rdf_construct/docs/templates/html/hierarchy.html.jinja +28 -0
- rdf_construct/docs/templates/html/index.html.jinja +110 -0
- rdf_construct/docs/templates/html/instance.html.jinja +90 -0
- rdf_construct/docs/templates/html/namespaces.html.jinja +37 -0
- rdf_construct/docs/templates/html/property.html.jinja +124 -0
- rdf_construct/docs/templates/html/single_page.html.jinja +169 -0
- rdf_construct/lint/__init__.py +75 -0
- rdf_construct/lint/config.py +214 -0
- rdf_construct/lint/engine.py +396 -0
- rdf_construct/lint/formatters.py +327 -0
- rdf_construct/lint/rules.py +692 -0
- rdf_construct/localise/__init__.py +114 -0
- rdf_construct/localise/config.py +508 -0
- rdf_construct/localise/extractor.py +427 -0
- rdf_construct/localise/formatters/__init__.py +36 -0
- rdf_construct/localise/formatters/markdown.py +229 -0
- rdf_construct/localise/formatters/text.py +224 -0
- rdf_construct/localise/merger.py +346 -0
- rdf_construct/localise/reporter.py +356 -0
- rdf_construct/main.py +6 -0
- rdf_construct/merge/__init__.py +165 -0
- rdf_construct/merge/config.py +354 -0
- rdf_construct/merge/conflicts.py +281 -0
- rdf_construct/merge/formatters.py +426 -0
- rdf_construct/merge/merger.py +425 -0
- rdf_construct/merge/migrator.py +339 -0
- rdf_construct/merge/rules.py +377 -0
- rdf_construct/merge/splitter.py +1102 -0
- rdf_construct/puml2rdf/__init__.py +103 -0
- rdf_construct/puml2rdf/config.py +230 -0
- rdf_construct/puml2rdf/converter.py +420 -0
- rdf_construct/puml2rdf/merger.py +200 -0
- rdf_construct/puml2rdf/model.py +202 -0
- rdf_construct/puml2rdf/parser.py +565 -0
- rdf_construct/puml2rdf/validators.py +451 -0
- rdf_construct/refactor/__init__.py +72 -0
- rdf_construct/refactor/config.py +362 -0
- rdf_construct/refactor/deprecator.py +328 -0
- rdf_construct/refactor/formatters/__init__.py +8 -0
- rdf_construct/refactor/formatters/text.py +311 -0
- rdf_construct/refactor/renamer.py +294 -0
- rdf_construct/shacl/__init__.py +56 -0
- rdf_construct/shacl/config.py +166 -0
- rdf_construct/shacl/converters.py +520 -0
- rdf_construct/shacl/generator.py +364 -0
- rdf_construct/shacl/namespaces.py +93 -0
- rdf_construct/stats/__init__.py +29 -0
- rdf_construct/stats/collector.py +178 -0
- rdf_construct/stats/comparator.py +298 -0
- rdf_construct/stats/formatters/__init__.py +83 -0
- rdf_construct/stats/formatters/json.py +38 -0
- rdf_construct/stats/formatters/markdown.py +153 -0
- rdf_construct/stats/formatters/text.py +186 -0
- rdf_construct/stats/metrics/__init__.py +26 -0
- rdf_construct/stats/metrics/basic.py +147 -0
- rdf_construct/stats/metrics/complexity.py +137 -0
- rdf_construct/stats/metrics/connectivity.py +130 -0
- rdf_construct/stats/metrics/documentation.py +128 -0
- rdf_construct/stats/metrics/hierarchy.py +207 -0
- rdf_construct/stats/metrics/properties.py +88 -0
- rdf_construct/uml/__init__.py +22 -0
- rdf_construct/uml/context.py +194 -0
- rdf_construct/uml/mapper.py +371 -0
- rdf_construct/uml/odm_renderer.py +789 -0
- rdf_construct/uml/renderer.py +684 -0
- rdf_construct/uml/uml_layout.py +393 -0
- rdf_construct/uml/uml_style.py +613 -0
- rdf_construct-0.3.0.dist-info/METADATA +496 -0
- rdf_construct-0.3.0.dist-info/RECORD +110 -0
- rdf_construct-0.3.0.dist-info/WHEEL +4 -0
- rdf_construct-0.3.0.dist-info/entry_points.txt +3 -0
- rdf_construct-0.3.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""Data graph migration for ontology changes.
|
|
2
|
+
|
|
3
|
+
This module handles migrating instance data when ontologies change:
|
|
4
|
+
- Simple URI substitution (renames, namespace changes)
|
|
5
|
+
- Complex CONSTRUCT-style transformations (property splits, type migrations)
|
|
6
|
+
|
|
7
|
+
The migrator is reusable by merge, split, and refactor commands.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from rdflib import Graph, URIRef, Literal, BNode
|
|
15
|
+
from rdflib.namespace import RDF
|
|
16
|
+
|
|
17
|
+
from rdf_construct.merge.config import MigrationRule, DataMigrationConfig
|
|
18
|
+
from rdf_construct.merge.rules import RuleEngine
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class MigrationStats:
|
|
23
|
+
"""Statistics from a migration operation.
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
subjects_updated: Number of subject URIs substituted
|
|
27
|
+
objects_updated: Number of object URIs substituted
|
|
28
|
+
triples_added: Number of triples added by transformations
|
|
29
|
+
triples_removed: Number of triples removed by transformations
|
|
30
|
+
rules_applied: Count of each rule applied
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
subjects_updated: int = 0
|
|
34
|
+
objects_updated: int = 0
|
|
35
|
+
triples_added: int = 0
|
|
36
|
+
triples_removed: int = 0
|
|
37
|
+
rules_applied: dict[str, int] = field(default_factory=dict)
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def total_changes(self) -> int:
|
|
41
|
+
"""Total number of changes made."""
|
|
42
|
+
return (
|
|
43
|
+
self.subjects_updated
|
|
44
|
+
+ self.objects_updated
|
|
45
|
+
+ self.triples_added
|
|
46
|
+
+ self.triples_removed
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class MigrationResult:
|
|
52
|
+
"""Result of a data migration operation.
|
|
53
|
+
|
|
54
|
+
Attributes:
|
|
55
|
+
migrated_graph: The migrated RDF graph
|
|
56
|
+
stats: Migration statistics
|
|
57
|
+
success: Whether migration completed without errors
|
|
58
|
+
error: Error message if success is False
|
|
59
|
+
source_triples: Original triple count
|
|
60
|
+
result_triples: Final triple count
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
migrated_graph: Graph | None = None
|
|
64
|
+
stats: MigrationStats = field(default_factory=MigrationStats)
|
|
65
|
+
success: bool = True
|
|
66
|
+
error: str | None = None
|
|
67
|
+
source_triples: int = 0
|
|
68
|
+
result_triples: int = 0
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class DataMigrator:
|
|
72
|
+
"""Migrates instance data graphs when ontology structure changes.
|
|
73
|
+
|
|
74
|
+
Supports two types of migration:
|
|
75
|
+
|
|
76
|
+
1. **Simple URI substitution**: Replaces URIs throughout the graph.
|
|
77
|
+
Used for renames, namespace changes, and class/property moves.
|
|
78
|
+
|
|
79
|
+
2. **Complex transformations**: SPARQL CONSTRUCT-style rules that can:
|
|
80
|
+
- Split properties (fullName → givenName + familyName)
|
|
81
|
+
- Merge properties
|
|
82
|
+
- Change types (Company → Organisation)
|
|
83
|
+
- Transform values (Fahrenheit → Celsius)
|
|
84
|
+
|
|
85
|
+
Example usage:
|
|
86
|
+
migrator = DataMigrator()
|
|
87
|
+
uri_map = {
|
|
88
|
+
URIRef("http://old.org/Class"): URIRef("http://new.org/Class")
|
|
89
|
+
}
|
|
90
|
+
result = migrator.migrate(data_graph, uri_map=uri_map)
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
def __init__(self):
|
|
94
|
+
"""Initialize the data migrator."""
|
|
95
|
+
self.rule_engine = RuleEngine()
|
|
96
|
+
|
|
97
|
+
def migrate(
|
|
98
|
+
self,
|
|
99
|
+
data: Graph,
|
|
100
|
+
uri_map: dict[URIRef, URIRef] | None = None,
|
|
101
|
+
rules: list[MigrationRule] | None = None,
|
|
102
|
+
) -> MigrationResult:
|
|
103
|
+
"""Migrate a data graph.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
data: Source data graph to migrate
|
|
107
|
+
uri_map: Simple URI substitution map (old -> new)
|
|
108
|
+
rules: Complex migration rules to apply
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
MigrationResult with migrated graph and statistics
|
|
112
|
+
"""
|
|
113
|
+
result = MigrationResult()
|
|
114
|
+
result.source_triples = len(data)
|
|
115
|
+
|
|
116
|
+
# Create a new graph for the migrated data
|
|
117
|
+
migrated = Graph()
|
|
118
|
+
|
|
119
|
+
# Copy namespace bindings
|
|
120
|
+
for prefix, ns in data.namespace_manager.namespaces():
|
|
121
|
+
migrated.bind(prefix, ns)
|
|
122
|
+
|
|
123
|
+
# Phase 1: Apply simple URI substitutions
|
|
124
|
+
if uri_map:
|
|
125
|
+
for s, p, o in data:
|
|
126
|
+
new_s = self._substitute_uri(s, uri_map, result.stats, is_subject=True)
|
|
127
|
+
new_o = self._substitute_uri(o, uri_map, result.stats, is_subject=False)
|
|
128
|
+
migrated.add((new_s, p, new_o))
|
|
129
|
+
else:
|
|
130
|
+
# No substitutions, just copy
|
|
131
|
+
for triple in data:
|
|
132
|
+
migrated.add(triple)
|
|
133
|
+
|
|
134
|
+
# Phase 2: Apply complex transformation rules
|
|
135
|
+
if rules:
|
|
136
|
+
for rule in rules:
|
|
137
|
+
if rule.type == "rename":
|
|
138
|
+
# Handle rename rules that weren't in uri_map
|
|
139
|
+
if rule.from_uri and rule.to_uri:
|
|
140
|
+
single_map = {
|
|
141
|
+
URIRef(rule.from_uri): URIRef(rule.to_uri)
|
|
142
|
+
}
|
|
143
|
+
migrated = self._apply_uri_substitution(
|
|
144
|
+
migrated, single_map, result.stats
|
|
145
|
+
)
|
|
146
|
+
result.stats.rules_applied[rule.description or "rename"] = (
|
|
147
|
+
result.stats.rules_applied.get(rule.description or "rename", 0) + 1
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
elif rule.type == "transform":
|
|
151
|
+
changes = self.rule_engine.apply_rule(migrated, rule)
|
|
152
|
+
result.stats.triples_added += changes.get("added", 0)
|
|
153
|
+
result.stats.triples_removed += changes.get("removed", 0)
|
|
154
|
+
result.stats.rules_applied[rule.description or "transform"] = (
|
|
155
|
+
result.stats.rules_applied.get(rule.description or "transform", 0)
|
|
156
|
+
+ changes.get("instances", 0)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
result.migrated_graph = migrated
|
|
160
|
+
result.result_triples = len(migrated)
|
|
161
|
+
result.success = True
|
|
162
|
+
|
|
163
|
+
return result
|
|
164
|
+
|
|
165
|
+
def _substitute_uri(
|
|
166
|
+
self,
|
|
167
|
+
term: Any,
|
|
168
|
+
uri_map: dict[URIRef, URIRef],
|
|
169
|
+
stats: MigrationStats,
|
|
170
|
+
is_subject: bool,
|
|
171
|
+
) -> Any:
|
|
172
|
+
"""Substitute a URI if it's in the mapping.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
term: RDF term to potentially substitute
|
|
176
|
+
uri_map: URI substitution map
|
|
177
|
+
stats: Statistics to update
|
|
178
|
+
is_subject: Whether this is a subject position
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Substituted term or original
|
|
182
|
+
"""
|
|
183
|
+
if isinstance(term, URIRef) and term in uri_map:
|
|
184
|
+
if is_subject:
|
|
185
|
+
stats.subjects_updated += 1
|
|
186
|
+
else:
|
|
187
|
+
stats.objects_updated += 1
|
|
188
|
+
return uri_map[term]
|
|
189
|
+
return term
|
|
190
|
+
|
|
191
|
+
def _apply_uri_substitution(
|
|
192
|
+
self,
|
|
193
|
+
graph: Graph,
|
|
194
|
+
uri_map: dict[URIRef, URIRef],
|
|
195
|
+
stats: MigrationStats,
|
|
196
|
+
) -> Graph:
|
|
197
|
+
"""Apply URI substitution to an entire graph.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
graph: Graph to transform
|
|
201
|
+
uri_map: URI substitution map
|
|
202
|
+
stats: Statistics to update
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
New graph with substitutions applied
|
|
206
|
+
"""
|
|
207
|
+
new_graph = Graph()
|
|
208
|
+
|
|
209
|
+
# Copy namespace bindings, updating if needed
|
|
210
|
+
for prefix, ns in graph.namespace_manager.namespaces():
|
|
211
|
+
new_graph.bind(prefix, ns)
|
|
212
|
+
|
|
213
|
+
for s, p, o in graph:
|
|
214
|
+
new_s = self._substitute_uri(s, uri_map, stats, is_subject=True)
|
|
215
|
+
new_o = self._substitute_uri(o, uri_map, stats, is_subject=False)
|
|
216
|
+
new_graph.add((new_s, p, new_o))
|
|
217
|
+
|
|
218
|
+
return new_graph
|
|
219
|
+
|
|
220
|
+
def build_uri_map_from_namespaces(
|
|
221
|
+
self,
|
|
222
|
+
graph: Graph,
|
|
223
|
+
namespace_remaps: dict[str, str],
|
|
224
|
+
) -> dict[URIRef, URIRef]:
|
|
225
|
+
"""Build a URI map from namespace remappings.
|
|
226
|
+
|
|
227
|
+
Scans the graph for all URIs and creates substitution entries
|
|
228
|
+
for those that fall within remapped namespaces.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
graph: Graph to scan for URIs
|
|
232
|
+
namespace_remaps: Old namespace -> new namespace mapping
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
URI substitution map
|
|
236
|
+
"""
|
|
237
|
+
uri_map: dict[URIRef, URIRef] = {}
|
|
238
|
+
|
|
239
|
+
# Collect all URIs from the graph
|
|
240
|
+
all_uris: set[URIRef] = set()
|
|
241
|
+
for s, p, o in graph:
|
|
242
|
+
if isinstance(s, URIRef):
|
|
243
|
+
all_uris.add(s)
|
|
244
|
+
if isinstance(p, URIRef):
|
|
245
|
+
all_uris.add(p)
|
|
246
|
+
if isinstance(o, URIRef):
|
|
247
|
+
all_uris.add(o)
|
|
248
|
+
|
|
249
|
+
# Build substitution map
|
|
250
|
+
for uri in all_uris:
|
|
251
|
+
uri_str = str(uri)
|
|
252
|
+
for old_ns, new_ns in namespace_remaps.items():
|
|
253
|
+
if uri_str.startswith(old_ns):
|
|
254
|
+
new_uri_str = uri_str.replace(old_ns, new_ns, 1)
|
|
255
|
+
uri_map[uri] = URIRef(new_uri_str)
|
|
256
|
+
break
|
|
257
|
+
|
|
258
|
+
return uri_map
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def migrate_data_files(
|
|
262
|
+
data_paths: list[Path],
|
|
263
|
+
uri_map: dict[URIRef, URIRef] | None = None,
|
|
264
|
+
rules: list[MigrationRule] | None = None,
|
|
265
|
+
output_path: Path | None = None,
|
|
266
|
+
) -> MigrationResult:
|
|
267
|
+
"""Convenience function to migrate multiple data files.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
data_paths: Paths to data files to migrate
|
|
271
|
+
uri_map: URI substitution map
|
|
272
|
+
rules: Migration rules to apply
|
|
273
|
+
output_path: Path to write combined migrated output
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
MigrationResult with combined statistics
|
|
277
|
+
"""
|
|
278
|
+
migrator = DataMigrator()
|
|
279
|
+
combined_result = MigrationResult()
|
|
280
|
+
combined_graph = Graph()
|
|
281
|
+
combined_stats = MigrationStats()
|
|
282
|
+
|
|
283
|
+
for data_path in data_paths:
|
|
284
|
+
if not data_path.exists():
|
|
285
|
+
combined_result.success = False
|
|
286
|
+
combined_result.error = f"Data file not found: {data_path}"
|
|
287
|
+
return combined_result
|
|
288
|
+
|
|
289
|
+
# Load the data file
|
|
290
|
+
data = Graph()
|
|
291
|
+
try:
|
|
292
|
+
data.parse(data_path.as_posix())
|
|
293
|
+
except Exception as e:
|
|
294
|
+
combined_result.success = False
|
|
295
|
+
combined_result.error = f"Failed to parse {data_path}: {e}"
|
|
296
|
+
return combined_result
|
|
297
|
+
|
|
298
|
+
# Migrate this file
|
|
299
|
+
result = migrator.migrate(data, uri_map=uri_map, rules=rules)
|
|
300
|
+
|
|
301
|
+
if not result.success:
|
|
302
|
+
return result
|
|
303
|
+
|
|
304
|
+
# Combine results
|
|
305
|
+
combined_result.source_triples += result.source_triples
|
|
306
|
+
if result.migrated_graph:
|
|
307
|
+
for triple in result.migrated_graph:
|
|
308
|
+
combined_graph.add(triple)
|
|
309
|
+
|
|
310
|
+
# Copy namespace bindings
|
|
311
|
+
for prefix, ns in result.migrated_graph.namespace_manager.namespaces():
|
|
312
|
+
try:
|
|
313
|
+
combined_graph.bind(prefix, ns, override=False)
|
|
314
|
+
except Exception:
|
|
315
|
+
pass
|
|
316
|
+
|
|
317
|
+
# Combine stats
|
|
318
|
+
combined_stats.subjects_updated += result.stats.subjects_updated
|
|
319
|
+
combined_stats.objects_updated += result.stats.objects_updated
|
|
320
|
+
combined_stats.triples_added += result.stats.triples_added
|
|
321
|
+
combined_stats.triples_removed += result.stats.triples_removed
|
|
322
|
+
for rule_name, count in result.stats.rules_applied.items():
|
|
323
|
+
combined_stats.rules_applied[rule_name] = (
|
|
324
|
+
combined_stats.rules_applied.get(rule_name, 0) + count
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
combined_result.migrated_graph = combined_graph
|
|
328
|
+
combined_result.stats = combined_stats
|
|
329
|
+
combined_result.result_triples = len(combined_graph)
|
|
330
|
+
combined_result.success = True
|
|
331
|
+
|
|
332
|
+
# Write output if path provided
|
|
333
|
+
if output_path and combined_result.migrated_graph:
|
|
334
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
335
|
+
combined_result.migrated_graph.serialize(
|
|
336
|
+
destination=output_path.as_posix(), format="turtle"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
return combined_result
|
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
"""Migration rule parsing and execution.
|
|
2
|
+
|
|
3
|
+
This module handles complex transformation rules using a SPARQL-like
|
|
4
|
+
pattern matching approach:
|
|
5
|
+
|
|
6
|
+
- Match patterns: Find triples matching a pattern
|
|
7
|
+
- Construct patterns: Create new triples from matches
|
|
8
|
+
- Delete patterns: Remove matched triples
|
|
9
|
+
|
|
10
|
+
This enables structural transformations like:
|
|
11
|
+
- Property splits (fullName → givenName + familyName)
|
|
12
|
+
- Type migrations (Company → Organisation)
|
|
13
|
+
- Value transformations
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from rdflib import Graph, URIRef, Literal, BNode, Namespace
|
|
21
|
+
from rdflib.namespace import RDF, RDFS, XSD
|
|
22
|
+
from rdflib.term import Node
|
|
23
|
+
|
|
24
|
+
from rdf_construct.merge.config import MigrationRule
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class Binding:
|
|
29
|
+
"""A variable binding from pattern matching.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
variable: Variable name (without ?)
|
|
33
|
+
value: Bound RDF value
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
variable: str
|
|
37
|
+
value: Node
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class Match:
|
|
42
|
+
"""A single match result from pattern matching.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
bindings: Dictionary of variable -> value bindings
|
|
46
|
+
matched_triples: Triples that were matched
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
bindings: dict[str, Node] = field(default_factory=dict)
|
|
50
|
+
matched_triples: list[tuple] = field(default_factory=list)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class PatternParser:
|
|
54
|
+
"""Parses simple SPARQL-like triple patterns.
|
|
55
|
+
|
|
56
|
+
Supports patterns like:
|
|
57
|
+
- "?s ex:fullName ?name"
|
|
58
|
+
- "?s a ex:Company"
|
|
59
|
+
|
|
60
|
+
Variables start with ?
|
|
61
|
+
URIs can be full or prefixed (requires namespace context)
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
# Pattern for variables like ?s, ?name
|
|
65
|
+
VARIABLE_PATTERN = re.compile(r"\?(\w+)")
|
|
66
|
+
|
|
67
|
+
def __init__(self, namespaces: dict[str, Namespace] | None = None):
|
|
68
|
+
"""Initialize the parser.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
namespaces: Prefix -> Namespace mapping for expanding CURIEs
|
|
72
|
+
"""
|
|
73
|
+
self.namespaces = namespaces or {}
|
|
74
|
+
|
|
75
|
+
def parse_pattern(self, pattern: str) -> tuple[Any, Any, Any]:
|
|
76
|
+
"""Parse a triple pattern into (subject, predicate, object).
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
pattern: Triple pattern string like "?s ex:hasName ?name"
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Tuple of (subject, predicate, object) where variables are
|
|
83
|
+
represented as strings starting with ?
|
|
84
|
+
"""
|
|
85
|
+
# Handle special "a" for rdf:type
|
|
86
|
+
pattern = pattern.replace(" a ", f" {RDF.type} ")
|
|
87
|
+
|
|
88
|
+
parts = pattern.strip().split(None, 2)
|
|
89
|
+
if len(parts) != 3:
|
|
90
|
+
raise ValueError(f"Invalid pattern (expected 3 parts): {pattern}")
|
|
91
|
+
|
|
92
|
+
return (
|
|
93
|
+
self._parse_term(parts[0]),
|
|
94
|
+
self._parse_term(parts[1]),
|
|
95
|
+
self._parse_term(parts[2]),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def _parse_term(self, term: str) -> Any:
|
|
99
|
+
"""Parse a single term from a pattern.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
term: Term string (variable, URI, or literal)
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Parsed term: string for variables, URIRef for URIs, etc.
|
|
106
|
+
"""
|
|
107
|
+
term = term.strip()
|
|
108
|
+
|
|
109
|
+
# Variable
|
|
110
|
+
if term.startswith("?"):
|
|
111
|
+
return term # Keep as string marker
|
|
112
|
+
|
|
113
|
+
# Full URI in angle brackets
|
|
114
|
+
if term.startswith("<") and term.endswith(">"):
|
|
115
|
+
return URIRef(term[1:-1])
|
|
116
|
+
|
|
117
|
+
# Already a URIRef (from pattern replacement like RDF.type)
|
|
118
|
+
if isinstance(term, URIRef):
|
|
119
|
+
return term
|
|
120
|
+
if term.startswith("http://") or term.startswith("https://"):
|
|
121
|
+
return URIRef(term)
|
|
122
|
+
|
|
123
|
+
# Prefixed name
|
|
124
|
+
if ":" in term:
|
|
125
|
+
prefix, local = term.split(":", 1)
|
|
126
|
+
if prefix in self.namespaces:
|
|
127
|
+
return URIRef(str(self.namespaces[prefix]) + local)
|
|
128
|
+
# If prefix not found, return as-is (may be handled later)
|
|
129
|
+
return URIRef(term)
|
|
130
|
+
|
|
131
|
+
# Literal (quoted string)
|
|
132
|
+
if term.startswith('"') and term.endswith('"'):
|
|
133
|
+
return Literal(term[1:-1])
|
|
134
|
+
|
|
135
|
+
# Default: treat as local name (would need base URI)
|
|
136
|
+
return term
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class RuleEngine:
|
|
140
|
+
"""Executes transformation rules on RDF graphs.
|
|
141
|
+
|
|
142
|
+
Supports:
|
|
143
|
+
- Pattern matching with variable bindings
|
|
144
|
+
- Triple construction from bindings
|
|
145
|
+
- Simple value transformations (STRBEFORE, STRAFTER)
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
def __init__(self):
|
|
149
|
+
"""Initialize the rule engine."""
|
|
150
|
+
self.parser = PatternParser()
|
|
151
|
+
|
|
152
|
+
def set_namespaces(self, graph: Graph) -> None:
|
|
153
|
+
"""Update parser namespaces from a graph.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
graph: Graph to extract namespaces from
|
|
157
|
+
"""
|
|
158
|
+
self.parser.namespaces = {
|
|
159
|
+
prefix: Namespace(str(ns))
|
|
160
|
+
for prefix, ns in graph.namespace_manager.namespaces()
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
def apply_rule(self, graph: Graph, rule: MigrationRule) -> dict[str, int]:
|
|
164
|
+
"""Apply a transformation rule to a graph.
|
|
165
|
+
|
|
166
|
+
Modifies the graph in place.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
graph: Graph to transform
|
|
170
|
+
rule: Transformation rule to apply
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
Statistics: {"added": n, "removed": n, "instances": n}
|
|
174
|
+
"""
|
|
175
|
+
self.set_namespaces(graph)
|
|
176
|
+
|
|
177
|
+
if rule.type != "transform" or not rule.match:
|
|
178
|
+
return {"added": 0, "removed": 0, "instances": 0}
|
|
179
|
+
|
|
180
|
+
stats = {"added": 0, "removed": 0, "instances": 0}
|
|
181
|
+
|
|
182
|
+
# Find all matches
|
|
183
|
+
matches = self._find_matches(graph, rule.match)
|
|
184
|
+
|
|
185
|
+
for match in matches:
|
|
186
|
+
stats["instances"] += 1
|
|
187
|
+
|
|
188
|
+
# Construct new triples
|
|
189
|
+
if rule.construct:
|
|
190
|
+
for construct_spec in rule.construct:
|
|
191
|
+
new_triples = self._construct_triples(
|
|
192
|
+
match, construct_spec, graph
|
|
193
|
+
)
|
|
194
|
+
for triple in new_triples:
|
|
195
|
+
graph.add(triple)
|
|
196
|
+
stats["added"] += 1
|
|
197
|
+
|
|
198
|
+
# Delete matched triples if configured
|
|
199
|
+
if rule.delete_matched:
|
|
200
|
+
for triple in match.matched_triples:
|
|
201
|
+
graph.remove(triple)
|
|
202
|
+
stats["removed"] += 1
|
|
203
|
+
|
|
204
|
+
return stats
|
|
205
|
+
|
|
206
|
+
def _find_matches(self, graph: Graph, pattern_str: str) -> list[Match]:
|
|
207
|
+
"""Find all matches for a pattern in the graph.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
graph: Graph to search
|
|
211
|
+
pattern_str: Pattern string to match
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
List of Match objects with bindings
|
|
215
|
+
"""
|
|
216
|
+
try:
|
|
217
|
+
pattern = self.parser.parse_pattern(pattern_str)
|
|
218
|
+
except ValueError:
|
|
219
|
+
return []
|
|
220
|
+
|
|
221
|
+
matches: list[Match] = []
|
|
222
|
+
|
|
223
|
+
# Build query pattern for graph iteration
|
|
224
|
+
query_s = None if isinstance(pattern[0], str) else pattern[0]
|
|
225
|
+
query_p = None if isinstance(pattern[1], str) else pattern[1]
|
|
226
|
+
query_o = None if isinstance(pattern[2], str) else pattern[2]
|
|
227
|
+
|
|
228
|
+
for s, p, o in graph.triples((query_s, query_p, query_o)):
|
|
229
|
+
bindings: dict[str, Node] = {}
|
|
230
|
+
|
|
231
|
+
# Check and bind each position
|
|
232
|
+
if isinstance(pattern[0], str): # Variable
|
|
233
|
+
bindings[pattern[0][1:]] = s # Remove ? prefix
|
|
234
|
+
elif pattern[0] != s:
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
if isinstance(pattern[1], str):
|
|
238
|
+
bindings[pattern[1][1:]] = p
|
|
239
|
+
elif pattern[1] != p:
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
if isinstance(pattern[2], str):
|
|
243
|
+
bindings[pattern[2][1:]] = o
|
|
244
|
+
elif pattern[2] != o:
|
|
245
|
+
continue
|
|
246
|
+
|
|
247
|
+
matches.append(
|
|
248
|
+
Match(bindings=bindings, matched_triples=[(s, p, o)])
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
return matches
|
|
252
|
+
|
|
253
|
+
def _construct_triples(
|
|
254
|
+
self,
|
|
255
|
+
match: Match,
|
|
256
|
+
construct_spec: dict[str, str],
|
|
257
|
+
graph: Graph,
|
|
258
|
+
) -> list[tuple]:
|
|
259
|
+
"""Construct new triples from a match and specification.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
match: Match with variable bindings
|
|
263
|
+
construct_spec: Construction specification with pattern and optional bind
|
|
264
|
+
graph: Graph for namespace resolution
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
List of new triples to add
|
|
268
|
+
"""
|
|
269
|
+
pattern_str = construct_spec.get("pattern", "")
|
|
270
|
+
bind_expr = construct_spec.get("bind")
|
|
271
|
+
|
|
272
|
+
try:
|
|
273
|
+
pattern = self.parser.parse_pattern(pattern_str)
|
|
274
|
+
except ValueError:
|
|
275
|
+
return []
|
|
276
|
+
|
|
277
|
+
# Substitute variables in pattern
|
|
278
|
+
result_s = self._substitute_variable(pattern[0], match.bindings)
|
|
279
|
+
result_p = self._substitute_variable(pattern[1], match.bindings)
|
|
280
|
+
|
|
281
|
+
if bind_expr:
|
|
282
|
+
# Evaluate bind expression
|
|
283
|
+
result_o = self._evaluate_bind(bind_expr, match.bindings)
|
|
284
|
+
else:
|
|
285
|
+
result_o = self._substitute_variable(pattern[2], match.bindings)
|
|
286
|
+
|
|
287
|
+
if result_s is None or result_p is None or result_o is None:
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
return [(result_s, result_p, result_o)]
|
|
291
|
+
|
|
292
|
+
def _substitute_variable(
|
|
293
|
+
self, term: Any, bindings: dict[str, Node]
|
|
294
|
+
) -> Node | None:
|
|
295
|
+
"""Substitute a variable with its bound value.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
term: Term to substitute (may be a variable string)
|
|
299
|
+
bindings: Variable bindings
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
Substituted value or None if variable not bound
|
|
303
|
+
"""
|
|
304
|
+
if isinstance(term, str) and term.startswith("?"):
|
|
305
|
+
var_name = term[1:]
|
|
306
|
+
return bindings.get(var_name)
|
|
307
|
+
return term
|
|
308
|
+
|
|
309
|
+
def _evaluate_bind(
|
|
310
|
+
self, expression: str, bindings: dict[str, Node]
|
|
311
|
+
) -> Node | None:
|
|
312
|
+
"""Evaluate a simple bind expression.
|
|
313
|
+
|
|
314
|
+
Supports:
|
|
315
|
+
- STRBEFORE(?var, 'delimiter') - substring before delimiter
|
|
316
|
+
- STRAFTER(?var, 'delimiter') - substring after delimiter
|
|
317
|
+
- Simple arithmetic with +, -, *, /
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
expression: Bind expression string
|
|
321
|
+
bindings: Variable bindings
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Evaluated value or None
|
|
325
|
+
"""
|
|
326
|
+
# Handle STRBEFORE
|
|
327
|
+
strbefore_match = re.match(
|
|
328
|
+
r"STRBEFORE\(\?(\w+),\s*['\"](.+)['\"]\)\s*AS\s*\?(\w+)",
|
|
329
|
+
expression,
|
|
330
|
+
re.IGNORECASE,
|
|
331
|
+
)
|
|
332
|
+
if strbefore_match:
|
|
333
|
+
var_name = strbefore_match.group(1)
|
|
334
|
+
delimiter = strbefore_match.group(2)
|
|
335
|
+
if var_name in bindings:
|
|
336
|
+
value = str(bindings[var_name])
|
|
337
|
+
idx = value.find(delimiter)
|
|
338
|
+
if idx >= 0:
|
|
339
|
+
return Literal(value[:idx])
|
|
340
|
+
return None
|
|
341
|
+
|
|
342
|
+
# Handle STRAFTER
|
|
343
|
+
strafter_match = re.match(
|
|
344
|
+
r"STRAFTER\(\?(\w+),\s*['\"](.+)['\"]\)\s*AS\s*\?(\w+)",
|
|
345
|
+
expression,
|
|
346
|
+
re.IGNORECASE,
|
|
347
|
+
)
|
|
348
|
+
if strafter_match:
|
|
349
|
+
var_name = strafter_match.group(1)
|
|
350
|
+
delimiter = strafter_match.group(2)
|
|
351
|
+
if var_name in bindings:
|
|
352
|
+
value = str(bindings[var_name])
|
|
353
|
+
idx = value.find(delimiter)
|
|
354
|
+
if idx >= 0:
|
|
355
|
+
return Literal(value[idx + len(delimiter) :])
|
|
356
|
+
return None
|
|
357
|
+
|
|
358
|
+
# Handle simple arithmetic: ((?var - n) * m / d) AS ?result
|
|
359
|
+
arith_match = re.match(
|
|
360
|
+
r"\(\(\?(\w+)\s*-\s*(\d+)\)\s*\*\s*(\d+)/(\d+)\)\s*AS\s*\?(\w+)",
|
|
361
|
+
expression,
|
|
362
|
+
)
|
|
363
|
+
if arith_match:
|
|
364
|
+
var_name = arith_match.group(1)
|
|
365
|
+
sub = float(arith_match.group(2))
|
|
366
|
+
mult = float(arith_match.group(3))
|
|
367
|
+
div = float(arith_match.group(4))
|
|
368
|
+
if var_name in bindings:
|
|
369
|
+
try:
|
|
370
|
+
value = float(str(bindings[var_name]))
|
|
371
|
+
result = ((value - sub) * mult) / div
|
|
372
|
+
return Literal(result, datatype=XSD.decimal)
|
|
373
|
+
except (ValueError, ZeroDivisionError):
|
|
374
|
+
return None
|
|
375
|
+
return None
|
|
376
|
+
|
|
377
|
+
return None
|