rdf-construct 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rdf_construct/__init__.py +1 -1
- rdf_construct/cli.py +1794 -0
- rdf_construct/describe/__init__.py +93 -0
- rdf_construct/describe/analyzer.py +176 -0
- rdf_construct/describe/documentation.py +146 -0
- rdf_construct/describe/formatters/__init__.py +47 -0
- rdf_construct/describe/formatters/json.py +65 -0
- rdf_construct/describe/formatters/markdown.py +275 -0
- rdf_construct/describe/formatters/text.py +315 -0
- rdf_construct/describe/hierarchy.py +232 -0
- rdf_construct/describe/imports.py +213 -0
- rdf_construct/describe/metadata.py +187 -0
- rdf_construct/describe/metrics.py +145 -0
- rdf_construct/describe/models.py +552 -0
- rdf_construct/describe/namespaces.py +180 -0
- rdf_construct/describe/profiles.py +415 -0
- rdf_construct/localise/__init__.py +114 -0
- rdf_construct/localise/config.py +508 -0
- rdf_construct/localise/extractor.py +427 -0
- rdf_construct/localise/formatters/__init__.py +36 -0
- rdf_construct/localise/formatters/markdown.py +229 -0
- rdf_construct/localise/formatters/text.py +224 -0
- rdf_construct/localise/merger.py +346 -0
- rdf_construct/localise/reporter.py +356 -0
- rdf_construct/merge/__init__.py +165 -0
- rdf_construct/merge/config.py +354 -0
- rdf_construct/merge/conflicts.py +281 -0
- rdf_construct/merge/formatters.py +426 -0
- rdf_construct/merge/merger.py +425 -0
- rdf_construct/merge/migrator.py +339 -0
- rdf_construct/merge/rules.py +377 -0
- rdf_construct/merge/splitter.py +1102 -0
- rdf_construct/refactor/__init__.py +72 -0
- rdf_construct/refactor/config.py +362 -0
- rdf_construct/refactor/deprecator.py +328 -0
- rdf_construct/refactor/formatters/__init__.py +8 -0
- rdf_construct/refactor/formatters/text.py +311 -0
- rdf_construct/refactor/renamer.py +294 -0
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/METADATA +91 -6
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/RECORD +43 -7
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/WHEEL +0 -0
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/entry_points.txt +0 -0
- {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
"""Core merge logic for combining RDF ontologies.
|
|
2
|
+
|
|
3
|
+
This module provides the main OntologyMerger class that:
|
|
4
|
+
- Loads multiple source ontology files
|
|
5
|
+
- Detects and resolves conflicts
|
|
6
|
+
- Handles namespace remapping
|
|
7
|
+
- Manages owl:imports statements
|
|
8
|
+
- Writes merged output with conflict markers
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import TextIO
|
|
14
|
+
|
|
15
|
+
from rdflib import Graph, URIRef, Namespace
|
|
16
|
+
from rdflib.namespace import RDF, RDFS, OWL
|
|
17
|
+
|
|
18
|
+
from rdf_construct.merge.config import (
|
|
19
|
+
MergeConfig,
|
|
20
|
+
SourceConfig,
|
|
21
|
+
ConflictStrategy,
|
|
22
|
+
ImportsStrategy,
|
|
23
|
+
)
|
|
24
|
+
from rdf_construct.merge.conflicts import (
|
|
25
|
+
Conflict,
|
|
26
|
+
ConflictDetector,
|
|
27
|
+
SourceGraph,
|
|
28
|
+
generate_conflict_marker,
|
|
29
|
+
generate_conflict_end_marker,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class MergeResult:
|
|
35
|
+
"""Result of a merge operation.
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
merged_graph: The merged RDF graph
|
|
39
|
+
conflicts: List of detected conflicts
|
|
40
|
+
resolved_conflicts: Conflicts that were automatically resolved
|
|
41
|
+
unresolved_conflicts: Conflicts requiring manual attention
|
|
42
|
+
source_stats: Statistics per source file
|
|
43
|
+
total_triples: Total triples in merged output
|
|
44
|
+
success: Whether merge completed without errors
|
|
45
|
+
error: Error message if success is False
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
merged_graph: Graph | None = None
|
|
49
|
+
conflicts: list[Conflict] = field(default_factory=list)
|
|
50
|
+
resolved_conflicts: list[Conflict] = field(default_factory=list)
|
|
51
|
+
unresolved_conflicts: list[Conflict] = field(default_factory=list)
|
|
52
|
+
source_stats: dict[str, int] = field(default_factory=dict)
|
|
53
|
+
total_triples: int = 0
|
|
54
|
+
success: bool = True
|
|
55
|
+
error: str | None = None
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def has_conflicts(self) -> bool:
|
|
59
|
+
"""Check if any conflicts were detected."""
|
|
60
|
+
return len(self.conflicts) > 0
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def has_unresolved(self) -> bool:
|
|
64
|
+
"""Check if there are unresolved conflicts."""
|
|
65
|
+
return len(self.unresolved_conflicts) > 0
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class OntologyMerger:
|
|
69
|
+
"""Merges multiple RDF ontology files with conflict detection.
|
|
70
|
+
|
|
71
|
+
The merger:
|
|
72
|
+
1. Loads all source files with priority metadata
|
|
73
|
+
2. Builds a unified namespace map
|
|
74
|
+
3. Detects conflicts (same subject+predicate, different values)
|
|
75
|
+
4. Resolves conflicts according to the configured strategy
|
|
76
|
+
5. Marks unresolved conflicts in the output
|
|
77
|
+
6. Handles owl:imports according to configuration
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(self, config: MergeConfig):
|
|
81
|
+
"""Initialize the merger.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
config: Merge configuration
|
|
85
|
+
"""
|
|
86
|
+
self.config = config
|
|
87
|
+
self.detector = ConflictDetector(
|
|
88
|
+
ignore_predicates=config.conflicts.ignore_predicates
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def merge(self) -> MergeResult:
|
|
92
|
+
"""Execute the merge operation.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
MergeResult with merged graph and conflict information
|
|
96
|
+
"""
|
|
97
|
+
result = MergeResult()
|
|
98
|
+
|
|
99
|
+
# Load all source graphs
|
|
100
|
+
sources: list[SourceGraph] = []
|
|
101
|
+
for src_config in self.config.sources:
|
|
102
|
+
try:
|
|
103
|
+
source = self._load_source(src_config)
|
|
104
|
+
sources.append(source)
|
|
105
|
+
result.source_stats[src_config.path.name] = source.triple_count
|
|
106
|
+
except Exception as e:
|
|
107
|
+
result.success = False
|
|
108
|
+
result.error = f"Failed to load {src_config.path}: {e}"
|
|
109
|
+
return result
|
|
110
|
+
|
|
111
|
+
if not sources:
|
|
112
|
+
result.success = False
|
|
113
|
+
result.error = "No source files to merge"
|
|
114
|
+
return result
|
|
115
|
+
|
|
116
|
+
# Detect conflicts
|
|
117
|
+
result.conflicts = self.detector.detect_conflicts(sources)
|
|
118
|
+
|
|
119
|
+
# Resolve conflicts
|
|
120
|
+
self._resolve_conflicts(result.conflicts, result)
|
|
121
|
+
|
|
122
|
+
# Create merged graph
|
|
123
|
+
result.merged_graph = self._create_merged_graph(sources, result)
|
|
124
|
+
result.total_triples = len(result.merged_graph)
|
|
125
|
+
|
|
126
|
+
return result
|
|
127
|
+
|
|
128
|
+
def _load_source(self, src_config: SourceConfig) -> SourceGraph:
|
|
129
|
+
"""Load a single source file.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
src_config: Configuration for this source
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
SourceGraph with loaded data
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
FileNotFoundError: If source file doesn't exist
|
|
139
|
+
ValueError: If source file can't be parsed
|
|
140
|
+
"""
|
|
141
|
+
if not src_config.path.exists():
|
|
142
|
+
raise FileNotFoundError(f"Source file not found: {src_config.path}")
|
|
143
|
+
|
|
144
|
+
graph = Graph()
|
|
145
|
+
|
|
146
|
+
# Determine format from extension
|
|
147
|
+
ext = src_config.path.suffix.lower()
|
|
148
|
+
format_map = {
|
|
149
|
+
".ttl": "turtle",
|
|
150
|
+
".turtle": "turtle",
|
|
151
|
+
".rdf": "xml",
|
|
152
|
+
".xml": "xml",
|
|
153
|
+
".owl": "xml",
|
|
154
|
+
".n3": "n3",
|
|
155
|
+
".nt": "nt",
|
|
156
|
+
".ntriples": "nt",
|
|
157
|
+
".jsonld": "json-ld",
|
|
158
|
+
".json": "json-ld",
|
|
159
|
+
}
|
|
160
|
+
rdf_format = format_map.get(ext, "turtle")
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
graph.parse(src_config.path.as_posix(), format=rdf_format)
|
|
164
|
+
except Exception as e:
|
|
165
|
+
raise ValueError(f"Failed to parse {src_config.path}: {e}")
|
|
166
|
+
|
|
167
|
+
# Apply namespace remapping if configured
|
|
168
|
+
if src_config.namespace_remap:
|
|
169
|
+
graph = self._remap_namespaces(graph, src_config.namespace_remap)
|
|
170
|
+
|
|
171
|
+
return SourceGraph(
|
|
172
|
+
graph=graph,
|
|
173
|
+
path=str(src_config.path),
|
|
174
|
+
priority=src_config.priority,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _remap_namespaces(
|
|
178
|
+
self, graph: Graph, remappings: dict[str, str]
|
|
179
|
+
) -> Graph:
|
|
180
|
+
"""Remap namespaces in a graph.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
graph: Source graph to remap
|
|
184
|
+
remappings: Mapping of old namespace -> new namespace
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
New graph with remapped URIs
|
|
188
|
+
"""
|
|
189
|
+
if not remappings:
|
|
190
|
+
return graph
|
|
191
|
+
|
|
192
|
+
new_graph = Graph()
|
|
193
|
+
|
|
194
|
+
# Copy namespace bindings
|
|
195
|
+
for prefix, ns in graph.namespace_manager.namespaces():
|
|
196
|
+
ns_str = str(ns)
|
|
197
|
+
if ns_str in remappings:
|
|
198
|
+
new_graph.bind(prefix, Namespace(remappings[ns_str]))
|
|
199
|
+
else:
|
|
200
|
+
new_graph.bind(prefix, ns)
|
|
201
|
+
|
|
202
|
+
# Remap triples
|
|
203
|
+
for s, p, o in graph:
|
|
204
|
+
new_s = self._remap_uri(s, remappings)
|
|
205
|
+
new_p = self._remap_uri(p, remappings)
|
|
206
|
+
new_o = self._remap_uri(o, remappings) if isinstance(o, URIRef) else o
|
|
207
|
+
new_graph.add((new_s, new_p, new_o))
|
|
208
|
+
|
|
209
|
+
return new_graph
|
|
210
|
+
|
|
211
|
+
def _remap_uri(self, uri: URIRef, remappings: dict[str, str]) -> URIRef:
|
|
212
|
+
"""Remap a single URI according to namespace remappings.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
uri: URI to remap
|
|
216
|
+
remappings: Namespace remapping rules
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Remapped URI or original if no mapping applies
|
|
220
|
+
"""
|
|
221
|
+
uri_str = str(uri)
|
|
222
|
+
for old_ns, new_ns in remappings.items():
|
|
223
|
+
if uri_str.startswith(old_ns):
|
|
224
|
+
return URIRef(uri_str.replace(old_ns, new_ns, 1))
|
|
225
|
+
return uri
|
|
226
|
+
|
|
227
|
+
def _resolve_conflicts(
|
|
228
|
+
self, conflicts: list[Conflict], result: MergeResult
|
|
229
|
+
) -> None:
|
|
230
|
+
"""Resolve conflicts according to configured strategy.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
conflicts: Detected conflicts to resolve
|
|
234
|
+
result: MergeResult to update with resolution info
|
|
235
|
+
"""
|
|
236
|
+
strategy = self.config.conflicts.strategy
|
|
237
|
+
|
|
238
|
+
for conflict in conflicts:
|
|
239
|
+
if strategy == ConflictStrategy.PRIORITY:
|
|
240
|
+
conflict.resolve_by_priority()
|
|
241
|
+
elif strategy == ConflictStrategy.FIRST:
|
|
242
|
+
conflict.resolve_by_first()
|
|
243
|
+
elif strategy == ConflictStrategy.LAST:
|
|
244
|
+
conflict.resolve_by_last()
|
|
245
|
+
# MARK_ALL leaves conflicts unresolved
|
|
246
|
+
|
|
247
|
+
if conflict.is_resolved:
|
|
248
|
+
result.resolved_conflicts.append(conflict)
|
|
249
|
+
else:
|
|
250
|
+
result.unresolved_conflicts.append(conflict)
|
|
251
|
+
|
|
252
|
+
def _create_merged_graph(
|
|
253
|
+
self, sources: list[SourceGraph], result: MergeResult
|
|
254
|
+
) -> Graph:
|
|
255
|
+
"""Create the merged graph from sources.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
sources: Source graphs to merge
|
|
259
|
+
result: MergeResult with conflict information
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
Merged RDF graph
|
|
263
|
+
"""
|
|
264
|
+
merged = Graph()
|
|
265
|
+
|
|
266
|
+
# Collect and merge namespace bindings
|
|
267
|
+
for source in sources:
|
|
268
|
+
for prefix, ns in source.graph.namespace_manager.namespaces():
|
|
269
|
+
try:
|
|
270
|
+
merged.bind(prefix, ns, override=False)
|
|
271
|
+
except Exception:
|
|
272
|
+
pass # Skip conflicting bindings
|
|
273
|
+
|
|
274
|
+
# Apply preferred prefixes from config
|
|
275
|
+
for prefix, ns in self.config.namespaces.preferred_prefixes.items():
|
|
276
|
+
merged.bind(prefix, Namespace(ns), override=True)
|
|
277
|
+
|
|
278
|
+
# Add all triples from all sources
|
|
279
|
+
for source in sources:
|
|
280
|
+
for triple in source.graph:
|
|
281
|
+
merged.add(triple)
|
|
282
|
+
|
|
283
|
+
# Handle owl:imports
|
|
284
|
+
merged = self._handle_imports(merged, sources)
|
|
285
|
+
|
|
286
|
+
return merged
|
|
287
|
+
|
|
288
|
+
def _handle_imports(
|
|
289
|
+
self, merged: Graph, sources: list[SourceGraph]
|
|
290
|
+
) -> Graph:
|
|
291
|
+
"""Handle owl:imports statements according to strategy.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
merged: The merged graph
|
|
295
|
+
sources: Original source graphs
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Graph with imports handled
|
|
299
|
+
"""
|
|
300
|
+
strategy = self.config.imports
|
|
301
|
+
|
|
302
|
+
if strategy == ImportsStrategy.REMOVE:
|
|
303
|
+
# Remove all owl:imports statements
|
|
304
|
+
imports_to_remove = list(merged.triples((None, OWL.imports, None)))
|
|
305
|
+
for triple in imports_to_remove:
|
|
306
|
+
merged.remove(triple)
|
|
307
|
+
|
|
308
|
+
elif strategy == ImportsStrategy.MERGE:
|
|
309
|
+
# Deduplicate imports (already done by Graph.add())
|
|
310
|
+
pass
|
|
311
|
+
|
|
312
|
+
# PRESERVE and UPDATE are handled as-is for now
|
|
313
|
+
# UPDATE would require knowing the output path to update references
|
|
314
|
+
|
|
315
|
+
return merged
|
|
316
|
+
|
|
317
|
+
def write_output(self, result: MergeResult, output_path: Path) -> None:
|
|
318
|
+
"""Write the merged graph to file with conflict markers.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
result: MergeResult with merged graph and conflicts
|
|
322
|
+
output_path: Path to write output file
|
|
323
|
+
"""
|
|
324
|
+
if result.merged_graph is None:
|
|
325
|
+
raise ValueError("No merged graph to write")
|
|
326
|
+
|
|
327
|
+
# For now, serialize normally and then inject conflict markers
|
|
328
|
+
# A more sophisticated approach would use a custom serializer
|
|
329
|
+
turtle_output = result.merged_graph.serialize(format="turtle")
|
|
330
|
+
|
|
331
|
+
# If there are unresolved conflicts, we need to add markers
|
|
332
|
+
if result.unresolved_conflicts:
|
|
333
|
+
turtle_output = self._inject_conflict_markers(
|
|
334
|
+
turtle_output, result.unresolved_conflicts, result.merged_graph
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
338
|
+
output_path.write_text(turtle_output)
|
|
339
|
+
|
|
340
|
+
def _inject_conflict_markers(
|
|
341
|
+
self,
|
|
342
|
+
turtle: str,
|
|
343
|
+
conflicts: list[Conflict],
|
|
344
|
+
graph: Graph,
|
|
345
|
+
) -> str:
|
|
346
|
+
"""Inject conflict markers into Turtle output.
|
|
347
|
+
|
|
348
|
+
This is a simplified implementation that adds a conflict summary
|
|
349
|
+
at the top of the file. A more sophisticated version would inline
|
|
350
|
+
markers near the conflicting statements.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
turtle: Original Turtle serialization
|
|
354
|
+
conflicts: Unresolved conflicts to mark
|
|
355
|
+
graph: Graph for namespace resolution
|
|
356
|
+
|
|
357
|
+
Returns:
|
|
358
|
+
Turtle string with conflict markers added
|
|
359
|
+
"""
|
|
360
|
+
# Build conflict summary header
|
|
361
|
+
header_lines = [
|
|
362
|
+
"# ============================================================",
|
|
363
|
+
"# MERGE CONFLICTS",
|
|
364
|
+
f"# {len(conflicts)} unresolved conflict(s) require manual review",
|
|
365
|
+
"# Search for '=== CONFLICT ===' to find each one",
|
|
366
|
+
"# ============================================================",
|
|
367
|
+
"",
|
|
368
|
+
]
|
|
369
|
+
|
|
370
|
+
for conflict in conflicts:
|
|
371
|
+
header_lines.append(generate_conflict_marker(conflict, graph))
|
|
372
|
+
header_lines.append(generate_conflict_end_marker())
|
|
373
|
+
header_lines.append("")
|
|
374
|
+
|
|
375
|
+
header_lines.append("# ============================================================")
|
|
376
|
+
header_lines.append("")
|
|
377
|
+
|
|
378
|
+
return "\n".join(header_lines) + turtle
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def merge_files(
|
|
382
|
+
sources: list[Path],
|
|
383
|
+
output: Path,
|
|
384
|
+
priorities: list[int] | None = None,
|
|
385
|
+
conflict_strategy: str = "priority",
|
|
386
|
+
dry_run: bool = False,
|
|
387
|
+
) -> MergeResult:
|
|
388
|
+
"""Convenience function to merge files with minimal configuration.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
sources: List of source file paths
|
|
392
|
+
output: Output file path
|
|
393
|
+
priorities: Optional list of priorities (same order as sources)
|
|
394
|
+
conflict_strategy: Strategy name: priority, first, last, mark_all
|
|
395
|
+
dry_run: If True, don't write output
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
MergeResult with merge information
|
|
399
|
+
"""
|
|
400
|
+
from .config import SourceConfig, OutputConfig, ConflictConfig, ConflictStrategy
|
|
401
|
+
|
|
402
|
+
if priorities is None:
|
|
403
|
+
priorities = list(range(1, len(sources) + 1))
|
|
404
|
+
|
|
405
|
+
source_configs = [
|
|
406
|
+
SourceConfig(path=p, priority=pri)
|
|
407
|
+
for p, pri in zip(sources, priorities)
|
|
408
|
+
]
|
|
409
|
+
|
|
410
|
+
strategy = ConflictStrategy[conflict_strategy.upper()]
|
|
411
|
+
|
|
412
|
+
config = MergeConfig(
|
|
413
|
+
sources=source_configs,
|
|
414
|
+
output=OutputConfig(path=output),
|
|
415
|
+
conflicts=ConflictConfig(strategy=strategy),
|
|
416
|
+
dry_run=dry_run,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
merger = OntologyMerger(config)
|
|
420
|
+
result = merger.merge()
|
|
421
|
+
|
|
422
|
+
if not dry_run and result.success and result.merged_graph:
|
|
423
|
+
merger.write_output(result, output)
|
|
424
|
+
|
|
425
|
+
return result
|