rdf-construct 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. rdf_construct/__init__.py +1 -1
  2. rdf_construct/cli.py +1794 -0
  3. rdf_construct/describe/__init__.py +93 -0
  4. rdf_construct/describe/analyzer.py +176 -0
  5. rdf_construct/describe/documentation.py +146 -0
  6. rdf_construct/describe/formatters/__init__.py +47 -0
  7. rdf_construct/describe/formatters/json.py +65 -0
  8. rdf_construct/describe/formatters/markdown.py +275 -0
  9. rdf_construct/describe/formatters/text.py +315 -0
  10. rdf_construct/describe/hierarchy.py +232 -0
  11. rdf_construct/describe/imports.py +213 -0
  12. rdf_construct/describe/metadata.py +187 -0
  13. rdf_construct/describe/metrics.py +145 -0
  14. rdf_construct/describe/models.py +552 -0
  15. rdf_construct/describe/namespaces.py +180 -0
  16. rdf_construct/describe/profiles.py +415 -0
  17. rdf_construct/localise/__init__.py +114 -0
  18. rdf_construct/localise/config.py +508 -0
  19. rdf_construct/localise/extractor.py +427 -0
  20. rdf_construct/localise/formatters/__init__.py +36 -0
  21. rdf_construct/localise/formatters/markdown.py +229 -0
  22. rdf_construct/localise/formatters/text.py +224 -0
  23. rdf_construct/localise/merger.py +346 -0
  24. rdf_construct/localise/reporter.py +356 -0
  25. rdf_construct/merge/__init__.py +165 -0
  26. rdf_construct/merge/config.py +354 -0
  27. rdf_construct/merge/conflicts.py +281 -0
  28. rdf_construct/merge/formatters.py +426 -0
  29. rdf_construct/merge/merger.py +425 -0
  30. rdf_construct/merge/migrator.py +339 -0
  31. rdf_construct/merge/rules.py +377 -0
  32. rdf_construct/merge/splitter.py +1102 -0
  33. rdf_construct/refactor/__init__.py +72 -0
  34. rdf_construct/refactor/config.py +362 -0
  35. rdf_construct/refactor/deprecator.py +328 -0
  36. rdf_construct/refactor/formatters/__init__.py +8 -0
  37. rdf_construct/refactor/formatters/text.py +311 -0
  38. rdf_construct/refactor/renamer.py +294 -0
  39. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/METADATA +91 -6
  40. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/RECORD +43 -7
  41. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/WHEEL +0 -0
  42. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/entry_points.txt +0 -0
  43. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,425 @@
1
+ """Core merge logic for combining RDF ontologies.
2
+
3
+ This module provides the main OntologyMerger class that:
4
+ - Loads multiple source ontology files
5
+ - Detects and resolves conflicts
6
+ - Handles namespace remapping
7
+ - Manages owl:imports statements
8
+ - Writes merged output with conflict markers
9
+ """
10
+
11
+ from dataclasses import dataclass, field
12
+ from pathlib import Path
13
+ from typing import TextIO
14
+
15
+ from rdflib import Graph, URIRef, Namespace
16
+ from rdflib.namespace import RDF, RDFS, OWL
17
+
18
+ from rdf_construct.merge.config import (
19
+ MergeConfig,
20
+ SourceConfig,
21
+ ConflictStrategy,
22
+ ImportsStrategy,
23
+ )
24
+ from rdf_construct.merge.conflicts import (
25
+ Conflict,
26
+ ConflictDetector,
27
+ SourceGraph,
28
+ generate_conflict_marker,
29
+ generate_conflict_end_marker,
30
+ )
31
+
32
+
33
+ @dataclass
34
+ class MergeResult:
35
+ """Result of a merge operation.
36
+
37
+ Attributes:
38
+ merged_graph: The merged RDF graph
39
+ conflicts: List of detected conflicts
40
+ resolved_conflicts: Conflicts that were automatically resolved
41
+ unresolved_conflicts: Conflicts requiring manual attention
42
+ source_stats: Statistics per source file
43
+ total_triples: Total triples in merged output
44
+ success: Whether merge completed without errors
45
+ error: Error message if success is False
46
+ """
47
+
48
+ merged_graph: Graph | None = None
49
+ conflicts: list[Conflict] = field(default_factory=list)
50
+ resolved_conflicts: list[Conflict] = field(default_factory=list)
51
+ unresolved_conflicts: list[Conflict] = field(default_factory=list)
52
+ source_stats: dict[str, int] = field(default_factory=dict)
53
+ total_triples: int = 0
54
+ success: bool = True
55
+ error: str | None = None
56
+
57
+ @property
58
+ def has_conflicts(self) -> bool:
59
+ """Check if any conflicts were detected."""
60
+ return len(self.conflicts) > 0
61
+
62
+ @property
63
+ def has_unresolved(self) -> bool:
64
+ """Check if there are unresolved conflicts."""
65
+ return len(self.unresolved_conflicts) > 0
66
+
67
+
68
+ class OntologyMerger:
69
+ """Merges multiple RDF ontology files with conflict detection.
70
+
71
+ The merger:
72
+ 1. Loads all source files with priority metadata
73
+ 2. Builds a unified namespace map
74
+ 3. Detects conflicts (same subject+predicate, different values)
75
+ 4. Resolves conflicts according to the configured strategy
76
+ 5. Marks unresolved conflicts in the output
77
+ 6. Handles owl:imports according to configuration
78
+ """
79
+
80
+ def __init__(self, config: MergeConfig):
81
+ """Initialize the merger.
82
+
83
+ Args:
84
+ config: Merge configuration
85
+ """
86
+ self.config = config
87
+ self.detector = ConflictDetector(
88
+ ignore_predicates=config.conflicts.ignore_predicates
89
+ )
90
+
91
+ def merge(self) -> MergeResult:
92
+ """Execute the merge operation.
93
+
94
+ Returns:
95
+ MergeResult with merged graph and conflict information
96
+ """
97
+ result = MergeResult()
98
+
99
+ # Load all source graphs
100
+ sources: list[SourceGraph] = []
101
+ for src_config in self.config.sources:
102
+ try:
103
+ source = self._load_source(src_config)
104
+ sources.append(source)
105
+ result.source_stats[src_config.path.name] = source.triple_count
106
+ except Exception as e:
107
+ result.success = False
108
+ result.error = f"Failed to load {src_config.path}: {e}"
109
+ return result
110
+
111
+ if not sources:
112
+ result.success = False
113
+ result.error = "No source files to merge"
114
+ return result
115
+
116
+ # Detect conflicts
117
+ result.conflicts = self.detector.detect_conflicts(sources)
118
+
119
+ # Resolve conflicts
120
+ self._resolve_conflicts(result.conflicts, result)
121
+
122
+ # Create merged graph
123
+ result.merged_graph = self._create_merged_graph(sources, result)
124
+ result.total_triples = len(result.merged_graph)
125
+
126
+ return result
127
+
128
+ def _load_source(self, src_config: SourceConfig) -> SourceGraph:
129
+ """Load a single source file.
130
+
131
+ Args:
132
+ src_config: Configuration for this source
133
+
134
+ Returns:
135
+ SourceGraph with loaded data
136
+
137
+ Raises:
138
+ FileNotFoundError: If source file doesn't exist
139
+ ValueError: If source file can't be parsed
140
+ """
141
+ if not src_config.path.exists():
142
+ raise FileNotFoundError(f"Source file not found: {src_config.path}")
143
+
144
+ graph = Graph()
145
+
146
+ # Determine format from extension
147
+ ext = src_config.path.suffix.lower()
148
+ format_map = {
149
+ ".ttl": "turtle",
150
+ ".turtle": "turtle",
151
+ ".rdf": "xml",
152
+ ".xml": "xml",
153
+ ".owl": "xml",
154
+ ".n3": "n3",
155
+ ".nt": "nt",
156
+ ".ntriples": "nt",
157
+ ".jsonld": "json-ld",
158
+ ".json": "json-ld",
159
+ }
160
+ rdf_format = format_map.get(ext, "turtle")
161
+
162
+ try:
163
+ graph.parse(src_config.path.as_posix(), format=rdf_format)
164
+ except Exception as e:
165
+ raise ValueError(f"Failed to parse {src_config.path}: {e}")
166
+
167
+ # Apply namespace remapping if configured
168
+ if src_config.namespace_remap:
169
+ graph = self._remap_namespaces(graph, src_config.namespace_remap)
170
+
171
+ return SourceGraph(
172
+ graph=graph,
173
+ path=str(src_config.path),
174
+ priority=src_config.priority,
175
+ )
176
+
177
+ def _remap_namespaces(
178
+ self, graph: Graph, remappings: dict[str, str]
179
+ ) -> Graph:
180
+ """Remap namespaces in a graph.
181
+
182
+ Args:
183
+ graph: Source graph to remap
184
+ remappings: Mapping of old namespace -> new namespace
185
+
186
+ Returns:
187
+ New graph with remapped URIs
188
+ """
189
+ if not remappings:
190
+ return graph
191
+
192
+ new_graph = Graph()
193
+
194
+ # Copy namespace bindings
195
+ for prefix, ns in graph.namespace_manager.namespaces():
196
+ ns_str = str(ns)
197
+ if ns_str in remappings:
198
+ new_graph.bind(prefix, Namespace(remappings[ns_str]))
199
+ else:
200
+ new_graph.bind(prefix, ns)
201
+
202
+ # Remap triples
203
+ for s, p, o in graph:
204
+ new_s = self._remap_uri(s, remappings)
205
+ new_p = self._remap_uri(p, remappings)
206
+ new_o = self._remap_uri(o, remappings) if isinstance(o, URIRef) else o
207
+ new_graph.add((new_s, new_p, new_o))
208
+
209
+ return new_graph
210
+
211
+ def _remap_uri(self, uri: URIRef, remappings: dict[str, str]) -> URIRef:
212
+ """Remap a single URI according to namespace remappings.
213
+
214
+ Args:
215
+ uri: URI to remap
216
+ remappings: Namespace remapping rules
217
+
218
+ Returns:
219
+ Remapped URI or original if no mapping applies
220
+ """
221
+ uri_str = str(uri)
222
+ for old_ns, new_ns in remappings.items():
223
+ if uri_str.startswith(old_ns):
224
+ return URIRef(uri_str.replace(old_ns, new_ns, 1))
225
+ return uri
226
+
227
+ def _resolve_conflicts(
228
+ self, conflicts: list[Conflict], result: MergeResult
229
+ ) -> None:
230
+ """Resolve conflicts according to configured strategy.
231
+
232
+ Args:
233
+ conflicts: Detected conflicts to resolve
234
+ result: MergeResult to update with resolution info
235
+ """
236
+ strategy = self.config.conflicts.strategy
237
+
238
+ for conflict in conflicts:
239
+ if strategy == ConflictStrategy.PRIORITY:
240
+ conflict.resolve_by_priority()
241
+ elif strategy == ConflictStrategy.FIRST:
242
+ conflict.resolve_by_first()
243
+ elif strategy == ConflictStrategy.LAST:
244
+ conflict.resolve_by_last()
245
+ # MARK_ALL leaves conflicts unresolved
246
+
247
+ if conflict.is_resolved:
248
+ result.resolved_conflicts.append(conflict)
249
+ else:
250
+ result.unresolved_conflicts.append(conflict)
251
+
252
+ def _create_merged_graph(
253
+ self, sources: list[SourceGraph], result: MergeResult
254
+ ) -> Graph:
255
+ """Create the merged graph from sources.
256
+
257
+ Args:
258
+ sources: Source graphs to merge
259
+ result: MergeResult with conflict information
260
+
261
+ Returns:
262
+ Merged RDF graph
263
+ """
264
+ merged = Graph()
265
+
266
+ # Collect and merge namespace bindings
267
+ for source in sources:
268
+ for prefix, ns in source.graph.namespace_manager.namespaces():
269
+ try:
270
+ merged.bind(prefix, ns, override=False)
271
+ except Exception:
272
+ pass # Skip conflicting bindings
273
+
274
+ # Apply preferred prefixes from config
275
+ for prefix, ns in self.config.namespaces.preferred_prefixes.items():
276
+ merged.bind(prefix, Namespace(ns), override=True)
277
+
278
+ # Add all triples from all sources
279
+ for source in sources:
280
+ for triple in source.graph:
281
+ merged.add(triple)
282
+
283
+ # Handle owl:imports
284
+ merged = self._handle_imports(merged, sources)
285
+
286
+ return merged
287
+
288
+ def _handle_imports(
289
+ self, merged: Graph, sources: list[SourceGraph]
290
+ ) -> Graph:
291
+ """Handle owl:imports statements according to strategy.
292
+
293
+ Args:
294
+ merged: The merged graph
295
+ sources: Original source graphs
296
+
297
+ Returns:
298
+ Graph with imports handled
299
+ """
300
+ strategy = self.config.imports
301
+
302
+ if strategy == ImportsStrategy.REMOVE:
303
+ # Remove all owl:imports statements
304
+ imports_to_remove = list(merged.triples((None, OWL.imports, None)))
305
+ for triple in imports_to_remove:
306
+ merged.remove(triple)
307
+
308
+ elif strategy == ImportsStrategy.MERGE:
309
+ # Deduplicate imports (already done by Graph.add())
310
+ pass
311
+
312
+ # PRESERVE and UPDATE are handled as-is for now
313
+ # UPDATE would require knowing the output path to update references
314
+
315
+ return merged
316
+
317
+ def write_output(self, result: MergeResult, output_path: Path) -> None:
318
+ """Write the merged graph to file with conflict markers.
319
+
320
+ Args:
321
+ result: MergeResult with merged graph and conflicts
322
+ output_path: Path to write output file
323
+ """
324
+ if result.merged_graph is None:
325
+ raise ValueError("No merged graph to write")
326
+
327
+ # For now, serialize normally and then inject conflict markers
328
+ # A more sophisticated approach would use a custom serializer
329
+ turtle_output = result.merged_graph.serialize(format="turtle")
330
+
331
+ # If there are unresolved conflicts, we need to add markers
332
+ if result.unresolved_conflicts:
333
+ turtle_output = self._inject_conflict_markers(
334
+ turtle_output, result.unresolved_conflicts, result.merged_graph
335
+ )
336
+
337
+ output_path.parent.mkdir(parents=True, exist_ok=True)
338
+ output_path.write_text(turtle_output)
339
+
340
+ def _inject_conflict_markers(
341
+ self,
342
+ turtle: str,
343
+ conflicts: list[Conflict],
344
+ graph: Graph,
345
+ ) -> str:
346
+ """Inject conflict markers into Turtle output.
347
+
348
+ This is a simplified implementation that adds a conflict summary
349
+ at the top of the file. A more sophisticated version would inline
350
+ markers near the conflicting statements.
351
+
352
+ Args:
353
+ turtle: Original Turtle serialization
354
+ conflicts: Unresolved conflicts to mark
355
+ graph: Graph for namespace resolution
356
+
357
+ Returns:
358
+ Turtle string with conflict markers added
359
+ """
360
+ # Build conflict summary header
361
+ header_lines = [
362
+ "# ============================================================",
363
+ "# MERGE CONFLICTS",
364
+ f"# {len(conflicts)} unresolved conflict(s) require manual review",
365
+ "# Search for '=== CONFLICT ===' to find each one",
366
+ "# ============================================================",
367
+ "",
368
+ ]
369
+
370
+ for conflict in conflicts:
371
+ header_lines.append(generate_conflict_marker(conflict, graph))
372
+ header_lines.append(generate_conflict_end_marker())
373
+ header_lines.append("")
374
+
375
+ header_lines.append("# ============================================================")
376
+ header_lines.append("")
377
+
378
+ return "\n".join(header_lines) + turtle
379
+
380
+
381
+ def merge_files(
382
+ sources: list[Path],
383
+ output: Path,
384
+ priorities: list[int] | None = None,
385
+ conflict_strategy: str = "priority",
386
+ dry_run: bool = False,
387
+ ) -> MergeResult:
388
+ """Convenience function to merge files with minimal configuration.
389
+
390
+ Args:
391
+ sources: List of source file paths
392
+ output: Output file path
393
+ priorities: Optional list of priorities (same order as sources)
394
+ conflict_strategy: Strategy name: priority, first, last, mark_all
395
+ dry_run: If True, don't write output
396
+
397
+ Returns:
398
+ MergeResult with merge information
399
+ """
400
+ from .config import SourceConfig, OutputConfig, ConflictConfig, ConflictStrategy
401
+
402
+ if priorities is None:
403
+ priorities = list(range(1, len(sources) + 1))
404
+
405
+ source_configs = [
406
+ SourceConfig(path=p, priority=pri)
407
+ for p, pri in zip(sources, priorities)
408
+ ]
409
+
410
+ strategy = ConflictStrategy[conflict_strategy.upper()]
411
+
412
+ config = MergeConfig(
413
+ sources=source_configs,
414
+ output=OutputConfig(path=output),
415
+ conflicts=ConflictConfig(strategy=strategy),
416
+ dry_run=dry_run,
417
+ )
418
+
419
+ merger = OntologyMerger(config)
420
+ result = merger.merge()
421
+
422
+ if not dry_run and result.success and result.merged_graph:
423
+ merger.write_output(result, output)
424
+
425
+ return result