rdf-construct 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. rdf_construct/__init__.py +1 -1
  2. rdf_construct/cli.py +1794 -0
  3. rdf_construct/describe/__init__.py +93 -0
  4. rdf_construct/describe/analyzer.py +176 -0
  5. rdf_construct/describe/documentation.py +146 -0
  6. rdf_construct/describe/formatters/__init__.py +47 -0
  7. rdf_construct/describe/formatters/json.py +65 -0
  8. rdf_construct/describe/formatters/markdown.py +275 -0
  9. rdf_construct/describe/formatters/text.py +315 -0
  10. rdf_construct/describe/hierarchy.py +232 -0
  11. rdf_construct/describe/imports.py +213 -0
  12. rdf_construct/describe/metadata.py +187 -0
  13. rdf_construct/describe/metrics.py +145 -0
  14. rdf_construct/describe/models.py +552 -0
  15. rdf_construct/describe/namespaces.py +180 -0
  16. rdf_construct/describe/profiles.py +415 -0
  17. rdf_construct/localise/__init__.py +114 -0
  18. rdf_construct/localise/config.py +508 -0
  19. rdf_construct/localise/extractor.py +427 -0
  20. rdf_construct/localise/formatters/__init__.py +36 -0
  21. rdf_construct/localise/formatters/markdown.py +229 -0
  22. rdf_construct/localise/formatters/text.py +224 -0
  23. rdf_construct/localise/merger.py +346 -0
  24. rdf_construct/localise/reporter.py +356 -0
  25. rdf_construct/merge/__init__.py +165 -0
  26. rdf_construct/merge/config.py +354 -0
  27. rdf_construct/merge/conflicts.py +281 -0
  28. rdf_construct/merge/formatters.py +426 -0
  29. rdf_construct/merge/merger.py +425 -0
  30. rdf_construct/merge/migrator.py +339 -0
  31. rdf_construct/merge/rules.py +377 -0
  32. rdf_construct/merge/splitter.py +1102 -0
  33. rdf_construct/refactor/__init__.py +72 -0
  34. rdf_construct/refactor/config.py +362 -0
  35. rdf_construct/refactor/deprecator.py +328 -0
  36. rdf_construct/refactor/formatters/__init__.py +8 -0
  37. rdf_construct/refactor/formatters/text.py +311 -0
  38. rdf_construct/refactor/renamer.py +294 -0
  39. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/METADATA +91 -6
  40. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/RECORD +43 -7
  41. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/WHEEL +0 -0
  42. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/entry_points.txt +0 -0
  43. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,426 @@
1
+ """Output formatters for merge operations.
2
+
3
+ Provides text and Markdown formatters for:
4
+ - Merge progress and results
5
+ - Conflict reports
6
+ - Data migration summaries
7
+ """
8
+
9
+ from abc import ABC, abstractmethod
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ from typing import TextIO
13
+
14
+ from rdflib import Graph
15
+
16
+ from rdf_construct.merge.conflicts import Conflict, ConflictType
17
+ from rdf_construct.merge.merger import MergeResult
18
+ from rdf_construct.merge.migrator import MigrationResult
19
+
20
+
21
+ class BaseFormatter(ABC):
22
+ """Abstract base class for merge output formatters."""
23
+
24
+ @abstractmethod
25
+ def format_merge_result(self, result: MergeResult, graph: Graph | None = None) -> str:
26
+ """Format a merge result for output.
27
+
28
+ Args:
29
+ result: MergeResult to format
30
+ graph: Optional graph for namespace resolution
31
+
32
+ Returns:
33
+ Formatted string
34
+ """
35
+ pass
36
+
37
+ @abstractmethod
38
+ def format_conflict_report(
39
+ self, conflicts: list[Conflict], graph: Graph | None = None
40
+ ) -> str:
41
+ """Format a conflict report.
42
+
43
+ Args:
44
+ conflicts: List of conflicts to report
45
+ graph: Optional graph for namespace resolution
46
+
47
+ Returns:
48
+ Formatted conflict report
49
+ """
50
+ pass
51
+
52
+ @abstractmethod
53
+ def format_migration_result(self, result: MigrationResult) -> str:
54
+ """Format a migration result.
55
+
56
+ Args:
57
+ result: MigrationResult to format
58
+
59
+ Returns:
60
+ Formatted string
61
+ """
62
+ pass
63
+
64
+
65
+ class TextFormatter(BaseFormatter):
66
+ """Plain text formatter for console output."""
67
+
68
+ def __init__(self, use_colour: bool = True):
69
+ """Initialize the formatter.
70
+
71
+ Args:
72
+ use_colour: Whether to use ANSI colour codes
73
+ """
74
+ self.use_colour = use_colour
75
+
76
+ def _colour(self, text: str, colour: str) -> str:
77
+ """Apply ANSI colour to text.
78
+
79
+ Args:
80
+ text: Text to colour
81
+ colour: Colour name (green, yellow, red, cyan)
82
+
83
+ Returns:
84
+ Coloured text or plain text if colours disabled
85
+ """
86
+ if not self.use_colour:
87
+ return text
88
+
89
+ colours = {
90
+ "green": "\033[92m",
91
+ "yellow": "\033[93m",
92
+ "red": "\033[91m",
93
+ "cyan": "\033[96m",
94
+ "bold": "\033[1m",
95
+ "reset": "\033[0m",
96
+ }
97
+ return f"{colours.get(colour, '')}{text}{colours['reset']}"
98
+
99
+ def format_merge_result(self, result: MergeResult, graph: Graph | None = None) -> str:
100
+ """Format merge result as plain text."""
101
+ lines = []
102
+
103
+ if not result.success:
104
+ lines.append(self._colour(f"✗ Merge failed: {result.error}", "red"))
105
+ return "\n".join(lines)
106
+
107
+ lines.append(self._colour("Merge Results", "bold"))
108
+ lines.append("")
109
+
110
+ # Source statistics
111
+ lines.append("Sources:")
112
+ for source, count in result.source_stats.items():
113
+ lines.append(f" {self._colour('✓', 'green')} {source}: {count} triples")
114
+
115
+ lines.append("")
116
+ lines.append(f"Total merged: {result.total_triples} triples")
117
+
118
+ # Conflict summary
119
+ if result.conflicts:
120
+ lines.append("")
121
+ lines.append(self._colour("Conflicts:", "yellow"))
122
+ lines.append(f" Detected: {len(result.conflicts)}")
123
+ lines.append(
124
+ f" Auto-resolved: {len(result.resolved_conflicts)}"
125
+ )
126
+ if result.unresolved_conflicts:
127
+ lines.append(
128
+ f" {self._colour(f'Unresolved: {len(result.unresolved_conflicts)}', 'red')}"
129
+ )
130
+ lines.append(
131
+ " → Search for '=== CONFLICT ===' in output"
132
+ )
133
+ else:
134
+ lines.append("")
135
+ lines.append(self._colour("✓ No conflicts detected", "green"))
136
+
137
+ return "\n".join(lines)
138
+
139
+ def format_conflict_report(
140
+ self, conflicts: list[Conflict], graph: Graph | None = None
141
+ ) -> str:
142
+ """Format conflicts as plain text."""
143
+ lines = []
144
+
145
+ resolved = [c for c in conflicts if c.is_resolved]
146
+ unresolved = [c for c in conflicts if not c.is_resolved]
147
+
148
+ lines.append(self._colour("Conflict Report", "bold"))
149
+ lines.append(f"Total: {len(conflicts)}")
150
+ lines.append(f"Resolved: {len(resolved)}")
151
+ lines.append(f"Unresolved: {len(unresolved)}")
152
+ lines.append("")
153
+
154
+ if unresolved:
155
+ lines.append(self._colour("Unresolved Conflicts:", "red"))
156
+ for i, conflict in enumerate(unresolved, 1):
157
+ subj = self._format_term(conflict.subject, graph)
158
+ pred = self._format_term(conflict.predicate, graph)
159
+ lines.append(f" {i}. {subj} {pred}")
160
+ for cv in conflict.values:
161
+ lines.append(f" - {cv} (from {cv.source_path})")
162
+ lines.append("")
163
+
164
+ if resolved:
165
+ lines.append(self._colour("Auto-Resolved:", "green"))
166
+ for conflict in resolved:
167
+ subj = self._format_term(conflict.subject, graph)
168
+ pred = self._format_term(conflict.predicate, graph)
169
+ lines.append(f" {subj} {pred}")
170
+ if conflict.resolution:
171
+ lines.append(
172
+ f" → Used: {conflict.resolution}"
173
+ )
174
+
175
+ return "\n".join(lines)
176
+
177
+ def format_migration_result(self, result: MigrationResult) -> str:
178
+ """Format migration result as plain text."""
179
+ lines = []
180
+
181
+ if not result.success:
182
+ lines.append(self._colour(f"✗ Migration failed: {result.error}", "red"))
183
+ return "\n".join(lines)
184
+
185
+ lines.append(self._colour("Migration Results", "bold"))
186
+ lines.append("")
187
+ lines.append(f"Source triples: {result.source_triples}")
188
+ lines.append(f"Result triples: {result.result_triples}")
189
+ lines.append("")
190
+
191
+ lines.append("Changes:")
192
+ lines.append(f" Subjects updated: {result.stats.subjects_updated}")
193
+ lines.append(f" Objects updated: {result.stats.objects_updated}")
194
+ lines.append(f" Triples added: {result.stats.triples_added}")
195
+ lines.append(f" Triples removed: {result.stats.triples_removed}")
196
+
197
+ if result.stats.rules_applied:
198
+ lines.append("")
199
+ lines.append("Rules applied:")
200
+ for rule, count in result.stats.rules_applied.items():
201
+ lines.append(f" {rule}: {count} instances")
202
+
203
+ return "\n".join(lines)
204
+
205
+ def _format_term(self, term, graph: Graph | None) -> str:
206
+ """Format an RDF term for display."""
207
+ if graph:
208
+ try:
209
+ return graph.namespace_manager.normalizeUri(term)
210
+ except Exception:
211
+ pass
212
+ return str(term)
213
+
214
+
215
+ class MarkdownFormatter(BaseFormatter):
216
+ """Markdown formatter for conflict reports."""
217
+
218
+ def format_merge_result(self, result: MergeResult, graph: Graph | None = None) -> str:
219
+ """Format merge result as Markdown."""
220
+ lines = []
221
+
222
+ lines.append("# Merge Report")
223
+ lines.append("")
224
+ lines.append(f"Generated: {datetime.now().isoformat()}")
225
+ lines.append("")
226
+
227
+ if not result.success:
228
+ lines.append(f"**Error**: {result.error}")
229
+ return "\n".join(lines)
230
+
231
+ lines.append("## Summary")
232
+ lines.append("")
233
+ lines.append("| Metric | Value |")
234
+ lines.append("|--------|-------|")
235
+ lines.append(f"| Source files | {len(result.source_stats)} |")
236
+ lines.append(f"| Total triples | {result.total_triples} |")
237
+ lines.append(f"| Conflicts detected | {len(result.conflicts)} |")
238
+ lines.append(f"| Auto-resolved | {len(result.resolved_conflicts)} |")
239
+ lines.append(f"| **Unresolved** | **{len(result.unresolved_conflicts)}** |")
240
+ lines.append("")
241
+
242
+ if result.source_stats:
243
+ lines.append("## Sources")
244
+ lines.append("")
245
+ lines.append("| File | Triples |")
246
+ lines.append("|------|---------|")
247
+ for source, count in result.source_stats.items():
248
+ lines.append(f"| {source} | {count} |")
249
+ lines.append("")
250
+
251
+ return "\n".join(lines)
252
+
253
+ def format_conflict_report(
254
+ self, conflicts: list[Conflict], graph: Graph | None = None
255
+ ) -> str:
256
+ """Format conflicts as Markdown."""
257
+ lines = []
258
+
259
+ resolved = [c for c in conflicts if c.is_resolved]
260
+ unresolved = [c for c in conflicts if not c.is_resolved]
261
+
262
+ lines.append("# Merge Conflict Report")
263
+ lines.append("")
264
+ lines.append(f"Generated: {datetime.now().isoformat()}")
265
+ lines.append("")
266
+
267
+ lines.append("## Summary")
268
+ lines.append("")
269
+ lines.append("| Metric | Count |")
270
+ lines.append("|--------|-------|")
271
+ lines.append(f"| Total conflicts | {len(conflicts)} |")
272
+ lines.append(f"| Auto-resolved | {len(resolved)} |")
273
+ lines.append(f"| **Unresolved** | **{len(unresolved)}** |")
274
+ lines.append("")
275
+
276
+ if unresolved:
277
+ lines.append("## Unresolved Conflicts")
278
+ lines.append("")
279
+ lines.append(
280
+ "These require manual review. "
281
+ "Search for `# === CONFLICT ===` in the output file."
282
+ )
283
+ lines.append("")
284
+
285
+ for i, conflict in enumerate(unresolved, 1):
286
+ subj = self._format_term(conflict.subject, graph)
287
+ pred = self._format_term(conflict.predicate, graph)
288
+
289
+ lines.append(f"### {i}. {subj} {pred}")
290
+ lines.append("")
291
+ lines.append("| Source | Priority | Value |")
292
+ lines.append("|--------|----------|-------|")
293
+ for cv in conflict.values:
294
+ lines.append(f"| {cv.source_path} | {cv.priority} | {cv} |")
295
+ lines.append("")
296
+ lines.append(
297
+ f"**Reason**: {self._conflict_reason(conflict)}"
298
+ )
299
+ lines.append("")
300
+
301
+ if resolved:
302
+ lines.append("## Auto-Resolved Conflicts")
303
+ lines.append("")
304
+ lines.append(
305
+ "These were resolved automatically based on priority."
306
+ )
307
+ lines.append("")
308
+
309
+ for conflict in resolved:
310
+ subj = self._format_term(conflict.subject, graph)
311
+ pred = self._format_term(conflict.predicate, graph)
312
+
313
+ lines.append(f"### {subj} {pred}")
314
+ lines.append("")
315
+ if conflict.resolution:
316
+ lines.append(
317
+ f"- **Kept** ({conflict.resolution.source_path}, "
318
+ f"priority {conflict.resolution.priority}): {conflict.resolution}"
319
+ )
320
+ for cv in conflict.values:
321
+ if cv != conflict.resolution:
322
+ lines.append(
323
+ f"- *Discarded* ({cv.source_path}, "
324
+ f"priority {cv.priority}): {cv}"
325
+ )
326
+ lines.append("")
327
+
328
+ lines.append("## Recommendations")
329
+ lines.append("")
330
+ lines.append("1. Review unresolved conflicts in output file")
331
+ lines.append(
332
+ "2. Consider whether similar values should be merged or aliased"
333
+ )
334
+ lines.append(
335
+ "3. Run `rdf-construct lint` on merged output to check for issues"
336
+ )
337
+ lines.append("")
338
+
339
+ return "\n".join(lines)
340
+
341
+ def format_migration_result(self, result: MigrationResult) -> str:
342
+ """Format migration result as Markdown."""
343
+ lines = []
344
+
345
+ lines.append("# Data Migration Report")
346
+ lines.append("")
347
+ lines.append(f"Generated: {datetime.now().isoformat()}")
348
+ lines.append("")
349
+
350
+ if not result.success:
351
+ lines.append(f"**Error**: {result.error}")
352
+ return "\n".join(lines)
353
+
354
+ lines.append("## Summary")
355
+ lines.append("")
356
+ lines.append("| Metric | Value |")
357
+ lines.append("|--------|-------|")
358
+ lines.append(f"| Source triples | {result.source_triples} |")
359
+ lines.append(f"| Result triples | {result.result_triples} |")
360
+ lines.append(f"| Subjects updated | {result.stats.subjects_updated} |")
361
+ lines.append(f"| Objects updated | {result.stats.objects_updated} |")
362
+ lines.append(f"| Triples added | {result.stats.triples_added} |")
363
+ lines.append(f"| Triples removed | {result.stats.triples_removed} |")
364
+ lines.append("")
365
+
366
+ if result.stats.rules_applied:
367
+ lines.append("## Rules Applied")
368
+ lines.append("")
369
+ lines.append("| Rule | Instances |")
370
+ lines.append("|------|-----------|")
371
+ for rule, count in result.stats.rules_applied.items():
372
+ lines.append(f"| {rule} | {count} |")
373
+ lines.append("")
374
+
375
+ return "\n".join(lines)
376
+
377
+ def _format_term(self, term, graph: Graph | None) -> str:
378
+ """Format an RDF term for display."""
379
+ if graph:
380
+ try:
381
+ return f"`{graph.namespace_manager.normalizeUri(term)}`"
382
+ except Exception:
383
+ pass
384
+ return f"`{term}`"
385
+
386
+ def _conflict_reason(self, conflict: Conflict) -> str:
387
+ """Get a human-readable reason for the conflict."""
388
+ type_reasons = {
389
+ ConflictType.VALUE_DIFFERENCE: "Different values for the same predicate",
390
+ ConflictType.TYPE_DIFFERENCE: "Different type declarations",
391
+ ConflictType.HIERARCHY_DIFFERENCE: "Different hierarchy positions",
392
+ ConflictType.SEMANTIC_CONTRADICTION: "Semantically incompatible assertions",
393
+ }
394
+ return type_reasons.get(
395
+ conflict.conflict_type, "Values differ between sources"
396
+ )
397
+
398
+
399
+ # Formatter registry
400
+ FORMATTERS = {
401
+ "text": TextFormatter,
402
+ "markdown": MarkdownFormatter,
403
+ "md": MarkdownFormatter,
404
+ }
405
+
406
+
407
+ def get_formatter(format_name: str, **kwargs) -> BaseFormatter:
408
+ """Get a formatter by name.
409
+
410
+ Args:
411
+ format_name: Format name (text, markdown, md)
412
+ **kwargs: Additional arguments for formatter
413
+
414
+ Returns:
415
+ Formatter instance
416
+
417
+ Raises:
418
+ ValueError: If format name is unknown
419
+ """
420
+ formatter_class = FORMATTERS.get(format_name.lower())
421
+ if not formatter_class:
422
+ raise ValueError(
423
+ f"Unknown format: {format_name}. "
424
+ f"Available: {', '.join(FORMATTERS.keys())}"
425
+ )
426
+ return formatter_class(**kwargs)