rdf-construct 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. rdf_construct/__init__.py +1 -1
  2. rdf_construct/cli.py +1794 -0
  3. rdf_construct/describe/__init__.py +93 -0
  4. rdf_construct/describe/analyzer.py +176 -0
  5. rdf_construct/describe/documentation.py +146 -0
  6. rdf_construct/describe/formatters/__init__.py +47 -0
  7. rdf_construct/describe/formatters/json.py +65 -0
  8. rdf_construct/describe/formatters/markdown.py +275 -0
  9. rdf_construct/describe/formatters/text.py +315 -0
  10. rdf_construct/describe/hierarchy.py +232 -0
  11. rdf_construct/describe/imports.py +213 -0
  12. rdf_construct/describe/metadata.py +187 -0
  13. rdf_construct/describe/metrics.py +145 -0
  14. rdf_construct/describe/models.py +552 -0
  15. rdf_construct/describe/namespaces.py +180 -0
  16. rdf_construct/describe/profiles.py +415 -0
  17. rdf_construct/localise/__init__.py +114 -0
  18. rdf_construct/localise/config.py +508 -0
  19. rdf_construct/localise/extractor.py +427 -0
  20. rdf_construct/localise/formatters/__init__.py +36 -0
  21. rdf_construct/localise/formatters/markdown.py +229 -0
  22. rdf_construct/localise/formatters/text.py +224 -0
  23. rdf_construct/localise/merger.py +346 -0
  24. rdf_construct/localise/reporter.py +356 -0
  25. rdf_construct/merge/__init__.py +165 -0
  26. rdf_construct/merge/config.py +354 -0
  27. rdf_construct/merge/conflicts.py +281 -0
  28. rdf_construct/merge/formatters.py +426 -0
  29. rdf_construct/merge/merger.py +425 -0
  30. rdf_construct/merge/migrator.py +339 -0
  31. rdf_construct/merge/rules.py +377 -0
  32. rdf_construct/merge/splitter.py +1102 -0
  33. rdf_construct/refactor/__init__.py +72 -0
  34. rdf_construct/refactor/config.py +362 -0
  35. rdf_construct/refactor/deprecator.py +328 -0
  36. rdf_construct/refactor/formatters/__init__.py +8 -0
  37. rdf_construct/refactor/formatters/text.py +311 -0
  38. rdf_construct/refactor/renamer.py +294 -0
  39. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/METADATA +91 -6
  40. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/RECORD +43 -7
  41. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/WHEEL +0 -0
  42. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/entry_points.txt +0 -0
  43. {rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,224 @@
1
+ """Text formatter for console output.
2
+
3
+ Provides formatted text output for extraction results, merge results,
4
+ and coverage reports.
5
+ """
6
+
7
+ from rdf_construct.localise.extractor import ExtractionResult
8
+ from rdf_construct.localise.merger import MergeResult
9
+ from rdf_construct.localise.reporter import CoverageReport
10
+
11
+
12
+ class TextFormatter:
13
+ """Formats localise results for console output."""
14
+
15
+ def __init__(self, use_colour: bool = True):
16
+ """Initialise formatter.
17
+
18
+ Args:
19
+ use_colour: Whether to use ANSI colour codes.
20
+ """
21
+ self.use_colour = use_colour
22
+
23
+ def format_extraction_result(self, result: ExtractionResult) -> str:
24
+ """Format extraction result for display.
25
+
26
+ Args:
27
+ result: Extraction result.
28
+
29
+ Returns:
30
+ Formatted string.
31
+ """
32
+ lines: list[str] = []
33
+
34
+ if result.success:
35
+ lines.append(self._success("✓ Extraction complete"))
36
+ lines.append("")
37
+ lines.append(f" Entities: {result.total_entities}")
38
+ lines.append(f" Strings: {result.total_strings}")
39
+ if result.skipped_entities > 0:
40
+ lines.append(f" Skipped: {result.skipped_entities}")
41
+
42
+ if result.translation_file:
43
+ tf = result.translation_file
44
+ lines.append("")
45
+ lines.append(f" Source language: {tf.metadata.source_language}")
46
+ lines.append(f" Target language: {tf.metadata.target_language}")
47
+ else:
48
+ lines.append(self._error(f"✗ Extraction failed: {result.error}"))
49
+
50
+ return "\n".join(lines)
51
+
52
+ def format_merge_result(self, result: MergeResult) -> str:
53
+ """Format merge result for display.
54
+
55
+ Args:
56
+ result: Merge result.
57
+
58
+ Returns:
59
+ Formatted string.
60
+ """
61
+ lines: list[str] = []
62
+
63
+ if result.success:
64
+ lines.append(self._success("✓ Merge complete"))
65
+ lines.append("")
66
+
67
+ stats = result.stats
68
+ lines.append(f" Added: {stats.added}")
69
+ lines.append(f" Updated: {stats.updated}")
70
+
71
+ if stats.skipped_status > 0:
72
+ lines.append(f" Skipped (status): {stats.skipped_status}")
73
+ if stats.skipped_existing > 0:
74
+ lines.append(f" Skipped (exists): {stats.skipped_existing}")
75
+ if stats.errors > 0:
76
+ lines.append(self._warning(f" Errors: {stats.errors}"))
77
+
78
+ if result.warnings:
79
+ lines.append("")
80
+ lines.append(self._warning("Warnings:"))
81
+ for warning in result.warnings[:10]: # Limit to 10
82
+ lines.append(f" - {warning}")
83
+ if len(result.warnings) > 10:
84
+ lines.append(f" ... and {len(result.warnings) - 10} more")
85
+ else:
86
+ lines.append(self._error(f"✗ Merge failed: {result.error}"))
87
+
88
+ return "\n".join(lines)
89
+
90
+ def format_coverage_report(
91
+ self,
92
+ report: CoverageReport,
93
+ verbose: bool = False,
94
+ ) -> str:
95
+ """Format coverage report for display.
96
+
97
+ Args:
98
+ report: Coverage report.
99
+ verbose: Include detailed missing entity list.
100
+
101
+ Returns:
102
+ Formatted string.
103
+ """
104
+ lines: list[str] = []
105
+
106
+ # Header
107
+ lines.append("Translation Coverage Report")
108
+ lines.append("=" * 40)
109
+ lines.append("")
110
+ lines.append(f"Source: {report.source_file}")
111
+ lines.append(f"Entities: {report.total_entities}")
112
+ lines.append(f"Properties: {', '.join(report.properties)}")
113
+ lines.append("")
114
+
115
+ # Table header
116
+ # Calculate column widths
117
+ lang_width = max(8, max(len(lang) for lang in report.languages.keys()))
118
+ prop_width = max(10, max(len(p) for p in report.properties))
119
+
120
+ # Build header row
121
+ header_parts = ["Language".ljust(lang_width)]
122
+ for prop in report.properties:
123
+ header_parts.append(prop.ljust(prop_width))
124
+ header_parts.append("Overall")
125
+ header_parts.append("Status")
126
+
127
+ lines.append(" ".join(header_parts))
128
+ lines.append("-" * (len(" ".join(header_parts))))
129
+
130
+ # Data rows
131
+ for lang, coverage in report.languages.items():
132
+ row_parts = []
133
+
134
+ # Language name
135
+ lang_display = f"{lang} (base)" if coverage.is_source else lang
136
+ row_parts.append(lang_display.ljust(lang_width))
137
+
138
+ # Property coverages
139
+ for prop in report.properties:
140
+ prop_cov = coverage.by_property.get(prop)
141
+ if prop_cov:
142
+ pct = f"{prop_cov.coverage:.0f}%"
143
+ else:
144
+ pct = "-"
145
+ row_parts.append(pct.ljust(prop_width))
146
+
147
+ # Overall coverage
148
+ overall_pct = f"{coverage.coverage:.0f}%"
149
+ row_parts.append(overall_pct.ljust(7))
150
+
151
+ # Status indicator
152
+ if coverage.coverage == 100:
153
+ status = self._success("✓ Complete")
154
+ elif coverage.coverage >= 75:
155
+ status = self._warning(f"⚠ {coverage.pending} pending")
156
+ elif coverage.coverage > 0:
157
+ status = f"✗ {coverage.pending} pending"
158
+ else:
159
+ status = "✗ Not started"
160
+ row_parts.append(status)
161
+
162
+ lines.append(" ".join(row_parts))
163
+
164
+ # Missing entities section
165
+ if verbose:
166
+ for lang, coverage in report.languages.items():
167
+ if coverage.missing_entities and not coverage.is_source:
168
+ lines.append("")
169
+ lines.append(f"Missing {lang} translations:")
170
+ for uri in coverage.missing_entities[:20]:
171
+ # Shorten URI for display
172
+ short_uri = self._shorten_uri(uri)
173
+ lines.append(f" - {short_uri}")
174
+ if len(coverage.missing_entities) > 20:
175
+ lines.append(f" ... and {len(coverage.missing_entities) - 20} more")
176
+
177
+ return "\n".join(lines)
178
+
179
+ def _success(self, text: str) -> str:
180
+ """Format as success (green)."""
181
+ if self.use_colour:
182
+ return f"\033[32m{text}\033[0m"
183
+ return text
184
+
185
+ def _warning(self, text: str) -> str:
186
+ """Format as warning (yellow)."""
187
+ if self.use_colour:
188
+ return f"\033[33m{text}\033[0m"
189
+ return text
190
+
191
+ def _error(self, text: str) -> str:
192
+ """Format as error (red)."""
193
+ if self.use_colour:
194
+ return f"\033[31m{text}\033[0m"
195
+ return text
196
+
197
+ def _shorten_uri(self, uri: str) -> str:
198
+ """Shorten a URI for display.
199
+
200
+ Args:
201
+ uri: Full URI.
202
+
203
+ Returns:
204
+ Shortened version.
205
+ """
206
+ # Common namespace prefixes
207
+ prefixes = {
208
+ "http://www.w3.org/2000/01/rdf-schema#": "rdfs:",
209
+ "http://www.w3.org/2004/02/skos/core#": "skos:",
210
+ "http://www.w3.org/2002/07/owl#": "owl:",
211
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf:",
212
+ }
213
+
214
+ for namespace, prefix in prefixes.items():
215
+ if uri.startswith(namespace):
216
+ return prefix + uri[len(namespace) :]
217
+
218
+ # If no known prefix, just show local name
219
+ if "#" in uri:
220
+ return uri.split("#")[-1]
221
+ elif "/" in uri:
222
+ return uri.split("/")[-1]
223
+
224
+ return uri
@@ -0,0 +1,346 @@
1
+ """Merge translations back into RDF ontologies.
2
+
3
+ Takes completed translation files and adds translated literals to the
4
+ ontology, creating new language-tagged triples.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+
10
+ from rdflib import Graph, Literal, URIRef
11
+
12
+ from rdf_construct.localise.config import (
13
+ ExistingStrategy,
14
+ MergeConfig,
15
+ TranslationFile,
16
+ TranslationStatus,
17
+ )
18
+
19
+
20
+ @dataclass
21
+ class MergeStats:
22
+ """Statistics for a merge operation.
23
+
24
+ Attributes:
25
+ added: Number of translations added.
26
+ updated: Number of translations updated.
27
+ skipped_status: Translations skipped due to status.
28
+ skipped_existing: Translations skipped (already exist, preserve mode).
29
+ errors: Number of errors encountered.
30
+ """
31
+
32
+ added: int = 0
33
+ updated: int = 0
34
+ skipped_status: int = 0
35
+ skipped_existing: int = 0
36
+ errors: int = 0
37
+
38
+ @property
39
+ def total_processed(self) -> int:
40
+ """Total translations processed."""
41
+ return self.added + self.updated + self.skipped_status + self.skipped_existing
42
+
43
+
44
+ @dataclass
45
+ class MergeResult:
46
+ """Result of a translation merge operation.
47
+
48
+ Attributes:
49
+ success: Whether merge succeeded.
50
+ merged_graph: Graph with merged translations.
51
+ stats: Merge statistics.
52
+ error: Error message if failed.
53
+ warnings: List of warning messages.
54
+ """
55
+
56
+ success: bool
57
+ merged_graph: Graph | None = None
58
+ stats: MergeStats = field(default_factory=MergeStats)
59
+ error: str | None = None
60
+ warnings: list[str] = field(default_factory=list)
61
+
62
+
63
+ class TranslationMerger:
64
+ """Merges translation files back into RDF ontologies.
65
+
66
+ The merger takes completed translation YAML files and adds the
67
+ translations as new language-tagged literals to the ontology.
68
+ """
69
+
70
+ def __init__(self, config: MergeConfig | None = None):
71
+ """Initialise the merger.
72
+
73
+ Args:
74
+ config: Merge configuration. Uses defaults if not provided.
75
+ """
76
+ self.config = config or MergeConfig()
77
+
78
+ def merge(
79
+ self,
80
+ graph: Graph,
81
+ translation_file: TranslationFile,
82
+ ) -> MergeResult:
83
+ """Merge translations into an RDF graph.
84
+
85
+ Args:
86
+ graph: RDF graph to merge into.
87
+ translation_file: Completed translation file.
88
+
89
+ Returns:
90
+ MergeResult with merged graph.
91
+ """
92
+ try:
93
+ # Create a copy of the graph to work with
94
+ merged = Graph()
95
+ for prefix, namespace in graph.namespaces():
96
+ merged.bind(prefix, namespace)
97
+ for triple in graph:
98
+ merged.add(triple)
99
+
100
+ stats = MergeStats()
101
+ warnings: list[str] = []
102
+ target_lang = translation_file.metadata.target_language
103
+
104
+ # Process each entity
105
+ for entity in translation_file.entities:
106
+ entity_uri = URIRef(entity.uri)
107
+
108
+ # Check entity exists in graph
109
+ if not self._entity_exists(merged, entity_uri):
110
+ warnings.append(f"Entity not found in graph: {entity.uri}")
111
+ stats.errors += 1
112
+ continue
113
+
114
+ # Process each label
115
+ for entry in entity.labels:
116
+ # Check status threshold
117
+ if not self._meets_status(entry.status):
118
+ stats.skipped_status += 1
119
+ continue
120
+
121
+ # Skip empty translations
122
+ if not entry.translation.strip():
123
+ stats.skipped_status += 1
124
+ continue
125
+
126
+ # Expand property
127
+ prop_uri = URIRef(self._expand_property(entry.property))
128
+
129
+ # Check for existing translation
130
+ existing = self._get_existing_translation(
131
+ merged, entity_uri, prop_uri, target_lang
132
+ )
133
+
134
+ if existing:
135
+ if self.config.existing == ExistingStrategy.PRESERVE:
136
+ stats.skipped_existing += 1
137
+ continue
138
+ else:
139
+ # Remove existing before adding new
140
+ for triple in existing:
141
+ merged.remove(triple)
142
+ stats.updated += 1
143
+ else:
144
+ stats.added += 1
145
+
146
+ # Add translation
147
+ translation_literal = Literal(entry.translation, lang=target_lang)
148
+ merged.add((entity_uri, prop_uri, translation_literal))
149
+
150
+ return MergeResult(
151
+ success=True,
152
+ merged_graph=merged,
153
+ stats=stats,
154
+ warnings=warnings,
155
+ )
156
+
157
+ except Exception as e:
158
+ return MergeResult(
159
+ success=False,
160
+ error=str(e),
161
+ )
162
+
163
+ def merge_multiple(
164
+ self,
165
+ graph: Graph,
166
+ translation_files: list[TranslationFile],
167
+ ) -> MergeResult:
168
+ """Merge multiple translation files into a graph.
169
+
170
+ Args:
171
+ graph: RDF graph to merge into.
172
+ translation_files: List of translation files.
173
+
174
+ Returns:
175
+ Combined MergeResult.
176
+ """
177
+ # Start with a copy
178
+ merged = Graph()
179
+ for prefix, namespace in graph.namespaces():
180
+ merged.bind(prefix, namespace)
181
+ for triple in graph:
182
+ merged.add(triple)
183
+
184
+ combined_stats = MergeStats()
185
+ all_warnings: list[str] = []
186
+
187
+ for trans_file in translation_files:
188
+ result = self.merge(merged, trans_file)
189
+
190
+ if not result.success:
191
+ return MergeResult(
192
+ success=False,
193
+ error=f"Failed merging {trans_file.metadata.target_language}: {result.error}",
194
+ )
195
+
196
+ # Use the merged graph for next iteration
197
+ merged = result.merged_graph
198
+
199
+ # Combine stats
200
+ combined_stats.added += result.stats.added
201
+ combined_stats.updated += result.stats.updated
202
+ combined_stats.skipped_status += result.stats.skipped_status
203
+ combined_stats.skipped_existing += result.stats.skipped_existing
204
+ combined_stats.errors += result.stats.errors
205
+ all_warnings.extend(result.warnings)
206
+
207
+ return MergeResult(
208
+ success=True,
209
+ merged_graph=merged,
210
+ stats=combined_stats,
211
+ warnings=all_warnings,
212
+ )
213
+
214
+ def _meets_status(self, status: TranslationStatus) -> bool:
215
+ """Check if status meets minimum threshold.
216
+
217
+ Args:
218
+ status: Translation status to check.
219
+
220
+ Returns:
221
+ True if status meets threshold.
222
+ """
223
+ status_order = [
224
+ TranslationStatus.PENDING,
225
+ TranslationStatus.NEEDS_REVIEW,
226
+ TranslationStatus.TRANSLATED,
227
+ TranslationStatus.APPROVED,
228
+ ]
229
+
230
+ try:
231
+ status_level = status_order.index(status)
232
+ min_level = status_order.index(self.config.min_status)
233
+ return status_level >= min_level
234
+ except ValueError:
235
+ return False
236
+
237
+ def _entity_exists(self, graph: Graph, entity: URIRef) -> bool:
238
+ """Check if an entity exists in the graph.
239
+
240
+ Args:
241
+ graph: RDF graph.
242
+ entity: Entity URI.
243
+
244
+ Returns:
245
+ True if entity has any triples.
246
+ """
247
+ # Check if entity appears as subject
248
+ for _ in graph.triples((entity, None, None)):
249
+ return True
250
+
251
+ return False
252
+
253
+ def _get_existing_translation(
254
+ self,
255
+ graph: Graph,
256
+ subject: URIRef,
257
+ predicate: URIRef,
258
+ language: str,
259
+ ) -> list[tuple]:
260
+ """Get existing translations for a specific language.
261
+
262
+ Args:
263
+ graph: RDF graph.
264
+ subject: Subject URI.
265
+ predicate: Predicate URI.
266
+ language: Language code.
267
+
268
+ Returns:
269
+ List of matching triples.
270
+ """
271
+ existing = []
272
+
273
+ for obj in graph.objects(subject, predicate):
274
+ if isinstance(obj, Literal) and obj.language == language:
275
+ existing.append((subject, predicate, obj))
276
+
277
+ return existing
278
+
279
+ def _expand_property(self, prop: str) -> str:
280
+ """Expand a CURIE to full URI.
281
+
282
+ Args:
283
+ prop: Property string (CURIE or full URI).
284
+
285
+ Returns:
286
+ Full URI string.
287
+ """
288
+ prefixes = {
289
+ "rdfs:": "http://www.w3.org/2000/01/rdf-schema#",
290
+ "skos:": "http://www.w3.org/2004/02/skos/core#",
291
+ "owl:": "http://www.w3.org/2002/07/owl#",
292
+ "rdf:": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
293
+ "dc:": "http://purl.org/dc/elements/1.1/",
294
+ "dcterms:": "http://purl.org/dc/terms/",
295
+ }
296
+
297
+ for prefix, namespace in prefixes.items():
298
+ if prop.startswith(prefix):
299
+ return namespace + prop[len(prefix) :]
300
+
301
+ return prop
302
+
303
+
304
+ def merge_translations(
305
+ source: Path,
306
+ translation_files: list[Path],
307
+ output: Path | None = None,
308
+ min_status: str = "translated",
309
+ existing: str = "preserve",
310
+ ) -> MergeResult:
311
+ """Merge translation files into an ontology.
312
+
313
+ Convenience function for simple merge operations.
314
+
315
+ Args:
316
+ source: Source ontology file.
317
+ translation_files: List of translation YAML files.
318
+ output: Output file path. Writes to source if not provided.
319
+ min_status: Minimum status to include.
320
+ existing: How to handle existing translations.
321
+
322
+ Returns:
323
+ MergeResult with merged graph.
324
+ """
325
+ # Load graph
326
+ graph = Graph()
327
+ graph.parse(source)
328
+
329
+ # Load translation files
330
+ trans_files = [TranslationFile.from_yaml(p) for p in translation_files]
331
+
332
+ # Build config
333
+ config = MergeConfig(
334
+ min_status=TranslationStatus(min_status),
335
+ existing=ExistingStrategy(existing),
336
+ )
337
+
338
+ # Merge
339
+ merger = TranslationMerger(config)
340
+ result = merger.merge_multiple(graph, trans_files)
341
+
342
+ # Save if requested
343
+ if result.success and output and result.merged_graph:
344
+ result.merged_graph.serialize(destination=output, format="turtle")
345
+
346
+ return result