PyPI - rdf-construct - Versions diffs - 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

rdf-construct 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

rdf_construct/__init__.py +1 -1
rdf_construct/cli.py +1794 -0
rdf_construct/describe/__init__.py +93 -0
rdf_construct/describe/analyzer.py +176 -0
rdf_construct/describe/documentation.py +146 -0
rdf_construct/describe/formatters/__init__.py +47 -0
rdf_construct/describe/formatters/json.py +65 -0
rdf_construct/describe/formatters/markdown.py +275 -0
rdf_construct/describe/formatters/text.py +315 -0
rdf_construct/describe/hierarchy.py +232 -0
rdf_construct/describe/imports.py +213 -0
rdf_construct/describe/metadata.py +187 -0
rdf_construct/describe/metrics.py +145 -0
rdf_construct/describe/models.py +552 -0
rdf_construct/describe/namespaces.py +180 -0
rdf_construct/describe/profiles.py +415 -0
rdf_construct/localise/__init__.py +114 -0
rdf_construct/localise/config.py +508 -0
rdf_construct/localise/extractor.py +427 -0
rdf_construct/localise/formatters/__init__.py +36 -0
rdf_construct/localise/formatters/markdown.py +229 -0
rdf_construct/localise/formatters/text.py +224 -0
rdf_construct/localise/merger.py +346 -0
rdf_construct/localise/reporter.py +356 -0
rdf_construct/merge/__init__.py +165 -0
rdf_construct/merge/config.py +354 -0
rdf_construct/merge/conflicts.py +281 -0
rdf_construct/merge/formatters.py +426 -0
rdf_construct/merge/merger.py +425 -0
rdf_construct/merge/migrator.py +339 -0
rdf_construct/merge/rules.py +377 -0
rdf_construct/merge/splitter.py +1102 -0
rdf_construct/refactor/__init__.py +72 -0
rdf_construct/refactor/config.py +362 -0
rdf_construct/refactor/deprecator.py +328 -0
rdf_construct/refactor/formatters/__init__.py +8 -0
rdf_construct/refactor/formatters/text.py +311 -0
rdf_construct/refactor/renamer.py +294 -0
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/METADATA +91 -6
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/RECORD +43 -7
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/WHEEL +0 -0
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/entry_points.txt +0 -0
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/licenses/LICENSE +0 -0

rdf_construct/localise/formatters/text.py ADDED Viewed

@@ -0,0 +1,224 @@
+"""Text formatter for console output.
+Provides formatted text output for extraction results, merge results,
+and coverage reports.
+"""
+from rdf_construct.localise.extractor import ExtractionResult
+from rdf_construct.localise.merger import MergeResult
+from rdf_construct.localise.reporter import CoverageReport
+class TextFormatter:
+    """Formats localise results for console output."""
+    def __init__(self, use_colour: bool = True):
+        """Initialise formatter.
+        Args:
+            use_colour: Whether to use ANSI colour codes.
+        """
+        self.use_colour = use_colour
+    def format_extraction_result(self, result: ExtractionResult) -> str:
+        """Format extraction result for display.
+        Args:
+            result: Extraction result.
+        Returns:
+            Formatted string.
+        """
+        lines: list[str] = []
+        if result.success:
+            lines.append(self._success("✓ Extraction complete"))
+            lines.append("")
+            lines.append(f"  Entities:        {result.total_entities}")
+            lines.append(f"  Strings:         {result.total_strings}")
+            if result.skipped_entities > 0:
+                lines.append(f"  Skipped:         {result.skipped_entities}")
+            if result.translation_file:
+                tf = result.translation_file
+                lines.append("")
+                lines.append(f"  Source language: {tf.metadata.source_language}")
+                lines.append(f"  Target language: {tf.metadata.target_language}")
+        else:
+            lines.append(self._error(f"✗ Extraction failed: {result.error}"))
+        return "\n".join(lines)
+    def format_merge_result(self, result: MergeResult) -> str:
+        """Format merge result for display.
+        Args:
+            result: Merge result.
+        Returns:
+            Formatted string.
+        """
+        lines: list[str] = []
+        if result.success:
+            lines.append(self._success("✓ Merge complete"))
+            lines.append("")
+            stats = result.stats
+            lines.append(f"  Added:           {stats.added}")
+            lines.append(f"  Updated:         {stats.updated}")
+            if stats.skipped_status > 0:
+                lines.append(f"  Skipped (status): {stats.skipped_status}")
+            if stats.skipped_existing > 0:
+                lines.append(f"  Skipped (exists): {stats.skipped_existing}")
+            if stats.errors > 0:
+                lines.append(self._warning(f"  Errors:          {stats.errors}"))
+            if result.warnings:
+                lines.append("")
+                lines.append(self._warning("Warnings:"))
+                for warning in result.warnings[:10]:  # Limit to 10
+                    lines.append(f"  - {warning}")
+                if len(result.warnings) > 10:
+                    lines.append(f"  ... and {len(result.warnings) - 10} more")
+        else:
+            lines.append(self._error(f"✗ Merge failed: {result.error}"))
+        return "\n".join(lines)
+    def format_coverage_report(
+        self,
+        report: CoverageReport,
+        verbose: bool = False,
+    ) -> str:
+        """Format coverage report for display.
+        Args:
+            report: Coverage report.
+            verbose: Include detailed missing entity list.
+        Returns:
+            Formatted string.
+        """
+        lines: list[str] = []
+        # Header
+        lines.append("Translation Coverage Report")
+        lines.append("=" * 40)
+        lines.append("")
+        lines.append(f"Source: {report.source_file}")
+        lines.append(f"Entities: {report.total_entities}")
+        lines.append(f"Properties: {', '.join(report.properties)}")
+        lines.append("")
+        # Table header
+        # Calculate column widths
+        lang_width = max(8, max(len(lang) for lang in report.languages.keys()))
+        prop_width = max(10, max(len(p) for p in report.properties))
+        # Build header row
+        header_parts = ["Language".ljust(lang_width)]
+        for prop in report.properties:
+            header_parts.append(prop.ljust(prop_width))
+        header_parts.append("Overall")
+        header_parts.append("Status")
+        lines.append("  ".join(header_parts))
+        lines.append("-" * (len("  ".join(header_parts))))
+        # Data rows
+        for lang, coverage in report.languages.items():
+            row_parts = []
+            # Language name
+            lang_display = f"{lang} (base)" if coverage.is_source else lang
+            row_parts.append(lang_display.ljust(lang_width))
+            # Property coverages
+            for prop in report.properties:
+                prop_cov = coverage.by_property.get(prop)
+                if prop_cov:
+                    pct = f"{prop_cov.coverage:.0f}%"
+                else:
+                    pct = "-"
+                row_parts.append(pct.ljust(prop_width))
+            # Overall coverage
+            overall_pct = f"{coverage.coverage:.0f}%"
+            row_parts.append(overall_pct.ljust(7))
+            # Status indicator
+            if coverage.coverage == 100:
+                status = self._success("✓ Complete")
+            elif coverage.coverage >= 75:
+                status = self._warning(f"⚠ {coverage.pending} pending")
+            elif coverage.coverage > 0:
+                status = f"✗ {coverage.pending} pending"
+            else:
+                status = "✗ Not started"
+            row_parts.append(status)
+            lines.append("  ".join(row_parts))
+        # Missing entities section
+        if verbose:
+            for lang, coverage in report.languages.items():
+                if coverage.missing_entities and not coverage.is_source:
+                    lines.append("")
+                    lines.append(f"Missing {lang} translations:")
+                    for uri in coverage.missing_entities[:20]:
+                        # Shorten URI for display
+                        short_uri = self._shorten_uri(uri)
+                        lines.append(f"  - {short_uri}")
+                    if len(coverage.missing_entities) > 20:
+                        lines.append(f"  ... and {len(coverage.missing_entities) - 20} more")
+        return "\n".join(lines)
+    def _success(self, text: str) -> str:
+        """Format as success (green)."""
+        if self.use_colour:
+            return f"\033[32m{text}\033[0m"
+        return text
+    def _warning(self, text: str) -> str:
+        """Format as warning (yellow)."""
+        if self.use_colour:
+            return f"\033[33m{text}\033[0m"
+        return text
+    def _error(self, text: str) -> str:
+        """Format as error (red)."""
+        if self.use_colour:
+            return f"\033[31m{text}\033[0m"
+        return text
+    def _shorten_uri(self, uri: str) -> str:
+        """Shorten a URI for display.
+        Args:
+            uri: Full URI.
+        Returns:
+            Shortened version.
+        """
+        # Common namespace prefixes
+        prefixes = {
+            "http://www.w3.org/2000/01/rdf-schema#": "rdfs:",
+            "http://www.w3.org/2004/02/skos/core#": "skos:",
+            "http://www.w3.org/2002/07/owl#": "owl:",
+            "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf:",
+        }
+        for namespace, prefix in prefixes.items():
+            if uri.startswith(namespace):
+                return prefix + uri[len(namespace) :]
+        # If no known prefix, just show local name
+        if "#" in uri:
+            return uri.split("#")[-1]
+        elif "/" in uri:
+            return uri.split("/")[-1]
+        return uri

rdf_construct/localise/merger.py ADDED Viewed

@@ -0,0 +1,346 @@
+"""Merge translations back into RDF ontologies.
+Takes completed translation files and adds translated literals to the
+ontology, creating new language-tagged triples.
+"""
+from dataclasses import dataclass, field
+from pathlib import Path
+from rdflib import Graph, Literal, URIRef
+from rdf_construct.localise.config import (
+    ExistingStrategy,
+    MergeConfig,
+    TranslationFile,
+    TranslationStatus,
+)
+@dataclass
+class MergeStats:
+    """Statistics for a merge operation.
+    Attributes:
+        added: Number of translations added.
+        updated: Number of translations updated.
+        skipped_status: Translations skipped due to status.
+        skipped_existing: Translations skipped (already exist, preserve mode).
+        errors: Number of errors encountered.
+    """
+    added: int = 0
+    updated: int = 0
+    skipped_status: int = 0
+    skipped_existing: int = 0
+    errors: int = 0
+    @property
+    def total_processed(self) -> int:
+        """Total translations processed."""
+        return self.added + self.updated + self.skipped_status + self.skipped_existing
+@dataclass
+class MergeResult:
+    """Result of a translation merge operation.
+    Attributes:
+        success: Whether merge succeeded.
+        merged_graph: Graph with merged translations.
+        stats: Merge statistics.
+        error: Error message if failed.
+        warnings: List of warning messages.
+    """
+    success: bool
+    merged_graph: Graph | None = None
+    stats: MergeStats = field(default_factory=MergeStats)
+    error: str | None = None
+    warnings: list[str] = field(default_factory=list)
+class TranslationMerger:
+    """Merges translation files back into RDF ontologies.
+    The merger takes completed translation YAML files and adds the
+    translations as new language-tagged literals to the ontology.
+    """
+    def __init__(self, config: MergeConfig | None = None):
+        """Initialise the merger.
+        Args:
+            config: Merge configuration. Uses defaults if not provided.
+        """
+        self.config = config or MergeConfig()
+    def merge(
+        self,
+        graph: Graph,
+        translation_file: TranslationFile,
+    ) -> MergeResult:
+        """Merge translations into an RDF graph.
+        Args:
+            graph: RDF graph to merge into.
+            translation_file: Completed translation file.
+        Returns:
+            MergeResult with merged graph.
+        """
+        try:
+            # Create a copy of the graph to work with
+            merged = Graph()
+            for prefix, namespace in graph.namespaces():
+                merged.bind(prefix, namespace)
+            for triple in graph:
+                merged.add(triple)
+            stats = MergeStats()
+            warnings: list[str] = []
+            target_lang = translation_file.metadata.target_language
+            # Process each entity
+            for entity in translation_file.entities:
+                entity_uri = URIRef(entity.uri)
+                # Check entity exists in graph
+                if not self._entity_exists(merged, entity_uri):
+                    warnings.append(f"Entity not found in graph: {entity.uri}")
+                    stats.errors += 1
+                    continue
+                # Process each label
+                for entry in entity.labels:
+                    # Check status threshold
+                    if not self._meets_status(entry.status):
+                        stats.skipped_status += 1
+                        continue
+                    # Skip empty translations
+                    if not entry.translation.strip():
+                        stats.skipped_status += 1
+                        continue
+                    # Expand property
+                    prop_uri = URIRef(self._expand_property(entry.property))
+                    # Check for existing translation
+                    existing = self._get_existing_translation(
+                        merged, entity_uri, prop_uri, target_lang
+                    )
+                    if existing:
+                        if self.config.existing == ExistingStrategy.PRESERVE:
+                            stats.skipped_existing += 1
+                            continue
+                        else:
+                            # Remove existing before adding new
+                            for triple in existing:
+                                merged.remove(triple)
+                            stats.updated += 1
+                    else:
+                        stats.added += 1
+                    # Add translation
+                    translation_literal = Literal(entry.translation, lang=target_lang)
+                    merged.add((entity_uri, prop_uri, translation_literal))
+            return MergeResult(
+                success=True,
+                merged_graph=merged,
+                stats=stats,
+                warnings=warnings,
+            )
+        except Exception as e:
+            return MergeResult(
+                success=False,
+                error=str(e),
+            )
+    def merge_multiple(
+        self,
+        graph: Graph,
+        translation_files: list[TranslationFile],
+    ) -> MergeResult:
+        """Merge multiple translation files into a graph.
+        Args:
+            graph: RDF graph to merge into.
+            translation_files: List of translation files.
+        Returns:
+            Combined MergeResult.
+        """
+        # Start with a copy
+        merged = Graph()
+        for prefix, namespace in graph.namespaces():
+            merged.bind(prefix, namespace)
+        for triple in graph:
+            merged.add(triple)
+        combined_stats = MergeStats()
+        all_warnings: list[str] = []
+        for trans_file in translation_files:
+            result = self.merge(merged, trans_file)
+            if not result.success:
+                return MergeResult(
+                    success=False,
+                    error=f"Failed merging {trans_file.metadata.target_language}: {result.error}",
+                )
+            # Use the merged graph for next iteration
+            merged = result.merged_graph
+            # Combine stats
+            combined_stats.added += result.stats.added
+            combined_stats.updated += result.stats.updated
+            combined_stats.skipped_status += result.stats.skipped_status
+            combined_stats.skipped_existing += result.stats.skipped_existing
+            combined_stats.errors += result.stats.errors
+            all_warnings.extend(result.warnings)
+        return MergeResult(
+            success=True,
+            merged_graph=merged,
+            stats=combined_stats,
+            warnings=all_warnings,
+        )
+    def _meets_status(self, status: TranslationStatus) -> bool:
+        """Check if status meets minimum threshold.
+        Args:
+            status: Translation status to check.
+        Returns:
+            True if status meets threshold.
+        """
+        status_order = [
+            TranslationStatus.PENDING,
+            TranslationStatus.NEEDS_REVIEW,
+            TranslationStatus.TRANSLATED,
+            TranslationStatus.APPROVED,
+        ]
+        try:
+            status_level = status_order.index(status)
+            min_level = status_order.index(self.config.min_status)
+            return status_level >= min_level
+        except ValueError:
+            return False
+    def _entity_exists(self, graph: Graph, entity: URIRef) -> bool:
+        """Check if an entity exists in the graph.
+        Args:
+            graph: RDF graph.
+            entity: Entity URI.
+        Returns:
+            True if entity has any triples.
+        """
+        # Check if entity appears as subject
+        for _ in graph.triples((entity, None, None)):
+            return True
+        return False
+    def _get_existing_translation(
+        self,
+        graph: Graph,
+        subject: URIRef,
+        predicate: URIRef,
+        language: str,
+    ) -> list[tuple]:
+        """Get existing translations for a specific language.
+        Args:
+            graph: RDF graph.
+            subject: Subject URI.
+            predicate: Predicate URI.
+            language: Language code.
+        Returns:
+            List of matching triples.
+        """
+        existing = []
+        for obj in graph.objects(subject, predicate):
+            if isinstance(obj, Literal) and obj.language == language:
+                existing.append((subject, predicate, obj))
+        return existing
+    def _expand_property(self, prop: str) -> str:
+        """Expand a CURIE to full URI.
+        Args:
+            prop: Property string (CURIE or full URI).
+        Returns:
+            Full URI string.
+        """
+        prefixes = {
+            "rdfs:": "http://www.w3.org/2000/01/rdf-schema#",
+            "skos:": "http://www.w3.org/2004/02/skos/core#",
+            "owl:": "http://www.w3.org/2002/07/owl#",
+            "rdf:": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+            "dc:": "http://purl.org/dc/elements/1.1/",
+            "dcterms:": "http://purl.org/dc/terms/",
+        }
+        for prefix, namespace in prefixes.items():
+            if prop.startswith(prefix):
+                return namespace + prop[len(prefix) :]
+        return prop
+def merge_translations(
+    source: Path,
+    translation_files: list[Path],
+    output: Path | None = None,
+    min_status: str = "translated",
+    existing: str = "preserve",
+) -> MergeResult:
+    """Merge translation files into an ontology.
+    Convenience function for simple merge operations.
+    Args:
+        source: Source ontology file.
+        translation_files: List of translation YAML files.
+        output: Output file path. Writes to source if not provided.
+        min_status: Minimum status to include.
+        existing: How to handle existing translations.
+    Returns:
+        MergeResult with merged graph.
+    """
+    # Load graph
+    graph = Graph()
+    graph.parse(source)
+    # Load translation files
+    trans_files = [TranslationFile.from_yaml(p) for p in translation_files]
+    # Build config
+    config = MergeConfig(
+        min_status=TranslationStatus(min_status),
+        existing=ExistingStrategy(existing),
+    )
+    # Merge
+    merger = TranslationMerger(config)
+    result = merger.merge_multiple(graph, trans_files)
+    # Save if requested
+    if result.success and output and result.merged_graph:
+        result.merged_graph.serialize(destination=output, format="turtle")
+    return result

rdf-construct 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

rdf-construct 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl