PyPI - rdf-construct - Versions diffs - 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

rdf-construct 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

rdf_construct/__init__.py +1 -1
rdf_construct/cli.py +1794 -0
rdf_construct/describe/__init__.py +93 -0
rdf_construct/describe/analyzer.py +176 -0
rdf_construct/describe/documentation.py +146 -0
rdf_construct/describe/formatters/__init__.py +47 -0
rdf_construct/describe/formatters/json.py +65 -0
rdf_construct/describe/formatters/markdown.py +275 -0
rdf_construct/describe/formatters/text.py +315 -0
rdf_construct/describe/hierarchy.py +232 -0
rdf_construct/describe/imports.py +213 -0
rdf_construct/describe/metadata.py +187 -0
rdf_construct/describe/metrics.py +145 -0
rdf_construct/describe/models.py +552 -0
rdf_construct/describe/namespaces.py +180 -0
rdf_construct/describe/profiles.py +415 -0
rdf_construct/localise/__init__.py +114 -0
rdf_construct/localise/config.py +508 -0
rdf_construct/localise/extractor.py +427 -0
rdf_construct/localise/formatters/__init__.py +36 -0
rdf_construct/localise/formatters/markdown.py +229 -0
rdf_construct/localise/formatters/text.py +224 -0
rdf_construct/localise/merger.py +346 -0
rdf_construct/localise/reporter.py +356 -0
rdf_construct/merge/__init__.py +165 -0
rdf_construct/merge/config.py +354 -0
rdf_construct/merge/conflicts.py +281 -0
rdf_construct/merge/formatters.py +426 -0
rdf_construct/merge/merger.py +425 -0
rdf_construct/merge/migrator.py +339 -0
rdf_construct/merge/rules.py +377 -0
rdf_construct/merge/splitter.py +1102 -0
rdf_construct/refactor/__init__.py +72 -0
rdf_construct/refactor/config.py +362 -0
rdf_construct/refactor/deprecator.py +328 -0
rdf_construct/refactor/formatters/__init__.py +8 -0
rdf_construct/refactor/formatters/text.py +311 -0
rdf_construct/refactor/renamer.py +294 -0
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/METADATA +91 -6
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/RECORD +43 -7
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/WHEEL +0 -0
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/entry_points.txt +0 -0
{rdf_construct-0.2.1.dist-info → rdf_construct-0.4.0.dist-info}/licenses/LICENSE +0 -0

rdf_construct/localise/extractor.py ADDED Viewed

@@ -0,0 +1,427 @@
+"""String extraction from RDF ontologies.
+Extracts translatable strings (labels, comments, definitions) from ontology
+files and generates translation files in YAML format.
+"""
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from rdflib import Graph, Literal, URIRef
+from rdflib.namespace import OWL, RDF, RDFS
+from rdf_construct.localise.config import (
+    EntityTranslations,
+    ExtractConfig,
+    TranslationEntry,
+    TranslationFile,
+    TranslationFileMetadata,
+    TranslationStatus,
+)
+# Standard property URIs
+LABEL_PROPERTIES = [
+    "http://www.w3.org/2000/01/rdf-schema#label",
+    "http://www.w3.org/2004/02/skos/core#prefLabel",
+    "http://www.w3.org/2004/02/skos/core#altLabel",
+]
+COMMENT_PROPERTIES = [
+    "http://www.w3.org/2000/01/rdf-schema#comment",
+    "http://www.w3.org/2004/02/skos/core#definition",
+    "http://www.w3.org/2004/02/skos/core#example",
+    "http://www.w3.org/2004/02/skos/core#note",
+    "http://www.w3.org/2004/02/skos/core#scopeNote",
+]
+DEFAULT_PROPERTIES = LABEL_PROPERTIES[:2] + COMMENT_PROPERTIES[:2]
+@dataclass
+class ExtractionResult:
+    """Result of a string extraction operation.
+    Attributes:
+        success: Whether extraction succeeded.
+        translation_file: Generated translation file.
+        total_entities: Number of entities processed.
+        total_strings: Number of strings extracted.
+        skipped_entities: Number of entities skipped.
+        error: Error message if failed.
+    """
+    success: bool
+    translation_file: TranslationFile | None = None
+    total_entities: int = 0
+    total_strings: int = 0
+    skipped_entities: int = 0
+    error: str | None = None
+class StringExtractor:
+    """Extracts translatable strings from RDF ontologies.
+    The extractor identifies entities (classes, properties, individuals) and
+    extracts text values for configured properties (rdfs:label, rdfs:comment, etc.)
+    in the source language. The output is a translation file with empty
+    translation fields ready for translators.
+    """
+    def __init__(self, config: ExtractConfig | None = None):
+        """Initialise the extractor.
+        Args:
+            config: Extraction configuration. Uses defaults if not provided.
+        """
+        self.config = config or ExtractConfig()
+    def extract(
+        self,
+        graph: Graph,
+        source_file: Path | str,
+        target_language: str | None = None,
+    ) -> ExtractionResult:
+        """Extract translatable strings from an RDF graph.
+        Args:
+            graph: RDF graph to extract from.
+            source_file: Path to source file (for metadata).
+            target_language: Override target language from config.
+        Returns:
+            ExtractionResult with translation file.
+        """
+        target_lang = target_language or self.config.target_language
+        if not target_lang:
+            return ExtractionResult(
+                success=False,
+                error="No target language specified",
+            )
+        try:
+            # Get all entities from the graph
+            entities = self._collect_entities(graph)
+            # Extract translations for each entity
+            entity_translations: list[EntityTranslations] = []
+            total_strings = 0
+            skipped = 0
+            for entity_uri, entity_type in entities:
+                # Check for deprecation
+                if not self.config.include_deprecated and self._is_deprecated(
+                    graph, entity_uri
+                ):
+                    skipped += 1
+                    continue
+                # Extract labels for this entity
+                labels = self._extract_entity_labels(
+                    graph,
+                    entity_uri,
+                    target_lang,
+                )
+                if not labels:
+                    if self.config.include_unlabelled:
+                        # Include entity with empty labels
+                        pass
+                    else:
+                        skipped += 1
+                        continue
+                if labels:
+                    entity_translations.append(
+                        EntityTranslations(
+                            uri=str(entity_uri),
+                            entity_type=entity_type,
+                            labels=labels,
+                        )
+                    )
+                    total_strings += len(labels)
+            # Build translation file
+            metadata = TranslationFileMetadata(
+                source_file=str(source_file),
+                source_language=self.config.source_language,
+                target_language=target_lang,
+                generated=datetime.now(),
+                properties=[self._shorten_property(p) for p in self.config.properties],
+            )
+            translation_file = TranslationFile(
+                metadata=metadata,
+                entities=entity_translations,
+            )
+            return ExtractionResult(
+                success=True,
+                translation_file=translation_file,
+                total_entities=len(entity_translations),
+                total_strings=total_strings,
+                skipped_entities=skipped,
+            )
+        except Exception as e:
+            return ExtractionResult(
+                success=False,
+                error=str(e),
+            )
+    def _collect_entities(self, graph: Graph) -> list[tuple[URIRef, str]]:
+        """Collect all entities from the graph with their types.
+        Args:
+            graph: RDF graph.
+        Returns:
+            List of (URI, type_string) tuples.
+        """
+        entities: list[tuple[URIRef, str]] = []
+        seen: set[URIRef] = set()
+        # Classes
+        for cls_type in [OWL.Class, RDFS.Class]:
+            for s in graph.subjects(RDF.type, cls_type):
+                if isinstance(s, URIRef) and s not in seen:
+                    seen.add(s)
+                    entities.append((s, "owl:Class"))
+        # Object Properties
+        for s in graph.subjects(RDF.type, OWL.ObjectProperty):
+            if isinstance(s, URIRef) and s not in seen:
+                seen.add(s)
+                entities.append((s, "owl:ObjectProperty"))
+        # Datatype Properties
+        for s in graph.subjects(RDF.type, OWL.DatatypeProperty):
+            if isinstance(s, URIRef) and s not in seen:
+                seen.add(s)
+                entities.append((s, "owl:DatatypeProperty"))
+        # Annotation Properties
+        for s in graph.subjects(RDF.type, OWL.AnnotationProperty):
+            if isinstance(s, URIRef) and s not in seen:
+                seen.add(s)
+                entities.append((s, "owl:AnnotationProperty"))
+        # RDF Properties
+        for s in graph.subjects(RDF.type, RDF.Property):
+            if isinstance(s, URIRef) and s not in seen:
+                seen.add(s)
+                entities.append((s, "rdf:Property"))
+        # Named Individuals
+        for s in graph.subjects(RDF.type, OWL.NamedIndividual):
+            if isinstance(s, URIRef) and s not in seen:
+                seen.add(s)
+                entities.append((s, "owl:NamedIndividual"))
+        # Sort by URI for consistent output
+        entities.sort(key=lambda x: str(x[0]))
+        return entities
+    def _extract_entity_labels(
+        self,
+        graph: Graph,
+        entity: URIRef,
+        target_lang: str,
+    ) -> list[TranslationEntry]:
+        """Extract label properties for a single entity.
+        Args:
+            graph: RDF graph.
+            entity: Entity URI.
+            target_lang: Target language code.
+        Returns:
+            List of TranslationEntry objects.
+        """
+        labels: list[TranslationEntry] = []
+        source_lang = self.config.source_language
+        for prop_uri_str in self.config.properties:
+            prop_uri = URIRef(self._expand_property(prop_uri_str))
+            # Find source language literals
+            source_literals = self._get_language_literals(
+                graph, entity, prop_uri, source_lang
+            )
+            if not source_literals:
+                continue
+            # Check for existing translation if missing_only mode
+            if self.config.missing_only:
+                existing = self._get_language_literals(
+                    graph, entity, prop_uri, target_lang
+                )
+                if existing:
+                    continue
+            for source_text in source_literals:
+                labels.append(
+                    TranslationEntry(
+                        property=self._shorten_property(str(prop_uri)),
+                        source_text=source_text,
+                        translation="",
+                        status=TranslationStatus.PENDING,
+                    )
+                )
+        return labels
+    def _get_language_literals(
+        self,
+        graph: Graph,
+        subject: URIRef,
+        predicate: URIRef,
+        language: str,
+    ) -> list[str]:
+        """Get literal values for a specific language.
+        Args:
+            graph: RDF graph.
+            subject: Subject URI.
+            predicate: Predicate URI.
+            language: Language code.
+        Returns:
+            List of literal string values.
+        """
+        results: list[str] = []
+        for obj in graph.objects(subject, predicate):
+            if isinstance(obj, Literal):
+                # Match language exactly or match untagged literals for source
+                obj_lang = obj.language
+                if obj_lang == language:
+                    results.append(str(obj))
+                elif obj_lang is None and language == self.config.source_language:
+                    # Treat untagged literals as source language
+                    results.append(str(obj))
+        return results
+    def _is_deprecated(self, graph: Graph, entity: URIRef) -> bool:
+        """Check if an entity is deprecated.
+        Args:
+            graph: RDF graph.
+            entity: Entity URI.
+        Returns:
+            True if entity is deprecated.
+        """
+        # Check owl:deprecated
+        for obj in graph.objects(entity, OWL.deprecated):
+            if isinstance(obj, Literal) and obj.toPython() is True:
+                return True
+        # Check owl:DeprecatedClass / owl:DeprecatedProperty
+        deprecated_types = [OWL.DeprecatedClass, OWL.DeprecatedProperty]
+        for dtype in deprecated_types:
+            if (entity, RDF.type, dtype) in graph:
+                return True
+        return False
+    def _expand_property(self, prop: str) -> str:
+        """Expand a CURIE to full URI.
+        Args:
+            prop: Property string (CURIE or full URI).
+        Returns:
+            Full URI string.
+        """
+        prefixes = {
+            "rdfs:": "http://www.w3.org/2000/01/rdf-schema#",
+            "skos:": "http://www.w3.org/2004/02/skos/core#",
+            "owl:": "http://www.w3.org/2002/07/owl#",
+            "rdf:": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
+            "dc:": "http://purl.org/dc/elements/1.1/",
+            "dcterms:": "http://purl.org/dc/terms/",
+        }
+        for prefix, namespace in prefixes.items():
+            if prop.startswith(prefix):
+                return namespace + prop[len(prefix) :]
+        return prop
+    def _shorten_property(self, prop: str) -> str:
+        """Shorten a full URI to CURIE if possible.
+        Args:
+            prop: Full property URI.
+        Returns:
+            CURIE or original URI.
+        """
+        namespaces = {
+            "http://www.w3.org/2000/01/rdf-schema#": "rdfs:",
+            "http://www.w3.org/2004/02/skos/core#": "skos:",
+            "http://www.w3.org/2002/07/owl#": "owl:",
+            "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf:",
+            "http://purl.org/dc/elements/1.1/": "dc:",
+            "http://purl.org/dc/terms/": "dcterms:",
+        }
+        for namespace, prefix in namespaces.items():
+            if prop.startswith(namespace):
+                return prefix + prop[len(namespace) :]
+        return prop
+def extract_strings(
+    source: Path,
+    target_language: str,
+    output: Path | None = None,
+    source_language: str = "en",
+    properties: list[str] | None = None,
+    include_deprecated: bool = False,
+    missing_only: bool = False,
+) -> ExtractionResult:
+    """Extract translatable strings from an ontology file.
+    Convenience function for simple extraction.
+    Args:
+        source: Source ontology file.
+        target_language: Target language code.
+        output: Output file path. Auto-generated if not provided.
+        source_language: Source language code.
+        properties: Properties to extract. Uses defaults if not provided.
+        include_deprecated: Include deprecated entities.
+        missing_only: Only extract missing translations.
+    Returns:
+        ExtractionResult with translation file.
+    """
+    # Load graph
+    graph = Graph()
+    graph.parse(source)
+    # Build config
+    config = ExtractConfig(
+        source_language=source_language,
+        target_language=target_language,
+        properties=properties or list(DEFAULT_PROPERTIES),
+        include_deprecated=include_deprecated,
+        missing_only=missing_only,
+    )
+    # Extract
+    extractor = StringExtractor(config)
+    result = extractor.extract(graph, source, target_language)
+    # Save if requested
+    if result.success and output and result.translation_file:
+        result.translation_file.save(output)
+    return result

rdf_construct/localise/formatters/__init__.py ADDED Viewed

@@ -0,0 +1,36 @@
+"""Formatters for localise command output.
+Provides output formatting for:
+- Console text output
+- Markdown reports
+"""
+from rdf_construct.localise.formatters.text import TextFormatter
+from rdf_construct.localise.formatters.markdown import MarkdownFormatter
+__all__ = [
+    "TextFormatter",
+    "MarkdownFormatter",
+    "get_formatter",
+]
+def get_formatter(format_name: str, use_colour: bool = True) -> TextFormatter | MarkdownFormatter:
+    """Get a formatter by name.
+    Args:
+        format_name: Formatter name ("text" or "markdown").
+        use_colour: Whether to use colour output (text only).
+    Returns:
+        Formatter instance.
+    Raises:
+        ValueError: If format name is unknown.
+    """
+    if format_name == "text":
+        return TextFormatter(use_colour=use_colour)
+    elif format_name in ("markdown", "md"):
+        return MarkdownFormatter()
+    else:
+        raise ValueError(f"Unknown format: {format_name}")

rdf_construct/localise/formatters/markdown.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""Markdown formatter for coverage reports.
+Generates markdown-formatted coverage reports suitable for documentation
+or inclusion in PRs/issues.
+"""
+from datetime import datetime
+from rdf_construct.localise.extractor import ExtractionResult
+from rdf_construct.localise.merger import MergeResult
+from rdf_construct.localise.reporter import CoverageReport
+class MarkdownFormatter:
+    """Formats localise results as Markdown."""
+    def format_extraction_result(self, result: ExtractionResult) -> str:
+        """Format extraction result as Markdown.
+        Args:
+            result: Extraction result.
+        Returns:
+            Markdown string.
+        """
+        lines: list[str] = []
+        lines.append("# Extraction Result")
+        lines.append("")
+        if result.success:
+            lines.append("**Status:** ✅ Success")
+            lines.append("")
+            lines.append("| Metric | Value |")
+            lines.append("|--------|-------|")
+            lines.append(f"| Entities | {result.total_entities} |")
+            lines.append(f"| Strings | {result.total_strings} |")
+            lines.append(f"| Skipped | {result.skipped_entities} |")
+            if result.translation_file:
+                tf = result.translation_file
+                lines.append("")
+                lines.append("## Metadata")
+                lines.append("")
+                lines.append(f"- **Source file:** `{tf.metadata.source_file}`")
+                lines.append(f"- **Source language:** {tf.metadata.source_language}")
+                lines.append(f"- **Target language:** {tf.metadata.target_language}")
+        else:
+            lines.append("**Status:** ❌ Failed")
+            lines.append("")
+            lines.append(f"**Error:** {result.error}")
+        return "\n".join(lines)
+    def format_merge_result(self, result: MergeResult) -> str:
+        """Format merge result as Markdown.
+        Args:
+            result: Merge result.
+        Returns:
+            Markdown string.
+        """
+        lines: list[str] = []
+        lines.append("# Merge Result")
+        lines.append("")
+        if result.success:
+            lines.append("**Status:** ✅ Success")
+            lines.append("")
+            stats = result.stats
+            lines.append("| Metric | Count |")
+            lines.append("|--------|-------|")
+            lines.append(f"| Added | {stats.added} |")
+            lines.append(f"| Updated | {stats.updated} |")
+            lines.append(f"| Skipped (status) | {stats.skipped_status} |")
+            lines.append(f"| Skipped (existing) | {stats.skipped_existing} |")
+            lines.append(f"| Errors | {stats.errors} |")
+            if result.warnings:
+                lines.append("")
+                lines.append("## Warnings")
+                lines.append("")
+                for warning in result.warnings:
+                    lines.append(f"- {warning}")
+        else:
+            lines.append("**Status:** ❌ Failed")
+            lines.append("")
+            lines.append(f"**Error:** {result.error}")
+        return "\n".join(lines)
+    def format_coverage_report(
+        self,
+        report: CoverageReport,
+        verbose: bool = False,
+    ) -> str:
+        """Format coverage report as Markdown.
+        Args:
+            report: Coverage report.
+            verbose: Include detailed missing entity list.
+        Returns:
+            Markdown string.
+        """
+        lines: list[str] = []
+        # Header
+        lines.append("# Translation Coverage Report")
+        lines.append("")
+        lines.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}")
+        lines.append("")
+        # Summary
+        lines.append("## Summary")
+        lines.append("")
+        lines.append(f"- **Source file:** `{report.source_file}`")
+        lines.append(f"- **Source language:** {report.source_language}")
+        lines.append(f"- **Total entities:** {report.total_entities}")
+        lines.append(f"- **Properties checked:** {', '.join(report.properties)}")
+        lines.append("")
+        # Coverage table
+        lines.append("## Coverage by Language")
+        lines.append("")
+        # Build header
+        header = ["Language"]
+        header.extend(report.properties)
+        header.append("Overall")
+        header.append("Status")
+        lines.append("| " + " | ".join(header) + " |")
+        lines.append("| " + " | ".join(["---"] * len(header)) + " |")
+        # Data rows
+        for lang, coverage in report.languages.items():
+            row = []
+            # Language
+            if coverage.is_source:
+                row.append(f"**{lang}** (base)")
+            else:
+                row.append(lang)
+            # Property coverages
+            for prop in report.properties:
+                prop_cov = coverage.by_property.get(prop)
+                if prop_cov:
+                    pct = f"{prop_cov.coverage:.0f}%"
+                else:
+                    pct = "-"
+                row.append(pct)
+            # Overall
+            row.append(f"**{coverage.coverage:.0f}%**")
+            # Status
+            if coverage.coverage == 100:
+                row.append("✅ Complete")
+            elif coverage.coverage >= 75:
+                row.append(f"⚠️ {coverage.pending} pending")
+            elif coverage.coverage > 0:
+                row.append(f"❌ {coverage.pending} pending")
+            else:
+                row.append("❌ Not started")
+            lines.append("| " + " | ".join(row) + " |")
+        # Missing translations section
+        if verbose:
+            has_missing = False
+            for lang, coverage in report.languages.items():
+                if coverage.missing_entities and not coverage.is_source:
+                    if not has_missing:
+                        lines.append("")
+                        lines.append("## Missing Translations")
+                        has_missing = True
+                    lines.append("")
+                    lines.append(f"### {lang.upper()}")
+                    lines.append("")
+                    # Group by entity type based on URI pattern
+                    lines.append("<details>")
+                    lines.append(f"<summary>{len(coverage.missing_entities)} entities missing translations</summary>")
+                    lines.append("")
+                    for uri in coverage.missing_entities:
+                        short_uri = self._shorten_uri(uri)
+                        lines.append(f"- `{short_uri}`")
+                    lines.append("")
+                    lines.append("</details>")
+        # Footer
+        lines.append("")
+        lines.append("---")
+        lines.append("*Generated by rdf-construct localise*")
+        return "\n".join(lines)
+    def _shorten_uri(self, uri: str) -> str:
+        """Shorten a URI for display.
+        Args:
+            uri: Full URI.
+        Returns:
+            Shortened version.
+        """
+        prefixes = {
+            "http://www.w3.org/2000/01/rdf-schema#": "rdfs:",
+            "http://www.w3.org/2004/02/skos/core#": "skos:",
+            "http://www.w3.org/2002/07/owl#": "owl:",
+            "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf:",
+        }
+        for namespace, prefix in prefixes.items():
+            if uri.startswith(namespace):
+                return prefix + uri[len(namespace) :]
+        # If no known prefix, just show local name
+        if "#" in uri:
+            return uri.split("#")[-1]
+        elif "/" in uri:
+            return uri.split("/")[-1]
+        return uri

rdf-construct 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

rdf-construct 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl