PyPI - lokit-python - Versions diffs - 0.1.0__tar.gz → 0.1.1__tar.gz - Mend

lokit-python 0.1.0tar.gz → 0.1.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

{lokit_python-0.1.0 → lokit_python-0.1.1}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: lokit-python
-Version: 0.1.0
-Summary: Add your description here
+Version: 0.1.1
+Summary: A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files.
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
 Requires-Dist: lxml>=6.1.1

{lokit_python-0.1.0 → lokit_python-0.1.1}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "lokit-python"
-version = "0.1.0"
-description = "Add your description here"
+version = "0.1.1"
+description = "A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files."
 readme = "README.md"
 requires-python = ">=3.12"
 dependencies = [

lokit_python-0.1.1/setup.py ADDED Viewed

@@ -0,0 +1,102 @@
+import glob
+from pathlib import Path, PurePosixPath, PureWindowsPath
+from setuptools import setup
+from setuptools.command.build_ext import build_ext
+def _build_path_replacements(src_files):
+    """Build a mapping of Windows-form paths to POSIX-form paths.
+    For each source file, compute its Windows backslash representation
+    and its POSIX forward-slash representation. Only include entries
+    where the two differ (i.e. the path contains directory separators).
+    This is used to fix mypyc-generated C files on Windows, where
+    embedded Python source paths use backslashes that MSVC interprets
+    as C escape sequences (e.g. \\x in \\xliff causes error C2153).
+    """
+    replacements = {}
+    for src_file in src_files:
+        posix_form = PurePosixPath(src_file).as_posix()
+        windows_form = str(PureWindowsPath(src_file))
+        if windows_form != posix_form:
+            replacements[windows_form] = posix_form
+    return replacements
+def _normalize_generated_c_file(path, replacements):
+    """Replace Windows backslash paths with POSIX paths in a generated C file.
+    Performs direct string replacement of known source file paths,
+    avoiding any C source parsing. This is robust against escaped
+    quotes and other C syntax that broke the previous quote-based parser.
+    """
+    if path.suffix != ".c" or not path.exists():
+        return
+    contents = path.read_text(encoding="utf-8")
+    normalized = contents
+    for windows_path, posix_path in replacements.items():
+        normalized = normalized.replace(windows_path, posix_path)
+    if normalized != contents:
+        path.write_text(normalized, encoding="utf-8")
+def _normalize_all_generated_c_files(replacements):
+    """Normalize all generated C files in the build directory."""
+    build_dir = Path("build")
+    if build_dir.exists():
+        for path in build_dir.rglob("*.c"):
+            _normalize_generated_c_file(path, replacements)
+def _normalize_ext_c_files(ext, replacements):
+    """Normalize C files listed as sources for an extension module."""
+    for source in ext.sources:
+        _normalize_generated_c_file(Path(source), replacements)
+class BuildExt(build_ext):
+    def build_extensions(self):
+        self._normalize_before_compile()
+        _normalize_all_generated_c_files(_path_replacements)
+        super().build_extensions()
+    def build_extension(self, ext):
+        _normalize_ext_c_files(ext, _path_replacements)
+        super().build_extension(ext)
+    def _normalize_before_compile(self):
+        original_compile = self.compiler.compile
+        def compile_with_normalized_sources(sources, *args, **kwargs):
+            for source in sources:
+                _normalize_generated_c_file(Path(source), _path_replacements)
+            return original_compile(sources, *args, **kwargs)
+        self.compiler.compile = compile_with_normalized_sources
+try:
+    from mypyc.build import mypycify
+    src_files = glob.glob("src/lokit/**/*.py", recursive=True)
+    src_files = [f.replace("\\", "/") for f in src_files if "importers.py" not in f]
+    _path_replacements = _build_path_replacements(src_files)
+    ext_modules = mypycify(
+        src_files,
+        opt_level="3",
+        debug_level="0",
+    )
+    _normalize_all_generated_c_files(_path_replacements)
+except ImportError:
+    _path_replacements = {}
+    ext_modules = []
+setup(
+    cmdclass={"build_ext": BuildExt},
+    ext_modules=ext_modules,
+)

{lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/__init__.py RENAMED Viewed

@@ -3,12 +3,14 @@ from lokit.data.structure import (
     BaseStructure,
     CodePart,
     Comment,
+    ConversionStats,
     Data,
     Meta,
     Origin,
     Plural,
     PluralCategory,
     SegmentPart,
+    StreamingStructure,
     Tags,
     TextPart,
     TranslationStatus,
@@ -37,6 +39,8 @@ from lokit.exporters import (
 from lokit.importers import (
     import_csv,
     import_csv_async,
+    import_file,
+    import_file_async,
     import_idml,
     import_idml_async,
     import_html,
@@ -47,6 +51,10 @@ from lokit.importers import (
     import_po_async,
     import_tmx,
     import_tmx_async,
+    stream_tmx,
+    convert_tmx_to_csv,
+    convert_tmx_to_tmx,
+    convert_tmx_to_xliff,
     import_xliff,
     import_xliff_async,
     import_xlsx,
@@ -68,6 +76,7 @@ __all__ = [
     "BaseStructure",
     "CodePart",
     "Comment",
+    "ConversionStats",
     "Data",
     "Meta",
     "Lokit",
@@ -76,6 +85,7 @@ __all__ = [
     "Plural",
     "PluralCategory",
     "SegmentPart",
+    "StreamingStructure",
     "Tags",
     "TextPart",
     "TieData",
@@ -109,6 +119,8 @@ __all__ = [
     "export_xlsx_async",
     "import_csv",
     "import_csv_async",
+    "import_file",
+    "import_file_async",
     "import_idml",
     "import_idml_async",
     "import_html",
@@ -119,6 +131,10 @@ __all__ = [
     "import_po_async",
     "import_tmx",
     "import_tmx_async",
+    "stream_tmx",
+    "convert_tmx_to_csv",
+    "convert_tmx_to_tmx",
+    "convert_tmx_to_xliff",
     "import_xliff",
     "import_xliff_async",
     "import_xlsx",

{lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/structure.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from dataclasses import dataclass, field
 from enum import StrEnum
+from collections.abc import Iterable
 from typing import Optional
 from lokit.data.tag_types import TieData
@@ -116,3 +117,25 @@ class BaseStructure:
     source_language: Optional[str] = None
     target_language: Optional[str] = None
     extensions: dict[str, str] = field(default_factory=dict)
+@dataclass(slots=True)
+class StreamingStructure:
+    source_locale: str
+    target_locale: Optional[str]
+    items: Iterable[tuple[str, Data]]
+    format_version: str = "0.1"
+    export_origin: str = ""
+    export_timestamp: str = ""
+    source_language: Optional[str] = None
+    target_language: Optional[str] = None
+    extensions: dict[str, str] = field(default_factory=dict)
+@dataclass(slots=True)
+class ConversionStats:
+    units_read: int
+    units_written: int
+    input_bytes: int
+    output_bytes: int
+    seconds: float

{lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/tag_types.py RENAMED Viewed

@@ -76,3 +76,4 @@ class TieData:
     order: int = 0
     pair_id: Optional[str] = None
     original_name: Optional[str] = None
+    original_text: Optional[str] = None

{lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/csv.py RENAMED Viewed

@@ -2,20 +2,24 @@ from __future__ import annotations
 import asyncio
 import csv
+from collections.abc import Iterable
 from pathlib import Path
-from lokit.data.structure import BaseStructure, TranslationStatus
+from lokit.data.structure import BaseStructure, Data, StreamingStructure, TranslationStatus
+from lokit.io.atomic import atomic_output_path
-def export_csv(document: BaseStructure, filepath: str | Path) -> None:
+Structure = BaseStructure | StreamingStructure
+def export_csv(document: Structure, filepath: str | Path) -> None:
     path = Path(filepath)
-    path.parent.mkdir(parents=True, exist_ok=True)
-    with path.open("w", newline="", encoding="utf-8") as fh:
+    with atomic_output_path(path, "w") as fh:
         writer = csv.DictWriter(fh, fieldnames=["id", "source", "target", "status", "comment"])
         writer.writeheader()
-        for unit_id, unit in document.data.items():
+        for unit_id, unit in _iter_items(document):
             comment = "; ".join(c.context for c in unit.comments if c.context)
             status = unit.status.value if unit.status != TranslationStatus.UNKNOWN else ""
@@ -30,3 +34,9 @@ def export_csv(document: BaseStructure, filepath: str | Path) -> None:
 async def export_csv_async(document: BaseStructure, filepath: str | Path) -> None:
     await asyncio.to_thread(export_csv, document, filepath)
+def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
+    if isinstance(document, BaseStructure):
+        return document.data.items()
+    return document.items

{lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/html.py RENAMED Viewed

@@ -1,18 +1,22 @@
 from __future__ import annotations
 import asyncio
+from collections.abc import Iterable
 from pathlib import Path
 from typing import Any, cast
 from lxml import html as lxml_html
 from lxml.html import HtmlElement, tostring
-from lokit.data.structure import BaseStructure, CodePart, Data, TextPart
+from lokit.data.structure import BaseStructure, CodePart, Data, StreamingStructure, TextPart
 from lokit.data.tag_types import TieData, TieType
+from lokit.io.atomic import atomic_output_path
+Structure = BaseStructure | StreamingStructure
 def export_html(
-    document: BaseStructure,
+    document: Structure,
     filepath: str | Path,
     source_html: str | Path | None = None,
 ) -> None:
@@ -26,7 +30,7 @@ def export_html(
 async def export_html_async(
-    document: BaseStructure,
+    document: Structure,
     filepath: str | Path,
     source_html: str | Path | None = None,
 ) -> None:
@@ -34,7 +38,7 @@ async def export_html_async(
 def _export_from_source(
-    document: BaseStructure, output: Path, source: Path
+    document: Structure, output: Path, source: Path
 ) -> None:
     doc = lxml_html.parse(str(source))
     root = doc.getroot()
@@ -86,10 +90,11 @@ def _export_from_source(
                 index += 1
     result = tostring(root, encoding="unicode", doctype="<!DOCTYPE html>")
-    output.write_text(result, encoding="utf-8")
+    with atomic_output_path(output, "w") as f:
+        f.write(result)
-def _export_minimal(document: BaseStructure, output: Path) -> None:
+def _export_minimal(document: Structure, output: Path) -> None:
     lang = document.target_locale or document.source_locale
     lines: list[str] = [
         "<!DOCTYPE html>",
@@ -98,7 +103,7 @@ def _export_minimal(document: BaseStructure, output: Path) -> None:
         '<meta charset="utf-8">',
     ]
-    for unit_id, unit in document.data.items():
+    for unit_id, unit in _iter_items(document):
         if "meta." in unit_id:
             name = unit.extensions.get("meta_name", "")
             text = unit.target or unit.source
@@ -107,7 +112,7 @@ def _export_minimal(document: BaseStructure, output: Path) -> None:
     lines.append("</head>")
     lines.append("<body>")
-    for unit_id, unit in document.data.items():
+    for unit_id, unit in _iter_items(document):
         if "meta." in unit_id or "img.alt" in unit_id:
             continue
         text = unit.target or unit.source
@@ -120,7 +125,8 @@ def _export_minimal(document: BaseStructure, output: Path) -> None:
     lines.append("</body>")
     lines.append("</html>")
-    output.write_text("\n".join(lines), encoding="utf-8")
+    with atomic_output_path(output, "w") as f:
+        f.write("\n".join(lines))
 def _replace_element_text(element: HtmlElement, unit: Data) -> None:
@@ -197,8 +203,14 @@ def _format_attrs(attributes: dict[str, str]) -> str:
     return "".join(parts)
-def _build_unit_lookup(document: BaseStructure) -> dict[str, Data]:
-    return dict(document.data)
+def _build_unit_lookup(document: Structure) -> dict[str, Data]:
+    return dict(_iter_items(document))
+def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
+    if isinstance(document, BaseStructure):
+        return document.data.items()
+    return document.items
 def _extract_tag_from_id(unit_id: str) -> str:

{lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/idml.py RENAMED Viewed

@@ -1,7 +1,10 @@
 from __future__ import annotations
 import asyncio
+import contextlib
+import os
 import shutil
+import tempfile
 import zipfile
 from pathlib import Path
@@ -19,30 +22,46 @@ def export_idml(
     output_path = Path(filepath)
     source_path = Path(source_idml)
     output_path.parent.mkdir(parents=True, exist_ok=True)
+    tmp = tempfile.NamedTemporaryFile(
+        dir=output_path.parent,
+        prefix=f".{output_path.name}.",
+        suffix=".tmp",
+        delete=False,
+    )
+    tmp_path = Path(tmp.name)
+    tmp.close()
     story_units = _group_by_story(document)
-    shutil.copy2(str(source_path), str(output_path))
-    with zipfile.ZipFile(str(output_path), "a") as zf_out:
-        with zipfile.ZipFile(str(source_path), "r") as zf_in:
-            story_files = [
-                name for name in zf_in.namelist()
-                if name.startswith("Stories/Story_") and name.endswith(".xml")
-            ]
-            for story_file in story_files:
-                units = story_units.get(story_file)
-                if not units:
-                    continue
-                with zf_in.open(story_file) as stream:
-                    tree = etree.parse(stream)
-                    root = tree.getroot()
-                    _apply_translations(root, units)
-                    modified_xml = etree.tostring(
-                        root, xml_declaration=True, encoding="UTF-8"
-                    )
-                _replace_in_zip(zf_out, story_file, modified_xml)
+    shutil.copy2(str(source_path), str(tmp_path))
+    try:
+        with zipfile.ZipFile(str(tmp_path), "a") as zf_out:
+            with zipfile.ZipFile(str(source_path), "r") as zf_in:
+                story_files = [
+                    name for name in zf_in.namelist()
+                    if name.startswith("Stories/Story_") and name.endswith(".xml")
+                ]
+                for story_file in story_files:
+                    units = story_units.get(story_file)
+                    if not units:
+                        continue
+                    with zf_in.open(story_file) as stream:
+                        tree = etree.parse(stream)
+                        root = tree.getroot()
+                        _apply_translations(root, units)
+                        modified_xml = etree.tostring(
+                            root, xml_declaration=True, encoding="UTF-8"
+                        )
+                    _replace_in_zip(zf_out, story_file, modified_xml)
+        with tmp_path.open("rb") as f:
+            os.fsync(f.fileno())
+        os.replace(tmp_path, output_path)
+    except BaseException:
+        with contextlib.suppress(FileNotFoundError):
+            tmp_path.unlink()
+        raise
 async def export_idml_async(

{lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/json_i18n.py RENAMED Viewed

@@ -2,14 +2,18 @@ from __future__ import annotations
 import asyncio
 import json
+from collections.abc import Iterable
 from pathlib import Path
 from typing import Any
-from lokit.data.structure import BaseStructure
+from lokit.data.structure import BaseStructure, Data, StreamingStructure
+from lokit.io.atomic import atomic_output_path
+Structure = BaseStructure | StreamingStructure
 def export_json_i18n(
-    document: BaseStructure,
+    document: Structure,
     filepath: str | Path,
     nested: bool = True,
 ) -> None:
@@ -17,14 +21,14 @@ def export_json_i18n(
     path.parent.mkdir(parents=True, exist_ok=True)
     output: dict[str, Any] = {}
-    for key, unit in document.data.items():
+    for key, unit in _iter_items(document):
         value = unit.target if unit.target is not None else unit.source
         if nested:
             _set_nested(output, key, value)
         else:
             output[key] = value
-    with path.open("w", encoding="utf-8") as f:
+    with atomic_output_path(path, "w") as f:
         json.dump(output, f, ensure_ascii=False, indent=2)
         f.write("\n")
@@ -45,3 +49,9 @@ def _set_nested(obj: dict[str, Any], dot_key: str, value: str) -> None:
             current[part] = {}
         current = current[part]
     current[parts[-1]] = value
+def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
+    if isinstance(document, BaseStructure):
+        return document.data.items()
+    return document.items

{lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/po.py RENAMED Viewed

@@ -1,18 +1,25 @@
 from __future__ import annotations
 import asyncio
+import contextlib
+import os
+import tempfile
 from collections import defaultdict
+from collections.abc import Iterable
 from pathlib import Path
 from typing import Any
 import polib
-from lokit.data.structure import BaseStructure, Data, TranslationStatus
+from lokit.data.structure import BaseStructure, Data, StreamingStructure, TranslationStatus
 _PLURAL_SUFFIX_PATTERN = "["
-def export_po(document: BaseStructure, filepath: str | Path) -> None:
+Structure = BaseStructure | StreamingStructure
+def export_po(document: Structure, filepath: str | Path) -> None:
     path = Path(filepath)
     path.parent.mkdir(parents=True, exist_ok=True)
@@ -22,7 +29,7 @@ def export_po(document: BaseStructure, filepath: str | Path) -> None:
     plural_groups: dict[str, list[tuple[str, Data]]] = defaultdict(list)
     singular_units: list[tuple[str, Data]] = []
-    for unit_id, unit in document.data.items():
+    for unit_id, unit in _iter_items(document):
         if _PLURAL_SUFFIX_PATTERN in unit_id and unit.plural is not None:
             base_id = unit_id[: unit_id.index(_PLURAL_SUFFIX_PATTERN)]
             plural_groups[base_id].append((unit_id, unit))
@@ -37,14 +44,30 @@ def export_po(document: BaseStructure, filepath: str | Path) -> None:
     for base_id, forms in plural_groups.items():
         po.append(_build_plural_entry(base_id, forms))
-    po.save(str(path))
+    tmp = tempfile.NamedTemporaryFile(
+        dir=path.parent,
+        prefix=f".{path.name}.",
+        suffix=".tmp",
+        delete=False,
+    )
+    tmp_path = Path(tmp.name)
+    tmp.close()
+    try:
+        po.save(str(tmp_path))
+        with tmp_path.open("rb") as f:
+            os.fsync(f.fileno())
+        os.replace(tmp_path, path)
+    except BaseException:
+        with contextlib.suppress(FileNotFoundError):
+            tmp_path.unlink()
+        raise
 async def export_po_async(document: BaseStructure, filepath: str | Path) -> None:
     await asyncio.to_thread(export_po, document, filepath)
-def _build_metadata(document: BaseStructure) -> dict[str, str]:
+def _build_metadata(document: Structure) -> dict[str, str]:
     meta: dict[str, str] = {
         "Content-Type": "text/plain; charset=UTF-8",
         "Content-Transfer-Encoding": "8bit",
@@ -160,3 +183,9 @@ def _apply_occurrences(entry: Any, unit: Data) -> None:
         else:
             occurrences.append((ref, ""))
     entry.occurrences = occurrences
+def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
+    if isinstance(document, BaseStructure):
+        return document.data.items()
+    return document.items

lokit-python 0.1.0__tar.gz → 0.1.1__tar.gz

lokit-python 0.1.0tar.gz → 0.1.1tar.gz