PyPI - lokit-python - Versions diffs - 0.1.1__tar.gz → 0.1.3__tar.gz - Mend

lokit-python 0.1.1tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

{lokit_python-0.1.1 → lokit_python-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lokit-python
-Version: 0.1.1
+Version: 0.1.3
 Summary: A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files.
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown

{lokit_python-0.1.1 → lokit_python-0.1.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "lokit-python"
-version = "0.1.1"
+version = "0.1.3"
 description = "A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files."
 readme = "README.md"
 requires-python = ">=3.12"

{lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/__init__.py RENAMED Viewed

@@ -51,7 +51,11 @@ from lokit.importers import (
     import_po_async,
     import_tmx,
     import_tmx_async,
+    import_tmx_batches_async,
+    import_tmx_parallel,
+    process_tmx_async,
     stream_tmx,
+    stream_tmx_parallel,
     convert_tmx_to_csv,
     convert_tmx_to_tmx,
     convert_tmx_to_xliff,
@@ -61,6 +65,7 @@ from lokit.importers import (
     import_xlsx_async,
 )
 from lokit.io import load_lokit_json, load_lokit_json_bytes
+from lokit.io.stream_json import LokitJsonContext
 from lokit.logic import Lokit, MatchResult
 from lokit.parsers.csv.extraction import CsvExtractor
 from lokit.parsers.xlsx.extraction import XlsxExtractor
@@ -69,6 +74,8 @@ from lokit.parsers.po.extraction import PoExtractor
 from lokit.parsers.json_i18n.extraction import JsonI18nExtractor
 from lokit.parsers.idml.extraction import IdmlExtractor
 from lokit.parsers.tmx.extraction import TmxExtractor
+from lokit.parsers.tmx.models import TmxParseMode
+from lokit.parsers.tmx.parallel import TmxParallelOptions
 from lokit.parsers.xliff.extraction import XliffExtractor
 __all__ = [
@@ -80,6 +87,7 @@ __all__ = [
     "Data",
     "Meta",
     "Lokit",
+    "LokitJsonContext",
     "MatchResult",
     "Origin",
     "Plural",
@@ -91,6 +99,8 @@ __all__ = [
     "TieData",
     "TieType",
     "TmxExtractor",
+    "TmxParseMode",
+    "TmxParallelOptions",
     "TranslationStatus",
     "XliffExtractor",
     "CsvExtractor",
@@ -131,7 +141,11 @@ __all__ = [
     "import_po_async",
     "import_tmx",
     "import_tmx_async",
+    "import_tmx_batches_async",
+    "import_tmx_parallel",
+    "process_tmx_async",
     "stream_tmx",
+    "stream_tmx_parallel",
     "convert_tmx_to_csv",
     "convert_tmx_to_tmx",
     "convert_tmx_to_xliff",

{lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/tmx.py RENAMED Viewed

@@ -1,7 +1,9 @@
 from __future__ import annotations
 from collections.abc import Iterable
+from dataclasses import dataclass
 from pathlib import Path
+from typing import Any
 from lxml import etree
 from lxml.etree import _Element
@@ -25,6 +27,13 @@ from lokit.io.atomic import atomic_output_path
 Structure = BaseStructure | StreamingStructure
+@dataclass(slots=True)
+class _CommentSummary:
+    creator_id: str | None = None
+    project: str | None = None
+    system: str | None = None
 def export_tmx(document: Structure, filepath: str | Path) -> None:
     path = Path(filepath)
     with atomic_output_path(path, "wb") as stream:
@@ -75,9 +84,9 @@ def _build_tu(unit_id: str, unit: Data, document: BaseStructure) -> _Element:
         attrs["creationdate"] = unit.meta.created
     if unit.meta.updated:
         attrs["changedate"] = unit.meta.updated
-    creator_id = _first_creator_id(unit)
-    if creator_id:
-        attrs["creationid"] = creator_id
+    comment_summary = _comment_summary(unit)
+    if comment_summary.creator_id:
+        attrs["creationid"] = comment_summary.creator_id
     change_id = unit.meta.extensions.get("change_id")
     if change_id:
         attrs["changeid"] = change_id
@@ -85,7 +94,7 @@ def _build_tu(unit_id: str, unit: Data, document: BaseStructure) -> _Element:
         attrs["usagecount"] = str(unit.meta.usage_count)
     tu = etree.Element("tu", attrs)
-    _append_unit_properties(tu, unit)
+    _append_unit_properties(tu, unit, comment_summary)
     _append_comments(tu, unit)
     tu.append(
         _build_tuv(
@@ -108,7 +117,7 @@ def _build_tu(unit_id: str, unit: Data, document: BaseStructure) -> _Element:
 def _write_tu(
-    xf: etree.xmlfile,
+    xf: Any,
     unit_id: str,
     unit: Data,
     document: Structure,
@@ -118,9 +127,9 @@ def _write_tu(
         attrs["creationdate"] = unit.meta.created
     if unit.meta.updated:
         attrs["changedate"] = unit.meta.updated
-    creator_id = _first_creator_id(unit)
-    if creator_id:
-        attrs["creationid"] = creator_id
+    comment_summary = _comment_summary(unit)
+    if comment_summary.creator_id:
+        attrs["creationid"] = comment_summary.creator_id
     change_id = unit.meta.extensions.get("change_id")
     if change_id:
         attrs["changeid"] = change_id
@@ -128,31 +137,30 @@ def _write_tu(
         attrs["usagecount"] = str(unit.meta.usage_count)
     with xf.element("tu", attrs):
-        prop_holder = etree.Element("props")
-        _append_unit_properties(prop_holder, unit)
-        _append_comments(prop_holder, unit)
-        for child in prop_holder:
-            xf.write(child)
-        xf.write(
-            _build_tuv(
-                document.source_locale,
-                unit.source,
-                unit.tags.source_parts if unit.tags else [],
-                unit.tags.source_tag_map if unit.tags else {},
-            )
+        _write_unit_properties(xf, unit, comment_summary)
+        _write_comments(xf, unit)
+        _write_tuv(
+            xf,
+            document.source_locale,
+            unit.source,
+            unit.tags.source_parts if unit.tags else [],
+            unit.tags.source_tag_map if unit.tags else {},
         )
         if document.target_locale is not None and unit.target is not None:
-            xf.write(
-                _build_tuv(
-                    document.target_locale,
-                    unit.target,
-                    unit.tags.target_parts if unit.tags else [],
-                    unit.tags.target_tag_map if unit.tags else {},
-                )
+            _write_tuv(
+                xf,
+                document.target_locale,
+                unit.target,
+                unit.tags.target_parts if unit.tags else [],
+                unit.tags.target_tag_map if unit.tags else {},
             )
-def _append_unit_properties(tu: _Element, unit: Data) -> None:
+def _append_unit_properties(
+    tu: _Element,
+    unit: Data,
+    comment_summary: _CommentSummary | None = None,
+) -> None:
     if unit.status != TranslationStatus.UNKNOWN:
         prop = etree.SubElement(tu, "prop", type="x-status")
         prop.text = unit.status.value
@@ -167,19 +175,47 @@ def _append_unit_properties(tu: _Element, unit: Data) -> None:
         _append_prop_if_present(tu, "x-next-source-text", unit.next_context.source)
         _append_prop_if_present(tu, "x-next-target-text", unit.next_context.target)
-    project = _first_project(unit)
-    if project:
-        _append_prop_if_present(tu, "x-project", project)
+    summary = comment_summary or _comment_summary(unit)
+    if summary.project:
+        _append_prop_if_present(tu, "x-project", summary.project)
-    system = _first_system(unit)
-    if system:
-        _append_prop_if_present(tu, "x-system", system)
+    if summary.system:
+        _append_prop_if_present(tu, "x-system", summary.system)
     for key, value in unit.extensions.items():
         if key.startswith("property."):
             _append_prop_if_present(tu, _property_type(key), value)
+def _write_unit_properties(
+    xf: Any,
+    unit: Data,
+    comment_summary: _CommentSummary,
+) -> None:
+    if unit.status != TranslationStatus.UNKNOWN:
+        _write_prop(xf, "x-status", unit.status.value)
+    if unit.previous_context is not None:
+        _write_prop_if_present(xf, "x-previous-id", unit.previous_context.unit_id)
+        _write_prop_if_present(xf, "x-previous-source-text", unit.previous_context.source)
+        _write_prop_if_present(xf, "x-previous-target-text", unit.previous_context.target)
+    if unit.next_context is not None:
+        _write_prop_if_present(xf, "x-next-id", unit.next_context.unit_id)
+        _write_prop_if_present(xf, "x-next-source-text", unit.next_context.source)
+        _write_prop_if_present(xf, "x-next-target-text", unit.next_context.target)
+    if comment_summary.project:
+        _write_prop(xf, "x-project", comment_summary.project)
+    if comment_summary.system:
+        _write_prop(xf, "x-system", comment_summary.system)
+    for key, value in unit.extensions.items():
+        if key.startswith("property."):
+            _write_prop_if_present(xf, _property_type(key), value)
 def _append_comments(tu: _Element, unit: Data) -> None:
     for comment in unit.comments:
         if not comment.context:
@@ -188,6 +224,13 @@ def _append_comments(tu: _Element, unit: Data) -> None:
         note.text = comment.context
+def _write_comments(xf: Any, unit: Data) -> None:
+    for comment in unit.comments:
+        if comment.context:
+            with xf.element("note"):
+                xf.write(comment.context)
 def _build_tuv(
     locale: str,
     text: str,
@@ -199,6 +242,17 @@ def _build_tuv(
     return tuv
+def _write_tuv(
+    xf: Any,
+    locale: str,
+    text: str,
+    parts: list[SegmentPart],
+    tag_map: dict[str, TieData],
+) -> None:
+    with xf.element("tuv", lang=locale):
+        _write_seg(xf, text, parts, tag_map)
 def _build_seg(
     text: str,
     parts: list[SegmentPart],
@@ -224,6 +278,25 @@ def _build_seg(
     return seg
+def _write_seg(
+    xf: Any,
+    text: str,
+    parts: list[SegmentPart],
+    tag_map: dict[str, TieData],
+) -> None:
+    effective_parts = parts if parts else [TextPart(text)]
+    pair_numbers = _pair_numbers(tag_map)
+    with xf.element("seg"):
+        for part in effective_parts:
+            if isinstance(part, TextPart):
+                xf.write(part.value)
+            elif isinstance(part, CodePart):
+                code = tag_map.get(part.ref)
+                if code is None:
+                    continue
+                xf.write(_build_code_element(code, pair_numbers))
 def _build_code_element(code: TieData, pair_numbers: dict[str, str]) -> _Element:
     if code.original_name in {"bpt", "ept", "ph", "it", "ut", "hi"}:
         attrs = dict(code.attributes)
@@ -284,25 +357,35 @@ def _append_prop_if_present(tu: _Element, prop_type: str, value: str | None) ->
     prop.text = value
-def _first_creator_id(unit: Data) -> str | None:
-    for comment in unit.comments:
-        if comment.origin is not None and comment.origin.creator_id:
-            return comment.origin.creator_id
-    return None
+def _write_prop_if_present(xf: Any, prop_type: str, value: str | None) -> None:
+    if value is not None and value != "":
+        _write_prop(xf, prop_type, value)
-def _first_project(unit: Data) -> str | None:
-    for comment in unit.comments:
-        if comment.origin is not None and comment.origin.project:
-            return comment.origin.project
-    return None
+def _write_prop(xf: Any, prop_type: str, value: str) -> None:
+    with xf.element("prop", type=prop_type):
+        xf.write(value)
-def _first_system(unit: Data) -> str | None:
+def _comment_summary(unit: Data) -> _CommentSummary:
+    summary = _CommentSummary()
     for comment in unit.comments:
-        if comment.origin is not None and comment.origin.system:
-            return comment.origin.system
-    return None
+        origin = comment.origin
+        if origin is None:
+            continue
+        if summary.creator_id is None and origin.creator_id:
+            summary.creator_id = origin.creator_id
+        if summary.project is None and origin.project:
+            summary.project = origin.project
+        if summary.system is None and origin.system:
+            summary.system = origin.system
+        if (
+            summary.creator_id is not None
+            and summary.project is not None
+            and summary.system is not None
+        ):
+            break
+    return summary
 def _property_type(key: str) -> str:

{lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/xliff.py RENAMED Viewed

@@ -90,38 +90,54 @@ def _write_file(
     with xf.element(f"{{{XLIFF_NS}}}file", attrs):
         xf.write(etree.Element(f"{{{XLIFF_NS}}}header"))
         with xf.element(f"{{{XLIFF_NS}}}body"):
-            xf.write(_build_trans_unit(first_id, first_unit))
+            _write_trans_unit(xf, first_id, first_unit)
             for unit_id, unit in unit_iter:
-                xf.write(_build_trans_unit(unit_id, unit))
+                _write_trans_unit(xf, unit_id, unit)
-def _build_trans_unit(unit_id: str, unit: Data) -> _Element:
+def _write_trans_unit(xf: Any, unit_id: str, unit: Data) -> None:
     attrs = {"id": unit.extensions.get("unit_id", unit_id)}
     space = unit.extensions.get("space")
     if space:
         attrs["{http://www.w3.org/XML/1998/namespace}space"] = space
-    trans_unit = etree.Element(f"{{{XLIFF_NS}}}trans-unit", attrs)
-    trans_unit.append(
-        _build_segment(
+    with xf.element(f"{{{XLIFF_NS}}}trans-unit", attrs):
+        _write_segment(
+            xf,
             "source",
             unit.source,
             unit.tags.source_parts if unit.tags else [],
             unit.tags.source_tag_map if unit.tags else {},
         )
-    )
-    if unit.target is not None:
-        target = _build_segment(
-            "target",
-            unit.target,
-            unit.tags.target_parts if unit.tags else [],
-            unit.tags.target_tag_map if unit.tags else {},
-        )
-        trans_unit.append(target)
-    for comment in unit.comments:
-        if comment.context:
-            note = etree.SubElement(trans_unit, f"{{{XLIFF_NS}}}note")
-            note.text = comment.context
-    return trans_unit
+        if unit.target is not None:
+            _write_segment(
+                xf,
+                "target",
+                unit.target,
+                unit.tags.target_parts if unit.tags else [],
+                unit.tags.target_tag_map if unit.tags else {},
+            )
+        for comment in unit.comments:
+            if comment.context:
+                with xf.element(f"{{{XLIFF_NS}}}note"):
+                    xf.write(comment.context)
+def _write_segment(
+    xf: Any,
+    name: str,
+    text: str,
+    parts: list[SegmentPart],
+    tag_map: dict[str, TieData],
+) -> None:
+    with xf.element(f"{{{XLIFF_NS}}}{name}"):
+        effective_parts = parts if parts else [TextPart(text)]
+        for part in effective_parts:
+            if isinstance(part, TextPart):
+                xf.write(part.value)
+            elif isinstance(part, CodePart):
+                code = tag_map.get(part.ref)
+                if code is not None:
+                    xf.write(_build_code(code))
 def _build_segment(

{lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/format_detection.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import json
+import re
 import zipfile
 from enum import StrEnum
 from io import BytesIO
@@ -8,6 +9,8 @@ from pathlib import Path
 from lokit.parsers.tmx.xml_utils import iterparse_safe, local_name
+_JSON_FORMAT_RE = re.compile(r'"(?:format_version|data)"\s*:')
 class LokitInputFormat(StrEnum):
     TMX = "tmx"
@@ -36,9 +39,9 @@ def detect_format(filepath: str | Path) -> LokitInputFormat:
         return LokitInputFormat.IDML
     if suffix == ".json":
         try:
-            with path.open("r", encoding="utf-8") as f:
-                data = json.load(f)
-            if isinstance(data, dict) and ("format_version" in data or "data" in data):
+            with path.open("rb") as f:
+                data = f.read(4096)
+            if _JSON_FORMAT_RE.search(data.decode("utf-8", errors="ignore")):
                 return LokitInputFormat.LOKIT_JSON
         except Exception:
             pass

lokit-python 0.1.1__tar.gz → 0.1.3__tar.gz

lokit-python 0.1.1tar.gz → 0.1.3tar.gz