lokit-python 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lokit_python-0.1.0 → lokit_python-0.1.1}/PKG-INFO +2 -2
- {lokit_python-0.1.0 → lokit_python-0.1.1}/pyproject.toml +2 -2
- lokit_python-0.1.1/setup.py +102 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/__init__.py +16 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/structure.py +23 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/tag_types.py +1 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/csv.py +15 -5
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/html.py +23 -11
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/idml.py +41 -22
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/json_i18n.py +14 -4
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/po.py +34 -5
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/tmx.py +71 -6
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/xliff.py +54 -21
- lokit_python-0.1.1/src/lokit/exporters/xlsx.py +69 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/importers.py +172 -3
- lokit_python-0.1.1/src/lokit/io/atomic.py +39 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/logic.py +46 -13
- lokit_python-0.1.1/src/lokit/parsers/async_bridge.py +81 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/csv/extraction.py +3 -64
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/html/extraction.py +3 -63
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/idml/extraction.py +3 -63
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/json_i18n/extraction.py +3 -63
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/po/extraction.py +3 -63
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/base.py +3 -1
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/extraction.py +11 -74
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/props.py +133 -15
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/tags.py +3 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xliff/extraction.py +3 -62
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xlsx/extraction.py +3 -64
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit_python.egg-info/PKG-INFO +2 -2
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit_python.egg-info/SOURCES.txt +3 -0
- lokit_python-0.1.1/src/lokit_python.egg-info/top_level.txt +2 -0
- lokit_python-0.1.1/tests/test_performance_safety.py +140 -0
- lokit_python-0.1.0/setup.py +0 -48
- lokit_python-0.1.0/src/lokit/exporters/xlsx.py +0 -39
- lokit_python-0.1.0/src/lokit_python.egg-info/top_level.txt +0 -2
- {lokit_python-0.1.0 → lokit_python-0.1.1}/README.md +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/setup.cfg +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/core/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/core/logger.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/lang_codes.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/format_detection.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/io/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/io/json.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/csv/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/html/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/idml/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/json_i18n/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/po/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/header.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/helpers.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/models.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/xml_utils.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xliff/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xliff/tags.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xlsx/__init__.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/py.typed +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit_python.egg-info/dependency_links.txt +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit_python.egg-info/requires.txt +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_csv.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_html.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_idml.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_json_i18n.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_po.py +0 -0
- {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_xlsx.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lokit-python
|
|
3
|
-
Version: 0.1.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files.
|
|
5
5
|
Requires-Python: >=3.12
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
7
|
Requires-Dist: lxml>=6.1.1
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "lokit-python"
|
|
3
|
-
version = "0.1.
|
|
4
|
-
description = "
|
|
3
|
+
version = "0.1.1"
|
|
4
|
+
description = "A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
7
7
|
dependencies = [
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
import glob
|
|
2
|
+
from pathlib import Path, PurePosixPath, PureWindowsPath
|
|
3
|
+
|
|
4
|
+
from setuptools import setup
|
|
5
|
+
from setuptools.command.build_ext import build_ext
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _build_path_replacements(src_files):
|
|
9
|
+
"""Build a mapping of Windows-form paths to POSIX-form paths.
|
|
10
|
+
|
|
11
|
+
For each source file, compute its Windows backslash representation
|
|
12
|
+
and its POSIX forward-slash representation. Only include entries
|
|
13
|
+
where the two differ (i.e. the path contains directory separators).
|
|
14
|
+
|
|
15
|
+
This is used to fix mypyc-generated C files on Windows, where
|
|
16
|
+
embedded Python source paths use backslashes that MSVC interprets
|
|
17
|
+
as C escape sequences (e.g. \\x in \\xliff causes error C2153).
|
|
18
|
+
"""
|
|
19
|
+
replacements = {}
|
|
20
|
+
for src_file in src_files:
|
|
21
|
+
posix_form = PurePosixPath(src_file).as_posix()
|
|
22
|
+
windows_form = str(PureWindowsPath(src_file))
|
|
23
|
+
if windows_form != posix_form:
|
|
24
|
+
replacements[windows_form] = posix_form
|
|
25
|
+
return replacements
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _normalize_generated_c_file(path, replacements):
|
|
29
|
+
"""Replace Windows backslash paths with POSIX paths in a generated C file.
|
|
30
|
+
|
|
31
|
+
Performs direct string replacement of known source file paths,
|
|
32
|
+
avoiding any C source parsing. This is robust against escaped
|
|
33
|
+
quotes and other C syntax that broke the previous quote-based parser.
|
|
34
|
+
"""
|
|
35
|
+
if path.suffix != ".c" or not path.exists():
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
contents = path.read_text(encoding="utf-8")
|
|
39
|
+
normalized = contents
|
|
40
|
+
for windows_path, posix_path in replacements.items():
|
|
41
|
+
normalized = normalized.replace(windows_path, posix_path)
|
|
42
|
+
if normalized != contents:
|
|
43
|
+
path.write_text(normalized, encoding="utf-8")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _normalize_all_generated_c_files(replacements):
|
|
47
|
+
"""Normalize all generated C files in the build directory."""
|
|
48
|
+
build_dir = Path("build")
|
|
49
|
+
if build_dir.exists():
|
|
50
|
+
for path in build_dir.rglob("*.c"):
|
|
51
|
+
_normalize_generated_c_file(path, replacements)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _normalize_ext_c_files(ext, replacements):
|
|
55
|
+
"""Normalize C files listed as sources for an extension module."""
|
|
56
|
+
for source in ext.sources:
|
|
57
|
+
_normalize_generated_c_file(Path(source), replacements)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class BuildExt(build_ext):
|
|
61
|
+
def build_extensions(self):
|
|
62
|
+
self._normalize_before_compile()
|
|
63
|
+
_normalize_all_generated_c_files(_path_replacements)
|
|
64
|
+
super().build_extensions()
|
|
65
|
+
|
|
66
|
+
def build_extension(self, ext):
|
|
67
|
+
_normalize_ext_c_files(ext, _path_replacements)
|
|
68
|
+
super().build_extension(ext)
|
|
69
|
+
|
|
70
|
+
def _normalize_before_compile(self):
|
|
71
|
+
original_compile = self.compiler.compile
|
|
72
|
+
|
|
73
|
+
def compile_with_normalized_sources(sources, *args, **kwargs):
|
|
74
|
+
for source in sources:
|
|
75
|
+
_normalize_generated_c_file(Path(source), _path_replacements)
|
|
76
|
+
return original_compile(sources, *args, **kwargs)
|
|
77
|
+
|
|
78
|
+
self.compiler.compile = compile_with_normalized_sources
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
from mypyc.build import mypycify
|
|
83
|
+
|
|
84
|
+
src_files = glob.glob("src/lokit/**/*.py", recursive=True)
|
|
85
|
+
src_files = [f.replace("\\", "/") for f in src_files if "importers.py" not in f]
|
|
86
|
+
|
|
87
|
+
_path_replacements = _build_path_replacements(src_files)
|
|
88
|
+
|
|
89
|
+
ext_modules = mypycify(
|
|
90
|
+
src_files,
|
|
91
|
+
opt_level="3",
|
|
92
|
+
debug_level="0",
|
|
93
|
+
)
|
|
94
|
+
_normalize_all_generated_c_files(_path_replacements)
|
|
95
|
+
except ImportError:
|
|
96
|
+
_path_replacements = {}
|
|
97
|
+
ext_modules = []
|
|
98
|
+
|
|
99
|
+
setup(
|
|
100
|
+
cmdclass={"build_ext": BuildExt},
|
|
101
|
+
ext_modules=ext_modules,
|
|
102
|
+
)
|
|
@@ -3,12 +3,14 @@ from lokit.data.structure import (
|
|
|
3
3
|
BaseStructure,
|
|
4
4
|
CodePart,
|
|
5
5
|
Comment,
|
|
6
|
+
ConversionStats,
|
|
6
7
|
Data,
|
|
7
8
|
Meta,
|
|
8
9
|
Origin,
|
|
9
10
|
Plural,
|
|
10
11
|
PluralCategory,
|
|
11
12
|
SegmentPart,
|
|
13
|
+
StreamingStructure,
|
|
12
14
|
Tags,
|
|
13
15
|
TextPart,
|
|
14
16
|
TranslationStatus,
|
|
@@ -37,6 +39,8 @@ from lokit.exporters import (
|
|
|
37
39
|
from lokit.importers import (
|
|
38
40
|
import_csv,
|
|
39
41
|
import_csv_async,
|
|
42
|
+
import_file,
|
|
43
|
+
import_file_async,
|
|
40
44
|
import_idml,
|
|
41
45
|
import_idml_async,
|
|
42
46
|
import_html,
|
|
@@ -47,6 +51,10 @@ from lokit.importers import (
|
|
|
47
51
|
import_po_async,
|
|
48
52
|
import_tmx,
|
|
49
53
|
import_tmx_async,
|
|
54
|
+
stream_tmx,
|
|
55
|
+
convert_tmx_to_csv,
|
|
56
|
+
convert_tmx_to_tmx,
|
|
57
|
+
convert_tmx_to_xliff,
|
|
50
58
|
import_xliff,
|
|
51
59
|
import_xliff_async,
|
|
52
60
|
import_xlsx,
|
|
@@ -68,6 +76,7 @@ __all__ = [
|
|
|
68
76
|
"BaseStructure",
|
|
69
77
|
"CodePart",
|
|
70
78
|
"Comment",
|
|
79
|
+
"ConversionStats",
|
|
71
80
|
"Data",
|
|
72
81
|
"Meta",
|
|
73
82
|
"Lokit",
|
|
@@ -76,6 +85,7 @@ __all__ = [
|
|
|
76
85
|
"Plural",
|
|
77
86
|
"PluralCategory",
|
|
78
87
|
"SegmentPart",
|
|
88
|
+
"StreamingStructure",
|
|
79
89
|
"Tags",
|
|
80
90
|
"TextPart",
|
|
81
91
|
"TieData",
|
|
@@ -109,6 +119,8 @@ __all__ = [
|
|
|
109
119
|
"export_xlsx_async",
|
|
110
120
|
"import_csv",
|
|
111
121
|
"import_csv_async",
|
|
122
|
+
"import_file",
|
|
123
|
+
"import_file_async",
|
|
112
124
|
"import_idml",
|
|
113
125
|
"import_idml_async",
|
|
114
126
|
"import_html",
|
|
@@ -119,6 +131,10 @@ __all__ = [
|
|
|
119
131
|
"import_po_async",
|
|
120
132
|
"import_tmx",
|
|
121
133
|
"import_tmx_async",
|
|
134
|
+
"stream_tmx",
|
|
135
|
+
"convert_tmx_to_csv",
|
|
136
|
+
"convert_tmx_to_tmx",
|
|
137
|
+
"convert_tmx_to_xliff",
|
|
122
138
|
"import_xliff",
|
|
123
139
|
"import_xliff_async",
|
|
124
140
|
"import_xlsx",
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
from enum import StrEnum
|
|
3
|
+
from collections.abc import Iterable
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
5
6
|
from lokit.data.tag_types import TieData
|
|
@@ -116,3 +117,25 @@ class BaseStructure:
|
|
|
116
117
|
source_language: Optional[str] = None
|
|
117
118
|
target_language: Optional[str] = None
|
|
118
119
|
extensions: dict[str, str] = field(default_factory=dict)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass(slots=True)
|
|
123
|
+
class StreamingStructure:
|
|
124
|
+
source_locale: str
|
|
125
|
+
target_locale: Optional[str]
|
|
126
|
+
items: Iterable[tuple[str, Data]]
|
|
127
|
+
format_version: str = "0.1"
|
|
128
|
+
export_origin: str = ""
|
|
129
|
+
export_timestamp: str = ""
|
|
130
|
+
source_language: Optional[str] = None
|
|
131
|
+
target_language: Optional[str] = None
|
|
132
|
+
extensions: dict[str, str] = field(default_factory=dict)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass(slots=True)
|
|
136
|
+
class ConversionStats:
|
|
137
|
+
units_read: int
|
|
138
|
+
units_written: int
|
|
139
|
+
input_bytes: int
|
|
140
|
+
output_bytes: int
|
|
141
|
+
seconds: float
|
|
@@ -2,20 +2,24 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import csv
|
|
5
|
+
from collections.abc import Iterable
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
|
|
7
|
-
from lokit.data.structure import BaseStructure, TranslationStatus
|
|
8
|
+
from lokit.data.structure import BaseStructure, Data, StreamingStructure, TranslationStatus
|
|
9
|
+
from lokit.io.atomic import atomic_output_path
|
|
8
10
|
|
|
9
11
|
|
|
10
|
-
|
|
12
|
+
Structure = BaseStructure | StreamingStructure
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def export_csv(document: Structure, filepath: str | Path) -> None:
|
|
11
16
|
path = Path(filepath)
|
|
12
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
13
17
|
|
|
14
|
-
with path
|
|
18
|
+
with atomic_output_path(path, "w") as fh:
|
|
15
19
|
writer = csv.DictWriter(fh, fieldnames=["id", "source", "target", "status", "comment"])
|
|
16
20
|
writer.writeheader()
|
|
17
21
|
|
|
18
|
-
for unit_id, unit in document
|
|
22
|
+
for unit_id, unit in _iter_items(document):
|
|
19
23
|
comment = "; ".join(c.context for c in unit.comments if c.context)
|
|
20
24
|
status = unit.status.value if unit.status != TranslationStatus.UNKNOWN else ""
|
|
21
25
|
|
|
@@ -30,3 +34,9 @@ def export_csv(document: BaseStructure, filepath: str | Path) -> None:
|
|
|
30
34
|
|
|
31
35
|
async def export_csv_async(document: BaseStructure, filepath: str | Path) -> None:
|
|
32
36
|
await asyncio.to_thread(export_csv, document, filepath)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
|
|
40
|
+
if isinstance(document, BaseStructure):
|
|
41
|
+
return document.data.items()
|
|
42
|
+
return document.items
|
|
@@ -1,18 +1,22 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
from collections.abc import Iterable
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Any, cast
|
|
6
7
|
|
|
7
8
|
from lxml import html as lxml_html
|
|
8
9
|
from lxml.html import HtmlElement, tostring
|
|
9
10
|
|
|
10
|
-
from lokit.data.structure import BaseStructure, CodePart, Data, TextPart
|
|
11
|
+
from lokit.data.structure import BaseStructure, CodePart, Data, StreamingStructure, TextPart
|
|
11
12
|
from lokit.data.tag_types import TieData, TieType
|
|
13
|
+
from lokit.io.atomic import atomic_output_path
|
|
14
|
+
|
|
15
|
+
Structure = BaseStructure | StreamingStructure
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
def export_html(
|
|
15
|
-
document:
|
|
19
|
+
document: Structure,
|
|
16
20
|
filepath: str | Path,
|
|
17
21
|
source_html: str | Path | None = None,
|
|
18
22
|
) -> None:
|
|
@@ -26,7 +30,7 @@ def export_html(
|
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
async def export_html_async(
|
|
29
|
-
document:
|
|
33
|
+
document: Structure,
|
|
30
34
|
filepath: str | Path,
|
|
31
35
|
source_html: str | Path | None = None,
|
|
32
36
|
) -> None:
|
|
@@ -34,7 +38,7 @@ async def export_html_async(
|
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
def _export_from_source(
|
|
37
|
-
document:
|
|
41
|
+
document: Structure, output: Path, source: Path
|
|
38
42
|
) -> None:
|
|
39
43
|
doc = lxml_html.parse(str(source))
|
|
40
44
|
root = doc.getroot()
|
|
@@ -86,10 +90,11 @@ def _export_from_source(
|
|
|
86
90
|
index += 1
|
|
87
91
|
|
|
88
92
|
result = tostring(root, encoding="unicode", doctype="<!DOCTYPE html>")
|
|
89
|
-
output
|
|
93
|
+
with atomic_output_path(output, "w") as f:
|
|
94
|
+
f.write(result)
|
|
90
95
|
|
|
91
96
|
|
|
92
|
-
def _export_minimal(document:
|
|
97
|
+
def _export_minimal(document: Structure, output: Path) -> None:
|
|
93
98
|
lang = document.target_locale or document.source_locale
|
|
94
99
|
lines: list[str] = [
|
|
95
100
|
"<!DOCTYPE html>",
|
|
@@ -98,7 +103,7 @@ def _export_minimal(document: BaseStructure, output: Path) -> None:
|
|
|
98
103
|
'<meta charset="utf-8">',
|
|
99
104
|
]
|
|
100
105
|
|
|
101
|
-
for unit_id, unit in document
|
|
106
|
+
for unit_id, unit in _iter_items(document):
|
|
102
107
|
if "meta." in unit_id:
|
|
103
108
|
name = unit.extensions.get("meta_name", "")
|
|
104
109
|
text = unit.target or unit.source
|
|
@@ -107,7 +112,7 @@ def _export_minimal(document: BaseStructure, output: Path) -> None:
|
|
|
107
112
|
lines.append("</head>")
|
|
108
113
|
lines.append("<body>")
|
|
109
114
|
|
|
110
|
-
for unit_id, unit in document
|
|
115
|
+
for unit_id, unit in _iter_items(document):
|
|
111
116
|
if "meta." in unit_id or "img.alt" in unit_id:
|
|
112
117
|
continue
|
|
113
118
|
text = unit.target or unit.source
|
|
@@ -120,7 +125,8 @@ def _export_minimal(document: BaseStructure, output: Path) -> None:
|
|
|
120
125
|
|
|
121
126
|
lines.append("</body>")
|
|
122
127
|
lines.append("</html>")
|
|
123
|
-
output
|
|
128
|
+
with atomic_output_path(output, "w") as f:
|
|
129
|
+
f.write("\n".join(lines))
|
|
124
130
|
|
|
125
131
|
|
|
126
132
|
def _replace_element_text(element: HtmlElement, unit: Data) -> None:
|
|
@@ -197,8 +203,14 @@ def _format_attrs(attributes: dict[str, str]) -> str:
|
|
|
197
203
|
return "".join(parts)
|
|
198
204
|
|
|
199
205
|
|
|
200
|
-
def _build_unit_lookup(document:
|
|
201
|
-
return dict(document
|
|
206
|
+
def _build_unit_lookup(document: Structure) -> dict[str, Data]:
|
|
207
|
+
return dict(_iter_items(document))
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
|
|
211
|
+
if isinstance(document, BaseStructure):
|
|
212
|
+
return document.data.items()
|
|
213
|
+
return document.items
|
|
202
214
|
|
|
203
215
|
|
|
204
216
|
def _extract_tag_from_id(unit_id: str) -> str:
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import contextlib
|
|
5
|
+
import os
|
|
4
6
|
import shutil
|
|
7
|
+
import tempfile
|
|
5
8
|
import zipfile
|
|
6
9
|
from pathlib import Path
|
|
7
10
|
|
|
@@ -19,30 +22,46 @@ def export_idml(
|
|
|
19
22
|
output_path = Path(filepath)
|
|
20
23
|
source_path = Path(source_idml)
|
|
21
24
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
25
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
26
|
+
dir=output_path.parent,
|
|
27
|
+
prefix=f".{output_path.name}.",
|
|
28
|
+
suffix=".tmp",
|
|
29
|
+
delete=False,
|
|
30
|
+
)
|
|
31
|
+
tmp_path = Path(tmp.name)
|
|
32
|
+
tmp.close()
|
|
22
33
|
|
|
23
34
|
story_units = _group_by_story(document)
|
|
24
|
-
shutil.copy2(str(source_path), str(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
with zipfile.ZipFile(str(
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
35
|
+
shutil.copy2(str(source_path), str(tmp_path))
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
with zipfile.ZipFile(str(tmp_path), "a") as zf_out:
|
|
39
|
+
with zipfile.ZipFile(str(source_path), "r") as zf_in:
|
|
40
|
+
story_files = [
|
|
41
|
+
name for name in zf_in.namelist()
|
|
42
|
+
if name.startswith("Stories/Story_") and name.endswith(".xml")
|
|
43
|
+
]
|
|
44
|
+
for story_file in story_files:
|
|
45
|
+
units = story_units.get(story_file)
|
|
46
|
+
if not units:
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
with zf_in.open(story_file) as stream:
|
|
50
|
+
tree = etree.parse(stream)
|
|
51
|
+
root = tree.getroot()
|
|
52
|
+
_apply_translations(root, units)
|
|
53
|
+
modified_xml = etree.tostring(
|
|
54
|
+
root, xml_declaration=True, encoding="UTF-8"
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
_replace_in_zip(zf_out, story_file, modified_xml)
|
|
58
|
+
with tmp_path.open("rb") as f:
|
|
59
|
+
os.fsync(f.fileno())
|
|
60
|
+
os.replace(tmp_path, output_path)
|
|
61
|
+
except BaseException:
|
|
62
|
+
with contextlib.suppress(FileNotFoundError):
|
|
63
|
+
tmp_path.unlink()
|
|
64
|
+
raise
|
|
46
65
|
|
|
47
66
|
|
|
48
67
|
async def export_idml_async(
|
|
@@ -2,14 +2,18 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import json
|
|
5
|
+
from collections.abc import Iterable
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
from typing import Any
|
|
7
8
|
|
|
8
|
-
from lokit.data.structure import BaseStructure
|
|
9
|
+
from lokit.data.structure import BaseStructure, Data, StreamingStructure
|
|
10
|
+
from lokit.io.atomic import atomic_output_path
|
|
11
|
+
|
|
12
|
+
Structure = BaseStructure | StreamingStructure
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
def export_json_i18n(
|
|
12
|
-
document:
|
|
16
|
+
document: Structure,
|
|
13
17
|
filepath: str | Path,
|
|
14
18
|
nested: bool = True,
|
|
15
19
|
) -> None:
|
|
@@ -17,14 +21,14 @@ def export_json_i18n(
|
|
|
17
21
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
18
22
|
|
|
19
23
|
output: dict[str, Any] = {}
|
|
20
|
-
for key, unit in document
|
|
24
|
+
for key, unit in _iter_items(document):
|
|
21
25
|
value = unit.target if unit.target is not None else unit.source
|
|
22
26
|
if nested:
|
|
23
27
|
_set_nested(output, key, value)
|
|
24
28
|
else:
|
|
25
29
|
output[key] = value
|
|
26
30
|
|
|
27
|
-
with path
|
|
31
|
+
with atomic_output_path(path, "w") as f:
|
|
28
32
|
json.dump(output, f, ensure_ascii=False, indent=2)
|
|
29
33
|
f.write("\n")
|
|
30
34
|
|
|
@@ -45,3 +49,9 @@ def _set_nested(obj: dict[str, Any], dot_key: str, value: str) -> None:
|
|
|
45
49
|
current[part] = {}
|
|
46
50
|
current = current[part]
|
|
47
51
|
current[parts[-1]] = value
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
|
|
55
|
+
if isinstance(document, BaseStructure):
|
|
56
|
+
return document.data.items()
|
|
57
|
+
return document.items
|
|
@@ -1,18 +1,25 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
+
import contextlib
|
|
5
|
+
import os
|
|
6
|
+
import tempfile
|
|
4
7
|
from collections import defaultdict
|
|
8
|
+
from collections.abc import Iterable
|
|
5
9
|
from pathlib import Path
|
|
6
10
|
from typing import Any
|
|
7
11
|
|
|
8
12
|
import polib
|
|
9
13
|
|
|
10
|
-
from lokit.data.structure import BaseStructure, Data, TranslationStatus
|
|
14
|
+
from lokit.data.structure import BaseStructure, Data, StreamingStructure, TranslationStatus
|
|
11
15
|
|
|
12
16
|
_PLURAL_SUFFIX_PATTERN = "["
|
|
13
17
|
|
|
14
18
|
|
|
15
|
-
|
|
19
|
+
Structure = BaseStructure | StreamingStructure
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def export_po(document: Structure, filepath: str | Path) -> None:
|
|
16
23
|
path = Path(filepath)
|
|
17
24
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
18
25
|
|
|
@@ -22,7 +29,7 @@ def export_po(document: BaseStructure, filepath: str | Path) -> None:
|
|
|
22
29
|
plural_groups: dict[str, list[tuple[str, Data]]] = defaultdict(list)
|
|
23
30
|
singular_units: list[tuple[str, Data]] = []
|
|
24
31
|
|
|
25
|
-
for unit_id, unit in document
|
|
32
|
+
for unit_id, unit in _iter_items(document):
|
|
26
33
|
if _PLURAL_SUFFIX_PATTERN in unit_id and unit.plural is not None:
|
|
27
34
|
base_id = unit_id[: unit_id.index(_PLURAL_SUFFIX_PATTERN)]
|
|
28
35
|
plural_groups[base_id].append((unit_id, unit))
|
|
@@ -37,14 +44,30 @@ def export_po(document: BaseStructure, filepath: str | Path) -> None:
|
|
|
37
44
|
for base_id, forms in plural_groups.items():
|
|
38
45
|
po.append(_build_plural_entry(base_id, forms))
|
|
39
46
|
|
|
40
|
-
|
|
47
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
48
|
+
dir=path.parent,
|
|
49
|
+
prefix=f".{path.name}.",
|
|
50
|
+
suffix=".tmp",
|
|
51
|
+
delete=False,
|
|
52
|
+
)
|
|
53
|
+
tmp_path = Path(tmp.name)
|
|
54
|
+
tmp.close()
|
|
55
|
+
try:
|
|
56
|
+
po.save(str(tmp_path))
|
|
57
|
+
with tmp_path.open("rb") as f:
|
|
58
|
+
os.fsync(f.fileno())
|
|
59
|
+
os.replace(tmp_path, path)
|
|
60
|
+
except BaseException:
|
|
61
|
+
with contextlib.suppress(FileNotFoundError):
|
|
62
|
+
tmp_path.unlink()
|
|
63
|
+
raise
|
|
41
64
|
|
|
42
65
|
|
|
43
66
|
async def export_po_async(document: BaseStructure, filepath: str | Path) -> None:
|
|
44
67
|
await asyncio.to_thread(export_po, document, filepath)
|
|
45
68
|
|
|
46
69
|
|
|
47
|
-
def _build_metadata(document:
|
|
70
|
+
def _build_metadata(document: Structure) -> dict[str, str]:
|
|
48
71
|
meta: dict[str, str] = {
|
|
49
72
|
"Content-Type": "text/plain; charset=UTF-8",
|
|
50
73
|
"Content-Transfer-Encoding": "8bit",
|
|
@@ -160,3 +183,9 @@ def _apply_occurrences(entry: Any, unit: Data) -> None:
|
|
|
160
183
|
else:
|
|
161
184
|
occurrences.append((ref, ""))
|
|
162
185
|
entry.occurrences = occurrences
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
|
|
189
|
+
if isinstance(document, BaseStructure):
|
|
190
|
+
return document.data.items()
|
|
191
|
+
return document.items
|