lokit-python 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. {lokit_python-0.1.0 → lokit_python-0.1.1}/PKG-INFO +2 -2
  2. {lokit_python-0.1.0 → lokit_python-0.1.1}/pyproject.toml +2 -2
  3. lokit_python-0.1.1/setup.py +102 -0
  4. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/__init__.py +16 -0
  5. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/structure.py +23 -0
  6. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/tag_types.py +1 -0
  7. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/csv.py +15 -5
  8. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/html.py +23 -11
  9. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/idml.py +41 -22
  10. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/json_i18n.py +14 -4
  11. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/po.py +34 -5
  12. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/tmx.py +71 -6
  13. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/xliff.py +54 -21
  14. lokit_python-0.1.1/src/lokit/exporters/xlsx.py +69 -0
  15. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/importers.py +172 -3
  16. lokit_python-0.1.1/src/lokit/io/atomic.py +39 -0
  17. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/logic.py +46 -13
  18. lokit_python-0.1.1/src/lokit/parsers/async_bridge.py +81 -0
  19. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/csv/extraction.py +3 -64
  20. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/html/extraction.py +3 -63
  21. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/idml/extraction.py +3 -63
  22. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/json_i18n/extraction.py +3 -63
  23. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/po/extraction.py +3 -63
  24. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/base.py +3 -1
  25. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/extraction.py +11 -74
  26. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/props.py +133 -15
  27. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/tags.py +3 -0
  28. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xliff/extraction.py +3 -62
  29. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xlsx/extraction.py +3 -64
  30. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit_python.egg-info/PKG-INFO +2 -2
  31. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit_python.egg-info/SOURCES.txt +3 -0
  32. lokit_python-0.1.1/src/lokit_python.egg-info/top_level.txt +2 -0
  33. lokit_python-0.1.1/tests/test_performance_safety.py +140 -0
  34. lokit_python-0.1.0/setup.py +0 -48
  35. lokit_python-0.1.0/src/lokit/exporters/xlsx.py +0 -39
  36. lokit_python-0.1.0/src/lokit_python.egg-info/top_level.txt +0 -2
  37. {lokit_python-0.1.0 → lokit_python-0.1.1}/README.md +0 -0
  38. {lokit_python-0.1.0 → lokit_python-0.1.1}/setup.cfg +0 -0
  39. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/core/__init__.py +0 -0
  40. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/core/logger.py +0 -0
  41. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/__init__.py +0 -0
  42. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/data/lang_codes.py +0 -0
  43. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/exporters/__init__.py +0 -0
  44. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/format_detection.py +0 -0
  45. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/io/__init__.py +0 -0
  46. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/io/json.py +0 -0
  47. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/__init__.py +0 -0
  48. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/csv/__init__.py +0 -0
  49. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/html/__init__.py +0 -0
  50. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/idml/__init__.py +0 -0
  51. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/json_i18n/__init__.py +0 -0
  52. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/po/__init__.py +0 -0
  53. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/__init__.py +0 -0
  54. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/header.py +0 -0
  55. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/helpers.py +0 -0
  56. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/models.py +0 -0
  57. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/tmx/xml_utils.py +0 -0
  58. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xliff/__init__.py +0 -0
  59. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xliff/tags.py +0 -0
  60. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/parsers/xlsx/__init__.py +0 -0
  61. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit/py.typed +0 -0
  62. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit_python.egg-info/dependency_links.txt +0 -0
  63. {lokit_python-0.1.0 → lokit_python-0.1.1}/src/lokit_python.egg-info/requires.txt +0 -0
  64. {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_csv.py +0 -0
  65. {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_html.py +0 -0
  66. {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_idml.py +0 -0
  67. {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_json_i18n.py +0 -0
  68. {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_po.py +0 -0
  69. {lokit_python-0.1.0 → lokit_python-0.1.1}/tests/test_xlsx.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lokit-python
3
- Version: 0.1.0
4
- Summary: Add your description here
3
+ Version: 0.1.1
4
+ Summary: A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
7
7
  Requires-Dist: lxml>=6.1.1
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "lokit-python"
3
- version = "0.1.0"
4
- description = "Add your description here"
3
+ version = "0.1.1"
4
+ description = "A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
7
7
  dependencies = [
@@ -0,0 +1,102 @@
1
+ import glob
2
+ from pathlib import Path, PurePosixPath, PureWindowsPath
3
+
4
+ from setuptools import setup
5
+ from setuptools.command.build_ext import build_ext
6
+
7
+
8
+ def _build_path_replacements(src_files):
9
+ """Build a mapping of Windows-form paths to POSIX-form paths.
10
+
11
+ For each source file, compute its Windows backslash representation
12
+ and its POSIX forward-slash representation. Only include entries
13
+ where the two differ (i.e. the path contains directory separators).
14
+
15
+ This is used to fix mypyc-generated C files on Windows, where
16
+ embedded Python source paths use backslashes that MSVC interprets
17
+ as C escape sequences (e.g. \\x in \\xliff causes error C2153).
18
+ """
19
+ replacements = {}
20
+ for src_file in src_files:
21
+ posix_form = PurePosixPath(src_file).as_posix()
22
+ windows_form = str(PureWindowsPath(src_file))
23
+ if windows_form != posix_form:
24
+ replacements[windows_form] = posix_form
25
+ return replacements
26
+
27
+
28
+ def _normalize_generated_c_file(path, replacements):
29
+ """Replace Windows backslash paths with POSIX paths in a generated C file.
30
+
31
+ Performs direct string replacement of known source file paths,
32
+ avoiding any C source parsing. This is robust against escaped
33
+ quotes and other C syntax that broke the previous quote-based parser.
34
+ """
35
+ if path.suffix != ".c" or not path.exists():
36
+ return
37
+
38
+ contents = path.read_text(encoding="utf-8")
39
+ normalized = contents
40
+ for windows_path, posix_path in replacements.items():
41
+ normalized = normalized.replace(windows_path, posix_path)
42
+ if normalized != contents:
43
+ path.write_text(normalized, encoding="utf-8")
44
+
45
+
46
+ def _normalize_all_generated_c_files(replacements):
47
+ """Normalize all generated C files in the build directory."""
48
+ build_dir = Path("build")
49
+ if build_dir.exists():
50
+ for path in build_dir.rglob("*.c"):
51
+ _normalize_generated_c_file(path, replacements)
52
+
53
+
54
+ def _normalize_ext_c_files(ext, replacements):
55
+ """Normalize C files listed as sources for an extension module."""
56
+ for source in ext.sources:
57
+ _normalize_generated_c_file(Path(source), replacements)
58
+
59
+
60
+ class BuildExt(build_ext):
61
+ def build_extensions(self):
62
+ self._normalize_before_compile()
63
+ _normalize_all_generated_c_files(_path_replacements)
64
+ super().build_extensions()
65
+
66
+ def build_extension(self, ext):
67
+ _normalize_ext_c_files(ext, _path_replacements)
68
+ super().build_extension(ext)
69
+
70
+ def _normalize_before_compile(self):
71
+ original_compile = self.compiler.compile
72
+
73
+ def compile_with_normalized_sources(sources, *args, **kwargs):
74
+ for source in sources:
75
+ _normalize_generated_c_file(Path(source), _path_replacements)
76
+ return original_compile(sources, *args, **kwargs)
77
+
78
+ self.compiler.compile = compile_with_normalized_sources
79
+
80
+
81
+ try:
82
+ from mypyc.build import mypycify
83
+
84
+ src_files = glob.glob("src/lokit/**/*.py", recursive=True)
85
+ src_files = [f.replace("\\", "/") for f in src_files if "importers.py" not in f]
86
+
87
+ _path_replacements = _build_path_replacements(src_files)
88
+
89
+ ext_modules = mypycify(
90
+ src_files,
91
+ opt_level="3",
92
+ debug_level="0",
93
+ )
94
+ _normalize_all_generated_c_files(_path_replacements)
95
+ except ImportError:
96
+ _path_replacements = {}
97
+ ext_modules = []
98
+
99
+ setup(
100
+ cmdclass={"build_ext": BuildExt},
101
+ ext_modules=ext_modules,
102
+ )
@@ -3,12 +3,14 @@ from lokit.data.structure import (
3
3
  BaseStructure,
4
4
  CodePart,
5
5
  Comment,
6
+ ConversionStats,
6
7
  Data,
7
8
  Meta,
8
9
  Origin,
9
10
  Plural,
10
11
  PluralCategory,
11
12
  SegmentPart,
13
+ StreamingStructure,
12
14
  Tags,
13
15
  TextPart,
14
16
  TranslationStatus,
@@ -37,6 +39,8 @@ from lokit.exporters import (
37
39
  from lokit.importers import (
38
40
  import_csv,
39
41
  import_csv_async,
42
+ import_file,
43
+ import_file_async,
40
44
  import_idml,
41
45
  import_idml_async,
42
46
  import_html,
@@ -47,6 +51,10 @@ from lokit.importers import (
47
51
  import_po_async,
48
52
  import_tmx,
49
53
  import_tmx_async,
54
+ stream_tmx,
55
+ convert_tmx_to_csv,
56
+ convert_tmx_to_tmx,
57
+ convert_tmx_to_xliff,
50
58
  import_xliff,
51
59
  import_xliff_async,
52
60
  import_xlsx,
@@ -68,6 +76,7 @@ __all__ = [
68
76
  "BaseStructure",
69
77
  "CodePart",
70
78
  "Comment",
79
+ "ConversionStats",
71
80
  "Data",
72
81
  "Meta",
73
82
  "Lokit",
@@ -76,6 +85,7 @@ __all__ = [
76
85
  "Plural",
77
86
  "PluralCategory",
78
87
  "SegmentPart",
88
+ "StreamingStructure",
79
89
  "Tags",
80
90
  "TextPart",
81
91
  "TieData",
@@ -109,6 +119,8 @@ __all__ = [
109
119
  "export_xlsx_async",
110
120
  "import_csv",
111
121
  "import_csv_async",
122
+ "import_file",
123
+ "import_file_async",
112
124
  "import_idml",
113
125
  "import_idml_async",
114
126
  "import_html",
@@ -119,6 +131,10 @@ __all__ = [
119
131
  "import_po_async",
120
132
  "import_tmx",
121
133
  "import_tmx_async",
134
+ "stream_tmx",
135
+ "convert_tmx_to_csv",
136
+ "convert_tmx_to_tmx",
137
+ "convert_tmx_to_xliff",
122
138
  "import_xliff",
123
139
  "import_xliff_async",
124
140
  "import_xlsx",
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass, field
2
2
  from enum import StrEnum
3
+ from collections.abc import Iterable
3
4
  from typing import Optional
4
5
 
5
6
  from lokit.data.tag_types import TieData
@@ -116,3 +117,25 @@ class BaseStructure:
116
117
  source_language: Optional[str] = None
117
118
  target_language: Optional[str] = None
118
119
  extensions: dict[str, str] = field(default_factory=dict)
120
+
121
+
122
+ @dataclass(slots=True)
123
+ class StreamingStructure:
124
+ source_locale: str
125
+ target_locale: Optional[str]
126
+ items: Iterable[tuple[str, Data]]
127
+ format_version: str = "0.1"
128
+ export_origin: str = ""
129
+ export_timestamp: str = ""
130
+ source_language: Optional[str] = None
131
+ target_language: Optional[str] = None
132
+ extensions: dict[str, str] = field(default_factory=dict)
133
+
134
+
135
+ @dataclass(slots=True)
136
+ class ConversionStats:
137
+ units_read: int
138
+ units_written: int
139
+ input_bytes: int
140
+ output_bytes: int
141
+ seconds: float
@@ -76,3 +76,4 @@ class TieData:
76
76
  order: int = 0
77
77
  pair_id: Optional[str] = None
78
78
  original_name: Optional[str] = None
79
+ original_text: Optional[str] = None
@@ -2,20 +2,24 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import csv
5
+ from collections.abc import Iterable
5
6
  from pathlib import Path
6
7
 
7
- from lokit.data.structure import BaseStructure, TranslationStatus
8
+ from lokit.data.structure import BaseStructure, Data, StreamingStructure, TranslationStatus
9
+ from lokit.io.atomic import atomic_output_path
8
10
 
9
11
 
10
- def export_csv(document: BaseStructure, filepath: str | Path) -> None:
12
+ Structure = BaseStructure | StreamingStructure
13
+
14
+
15
+ def export_csv(document: Structure, filepath: str | Path) -> None:
11
16
  path = Path(filepath)
12
- path.parent.mkdir(parents=True, exist_ok=True)
13
17
 
14
- with path.open("w", newline="", encoding="utf-8") as fh:
18
+ with atomic_output_path(path, "w") as fh:
15
19
  writer = csv.DictWriter(fh, fieldnames=["id", "source", "target", "status", "comment"])
16
20
  writer.writeheader()
17
21
 
18
- for unit_id, unit in document.data.items():
22
+ for unit_id, unit in _iter_items(document):
19
23
  comment = "; ".join(c.context for c in unit.comments if c.context)
20
24
  status = unit.status.value if unit.status != TranslationStatus.UNKNOWN else ""
21
25
 
@@ -30,3 +34,9 @@ def export_csv(document: BaseStructure, filepath: str | Path) -> None:
30
34
 
31
35
  async def export_csv_async(document: BaseStructure, filepath: str | Path) -> None:
32
36
  await asyncio.to_thread(export_csv, document, filepath)
37
+
38
+
39
+ def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
40
+ if isinstance(document, BaseStructure):
41
+ return document.data.items()
42
+ return document.items
@@ -1,18 +1,22 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import asyncio
4
+ from collections.abc import Iterable
4
5
  from pathlib import Path
5
6
  from typing import Any, cast
6
7
 
7
8
  from lxml import html as lxml_html
8
9
  from lxml.html import HtmlElement, tostring
9
10
 
10
- from lokit.data.structure import BaseStructure, CodePart, Data, TextPart
11
+ from lokit.data.structure import BaseStructure, CodePart, Data, StreamingStructure, TextPart
11
12
  from lokit.data.tag_types import TieData, TieType
13
+ from lokit.io.atomic import atomic_output_path
14
+
15
+ Structure = BaseStructure | StreamingStructure
12
16
 
13
17
 
14
18
  def export_html(
15
- document: BaseStructure,
19
+ document: Structure,
16
20
  filepath: str | Path,
17
21
  source_html: str | Path | None = None,
18
22
  ) -> None:
@@ -26,7 +30,7 @@ def export_html(
26
30
 
27
31
 
28
32
  async def export_html_async(
29
- document: BaseStructure,
33
+ document: Structure,
30
34
  filepath: str | Path,
31
35
  source_html: str | Path | None = None,
32
36
  ) -> None:
@@ -34,7 +38,7 @@ async def export_html_async(
34
38
 
35
39
 
36
40
  def _export_from_source(
37
- document: BaseStructure, output: Path, source: Path
41
+ document: Structure, output: Path, source: Path
38
42
  ) -> None:
39
43
  doc = lxml_html.parse(str(source))
40
44
  root = doc.getroot()
@@ -86,10 +90,11 @@ def _export_from_source(
86
90
  index += 1
87
91
 
88
92
  result = tostring(root, encoding="unicode", doctype="<!DOCTYPE html>")
89
- output.write_text(result, encoding="utf-8")
93
+ with atomic_output_path(output, "w") as f:
94
+ f.write(result)
90
95
 
91
96
 
92
- def _export_minimal(document: BaseStructure, output: Path) -> None:
97
+ def _export_minimal(document: Structure, output: Path) -> None:
93
98
  lang = document.target_locale or document.source_locale
94
99
  lines: list[str] = [
95
100
  "<!DOCTYPE html>",
@@ -98,7 +103,7 @@ def _export_minimal(document: BaseStructure, output: Path) -> None:
98
103
  '<meta charset="utf-8">',
99
104
  ]
100
105
 
101
- for unit_id, unit in document.data.items():
106
+ for unit_id, unit in _iter_items(document):
102
107
  if "meta." in unit_id:
103
108
  name = unit.extensions.get("meta_name", "")
104
109
  text = unit.target or unit.source
@@ -107,7 +112,7 @@ def _export_minimal(document: BaseStructure, output: Path) -> None:
107
112
  lines.append("</head>")
108
113
  lines.append("<body>")
109
114
 
110
- for unit_id, unit in document.data.items():
115
+ for unit_id, unit in _iter_items(document):
111
116
  if "meta." in unit_id or "img.alt" in unit_id:
112
117
  continue
113
118
  text = unit.target or unit.source
@@ -120,7 +125,8 @@ def _export_minimal(document: BaseStructure, output: Path) -> None:
120
125
 
121
126
  lines.append("</body>")
122
127
  lines.append("</html>")
123
- output.write_text("\n".join(lines), encoding="utf-8")
128
+ with atomic_output_path(output, "w") as f:
129
+ f.write("\n".join(lines))
124
130
 
125
131
 
126
132
  def _replace_element_text(element: HtmlElement, unit: Data) -> None:
@@ -197,8 +203,14 @@ def _format_attrs(attributes: dict[str, str]) -> str:
197
203
  return "".join(parts)
198
204
 
199
205
 
200
- def _build_unit_lookup(document: BaseStructure) -> dict[str, Data]:
201
- return dict(document.data)
206
+ def _build_unit_lookup(document: Structure) -> dict[str, Data]:
207
+ return dict(_iter_items(document))
208
+
209
+
210
+ def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
211
+ if isinstance(document, BaseStructure):
212
+ return document.data.items()
213
+ return document.items
202
214
 
203
215
 
204
216
  def _extract_tag_from_id(unit_id: str) -> str:
@@ -1,7 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import asyncio
4
+ import contextlib
5
+ import os
4
6
  import shutil
7
+ import tempfile
5
8
  import zipfile
6
9
  from pathlib import Path
7
10
 
@@ -19,30 +22,46 @@ def export_idml(
19
22
  output_path = Path(filepath)
20
23
  source_path = Path(source_idml)
21
24
  output_path.parent.mkdir(parents=True, exist_ok=True)
25
+ tmp = tempfile.NamedTemporaryFile(
26
+ dir=output_path.parent,
27
+ prefix=f".{output_path.name}.",
28
+ suffix=".tmp",
29
+ delete=False,
30
+ )
31
+ tmp_path = Path(tmp.name)
32
+ tmp.close()
22
33
 
23
34
  story_units = _group_by_story(document)
24
- shutil.copy2(str(source_path), str(output_path))
25
-
26
- with zipfile.ZipFile(str(output_path), "a") as zf_out:
27
- with zipfile.ZipFile(str(source_path), "r") as zf_in:
28
- story_files = [
29
- name for name in zf_in.namelist()
30
- if name.startswith("Stories/Story_") and name.endswith(".xml")
31
- ]
32
- for story_file in story_files:
33
- units = story_units.get(story_file)
34
- if not units:
35
- continue
36
-
37
- with zf_in.open(story_file) as stream:
38
- tree = etree.parse(stream)
39
- root = tree.getroot()
40
- _apply_translations(root, units)
41
- modified_xml = etree.tostring(
42
- root, xml_declaration=True, encoding="UTF-8"
43
- )
44
-
45
- _replace_in_zip(zf_out, story_file, modified_xml)
35
+ shutil.copy2(str(source_path), str(tmp_path))
36
+
37
+ try:
38
+ with zipfile.ZipFile(str(tmp_path), "a") as zf_out:
39
+ with zipfile.ZipFile(str(source_path), "r") as zf_in:
40
+ story_files = [
41
+ name for name in zf_in.namelist()
42
+ if name.startswith("Stories/Story_") and name.endswith(".xml")
43
+ ]
44
+ for story_file in story_files:
45
+ units = story_units.get(story_file)
46
+ if not units:
47
+ continue
48
+
49
+ with zf_in.open(story_file) as stream:
50
+ tree = etree.parse(stream)
51
+ root = tree.getroot()
52
+ _apply_translations(root, units)
53
+ modified_xml = etree.tostring(
54
+ root, xml_declaration=True, encoding="UTF-8"
55
+ )
56
+
57
+ _replace_in_zip(zf_out, story_file, modified_xml)
58
+ with tmp_path.open("rb") as f:
59
+ os.fsync(f.fileno())
60
+ os.replace(tmp_path, output_path)
61
+ except BaseException:
62
+ with contextlib.suppress(FileNotFoundError):
63
+ tmp_path.unlink()
64
+ raise
46
65
 
47
66
 
48
67
  async def export_idml_async(
@@ -2,14 +2,18 @@ from __future__ import annotations
2
2
 
3
3
  import asyncio
4
4
  import json
5
+ from collections.abc import Iterable
5
6
  from pathlib import Path
6
7
  from typing import Any
7
8
 
8
- from lokit.data.structure import BaseStructure
9
+ from lokit.data.structure import BaseStructure, Data, StreamingStructure
10
+ from lokit.io.atomic import atomic_output_path
11
+
12
+ Structure = BaseStructure | StreamingStructure
9
13
 
10
14
 
11
15
  def export_json_i18n(
12
- document: BaseStructure,
16
+ document: Structure,
13
17
  filepath: str | Path,
14
18
  nested: bool = True,
15
19
  ) -> None:
@@ -17,14 +21,14 @@ def export_json_i18n(
17
21
  path.parent.mkdir(parents=True, exist_ok=True)
18
22
 
19
23
  output: dict[str, Any] = {}
20
- for key, unit in document.data.items():
24
+ for key, unit in _iter_items(document):
21
25
  value = unit.target if unit.target is not None else unit.source
22
26
  if nested:
23
27
  _set_nested(output, key, value)
24
28
  else:
25
29
  output[key] = value
26
30
 
27
- with path.open("w", encoding="utf-8") as f:
31
+ with atomic_output_path(path, "w") as f:
28
32
  json.dump(output, f, ensure_ascii=False, indent=2)
29
33
  f.write("\n")
30
34
 
@@ -45,3 +49,9 @@ def _set_nested(obj: dict[str, Any], dot_key: str, value: str) -> None:
45
49
  current[part] = {}
46
50
  current = current[part]
47
51
  current[parts[-1]] = value
52
+
53
+
54
+ def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
55
+ if isinstance(document, BaseStructure):
56
+ return document.data.items()
57
+ return document.items
@@ -1,18 +1,25 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import asyncio
4
+ import contextlib
5
+ import os
6
+ import tempfile
4
7
  from collections import defaultdict
8
+ from collections.abc import Iterable
5
9
  from pathlib import Path
6
10
  from typing import Any
7
11
 
8
12
  import polib
9
13
 
10
- from lokit.data.structure import BaseStructure, Data, TranslationStatus
14
+ from lokit.data.structure import BaseStructure, Data, StreamingStructure, TranslationStatus
11
15
 
12
16
  _PLURAL_SUFFIX_PATTERN = "["
13
17
 
14
18
 
15
- def export_po(document: BaseStructure, filepath: str | Path) -> None:
19
+ Structure = BaseStructure | StreamingStructure
20
+
21
+
22
+ def export_po(document: Structure, filepath: str | Path) -> None:
16
23
  path = Path(filepath)
17
24
  path.parent.mkdir(parents=True, exist_ok=True)
18
25
 
@@ -22,7 +29,7 @@ def export_po(document: BaseStructure, filepath: str | Path) -> None:
22
29
  plural_groups: dict[str, list[tuple[str, Data]]] = defaultdict(list)
23
30
  singular_units: list[tuple[str, Data]] = []
24
31
 
25
- for unit_id, unit in document.data.items():
32
+ for unit_id, unit in _iter_items(document):
26
33
  if _PLURAL_SUFFIX_PATTERN in unit_id and unit.plural is not None:
27
34
  base_id = unit_id[: unit_id.index(_PLURAL_SUFFIX_PATTERN)]
28
35
  plural_groups[base_id].append((unit_id, unit))
@@ -37,14 +44,30 @@ def export_po(document: BaseStructure, filepath: str | Path) -> None:
37
44
  for base_id, forms in plural_groups.items():
38
45
  po.append(_build_plural_entry(base_id, forms))
39
46
 
40
- po.save(str(path))
47
+ tmp = tempfile.NamedTemporaryFile(
48
+ dir=path.parent,
49
+ prefix=f".{path.name}.",
50
+ suffix=".tmp",
51
+ delete=False,
52
+ )
53
+ tmp_path = Path(tmp.name)
54
+ tmp.close()
55
+ try:
56
+ po.save(str(tmp_path))
57
+ with tmp_path.open("rb") as f:
58
+ os.fsync(f.fileno())
59
+ os.replace(tmp_path, path)
60
+ except BaseException:
61
+ with contextlib.suppress(FileNotFoundError):
62
+ tmp_path.unlink()
63
+ raise
41
64
 
42
65
 
43
66
  async def export_po_async(document: BaseStructure, filepath: str | Path) -> None:
44
67
  await asyncio.to_thread(export_po, document, filepath)
45
68
 
46
69
 
47
- def _build_metadata(document: BaseStructure) -> dict[str, str]:
70
+ def _build_metadata(document: Structure) -> dict[str, str]:
48
71
  meta: dict[str, str] = {
49
72
  "Content-Type": "text/plain; charset=UTF-8",
50
73
  "Content-Transfer-Encoding": "8bit",
@@ -160,3 +183,9 @@ def _apply_occurrences(entry: Any, unit: Data) -> None:
160
183
  else:
161
184
  occurrences.append((ref, ""))
162
185
  entry.occurrences = occurrences
186
+
187
+
188
+ def _iter_items(document: Structure) -> Iterable[tuple[str, Data]]:
189
+ if isinstance(document, BaseStructure):
190
+ return document.data.items()
191
+ return document.items