lokit-python 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lokit_python-0.1.1 → lokit_python-0.1.3}/PKG-INFO +1 -1
- {lokit_python-0.1.1 → lokit_python-0.1.3}/pyproject.toml +1 -1
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/__init__.py +14 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/tmx.py +131 -48
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/xliff.py +36 -20
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/format_detection.py +6 -3
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/importers.py +175 -12
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/io/__init__.py +4 -1
- lokit_python-0.1.3/src/lokit/io/atomic.py +90 -0
- lokit_python-0.1.3/src/lokit/io/stream_json.py +158 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/logic.py +21 -1
- lokit_python-0.1.3/src/lokit/parsers/async_bridge.py +107 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/tmx/base.py +79 -34
- lokit_python-0.1.3/src/lokit/parsers/tmx/extraction.py +162 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/tmx/models.py +7 -0
- lokit_python-0.1.3/src/lokit/parsers/tmx/parallel.py +154 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/tmx/props.py +34 -12
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/tmx/tags.py +16 -6
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/tmx/xml_utils.py +13 -1
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/xliff/tags.py +20 -14
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit_python.egg-info/PKG-INFO +1 -1
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit_python.egg-info/SOURCES.txt +2 -0
- lokit_python-0.1.3/src/lokit_python.egg-info/top_level.txt +2 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/tests/test_performance_safety.py +90 -5
- lokit_python-0.1.1/src/lokit/io/atomic.py +0 -39
- lokit_python-0.1.1/src/lokit/parsers/async_bridge.py +0 -81
- lokit_python-0.1.1/src/lokit/parsers/tmx/extraction.py +0 -107
- lokit_python-0.1.1/src/lokit_python.egg-info/top_level.txt +0 -2
- {lokit_python-0.1.1 → lokit_python-0.1.3}/README.md +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/setup.cfg +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/setup.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/core/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/core/logger.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/data/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/data/lang_codes.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/data/structure.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/data/tag_types.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/csv.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/html.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/idml.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/json_i18n.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/po.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/exporters/xlsx.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/io/json.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/csv/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/csv/extraction.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/html/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/html/extraction.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/idml/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/idml/extraction.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/json_i18n/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/json_i18n/extraction.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/po/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/po/extraction.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/tmx/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/tmx/header.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/tmx/helpers.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/xliff/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/xliff/extraction.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/xlsx/__init__.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/parsers/xlsx/extraction.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit/py.typed +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit_python.egg-info/dependency_links.txt +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/src/lokit_python.egg-info/requires.txt +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/tests/test_csv.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/tests/test_html.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/tests/test_idml.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/tests/test_json_i18n.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/tests/test_po.py +0 -0
- {lokit_python-0.1.1 → lokit_python-0.1.3}/tests/test_xlsx.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lokit-python
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files.
|
|
5
5
|
Requires-Python: >=3.12
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "lokit-python"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.3"
|
|
4
4
|
description = "A type-safe localization toolkit for parsing, converting, and matching TMX, XLIFF, PO, JSON, HTML, CSV, XLSX, and IDML files."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -51,7 +51,11 @@ from lokit.importers import (
|
|
|
51
51
|
import_po_async,
|
|
52
52
|
import_tmx,
|
|
53
53
|
import_tmx_async,
|
|
54
|
+
import_tmx_batches_async,
|
|
55
|
+
import_tmx_parallel,
|
|
56
|
+
process_tmx_async,
|
|
54
57
|
stream_tmx,
|
|
58
|
+
stream_tmx_parallel,
|
|
55
59
|
convert_tmx_to_csv,
|
|
56
60
|
convert_tmx_to_tmx,
|
|
57
61
|
convert_tmx_to_xliff,
|
|
@@ -61,6 +65,7 @@ from lokit.importers import (
|
|
|
61
65
|
import_xlsx_async,
|
|
62
66
|
)
|
|
63
67
|
from lokit.io import load_lokit_json, load_lokit_json_bytes
|
|
68
|
+
from lokit.io.stream_json import LokitJsonContext
|
|
64
69
|
from lokit.logic import Lokit, MatchResult
|
|
65
70
|
from lokit.parsers.csv.extraction import CsvExtractor
|
|
66
71
|
from lokit.parsers.xlsx.extraction import XlsxExtractor
|
|
@@ -69,6 +74,8 @@ from lokit.parsers.po.extraction import PoExtractor
|
|
|
69
74
|
from lokit.parsers.json_i18n.extraction import JsonI18nExtractor
|
|
70
75
|
from lokit.parsers.idml.extraction import IdmlExtractor
|
|
71
76
|
from lokit.parsers.tmx.extraction import TmxExtractor
|
|
77
|
+
from lokit.parsers.tmx.models import TmxParseMode
|
|
78
|
+
from lokit.parsers.tmx.parallel import TmxParallelOptions
|
|
72
79
|
from lokit.parsers.xliff.extraction import XliffExtractor
|
|
73
80
|
|
|
74
81
|
__all__ = [
|
|
@@ -80,6 +87,7 @@ __all__ = [
|
|
|
80
87
|
"Data",
|
|
81
88
|
"Meta",
|
|
82
89
|
"Lokit",
|
|
90
|
+
"LokitJsonContext",
|
|
83
91
|
"MatchResult",
|
|
84
92
|
"Origin",
|
|
85
93
|
"Plural",
|
|
@@ -91,6 +99,8 @@ __all__ = [
|
|
|
91
99
|
"TieData",
|
|
92
100
|
"TieType",
|
|
93
101
|
"TmxExtractor",
|
|
102
|
+
"TmxParseMode",
|
|
103
|
+
"TmxParallelOptions",
|
|
94
104
|
"TranslationStatus",
|
|
95
105
|
"XliffExtractor",
|
|
96
106
|
"CsvExtractor",
|
|
@@ -131,7 +141,11 @@ __all__ = [
|
|
|
131
141
|
"import_po_async",
|
|
132
142
|
"import_tmx",
|
|
133
143
|
"import_tmx_async",
|
|
144
|
+
"import_tmx_batches_async",
|
|
145
|
+
"import_tmx_parallel",
|
|
146
|
+
"process_tmx_async",
|
|
134
147
|
"stream_tmx",
|
|
148
|
+
"stream_tmx_parallel",
|
|
135
149
|
"convert_tmx_to_csv",
|
|
136
150
|
"convert_tmx_to_tmx",
|
|
137
151
|
"convert_tmx_to_xliff",
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from collections.abc import Iterable
|
|
4
|
+
from dataclasses import dataclass
|
|
4
5
|
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
5
7
|
|
|
6
8
|
from lxml import etree
|
|
7
9
|
from lxml.etree import _Element
|
|
@@ -25,6 +27,13 @@ from lokit.io.atomic import atomic_output_path
|
|
|
25
27
|
Structure = BaseStructure | StreamingStructure
|
|
26
28
|
|
|
27
29
|
|
|
30
|
+
@dataclass(slots=True)
|
|
31
|
+
class _CommentSummary:
|
|
32
|
+
creator_id: str | None = None
|
|
33
|
+
project: str | None = None
|
|
34
|
+
system: str | None = None
|
|
35
|
+
|
|
36
|
+
|
|
28
37
|
def export_tmx(document: Structure, filepath: str | Path) -> None:
|
|
29
38
|
path = Path(filepath)
|
|
30
39
|
with atomic_output_path(path, "wb") as stream:
|
|
@@ -75,9 +84,9 @@ def _build_tu(unit_id: str, unit: Data, document: BaseStructure) -> _Element:
|
|
|
75
84
|
attrs["creationdate"] = unit.meta.created
|
|
76
85
|
if unit.meta.updated:
|
|
77
86
|
attrs["changedate"] = unit.meta.updated
|
|
78
|
-
|
|
79
|
-
if creator_id:
|
|
80
|
-
attrs["creationid"] = creator_id
|
|
87
|
+
comment_summary = _comment_summary(unit)
|
|
88
|
+
if comment_summary.creator_id:
|
|
89
|
+
attrs["creationid"] = comment_summary.creator_id
|
|
81
90
|
change_id = unit.meta.extensions.get("change_id")
|
|
82
91
|
if change_id:
|
|
83
92
|
attrs["changeid"] = change_id
|
|
@@ -85,7 +94,7 @@ def _build_tu(unit_id: str, unit: Data, document: BaseStructure) -> _Element:
|
|
|
85
94
|
attrs["usagecount"] = str(unit.meta.usage_count)
|
|
86
95
|
|
|
87
96
|
tu = etree.Element("tu", attrs)
|
|
88
|
-
_append_unit_properties(tu, unit)
|
|
97
|
+
_append_unit_properties(tu, unit, comment_summary)
|
|
89
98
|
_append_comments(tu, unit)
|
|
90
99
|
tu.append(
|
|
91
100
|
_build_tuv(
|
|
@@ -108,7 +117,7 @@ def _build_tu(unit_id: str, unit: Data, document: BaseStructure) -> _Element:
|
|
|
108
117
|
|
|
109
118
|
|
|
110
119
|
def _write_tu(
|
|
111
|
-
xf:
|
|
120
|
+
xf: Any,
|
|
112
121
|
unit_id: str,
|
|
113
122
|
unit: Data,
|
|
114
123
|
document: Structure,
|
|
@@ -118,9 +127,9 @@ def _write_tu(
|
|
|
118
127
|
attrs["creationdate"] = unit.meta.created
|
|
119
128
|
if unit.meta.updated:
|
|
120
129
|
attrs["changedate"] = unit.meta.updated
|
|
121
|
-
|
|
122
|
-
if creator_id:
|
|
123
|
-
attrs["creationid"] = creator_id
|
|
130
|
+
comment_summary = _comment_summary(unit)
|
|
131
|
+
if comment_summary.creator_id:
|
|
132
|
+
attrs["creationid"] = comment_summary.creator_id
|
|
124
133
|
change_id = unit.meta.extensions.get("change_id")
|
|
125
134
|
if change_id:
|
|
126
135
|
attrs["changeid"] = change_id
|
|
@@ -128,31 +137,30 @@ def _write_tu(
|
|
|
128
137
|
attrs["usagecount"] = str(unit.meta.usage_count)
|
|
129
138
|
|
|
130
139
|
with xf.element("tu", attrs):
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
unit.source,
|
|
140
|
-
unit.tags.source_parts if unit.tags else [],
|
|
141
|
-
unit.tags.source_tag_map if unit.tags else {},
|
|
142
|
-
)
|
|
140
|
+
_write_unit_properties(xf, unit, comment_summary)
|
|
141
|
+
_write_comments(xf, unit)
|
|
142
|
+
_write_tuv(
|
|
143
|
+
xf,
|
|
144
|
+
document.source_locale,
|
|
145
|
+
unit.source,
|
|
146
|
+
unit.tags.source_parts if unit.tags else [],
|
|
147
|
+
unit.tags.source_tag_map if unit.tags else {},
|
|
143
148
|
)
|
|
144
149
|
if document.target_locale is not None and unit.target is not None:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
)
|
|
150
|
+
_write_tuv(
|
|
151
|
+
xf,
|
|
152
|
+
document.target_locale,
|
|
153
|
+
unit.target,
|
|
154
|
+
unit.tags.target_parts if unit.tags else [],
|
|
155
|
+
unit.tags.target_tag_map if unit.tags else {},
|
|
152
156
|
)
|
|
153
157
|
|
|
154
158
|
|
|
155
|
-
def _append_unit_properties(
|
|
159
|
+
def _append_unit_properties(
|
|
160
|
+
tu: _Element,
|
|
161
|
+
unit: Data,
|
|
162
|
+
comment_summary: _CommentSummary | None = None,
|
|
163
|
+
) -> None:
|
|
156
164
|
if unit.status != TranslationStatus.UNKNOWN:
|
|
157
165
|
prop = etree.SubElement(tu, "prop", type="x-status")
|
|
158
166
|
prop.text = unit.status.value
|
|
@@ -167,19 +175,47 @@ def _append_unit_properties(tu: _Element, unit: Data) -> None:
|
|
|
167
175
|
_append_prop_if_present(tu, "x-next-source-text", unit.next_context.source)
|
|
168
176
|
_append_prop_if_present(tu, "x-next-target-text", unit.next_context.target)
|
|
169
177
|
|
|
170
|
-
|
|
171
|
-
if project:
|
|
172
|
-
_append_prop_if_present(tu, "x-project", project)
|
|
178
|
+
summary = comment_summary or _comment_summary(unit)
|
|
179
|
+
if summary.project:
|
|
180
|
+
_append_prop_if_present(tu, "x-project", summary.project)
|
|
173
181
|
|
|
174
|
-
system
|
|
175
|
-
|
|
176
|
-
_append_prop_if_present(tu, "x-system", system)
|
|
182
|
+
if summary.system:
|
|
183
|
+
_append_prop_if_present(tu, "x-system", summary.system)
|
|
177
184
|
|
|
178
185
|
for key, value in unit.extensions.items():
|
|
179
186
|
if key.startswith("property."):
|
|
180
187
|
_append_prop_if_present(tu, _property_type(key), value)
|
|
181
188
|
|
|
182
189
|
|
|
190
|
+
def _write_unit_properties(
|
|
191
|
+
xf: Any,
|
|
192
|
+
unit: Data,
|
|
193
|
+
comment_summary: _CommentSummary,
|
|
194
|
+
) -> None:
|
|
195
|
+
if unit.status != TranslationStatus.UNKNOWN:
|
|
196
|
+
_write_prop(xf, "x-status", unit.status.value)
|
|
197
|
+
|
|
198
|
+
if unit.previous_context is not None:
|
|
199
|
+
_write_prop_if_present(xf, "x-previous-id", unit.previous_context.unit_id)
|
|
200
|
+
_write_prop_if_present(xf, "x-previous-source-text", unit.previous_context.source)
|
|
201
|
+
_write_prop_if_present(xf, "x-previous-target-text", unit.previous_context.target)
|
|
202
|
+
|
|
203
|
+
if unit.next_context is not None:
|
|
204
|
+
_write_prop_if_present(xf, "x-next-id", unit.next_context.unit_id)
|
|
205
|
+
_write_prop_if_present(xf, "x-next-source-text", unit.next_context.source)
|
|
206
|
+
_write_prop_if_present(xf, "x-next-target-text", unit.next_context.target)
|
|
207
|
+
|
|
208
|
+
if comment_summary.project:
|
|
209
|
+
_write_prop(xf, "x-project", comment_summary.project)
|
|
210
|
+
|
|
211
|
+
if comment_summary.system:
|
|
212
|
+
_write_prop(xf, "x-system", comment_summary.system)
|
|
213
|
+
|
|
214
|
+
for key, value in unit.extensions.items():
|
|
215
|
+
if key.startswith("property."):
|
|
216
|
+
_write_prop_if_present(xf, _property_type(key), value)
|
|
217
|
+
|
|
218
|
+
|
|
183
219
|
def _append_comments(tu: _Element, unit: Data) -> None:
|
|
184
220
|
for comment in unit.comments:
|
|
185
221
|
if not comment.context:
|
|
@@ -188,6 +224,13 @@ def _append_comments(tu: _Element, unit: Data) -> None:
|
|
|
188
224
|
note.text = comment.context
|
|
189
225
|
|
|
190
226
|
|
|
227
|
+
def _write_comments(xf: Any, unit: Data) -> None:
|
|
228
|
+
for comment in unit.comments:
|
|
229
|
+
if comment.context:
|
|
230
|
+
with xf.element("note"):
|
|
231
|
+
xf.write(comment.context)
|
|
232
|
+
|
|
233
|
+
|
|
191
234
|
def _build_tuv(
|
|
192
235
|
locale: str,
|
|
193
236
|
text: str,
|
|
@@ -199,6 +242,17 @@ def _build_tuv(
|
|
|
199
242
|
return tuv
|
|
200
243
|
|
|
201
244
|
|
|
245
|
+
def _write_tuv(
|
|
246
|
+
xf: Any,
|
|
247
|
+
locale: str,
|
|
248
|
+
text: str,
|
|
249
|
+
parts: list[SegmentPart],
|
|
250
|
+
tag_map: dict[str, TieData],
|
|
251
|
+
) -> None:
|
|
252
|
+
with xf.element("tuv", lang=locale):
|
|
253
|
+
_write_seg(xf, text, parts, tag_map)
|
|
254
|
+
|
|
255
|
+
|
|
202
256
|
def _build_seg(
|
|
203
257
|
text: str,
|
|
204
258
|
parts: list[SegmentPart],
|
|
@@ -224,6 +278,25 @@ def _build_seg(
|
|
|
224
278
|
return seg
|
|
225
279
|
|
|
226
280
|
|
|
281
|
+
def _write_seg(
|
|
282
|
+
xf: Any,
|
|
283
|
+
text: str,
|
|
284
|
+
parts: list[SegmentPart],
|
|
285
|
+
tag_map: dict[str, TieData],
|
|
286
|
+
) -> None:
|
|
287
|
+
effective_parts = parts if parts else [TextPart(text)]
|
|
288
|
+
pair_numbers = _pair_numbers(tag_map)
|
|
289
|
+
with xf.element("seg"):
|
|
290
|
+
for part in effective_parts:
|
|
291
|
+
if isinstance(part, TextPart):
|
|
292
|
+
xf.write(part.value)
|
|
293
|
+
elif isinstance(part, CodePart):
|
|
294
|
+
code = tag_map.get(part.ref)
|
|
295
|
+
if code is None:
|
|
296
|
+
continue
|
|
297
|
+
xf.write(_build_code_element(code, pair_numbers))
|
|
298
|
+
|
|
299
|
+
|
|
227
300
|
def _build_code_element(code: TieData, pair_numbers: dict[str, str]) -> _Element:
|
|
228
301
|
if code.original_name in {"bpt", "ept", "ph", "it", "ut", "hi"}:
|
|
229
302
|
attrs = dict(code.attributes)
|
|
@@ -284,25 +357,35 @@ def _append_prop_if_present(tu: _Element, prop_type: str, value: str | None) ->
|
|
|
284
357
|
prop.text = value
|
|
285
358
|
|
|
286
359
|
|
|
287
|
-
def
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
return comment.origin.creator_id
|
|
291
|
-
return None
|
|
360
|
+
def _write_prop_if_present(xf: Any, prop_type: str, value: str | None) -> None:
|
|
361
|
+
if value is not None and value != "":
|
|
362
|
+
_write_prop(xf, prop_type, value)
|
|
292
363
|
|
|
293
364
|
|
|
294
|
-
def
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
return comment.origin.project
|
|
298
|
-
return None
|
|
365
|
+
def _write_prop(xf: Any, prop_type: str, value: str) -> None:
|
|
366
|
+
with xf.element("prop", type=prop_type):
|
|
367
|
+
xf.write(value)
|
|
299
368
|
|
|
300
369
|
|
|
301
|
-
def
|
|
370
|
+
def _comment_summary(unit: Data) -> _CommentSummary:
|
|
371
|
+
summary = _CommentSummary()
|
|
302
372
|
for comment in unit.comments:
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
373
|
+
origin = comment.origin
|
|
374
|
+
if origin is None:
|
|
375
|
+
continue
|
|
376
|
+
if summary.creator_id is None and origin.creator_id:
|
|
377
|
+
summary.creator_id = origin.creator_id
|
|
378
|
+
if summary.project is None and origin.project:
|
|
379
|
+
summary.project = origin.project
|
|
380
|
+
if summary.system is None and origin.system:
|
|
381
|
+
summary.system = origin.system
|
|
382
|
+
if (
|
|
383
|
+
summary.creator_id is not None
|
|
384
|
+
and summary.project is not None
|
|
385
|
+
and summary.system is not None
|
|
386
|
+
):
|
|
387
|
+
break
|
|
388
|
+
return summary
|
|
306
389
|
|
|
307
390
|
|
|
308
391
|
def _property_type(key: str) -> str:
|
|
@@ -90,38 +90,54 @@ def _write_file(
|
|
|
90
90
|
with xf.element(f"{{{XLIFF_NS}}}file", attrs):
|
|
91
91
|
xf.write(etree.Element(f"{{{XLIFF_NS}}}header"))
|
|
92
92
|
with xf.element(f"{{{XLIFF_NS}}}body"):
|
|
93
|
-
xf
|
|
93
|
+
_write_trans_unit(xf, first_id, first_unit)
|
|
94
94
|
for unit_id, unit in unit_iter:
|
|
95
|
-
xf
|
|
95
|
+
_write_trans_unit(xf, unit_id, unit)
|
|
96
96
|
|
|
97
97
|
|
|
98
|
-
def
|
|
98
|
+
def _write_trans_unit(xf: Any, unit_id: str, unit: Data) -> None:
|
|
99
99
|
attrs = {"id": unit.extensions.get("unit_id", unit_id)}
|
|
100
100
|
space = unit.extensions.get("space")
|
|
101
101
|
if space:
|
|
102
102
|
attrs["{http://www.w3.org/XML/1998/namespace}space"] = space
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
103
|
+
with xf.element(f"{{{XLIFF_NS}}}trans-unit", attrs):
|
|
104
|
+
_write_segment(
|
|
105
|
+
xf,
|
|
106
106
|
"source",
|
|
107
107
|
unit.source,
|
|
108
108
|
unit.tags.source_parts if unit.tags else [],
|
|
109
109
|
unit.tags.source_tag_map if unit.tags else {},
|
|
110
110
|
)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
111
|
+
if unit.target is not None:
|
|
112
|
+
_write_segment(
|
|
113
|
+
xf,
|
|
114
|
+
"target",
|
|
115
|
+
unit.target,
|
|
116
|
+
unit.tags.target_parts if unit.tags else [],
|
|
117
|
+
unit.tags.target_tag_map if unit.tags else {},
|
|
118
|
+
)
|
|
119
|
+
for comment in unit.comments:
|
|
120
|
+
if comment.context:
|
|
121
|
+
with xf.element(f"{{{XLIFF_NS}}}note"):
|
|
122
|
+
xf.write(comment.context)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _write_segment(
|
|
126
|
+
xf: Any,
|
|
127
|
+
name: str,
|
|
128
|
+
text: str,
|
|
129
|
+
parts: list[SegmentPart],
|
|
130
|
+
tag_map: dict[str, TieData],
|
|
131
|
+
) -> None:
|
|
132
|
+
with xf.element(f"{{{XLIFF_NS}}}{name}"):
|
|
133
|
+
effective_parts = parts if parts else [TextPart(text)]
|
|
134
|
+
for part in effective_parts:
|
|
135
|
+
if isinstance(part, TextPart):
|
|
136
|
+
xf.write(part.value)
|
|
137
|
+
elif isinstance(part, CodePart):
|
|
138
|
+
code = tag_map.get(part.ref)
|
|
139
|
+
if code is not None:
|
|
140
|
+
xf.write(_build_code(code))
|
|
125
141
|
|
|
126
142
|
|
|
127
143
|
def _build_segment(
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
+
import re
|
|
4
5
|
import zipfile
|
|
5
6
|
from enum import StrEnum
|
|
6
7
|
from io import BytesIO
|
|
@@ -8,6 +9,8 @@ from pathlib import Path
|
|
|
8
9
|
|
|
9
10
|
from lokit.parsers.tmx.xml_utils import iterparse_safe, local_name
|
|
10
11
|
|
|
12
|
+
_JSON_FORMAT_RE = re.compile(r'"(?:format_version|data)"\s*:')
|
|
13
|
+
|
|
11
14
|
|
|
12
15
|
class LokitInputFormat(StrEnum):
|
|
13
16
|
TMX = "tmx"
|
|
@@ -36,9 +39,9 @@ def detect_format(filepath: str | Path) -> LokitInputFormat:
|
|
|
36
39
|
return LokitInputFormat.IDML
|
|
37
40
|
if suffix == ".json":
|
|
38
41
|
try:
|
|
39
|
-
with path.open("
|
|
40
|
-
data =
|
|
41
|
-
if
|
|
42
|
+
with path.open("rb") as f:
|
|
43
|
+
data = f.read(4096)
|
|
44
|
+
if _JSON_FORMAT_RE.search(data.decode("utf-8", errors="ignore")):
|
|
42
45
|
return LokitInputFormat.LOKIT_JSON
|
|
43
46
|
except Exception:
|
|
44
47
|
pass
|