lokit-python 0.1.0__cp313-cp313-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. 821d8b73c2a02cb7980f__mypyc.cp313-win32.pyd +0 -0
  2. lokit/__init__.cp313-win32.pyd +0 -0
  3. lokit/__init__.py +128 -0
  4. lokit/core/__init__.cp313-win32.pyd +0 -0
  5. lokit/core/__init__.py +0 -0
  6. lokit/core/logger.cp313-win32.pyd +0 -0
  7. lokit/core/logger.py +20 -0
  8. lokit/data/__init__.cp313-win32.pyd +0 -0
  9. lokit/data/__init__.py +0 -0
  10. lokit/data/lang_codes.cp313-win32.pyd +0 -0
  11. lokit/data/lang_codes.py +455 -0
  12. lokit/data/structure.cp313-win32.pyd +0 -0
  13. lokit/data/structure.py +118 -0
  14. lokit/data/tag_types.cp313-win32.pyd +0 -0
  15. lokit/data/tag_types.py +78 -0
  16. lokit/exporters/__init__.cp313-win32.pyd +0 -0
  17. lokit/exporters/__init__.py +34 -0
  18. lokit/exporters/csv.cp313-win32.pyd +0 -0
  19. lokit/exporters/csv.py +32 -0
  20. lokit/exporters/html.cp313-win32.pyd +0 -0
  21. lokit/exporters/html.py +217 -0
  22. lokit/exporters/idml.cp313-win32.pyd +0 -0
  23. lokit/exporters/idml.py +178 -0
  24. lokit/exporters/json_i18n.cp313-win32.pyd +0 -0
  25. lokit/exporters/json_i18n.py +47 -0
  26. lokit/exporters/po.cp313-win32.pyd +0 -0
  27. lokit/exporters/po.py +162 -0
  28. lokit/exporters/tmx.cp313-win32.pyd +0 -0
  29. lokit/exporters/tmx.py +247 -0
  30. lokit/exporters/xliff.cp313-win32.pyd +0 -0
  31. lokit/exporters/xliff.py +152 -0
  32. lokit/exporters/xlsx.cp313-win32.pyd +0 -0
  33. lokit/exporters/xlsx.py +39 -0
  34. lokit/format_detection.cp313-win32.pyd +0 -0
  35. lokit/format_detection.py +115 -0
  36. lokit/importers.py +321 -0
  37. lokit/io/__init__.cp313-win32.pyd +0 -0
  38. lokit/io/__init__.py +3 -0
  39. lokit/io/json.cp313-win32.pyd +0 -0
  40. lokit/io/json.py +194 -0
  41. lokit/logic.cp313-win32.pyd +0 -0
  42. lokit/logic.py +324 -0
  43. lokit/parsers/__init__.cp313-win32.pyd +0 -0
  44. lokit/parsers/__init__.py +1 -0
  45. lokit/parsers/csv/__init__.cp313-win32.pyd +0 -0
  46. lokit/parsers/csv/__init__.py +1 -0
  47. lokit/parsers/csv/extraction.cp313-win32.pyd +0 -0
  48. lokit/parsers/csv/extraction.py +164 -0
  49. lokit/parsers/html/__init__.cp313-win32.pyd +0 -0
  50. lokit/parsers/html/__init__.py +3 -0
  51. lokit/parsers/html/extraction.cp313-win32.pyd +0 -0
  52. lokit/parsers/html/extraction.py +365 -0
  53. lokit/parsers/idml/__init__.cp313-win32.pyd +0 -0
  54. lokit/parsers/idml/__init__.py +3 -0
  55. lokit/parsers/idml/extraction.cp313-win32.pyd +0 -0
  56. lokit/parsers/idml/extraction.py +264 -0
  57. lokit/parsers/json_i18n/__init__.cp313-win32.pyd +0 -0
  58. lokit/parsers/json_i18n/__init__.py +3 -0
  59. lokit/parsers/json_i18n/extraction.cp313-win32.pyd +0 -0
  60. lokit/parsers/json_i18n/extraction.py +163 -0
  61. lokit/parsers/po/__init__.cp313-win32.pyd +0 -0
  62. lokit/parsers/po/__init__.py +3 -0
  63. lokit/parsers/po/extraction.cp313-win32.pyd +0 -0
  64. lokit/parsers/po/extraction.py +236 -0
  65. lokit/parsers/tmx/__init__.cp313-win32.pyd +0 -0
  66. lokit/parsers/tmx/__init__.py +0 -0
  67. lokit/parsers/tmx/base.cp313-win32.pyd +0 -0
  68. lokit/parsers/tmx/base.py +145 -0
  69. lokit/parsers/tmx/extraction.cp313-win32.pyd +0 -0
  70. lokit/parsers/tmx/extraction.py +170 -0
  71. lokit/parsers/tmx/header.cp313-win32.pyd +0 -0
  72. lokit/parsers/tmx/header.py +55 -0
  73. lokit/parsers/tmx/helpers.cp313-win32.pyd +0 -0
  74. lokit/parsers/tmx/helpers.py +9 -0
  75. lokit/parsers/tmx/models.cp313-win32.pyd +0 -0
  76. lokit/parsers/tmx/models.py +10 -0
  77. lokit/parsers/tmx/props.cp313-win32.pyd +0 -0
  78. lokit/parsers/tmx/props.py +201 -0
  79. lokit/parsers/tmx/tags.cp313-win32.pyd +0 -0
  80. lokit/parsers/tmx/tags.py +59 -0
  81. lokit/parsers/tmx/xml_utils.cp313-win32.pyd +0 -0
  82. lokit/parsers/tmx/xml_utils.py +46 -0
  83. lokit/parsers/xliff/__init__.cp313-win32.pyd +0 -0
  84. lokit/parsers/xliff/__init__.py +3 -0
  85. lokit/parsers/xliff/extraction.cp313-win32.pyd +0 -0
  86. lokit/parsers/xliff/extraction.py +229 -0
  87. lokit/parsers/xliff/tags.cp313-win32.pyd +0 -0
  88. lokit/parsers/xliff/tags.py +128 -0
  89. lokit/parsers/xlsx/__init__.cp313-win32.pyd +0 -0
  90. lokit/parsers/xlsx/__init__.py +1 -0
  91. lokit/parsers/xlsx/extraction.cp313-win32.pyd +0 -0
  92. lokit/parsers/xlsx/extraction.py +198 -0
  93. lokit/py.typed +1 -0
  94. lokit_python-0.1.0.dist-info/METADATA +149 -0
  95. lokit_python-0.1.0.dist-info/RECORD +97 -0
  96. lokit_python-0.1.0.dist-info/WHEEL +5 -0
  97. lokit_python-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from lokit.data.structure import BaseStructure
9
+
10
+
11
+ def export_json_i18n(
12
+ document: BaseStructure,
13
+ filepath: str | Path,
14
+ nested: bool = True,
15
+ ) -> None:
16
+ path = Path(filepath)
17
+ path.parent.mkdir(parents=True, exist_ok=True)
18
+
19
+ output: dict[str, Any] = {}
20
+ for key, unit in document.data.items():
21
+ value = unit.target if unit.target is not None else unit.source
22
+ if nested:
23
+ _set_nested(output, key, value)
24
+ else:
25
+ output[key] = value
26
+
27
+ with path.open("w", encoding="utf-8") as f:
28
+ json.dump(output, f, ensure_ascii=False, indent=2)
29
+ f.write("\n")
30
+
31
+
32
+ async def export_json_i18n_async(
33
+ document: BaseStructure,
34
+ filepath: str | Path,
35
+ nested: bool = True,
36
+ ) -> None:
37
+ await asyncio.to_thread(export_json_i18n, document, filepath, nested)
38
+
39
+
40
+ def _set_nested(obj: dict[str, Any], dot_key: str, value: str) -> None:
41
+ parts = dot_key.split(".")
42
+ current = obj
43
+ for part in parts[:-1]:
44
+ if part not in current or not isinstance(current[part], dict):
45
+ current[part] = {}
46
+ current = current[part]
47
+ current[parts[-1]] = value
Binary file
lokit/exporters/po.py ADDED
@@ -0,0 +1,162 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import polib
9
+
10
+ from lokit.data.structure import BaseStructure, Data, TranslationStatus
11
+
12
+ _PLURAL_SUFFIX_PATTERN = "["
13
+
14
+
15
+ def export_po(document: BaseStructure, filepath: str | Path) -> None:
16
+ path = Path(filepath)
17
+ path.parent.mkdir(parents=True, exist_ok=True)
18
+
19
+ po: Any = polib.POFile()
20
+ po.metadata = _build_metadata(document)
21
+
22
+ plural_groups: dict[str, list[tuple[str, Data]]] = defaultdict(list)
23
+ singular_units: list[tuple[str, Data]] = []
24
+
25
+ for unit_id, unit in document.data.items():
26
+ if _PLURAL_SUFFIX_PATTERN in unit_id and unit.plural is not None:
27
+ base_id = unit_id[: unit_id.index(_PLURAL_SUFFIX_PATTERN)]
28
+ plural_groups[base_id].append((unit_id, unit))
29
+ elif unit.plural is not None:
30
+ plural_groups[unit_id].append((unit_id, unit))
31
+ else:
32
+ singular_units.append((unit_id, unit))
33
+
34
+ for unit_id, unit in singular_units:
35
+ po.append(_build_entry(unit_id, unit))
36
+
37
+ for base_id, forms in plural_groups.items():
38
+ po.append(_build_plural_entry(base_id, forms))
39
+
40
+ po.save(str(path))
41
+
42
+
43
+ async def export_po_async(document: BaseStructure, filepath: str | Path) -> None:
44
+ await asyncio.to_thread(export_po, document, filepath)
45
+
46
+
47
+ def _build_metadata(document: BaseStructure) -> dict[str, str]:
48
+ meta: dict[str, str] = {
49
+ "Content-Type": "text/plain; charset=UTF-8",
50
+ "Content-Transfer-Encoding": "8bit",
51
+ }
52
+ if document.target_locale:
53
+ meta["Language"] = document.target_locale
54
+ if document.export_origin:
55
+ meta["X-Generator"] = document.export_origin
56
+ return meta
57
+
58
+
59
+ def _parse_unit_id(unit_id: str) -> tuple[str | None, str]:
60
+ if "\x04" in unit_id:
61
+ ctx, msgid = unit_id.split("\x04", 1)
62
+ return ctx, msgid
63
+ return None, unit_id
64
+
65
+
66
+ def _build_entry(unit_id: str, unit: Data) -> Any:
67
+ msgctxt, msgid = _parse_unit_id(unit_id)
68
+ context_key = _find_context_key(unit)
69
+ if context_key is not None:
70
+ msgctxt = context_key
71
+
72
+ entry: Any = polib.POEntry(
73
+ msgid=msgid,
74
+ msgstr=unit.target or "",
75
+ msgctxt=msgctxt,
76
+ )
77
+
78
+ _apply_comments(entry, unit)
79
+ _apply_flags(entry, unit)
80
+ _apply_occurrences(entry, unit)
81
+ return entry
82
+
83
+
84
+ def _build_plural_entry(
85
+ base_id: str, forms: list[tuple[str, Data]]
86
+ ) -> Any:
87
+ msgctxt, msgid = _parse_unit_id(base_id)
88
+ base_unit = forms[0][1]
89
+ context_key = _find_context_key(base_unit)
90
+ if context_key is not None:
91
+ msgctxt = context_key
92
+
93
+ variant = base_unit.plural.variant if base_unit.plural else msgid
94
+
95
+ msgstr_plural: dict[int, str] = {}
96
+ msgstr_plural[0] = base_unit.target or ""
97
+
98
+ for uid, unit in forms:
99
+ if _PLURAL_SUFFIX_PATTERN in uid:
100
+ idx_str = uid[uid.index(_PLURAL_SUFFIX_PATTERN) + 1 : uid.rindex("]")]
101
+ idx = int(idx_str)
102
+ msgstr_plural[idx] = unit.target or ""
103
+
104
+ entry: Any = polib.POEntry(
105
+ msgid=msgid,
106
+ msgid_plural=variant,
107
+ msgstr_plural=msgstr_plural,
108
+ msgctxt=msgctxt,
109
+ )
110
+
111
+ _apply_comments(entry, base_unit)
112
+ _apply_flags(entry, base_unit)
113
+ _apply_occurrences(entry, base_unit)
114
+ return entry
115
+
116
+
117
+ def _find_context_key(unit: Data) -> str | None:
118
+ for comment in unit.comments:
119
+ if comment.context_key is not None:
120
+ return comment.context_key
121
+ return None
122
+
123
+
124
+ def _apply_comments(entry: Any, unit: Data) -> None:
125
+ translator_comments: list[str] = []
126
+ extracted_comments: list[str] = []
127
+ for i, comment in enumerate(unit.comments):
128
+ if i == 0 and comment.context_key is not None:
129
+ translator_comments.append(comment.context)
130
+ elif i == 0:
131
+ translator_comments.append(comment.context)
132
+ else:
133
+ extracted_comments.append(comment.context)
134
+ if translator_comments:
135
+ entry.comment = "\n".join(translator_comments)
136
+ if extracted_comments:
137
+ entry.tcomment = "\n".join(extracted_comments)
138
+
139
+
140
+ def _apply_flags(entry: Any, unit: Data) -> None:
141
+ flags: list[str] = []
142
+ if unit.status == TranslationStatus.DRAFT:
143
+ flags.append("fuzzy")
144
+ extra = unit.extensions.get("flags")
145
+ if extra:
146
+ flags.extend(f.strip() for f in extra.split(","))
147
+ entry.flags = flags
148
+
149
+
150
+ def _apply_occurrences(entry: Any, unit: Data) -> None:
151
+ refs = unit.extensions.get("references")
152
+ if not refs:
153
+ return
154
+ occurrences: list[tuple[str, str]] = []
155
+ for ref in refs.split(","):
156
+ ref = ref.strip()
157
+ if ":" in ref:
158
+ path, line = ref.rsplit(":", 1)
159
+ occurrences.append((path, line))
160
+ else:
161
+ occurrences.append((ref, ""))
162
+ entry.occurrences = occurrences
Binary file
lokit/exporters/tmx.py ADDED
@@ -0,0 +1,247 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from lxml import etree
6
+ from lxml.etree import _Element
7
+
8
+ from lokit.data.structure import (
9
+ BaseStructure,
10
+ CodePart,
11
+ Data,
12
+ SegmentPart,
13
+ TextPart,
14
+ TranslationStatus,
15
+ )
16
+ from lokit.data.tag_types import TieData, TieType
17
+ from lokit.io.json import load_lokit_json
18
+
19
+
20
+ def export_tmx(document: BaseStructure, filepath: str | Path) -> None:
21
+ path = Path(filepath)
22
+ path.parent.mkdir(parents=True, exist_ok=True)
23
+ with path.open("wb") as stream:
24
+ with etree.xmlfile(stream, encoding="UTF-8") as xf:
25
+ xf.write_declaration()
26
+ with xf.element("tmx", version="1.4"):
27
+ xf.write(_build_header(document))
28
+ with xf.element("body"):
29
+ for unit_id, unit in document.data.items():
30
+ xf.write(_build_tu(unit_id, unit, document))
31
+
32
+
33
+ def export_tmx_from_json(source_json: str | Path, target_tmx: str | Path) -> None:
34
+ export_tmx(load_lokit_json(source_json), target_tmx)
35
+
36
+
37
+ def _build_header(document: BaseStructure) -> _Element:
38
+ header = etree.Element(
39
+ "header",
40
+ {
41
+ "creationtool": document.extensions.get("tool_name", "lokit"),
42
+ "creationtoolversion": document.extensions.get("tool_version", "0.1"),
43
+ "segtype": document.extensions.get("segmentation", "sentence"),
44
+ "o-tmf": document.extensions.get("translation_memory_format", "lokit"),
45
+ "adminlang": document.extensions.get("admin_locale", document.source_locale),
46
+ "srclang": document.source_locale,
47
+ "datatype": document.extensions.get("data_type", "text"),
48
+ },
49
+ )
50
+ if document.export_timestamp:
51
+ header.attrib["creationdate"] = document.export_timestamp
52
+ for key, value in document.extensions.items():
53
+ if key.startswith("property."):
54
+ prop = etree.SubElement(header, "prop", type=_property_type(key))
55
+ prop.text = value
56
+ return header
57
+
58
+
59
+ def _build_tu(unit_id: str, unit: Data, document: BaseStructure) -> _Element:
60
+ attrs: dict[str, str] = {"tuid": unit_id}
61
+ if unit.meta.created:
62
+ attrs["creationdate"] = unit.meta.created
63
+ if unit.meta.updated:
64
+ attrs["changedate"] = unit.meta.updated
65
+ creator_id = _first_creator_id(unit)
66
+ if creator_id:
67
+ attrs["creationid"] = creator_id
68
+ change_id = unit.meta.extensions.get("change_id")
69
+ if change_id:
70
+ attrs["changeid"] = change_id
71
+ if unit.meta.usage_count is not None:
72
+ attrs["usagecount"] = str(unit.meta.usage_count)
73
+
74
+ tu = etree.Element("tu", attrs)
75
+ _append_unit_properties(tu, unit)
76
+ _append_comments(tu, unit)
77
+ tu.append(
78
+ _build_tuv(
79
+ document.source_locale,
80
+ unit.source,
81
+ unit.tags.source_parts if unit.tags else [],
82
+ unit.tags.source_tag_map if unit.tags else {},
83
+ )
84
+ )
85
+ if document.target_locale is not None and unit.target is not None:
86
+ tu.append(
87
+ _build_tuv(
88
+ document.target_locale,
89
+ unit.target,
90
+ unit.tags.target_parts if unit.tags else [],
91
+ unit.tags.target_tag_map if unit.tags else {},
92
+ )
93
+ )
94
+ return tu
95
+
96
+
97
+ def _append_unit_properties(tu: _Element, unit: Data) -> None:
98
+ if unit.status != TranslationStatus.UNKNOWN:
99
+ prop = etree.SubElement(tu, "prop", type="x-status")
100
+ prop.text = unit.status.value
101
+
102
+ if unit.previous_context is not None:
103
+ _append_prop_if_present(tu, "x-previous-id", unit.previous_context.unit_id)
104
+ _append_prop_if_present(tu, "x-previous-source-text", unit.previous_context.source)
105
+ _append_prop_if_present(tu, "x-previous-target-text", unit.previous_context.target)
106
+
107
+ if unit.next_context is not None:
108
+ _append_prop_if_present(tu, "x-next-id", unit.next_context.unit_id)
109
+ _append_prop_if_present(tu, "x-next-source-text", unit.next_context.source)
110
+ _append_prop_if_present(tu, "x-next-target-text", unit.next_context.target)
111
+
112
+ project = _first_project(unit)
113
+ if project:
114
+ _append_prop_if_present(tu, "x-project", project)
115
+
116
+ system = _first_system(unit)
117
+ if system:
118
+ _append_prop_if_present(tu, "x-system", system)
119
+
120
+ for key, value in unit.extensions.items():
121
+ if key.startswith("property."):
122
+ _append_prop_if_present(tu, _property_type(key), value)
123
+
124
+
125
+ def _append_comments(tu: _Element, unit: Data) -> None:
126
+ for comment in unit.comments:
127
+ if not comment.context:
128
+ continue
129
+ note = etree.SubElement(tu, "note")
130
+ note.text = comment.context
131
+
132
+
133
+ def _build_tuv(
134
+ locale: str,
135
+ text: str,
136
+ parts: list[SegmentPart],
137
+ tag_map: dict[str, TieData],
138
+ ) -> _Element:
139
+ tuv = etree.Element("tuv", {"{http://www.w3.org/XML/1998/namespace}lang": locale})
140
+ tuv.append(_build_seg(text, parts, tag_map))
141
+ return tuv
142
+
143
+
144
+ def _build_seg(
145
+ text: str,
146
+ parts: list[SegmentPart],
147
+ tag_map: dict[str, TieData],
148
+ ) -> _Element:
149
+ seg = etree.Element("seg")
150
+ effective_parts = parts if parts else [TextPart(text)]
151
+ pair_numbers = _pair_numbers(tag_map)
152
+ last_child: _Element | None = None
153
+
154
+ for part in effective_parts:
155
+ if isinstance(part, TextPart):
156
+ last_child = _append_text(seg, last_child, part.value)
157
+ elif isinstance(part, CodePart):
158
+ code = tag_map.get(part.ref)
159
+ if code is None:
160
+ last_child = _append_text(seg, last_child, "")
161
+ else:
162
+ child = _build_code_element(code, pair_numbers)
163
+ seg.append(child)
164
+ last_child = child
165
+
166
+ return seg
167
+
168
+
169
+ def _build_code_element(code: TieData, pair_numbers: dict[str, str]) -> _Element:
170
+ if _is_open(code.type):
171
+ element = etree.Element("bpt", i=_pair_number(code, pair_numbers), type=code.type.value)
172
+ element.text = f"<lokit id=\"{code.pair_id or code.id}\">"
173
+ return element
174
+ if _is_close(code.type):
175
+ element = etree.Element("ept", i=_pair_number(code, pair_numbers))
176
+ element.text = "</lokit>"
177
+ return element
178
+ element = etree.Element("ph", x=str(code.order), type=code.type.value)
179
+ element.text = f"<lokit id=\"{code.id}\"/>"
180
+ return element
181
+
182
+
183
+ def _append_text(seg: _Element, last_child: _Element | None, value: str) -> _Element | None:
184
+ if last_child is None:
185
+ seg.text = (seg.text or "") + value
186
+ else:
187
+ last_child.tail = (last_child.tail or "") + value
188
+ return last_child
189
+
190
+
191
+ def _pair_numbers(tag_map: dict[str, TieData]) -> dict[str, str]:
192
+ pair_ids: dict[str, str] = {}
193
+ index = 0
194
+ for code in sorted(tag_map.values(), key=lambda item: item.order):
195
+ if code.pair_id is not None and code.pair_id not in pair_ids:
196
+ pair_ids[code.pair_id] = str(index)
197
+ index += 1
198
+ return pair_ids
199
+
200
+
201
+ def _pair_number(code: TieData, pair_numbers: dict[str, str]) -> str:
202
+ if code.pair_id is None:
203
+ return str(code.order)
204
+ return pair_numbers.get(code.pair_id, str(code.order))
205
+
206
+
207
+ def _is_open(tie_type: TieType) -> bool:
208
+ return tie_type.value.endswith(".open")
209
+
210
+
211
+ def _is_close(tie_type: TieType) -> bool:
212
+ return tie_type.value.endswith(".close")
213
+
214
+
215
+ def _append_prop_if_present(tu: _Element, prop_type: str, value: str | None) -> None:
216
+ if value is None or value == "":
217
+ return
218
+ prop = etree.SubElement(tu, "prop", type=prop_type)
219
+ prop.text = value
220
+
221
+
222
+ def _first_creator_id(unit: Data) -> str | None:
223
+ for comment in unit.comments:
224
+ if comment.origin is not None and comment.origin.creator_id:
225
+ return comment.origin.creator_id
226
+ return None
227
+
228
+
229
+ def _first_project(unit: Data) -> str | None:
230
+ for comment in unit.comments:
231
+ if comment.origin is not None and comment.origin.project:
232
+ return comment.origin.project
233
+ return None
234
+
235
+
236
+ def _first_system(unit: Data) -> str | None:
237
+ for comment in unit.comments:
238
+ if comment.origin is not None and comment.origin.system:
239
+ return comment.origin.system
240
+ return None
241
+
242
+
243
+ def _property_type(key: str) -> str:
244
+ prefix = "property."
245
+ if key.startswith(prefix):
246
+ return key[len(prefix) :]
247
+ return key
Binary file
@@ -0,0 +1,152 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from collections import OrderedDict
5
+ from pathlib import Path
6
+ from typing import cast
7
+
8
+ from lxml import etree
9
+ from lxml.etree import _Element
10
+
11
+ from lokit.data.structure import BaseStructure, CodePart, Data, SegmentPart, TextPart
12
+ from lokit.data.tag_types import TieData, TieType
13
+ from lokit.io.json import load_lokit_json
14
+
15
+ XLIFF_NS = "urn:oasis:names:tc:xliff:document:1.2"
16
+ NSMAP = cast(dict[str, str], {None: XLIFF_NS})
17
+
18
+
19
+ def export_xliff(document: BaseStructure, filepath: str | Path) -> None:
20
+ path = Path(filepath)
21
+ path.parent.mkdir(parents=True, exist_ok=True)
22
+ with path.open("wb") as stream:
23
+ with etree.xmlfile(stream, encoding="UTF-8") as xf:
24
+ xf.write_declaration()
25
+ with xf.element(f"{{{XLIFF_NS}}}xliff", nsmap=NSMAP, version="1.2"):
26
+ for resource_key, units in _group_by_resource(document).items():
27
+ attrs = {
28
+ "original": resource_key or "lokit",
29
+ "datatype": _first_extension(units, "data_type", "plaintext"),
30
+ "source-language": document.source_locale,
31
+ }
32
+ if document.target_locale is not None:
33
+ attrs["target-language"] = document.target_locale
34
+ with xf.element(f"{{{XLIFF_NS}}}file", attrs):
35
+ xf.write(etree.Element(f"{{{XLIFF_NS}}}header"))
36
+ with xf.element(f"{{{XLIFF_NS}}}body"):
37
+ for unit_id, unit in units:
38
+ xf.write(_build_trans_unit(unit_id, unit))
39
+
40
+
41
+ def export_xliff_from_json(source_json: str | Path, target_xliff: str | Path) -> None:
42
+ export_xliff(load_lokit_json(source_json), target_xliff)
43
+
44
+
45
+ async def export_xliff_async(document: BaseStructure, filepath: str | Path) -> None:
46
+ await asyncio.to_thread(export_xliff, document, filepath)
47
+
48
+
49
+ async def export_xliff_from_json_async(
50
+ source_json: str | Path, target_xliff: str | Path
51
+ ) -> None:
52
+ await asyncio.to_thread(export_xliff_from_json, source_json, target_xliff)
53
+
54
+
55
+ def _group_by_resource(
56
+ document: BaseStructure,
57
+ ) -> OrderedDict[str, list[tuple[str, Data]]]:
58
+ groups: OrderedDict[str, list[tuple[str, Data]]] = OrderedDict()
59
+ for unit_id, unit in document.data.items():
60
+ resource = unit.extensions.get("resource", "lokit")
61
+ groups.setdefault(resource, []).append((unit_id, unit))
62
+ return groups
63
+
64
+
65
+ def _build_trans_unit(unit_id: str, unit: Data) -> _Element:
66
+ attrs = {"id": unit.extensions.get("unit_id", unit_id)}
67
+ space = unit.extensions.get("space")
68
+ if space:
69
+ attrs["{http://www.w3.org/XML/1998/namespace}space"] = space
70
+ trans_unit = etree.Element(f"{{{XLIFF_NS}}}trans-unit", attrs)
71
+ trans_unit.append(
72
+ _build_segment(
73
+ "source",
74
+ unit.source,
75
+ unit.tags.source_parts if unit.tags else [],
76
+ unit.tags.source_tag_map if unit.tags else {},
77
+ )
78
+ )
79
+ if unit.target is not None:
80
+ target = _build_segment(
81
+ "target",
82
+ unit.target,
83
+ unit.tags.target_parts if unit.tags else [],
84
+ unit.tags.target_tag_map if unit.tags else {},
85
+ )
86
+ trans_unit.append(target)
87
+ for comment in unit.comments:
88
+ if comment.context:
89
+ note = etree.SubElement(trans_unit, f"{{{XLIFF_NS}}}note")
90
+ note.text = comment.context
91
+ return trans_unit
92
+
93
+
94
+ def _build_segment(
95
+ name: str,
96
+ text: str,
97
+ parts: list[SegmentPart],
98
+ tag_map: dict[str, TieData],
99
+ ) -> _Element:
100
+ element = etree.Element(f"{{{XLIFF_NS}}}{name}")
101
+ effective_parts = parts if parts else [TextPart(text)]
102
+ last_child: _Element | None = None
103
+ for part in effective_parts:
104
+ if isinstance(part, TextPart):
105
+ last_child = _append_text(element, last_child, part.value)
106
+ elif isinstance(part, CodePart):
107
+ code = tag_map.get(part.ref)
108
+ if code is not None:
109
+ child = _build_code(code)
110
+ element.append(child)
111
+ last_child = child
112
+ return element
113
+
114
+
115
+ def _build_code(code: TieData) -> _Element:
116
+ if _is_open(code.type):
117
+ element = etree.Element(f"{{{XLIFF_NS}}}bx", id=code.id)
118
+ elif _is_close(code.type):
119
+ element = etree.Element(f"{{{XLIFF_NS}}}ex", id=code.id)
120
+ else:
121
+ element = etree.Element(f"{{{XLIFF_NS}}}x", id=code.id)
122
+ if code.pair_id is not None:
123
+ element.attrib["rid"] = code.pair_id
124
+ return element
125
+
126
+
127
+ def _append_text(
128
+ parent: _Element, last_child: _Element | None, value: str
129
+ ) -> _Element | None:
130
+ if last_child is None:
131
+ parent.text = (parent.text or "") + value
132
+ else:
133
+ last_child.tail = (last_child.tail or "") + value
134
+ return last_child
135
+
136
+
137
+ def _is_open(tie_type: TieType) -> bool:
138
+ return tie_type.value.endswith(".open")
139
+
140
+
141
+ def _is_close(tie_type: TieType) -> bool:
142
+ return tie_type.value.endswith(".close")
143
+
144
+
145
+ def _first_extension(
146
+ units: list[tuple[str, Data]], key: str, fallback: str
147
+ ) -> str:
148
+ for _, unit in units:
149
+ value = unit.extensions.get(key)
150
+ if value:
151
+ return value
152
+ return fallback
Binary file
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+
6
+ from openpyxl import Workbook
7
+
8
+ from lokit.data.structure import BaseStructure, TranslationStatus
9
+
10
+ _HEADERS = ["id", "source", "target", "status", "comment"]
11
+
12
+
13
+ def export_xlsx(document: BaseStructure, filepath: str | Path) -> None:
14
+ path = Path(filepath)
15
+ path.parent.mkdir(parents=True, exist_ok=True)
16
+
17
+ wb = Workbook(write_only=True)
18
+ ws = wb.create_sheet()
19
+
20
+ ws.append(_HEADERS)
21
+
22
+ for unit_id, unit in document.data.items():
23
+ comment = "; ".join(c.context for c in unit.comments if c.context)
24
+ status = unit.status.value if unit.status != TranslationStatus.UNKNOWN else ""
25
+
26
+ ws.append([
27
+ unit_id,
28
+ unit.source,
29
+ unit.target or "",
30
+ status,
31
+ comment,
32
+ ])
33
+
34
+ wb.save(str(path))
35
+ wb.close()
36
+
37
+
38
+ async def export_xlsx_async(document: BaseStructure, filepath: str | Path) -> None:
39
+ await asyncio.to_thread(export_xlsx, document, filepath)
Binary file