lokit-python 0.1.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- 821d8b73c2a02cb7980f__mypyc.cp313-win_amd64.pyd +0 -0
- lokit/__init__.cp313-win_amd64.pyd +0 -0
- lokit/__init__.py +128 -0
- lokit/core/__init__.cp313-win_amd64.pyd +0 -0
- lokit/core/__init__.py +0 -0
- lokit/core/logger.cp313-win_amd64.pyd +0 -0
- lokit/core/logger.py +20 -0
- lokit/data/__init__.cp313-win_amd64.pyd +0 -0
- lokit/data/__init__.py +0 -0
- lokit/data/lang_codes.cp313-win_amd64.pyd +0 -0
- lokit/data/lang_codes.py +455 -0
- lokit/data/structure.cp313-win_amd64.pyd +0 -0
- lokit/data/structure.py +118 -0
- lokit/data/tag_types.cp313-win_amd64.pyd +0 -0
- lokit/data/tag_types.py +78 -0
- lokit/exporters/__init__.cp313-win_amd64.pyd +0 -0
- lokit/exporters/__init__.py +34 -0
- lokit/exporters/csv.cp313-win_amd64.pyd +0 -0
- lokit/exporters/csv.py +32 -0
- lokit/exporters/html.cp313-win_amd64.pyd +0 -0
- lokit/exporters/html.py +217 -0
- lokit/exporters/idml.cp313-win_amd64.pyd +0 -0
- lokit/exporters/idml.py +178 -0
- lokit/exporters/json_i18n.cp313-win_amd64.pyd +0 -0
- lokit/exporters/json_i18n.py +47 -0
- lokit/exporters/po.cp313-win_amd64.pyd +0 -0
- lokit/exporters/po.py +162 -0
- lokit/exporters/tmx.cp313-win_amd64.pyd +0 -0
- lokit/exporters/tmx.py +247 -0
- lokit/exporters/xliff.cp313-win_amd64.pyd +0 -0
- lokit/exporters/xliff.py +152 -0
- lokit/exporters/xlsx.cp313-win_amd64.pyd +0 -0
- lokit/exporters/xlsx.py +39 -0
- lokit/format_detection.cp313-win_amd64.pyd +0 -0
- lokit/format_detection.py +115 -0
- lokit/importers.py +321 -0
- lokit/io/__init__.cp313-win_amd64.pyd +0 -0
- lokit/io/__init__.py +3 -0
- lokit/io/json.cp313-win_amd64.pyd +0 -0
- lokit/io/json.py +194 -0
- lokit/logic.cp313-win_amd64.pyd +0 -0
- lokit/logic.py +324 -0
- lokit/parsers/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/__init__.py +1 -0
- lokit/parsers/csv/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/csv/__init__.py +1 -0
- lokit/parsers/csv/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/csv/extraction.py +164 -0
- lokit/parsers/html/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/html/__init__.py +3 -0
- lokit/parsers/html/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/html/extraction.py +365 -0
- lokit/parsers/idml/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/idml/__init__.py +3 -0
- lokit/parsers/idml/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/idml/extraction.py +264 -0
- lokit/parsers/json_i18n/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/json_i18n/__init__.py +3 -0
- lokit/parsers/json_i18n/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/json_i18n/extraction.py +163 -0
- lokit/parsers/po/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/po/__init__.py +3 -0
- lokit/parsers/po/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/po/extraction.py +236 -0
- lokit/parsers/tmx/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/__init__.py +0 -0
- lokit/parsers/tmx/base.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/base.py +145 -0
- lokit/parsers/tmx/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/extraction.py +170 -0
- lokit/parsers/tmx/header.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/header.py +55 -0
- lokit/parsers/tmx/helpers.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/helpers.py +9 -0
- lokit/parsers/tmx/models.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/models.py +10 -0
- lokit/parsers/tmx/props.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/props.py +201 -0
- lokit/parsers/tmx/tags.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/tags.py +59 -0
- lokit/parsers/tmx/xml_utils.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/xml_utils.py +46 -0
- lokit/parsers/xliff/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xliff/__init__.py +3 -0
- lokit/parsers/xliff/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xliff/extraction.py +229 -0
- lokit/parsers/xliff/tags.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xliff/tags.py +128 -0
- lokit/parsers/xlsx/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xlsx/__init__.py +1 -0
- lokit/parsers/xlsx/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xlsx/extraction.py +198 -0
- lokit/py.typed +1 -0
- lokit_python-0.1.0.dist-info/METADATA +149 -0
- lokit_python-0.1.0.dist-info/RECORD +97 -0
- lokit_python-0.1.0.dist-info/WHEEL +5 -0
- lokit_python-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from lokit.data.structure import BaseStructure
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def export_json_i18n(
|
|
12
|
+
document: BaseStructure,
|
|
13
|
+
filepath: str | Path,
|
|
14
|
+
nested: bool = True,
|
|
15
|
+
) -> None:
|
|
16
|
+
path = Path(filepath)
|
|
17
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
18
|
+
|
|
19
|
+
output: dict[str, Any] = {}
|
|
20
|
+
for key, unit in document.data.items():
|
|
21
|
+
value = unit.target if unit.target is not None else unit.source
|
|
22
|
+
if nested:
|
|
23
|
+
_set_nested(output, key, value)
|
|
24
|
+
else:
|
|
25
|
+
output[key] = value
|
|
26
|
+
|
|
27
|
+
with path.open("w", encoding="utf-8") as f:
|
|
28
|
+
json.dump(output, f, ensure_ascii=False, indent=2)
|
|
29
|
+
f.write("\n")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
async def export_json_i18n_async(
|
|
33
|
+
document: BaseStructure,
|
|
34
|
+
filepath: str | Path,
|
|
35
|
+
nested: bool = True,
|
|
36
|
+
) -> None:
|
|
37
|
+
await asyncio.to_thread(export_json_i18n, document, filepath, nested)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _set_nested(obj: dict[str, Any], dot_key: str, value: str) -> None:
|
|
41
|
+
parts = dot_key.split(".")
|
|
42
|
+
current = obj
|
|
43
|
+
for part in parts[:-1]:
|
|
44
|
+
if part not in current or not isinstance(current[part], dict):
|
|
45
|
+
current[part] = {}
|
|
46
|
+
current = current[part]
|
|
47
|
+
current[parts[-1]] = value
|
|
Binary file
|
lokit/exporters/po.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import polib
|
|
9
|
+
|
|
10
|
+
from lokit.data.structure import BaseStructure, Data, TranslationStatus
|
|
11
|
+
|
|
12
|
+
_PLURAL_SUFFIX_PATTERN = "["
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def export_po(document: BaseStructure, filepath: str | Path) -> None:
|
|
16
|
+
path = Path(filepath)
|
|
17
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
18
|
+
|
|
19
|
+
po: Any = polib.POFile()
|
|
20
|
+
po.metadata = _build_metadata(document)
|
|
21
|
+
|
|
22
|
+
plural_groups: dict[str, list[tuple[str, Data]]] = defaultdict(list)
|
|
23
|
+
singular_units: list[tuple[str, Data]] = []
|
|
24
|
+
|
|
25
|
+
for unit_id, unit in document.data.items():
|
|
26
|
+
if _PLURAL_SUFFIX_PATTERN in unit_id and unit.plural is not None:
|
|
27
|
+
base_id = unit_id[: unit_id.index(_PLURAL_SUFFIX_PATTERN)]
|
|
28
|
+
plural_groups[base_id].append((unit_id, unit))
|
|
29
|
+
elif unit.plural is not None:
|
|
30
|
+
plural_groups[unit_id].append((unit_id, unit))
|
|
31
|
+
else:
|
|
32
|
+
singular_units.append((unit_id, unit))
|
|
33
|
+
|
|
34
|
+
for unit_id, unit in singular_units:
|
|
35
|
+
po.append(_build_entry(unit_id, unit))
|
|
36
|
+
|
|
37
|
+
for base_id, forms in plural_groups.items():
|
|
38
|
+
po.append(_build_plural_entry(base_id, forms))
|
|
39
|
+
|
|
40
|
+
po.save(str(path))
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def export_po_async(document: BaseStructure, filepath: str | Path) -> None:
|
|
44
|
+
await asyncio.to_thread(export_po, document, filepath)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _build_metadata(document: BaseStructure) -> dict[str, str]:
|
|
48
|
+
meta: dict[str, str] = {
|
|
49
|
+
"Content-Type": "text/plain; charset=UTF-8",
|
|
50
|
+
"Content-Transfer-Encoding": "8bit",
|
|
51
|
+
}
|
|
52
|
+
if document.target_locale:
|
|
53
|
+
meta["Language"] = document.target_locale
|
|
54
|
+
if document.export_origin:
|
|
55
|
+
meta["X-Generator"] = document.export_origin
|
|
56
|
+
return meta
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _parse_unit_id(unit_id: str) -> tuple[str | None, str]:
|
|
60
|
+
if "\x04" in unit_id:
|
|
61
|
+
ctx, msgid = unit_id.split("\x04", 1)
|
|
62
|
+
return ctx, msgid
|
|
63
|
+
return None, unit_id
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _build_entry(unit_id: str, unit: Data) -> Any:
|
|
67
|
+
msgctxt, msgid = _parse_unit_id(unit_id)
|
|
68
|
+
context_key = _find_context_key(unit)
|
|
69
|
+
if context_key is not None:
|
|
70
|
+
msgctxt = context_key
|
|
71
|
+
|
|
72
|
+
entry: Any = polib.POEntry(
|
|
73
|
+
msgid=msgid,
|
|
74
|
+
msgstr=unit.target or "",
|
|
75
|
+
msgctxt=msgctxt,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
_apply_comments(entry, unit)
|
|
79
|
+
_apply_flags(entry, unit)
|
|
80
|
+
_apply_occurrences(entry, unit)
|
|
81
|
+
return entry
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _build_plural_entry(
|
|
85
|
+
base_id: str, forms: list[tuple[str, Data]]
|
|
86
|
+
) -> Any:
|
|
87
|
+
msgctxt, msgid = _parse_unit_id(base_id)
|
|
88
|
+
base_unit = forms[0][1]
|
|
89
|
+
context_key = _find_context_key(base_unit)
|
|
90
|
+
if context_key is not None:
|
|
91
|
+
msgctxt = context_key
|
|
92
|
+
|
|
93
|
+
variant = base_unit.plural.variant if base_unit.plural else msgid
|
|
94
|
+
|
|
95
|
+
msgstr_plural: dict[int, str] = {}
|
|
96
|
+
msgstr_plural[0] = base_unit.target or ""
|
|
97
|
+
|
|
98
|
+
for uid, unit in forms:
|
|
99
|
+
if _PLURAL_SUFFIX_PATTERN in uid:
|
|
100
|
+
idx_str = uid[uid.index(_PLURAL_SUFFIX_PATTERN) + 1 : uid.rindex("]")]
|
|
101
|
+
idx = int(idx_str)
|
|
102
|
+
msgstr_plural[idx] = unit.target or ""
|
|
103
|
+
|
|
104
|
+
entry: Any = polib.POEntry(
|
|
105
|
+
msgid=msgid,
|
|
106
|
+
msgid_plural=variant,
|
|
107
|
+
msgstr_plural=msgstr_plural,
|
|
108
|
+
msgctxt=msgctxt,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
_apply_comments(entry, base_unit)
|
|
112
|
+
_apply_flags(entry, base_unit)
|
|
113
|
+
_apply_occurrences(entry, base_unit)
|
|
114
|
+
return entry
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _find_context_key(unit: Data) -> str | None:
|
|
118
|
+
for comment in unit.comments:
|
|
119
|
+
if comment.context_key is not None:
|
|
120
|
+
return comment.context_key
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _apply_comments(entry: Any, unit: Data) -> None:
|
|
125
|
+
translator_comments: list[str] = []
|
|
126
|
+
extracted_comments: list[str] = []
|
|
127
|
+
for i, comment in enumerate(unit.comments):
|
|
128
|
+
if i == 0 and comment.context_key is not None:
|
|
129
|
+
translator_comments.append(comment.context)
|
|
130
|
+
elif i == 0:
|
|
131
|
+
translator_comments.append(comment.context)
|
|
132
|
+
else:
|
|
133
|
+
extracted_comments.append(comment.context)
|
|
134
|
+
if translator_comments:
|
|
135
|
+
entry.comment = "\n".join(translator_comments)
|
|
136
|
+
if extracted_comments:
|
|
137
|
+
entry.tcomment = "\n".join(extracted_comments)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _apply_flags(entry: Any, unit: Data) -> None:
|
|
141
|
+
flags: list[str] = []
|
|
142
|
+
if unit.status == TranslationStatus.DRAFT:
|
|
143
|
+
flags.append("fuzzy")
|
|
144
|
+
extra = unit.extensions.get("flags")
|
|
145
|
+
if extra:
|
|
146
|
+
flags.extend(f.strip() for f in extra.split(","))
|
|
147
|
+
entry.flags = flags
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _apply_occurrences(entry: Any, unit: Data) -> None:
|
|
151
|
+
refs = unit.extensions.get("references")
|
|
152
|
+
if not refs:
|
|
153
|
+
return
|
|
154
|
+
occurrences: list[tuple[str, str]] = []
|
|
155
|
+
for ref in refs.split(","):
|
|
156
|
+
ref = ref.strip()
|
|
157
|
+
if ":" in ref:
|
|
158
|
+
path, line = ref.rsplit(":", 1)
|
|
159
|
+
occurrences.append((path, line))
|
|
160
|
+
else:
|
|
161
|
+
occurrences.append((ref, ""))
|
|
162
|
+
entry.occurrences = occurrences
|
|
Binary file
|
lokit/exporters/tmx.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from lxml import etree
|
|
6
|
+
from lxml.etree import _Element
|
|
7
|
+
|
|
8
|
+
from lokit.data.structure import (
|
|
9
|
+
BaseStructure,
|
|
10
|
+
CodePart,
|
|
11
|
+
Data,
|
|
12
|
+
SegmentPart,
|
|
13
|
+
TextPart,
|
|
14
|
+
TranslationStatus,
|
|
15
|
+
)
|
|
16
|
+
from lokit.data.tag_types import TieData, TieType
|
|
17
|
+
from lokit.io.json import load_lokit_json
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def export_tmx(document: BaseStructure, filepath: str | Path) -> None:
|
|
21
|
+
path = Path(filepath)
|
|
22
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
with path.open("wb") as stream:
|
|
24
|
+
with etree.xmlfile(stream, encoding="UTF-8") as xf:
|
|
25
|
+
xf.write_declaration()
|
|
26
|
+
with xf.element("tmx", version="1.4"):
|
|
27
|
+
xf.write(_build_header(document))
|
|
28
|
+
with xf.element("body"):
|
|
29
|
+
for unit_id, unit in document.data.items():
|
|
30
|
+
xf.write(_build_tu(unit_id, unit, document))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def export_tmx_from_json(source_json: str | Path, target_tmx: str | Path) -> None:
|
|
34
|
+
export_tmx(load_lokit_json(source_json), target_tmx)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _build_header(document: BaseStructure) -> _Element:
|
|
38
|
+
header = etree.Element(
|
|
39
|
+
"header",
|
|
40
|
+
{
|
|
41
|
+
"creationtool": document.extensions.get("tool_name", "lokit"),
|
|
42
|
+
"creationtoolversion": document.extensions.get("tool_version", "0.1"),
|
|
43
|
+
"segtype": document.extensions.get("segmentation", "sentence"),
|
|
44
|
+
"o-tmf": document.extensions.get("translation_memory_format", "lokit"),
|
|
45
|
+
"adminlang": document.extensions.get("admin_locale", document.source_locale),
|
|
46
|
+
"srclang": document.source_locale,
|
|
47
|
+
"datatype": document.extensions.get("data_type", "text"),
|
|
48
|
+
},
|
|
49
|
+
)
|
|
50
|
+
if document.export_timestamp:
|
|
51
|
+
header.attrib["creationdate"] = document.export_timestamp
|
|
52
|
+
for key, value in document.extensions.items():
|
|
53
|
+
if key.startswith("property."):
|
|
54
|
+
prop = etree.SubElement(header, "prop", type=_property_type(key))
|
|
55
|
+
prop.text = value
|
|
56
|
+
return header
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _build_tu(unit_id: str, unit: Data, document: BaseStructure) -> _Element:
|
|
60
|
+
attrs: dict[str, str] = {"tuid": unit_id}
|
|
61
|
+
if unit.meta.created:
|
|
62
|
+
attrs["creationdate"] = unit.meta.created
|
|
63
|
+
if unit.meta.updated:
|
|
64
|
+
attrs["changedate"] = unit.meta.updated
|
|
65
|
+
creator_id = _first_creator_id(unit)
|
|
66
|
+
if creator_id:
|
|
67
|
+
attrs["creationid"] = creator_id
|
|
68
|
+
change_id = unit.meta.extensions.get("change_id")
|
|
69
|
+
if change_id:
|
|
70
|
+
attrs["changeid"] = change_id
|
|
71
|
+
if unit.meta.usage_count is not None:
|
|
72
|
+
attrs["usagecount"] = str(unit.meta.usage_count)
|
|
73
|
+
|
|
74
|
+
tu = etree.Element("tu", attrs)
|
|
75
|
+
_append_unit_properties(tu, unit)
|
|
76
|
+
_append_comments(tu, unit)
|
|
77
|
+
tu.append(
|
|
78
|
+
_build_tuv(
|
|
79
|
+
document.source_locale,
|
|
80
|
+
unit.source,
|
|
81
|
+
unit.tags.source_parts if unit.tags else [],
|
|
82
|
+
unit.tags.source_tag_map if unit.tags else {},
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
if document.target_locale is not None and unit.target is not None:
|
|
86
|
+
tu.append(
|
|
87
|
+
_build_tuv(
|
|
88
|
+
document.target_locale,
|
|
89
|
+
unit.target,
|
|
90
|
+
unit.tags.target_parts if unit.tags else [],
|
|
91
|
+
unit.tags.target_tag_map if unit.tags else {},
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
return tu
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _append_unit_properties(tu: _Element, unit: Data) -> None:
|
|
98
|
+
if unit.status != TranslationStatus.UNKNOWN:
|
|
99
|
+
prop = etree.SubElement(tu, "prop", type="x-status")
|
|
100
|
+
prop.text = unit.status.value
|
|
101
|
+
|
|
102
|
+
if unit.previous_context is not None:
|
|
103
|
+
_append_prop_if_present(tu, "x-previous-id", unit.previous_context.unit_id)
|
|
104
|
+
_append_prop_if_present(tu, "x-previous-source-text", unit.previous_context.source)
|
|
105
|
+
_append_prop_if_present(tu, "x-previous-target-text", unit.previous_context.target)
|
|
106
|
+
|
|
107
|
+
if unit.next_context is not None:
|
|
108
|
+
_append_prop_if_present(tu, "x-next-id", unit.next_context.unit_id)
|
|
109
|
+
_append_prop_if_present(tu, "x-next-source-text", unit.next_context.source)
|
|
110
|
+
_append_prop_if_present(tu, "x-next-target-text", unit.next_context.target)
|
|
111
|
+
|
|
112
|
+
project = _first_project(unit)
|
|
113
|
+
if project:
|
|
114
|
+
_append_prop_if_present(tu, "x-project", project)
|
|
115
|
+
|
|
116
|
+
system = _first_system(unit)
|
|
117
|
+
if system:
|
|
118
|
+
_append_prop_if_present(tu, "x-system", system)
|
|
119
|
+
|
|
120
|
+
for key, value in unit.extensions.items():
|
|
121
|
+
if key.startswith("property."):
|
|
122
|
+
_append_prop_if_present(tu, _property_type(key), value)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _append_comments(tu: _Element, unit: Data) -> None:
|
|
126
|
+
for comment in unit.comments:
|
|
127
|
+
if not comment.context:
|
|
128
|
+
continue
|
|
129
|
+
note = etree.SubElement(tu, "note")
|
|
130
|
+
note.text = comment.context
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _build_tuv(
|
|
134
|
+
locale: str,
|
|
135
|
+
text: str,
|
|
136
|
+
parts: list[SegmentPart],
|
|
137
|
+
tag_map: dict[str, TieData],
|
|
138
|
+
) -> _Element:
|
|
139
|
+
tuv = etree.Element("tuv", {"{http://www.w3.org/XML/1998/namespace}lang": locale})
|
|
140
|
+
tuv.append(_build_seg(text, parts, tag_map))
|
|
141
|
+
return tuv
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _build_seg(
|
|
145
|
+
text: str,
|
|
146
|
+
parts: list[SegmentPart],
|
|
147
|
+
tag_map: dict[str, TieData],
|
|
148
|
+
) -> _Element:
|
|
149
|
+
seg = etree.Element("seg")
|
|
150
|
+
effective_parts = parts if parts else [TextPart(text)]
|
|
151
|
+
pair_numbers = _pair_numbers(tag_map)
|
|
152
|
+
last_child: _Element | None = None
|
|
153
|
+
|
|
154
|
+
for part in effective_parts:
|
|
155
|
+
if isinstance(part, TextPart):
|
|
156
|
+
last_child = _append_text(seg, last_child, part.value)
|
|
157
|
+
elif isinstance(part, CodePart):
|
|
158
|
+
code = tag_map.get(part.ref)
|
|
159
|
+
if code is None:
|
|
160
|
+
last_child = _append_text(seg, last_child, "")
|
|
161
|
+
else:
|
|
162
|
+
child = _build_code_element(code, pair_numbers)
|
|
163
|
+
seg.append(child)
|
|
164
|
+
last_child = child
|
|
165
|
+
|
|
166
|
+
return seg
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _build_code_element(code: TieData, pair_numbers: dict[str, str]) -> _Element:
|
|
170
|
+
if _is_open(code.type):
|
|
171
|
+
element = etree.Element("bpt", i=_pair_number(code, pair_numbers), type=code.type.value)
|
|
172
|
+
element.text = f"<lokit id=\"{code.pair_id or code.id}\">"
|
|
173
|
+
return element
|
|
174
|
+
if _is_close(code.type):
|
|
175
|
+
element = etree.Element("ept", i=_pair_number(code, pair_numbers))
|
|
176
|
+
element.text = "</lokit>"
|
|
177
|
+
return element
|
|
178
|
+
element = etree.Element("ph", x=str(code.order), type=code.type.value)
|
|
179
|
+
element.text = f"<lokit id=\"{code.id}\"/>"
|
|
180
|
+
return element
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _append_text(seg: _Element, last_child: _Element | None, value: str) -> _Element | None:
|
|
184
|
+
if last_child is None:
|
|
185
|
+
seg.text = (seg.text or "") + value
|
|
186
|
+
else:
|
|
187
|
+
last_child.tail = (last_child.tail or "") + value
|
|
188
|
+
return last_child
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _pair_numbers(tag_map: dict[str, TieData]) -> dict[str, str]:
|
|
192
|
+
pair_ids: dict[str, str] = {}
|
|
193
|
+
index = 0
|
|
194
|
+
for code in sorted(tag_map.values(), key=lambda item: item.order):
|
|
195
|
+
if code.pair_id is not None and code.pair_id not in pair_ids:
|
|
196
|
+
pair_ids[code.pair_id] = str(index)
|
|
197
|
+
index += 1
|
|
198
|
+
return pair_ids
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _pair_number(code: TieData, pair_numbers: dict[str, str]) -> str:
|
|
202
|
+
if code.pair_id is None:
|
|
203
|
+
return str(code.order)
|
|
204
|
+
return pair_numbers.get(code.pair_id, str(code.order))
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _is_open(tie_type: TieType) -> bool:
|
|
208
|
+
return tie_type.value.endswith(".open")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _is_close(tie_type: TieType) -> bool:
|
|
212
|
+
return tie_type.value.endswith(".close")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _append_prop_if_present(tu: _Element, prop_type: str, value: str | None) -> None:
|
|
216
|
+
if value is None or value == "":
|
|
217
|
+
return
|
|
218
|
+
prop = etree.SubElement(tu, "prop", type=prop_type)
|
|
219
|
+
prop.text = value
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _first_creator_id(unit: Data) -> str | None:
|
|
223
|
+
for comment in unit.comments:
|
|
224
|
+
if comment.origin is not None and comment.origin.creator_id:
|
|
225
|
+
return comment.origin.creator_id
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _first_project(unit: Data) -> str | None:
|
|
230
|
+
for comment in unit.comments:
|
|
231
|
+
if comment.origin is not None and comment.origin.project:
|
|
232
|
+
return comment.origin.project
|
|
233
|
+
return None
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _first_system(unit: Data) -> str | None:
|
|
237
|
+
for comment in unit.comments:
|
|
238
|
+
if comment.origin is not None and comment.origin.system:
|
|
239
|
+
return comment.origin.system
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _property_type(key: str) -> str:
|
|
244
|
+
prefix = "property."
|
|
245
|
+
if key.startswith(prefix):
|
|
246
|
+
return key[len(prefix) :]
|
|
247
|
+
return key
|
|
Binary file
|
lokit/exporters/xliff.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from collections import OrderedDict
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import cast
|
|
7
|
+
|
|
8
|
+
from lxml import etree
|
|
9
|
+
from lxml.etree import _Element
|
|
10
|
+
|
|
11
|
+
from lokit.data.structure import BaseStructure, CodePart, Data, SegmentPart, TextPart
|
|
12
|
+
from lokit.data.tag_types import TieData, TieType
|
|
13
|
+
from lokit.io.json import load_lokit_json
|
|
14
|
+
|
|
15
|
+
XLIFF_NS = "urn:oasis:names:tc:xliff:document:1.2"
|
|
16
|
+
NSMAP = cast(dict[str, str], {None: XLIFF_NS})
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def export_xliff(document: BaseStructure, filepath: str | Path) -> None:
|
|
20
|
+
path = Path(filepath)
|
|
21
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
with path.open("wb") as stream:
|
|
23
|
+
with etree.xmlfile(stream, encoding="UTF-8") as xf:
|
|
24
|
+
xf.write_declaration()
|
|
25
|
+
with xf.element(f"{{{XLIFF_NS}}}xliff", nsmap=NSMAP, version="1.2"):
|
|
26
|
+
for resource_key, units in _group_by_resource(document).items():
|
|
27
|
+
attrs = {
|
|
28
|
+
"original": resource_key or "lokit",
|
|
29
|
+
"datatype": _first_extension(units, "data_type", "plaintext"),
|
|
30
|
+
"source-language": document.source_locale,
|
|
31
|
+
}
|
|
32
|
+
if document.target_locale is not None:
|
|
33
|
+
attrs["target-language"] = document.target_locale
|
|
34
|
+
with xf.element(f"{{{XLIFF_NS}}}file", attrs):
|
|
35
|
+
xf.write(etree.Element(f"{{{XLIFF_NS}}}header"))
|
|
36
|
+
with xf.element(f"{{{XLIFF_NS}}}body"):
|
|
37
|
+
for unit_id, unit in units:
|
|
38
|
+
xf.write(_build_trans_unit(unit_id, unit))
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def export_xliff_from_json(source_json: str | Path, target_xliff: str | Path) -> None:
|
|
42
|
+
export_xliff(load_lokit_json(source_json), target_xliff)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def export_xliff_async(document: BaseStructure, filepath: str | Path) -> None:
|
|
46
|
+
await asyncio.to_thread(export_xliff, document, filepath)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
async def export_xliff_from_json_async(
|
|
50
|
+
source_json: str | Path, target_xliff: str | Path
|
|
51
|
+
) -> None:
|
|
52
|
+
await asyncio.to_thread(export_xliff_from_json, source_json, target_xliff)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _group_by_resource(
|
|
56
|
+
document: BaseStructure,
|
|
57
|
+
) -> OrderedDict[str, list[tuple[str, Data]]]:
|
|
58
|
+
groups: OrderedDict[str, list[tuple[str, Data]]] = OrderedDict()
|
|
59
|
+
for unit_id, unit in document.data.items():
|
|
60
|
+
resource = unit.extensions.get("resource", "lokit")
|
|
61
|
+
groups.setdefault(resource, []).append((unit_id, unit))
|
|
62
|
+
return groups
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _build_trans_unit(unit_id: str, unit: Data) -> _Element:
|
|
66
|
+
attrs = {"id": unit.extensions.get("unit_id", unit_id)}
|
|
67
|
+
space = unit.extensions.get("space")
|
|
68
|
+
if space:
|
|
69
|
+
attrs["{http://www.w3.org/XML/1998/namespace}space"] = space
|
|
70
|
+
trans_unit = etree.Element(f"{{{XLIFF_NS}}}trans-unit", attrs)
|
|
71
|
+
trans_unit.append(
|
|
72
|
+
_build_segment(
|
|
73
|
+
"source",
|
|
74
|
+
unit.source,
|
|
75
|
+
unit.tags.source_parts if unit.tags else [],
|
|
76
|
+
unit.tags.source_tag_map if unit.tags else {},
|
|
77
|
+
)
|
|
78
|
+
)
|
|
79
|
+
if unit.target is not None:
|
|
80
|
+
target = _build_segment(
|
|
81
|
+
"target",
|
|
82
|
+
unit.target,
|
|
83
|
+
unit.tags.target_parts if unit.tags else [],
|
|
84
|
+
unit.tags.target_tag_map if unit.tags else {},
|
|
85
|
+
)
|
|
86
|
+
trans_unit.append(target)
|
|
87
|
+
for comment in unit.comments:
|
|
88
|
+
if comment.context:
|
|
89
|
+
note = etree.SubElement(trans_unit, f"{{{XLIFF_NS}}}note")
|
|
90
|
+
note.text = comment.context
|
|
91
|
+
return trans_unit
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _build_segment(
|
|
95
|
+
name: str,
|
|
96
|
+
text: str,
|
|
97
|
+
parts: list[SegmentPart],
|
|
98
|
+
tag_map: dict[str, TieData],
|
|
99
|
+
) -> _Element:
|
|
100
|
+
element = etree.Element(f"{{{XLIFF_NS}}}{name}")
|
|
101
|
+
effective_parts = parts if parts else [TextPart(text)]
|
|
102
|
+
last_child: _Element | None = None
|
|
103
|
+
for part in effective_parts:
|
|
104
|
+
if isinstance(part, TextPart):
|
|
105
|
+
last_child = _append_text(element, last_child, part.value)
|
|
106
|
+
elif isinstance(part, CodePart):
|
|
107
|
+
code = tag_map.get(part.ref)
|
|
108
|
+
if code is not None:
|
|
109
|
+
child = _build_code(code)
|
|
110
|
+
element.append(child)
|
|
111
|
+
last_child = child
|
|
112
|
+
return element
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _build_code(code: TieData) -> _Element:
|
|
116
|
+
if _is_open(code.type):
|
|
117
|
+
element = etree.Element(f"{{{XLIFF_NS}}}bx", id=code.id)
|
|
118
|
+
elif _is_close(code.type):
|
|
119
|
+
element = etree.Element(f"{{{XLIFF_NS}}}ex", id=code.id)
|
|
120
|
+
else:
|
|
121
|
+
element = etree.Element(f"{{{XLIFF_NS}}}x", id=code.id)
|
|
122
|
+
if code.pair_id is not None:
|
|
123
|
+
element.attrib["rid"] = code.pair_id
|
|
124
|
+
return element
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _append_text(
|
|
128
|
+
parent: _Element, last_child: _Element | None, value: str
|
|
129
|
+
) -> _Element | None:
|
|
130
|
+
if last_child is None:
|
|
131
|
+
parent.text = (parent.text or "") + value
|
|
132
|
+
else:
|
|
133
|
+
last_child.tail = (last_child.tail or "") + value
|
|
134
|
+
return last_child
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _is_open(tie_type: TieType) -> bool:
|
|
138
|
+
return tie_type.value.endswith(".open")
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _is_close(tie_type: TieType) -> bool:
|
|
142
|
+
return tie_type.value.endswith(".close")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _first_extension(
|
|
146
|
+
units: list[tuple[str, Data]], key: str, fallback: str
|
|
147
|
+
) -> str:
|
|
148
|
+
for _, unit in units:
|
|
149
|
+
value = unit.extensions.get(key)
|
|
150
|
+
if value:
|
|
151
|
+
return value
|
|
152
|
+
return fallback
|
|
Binary file
|
lokit/exporters/xlsx.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from openpyxl import Workbook
|
|
7
|
+
|
|
8
|
+
from lokit.data.structure import BaseStructure, TranslationStatus
|
|
9
|
+
|
|
10
|
+
_HEADERS = ["id", "source", "target", "status", "comment"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def export_xlsx(document: BaseStructure, filepath: str | Path) -> None:
|
|
14
|
+
path = Path(filepath)
|
|
15
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
16
|
+
|
|
17
|
+
wb = Workbook(write_only=True)
|
|
18
|
+
ws = wb.create_sheet()
|
|
19
|
+
|
|
20
|
+
ws.append(_HEADERS)
|
|
21
|
+
|
|
22
|
+
for unit_id, unit in document.data.items():
|
|
23
|
+
comment = "; ".join(c.context for c in unit.comments if c.context)
|
|
24
|
+
status = unit.status.value if unit.status != TranslationStatus.UNKNOWN else ""
|
|
25
|
+
|
|
26
|
+
ws.append([
|
|
27
|
+
unit_id,
|
|
28
|
+
unit.source,
|
|
29
|
+
unit.target or "",
|
|
30
|
+
status,
|
|
31
|
+
comment,
|
|
32
|
+
])
|
|
33
|
+
|
|
34
|
+
wb.save(str(path))
|
|
35
|
+
wb.close()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def export_xlsx_async(document: BaseStructure, filepath: str | Path) -> None:
|
|
39
|
+
await asyncio.to_thread(export_xlsx, document, filepath)
|
|
Binary file
|