lokit-python 0.1.0__cp313-cp313-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- 821d8b73c2a02cb7980f__mypyc.cp313-win32.pyd +0 -0
- lokit/__init__.cp313-win32.pyd +0 -0
- lokit/__init__.py +128 -0
- lokit/core/__init__.cp313-win32.pyd +0 -0
- lokit/core/__init__.py +0 -0
- lokit/core/logger.cp313-win32.pyd +0 -0
- lokit/core/logger.py +20 -0
- lokit/data/__init__.cp313-win32.pyd +0 -0
- lokit/data/__init__.py +0 -0
- lokit/data/lang_codes.cp313-win32.pyd +0 -0
- lokit/data/lang_codes.py +455 -0
- lokit/data/structure.cp313-win32.pyd +0 -0
- lokit/data/structure.py +118 -0
- lokit/data/tag_types.cp313-win32.pyd +0 -0
- lokit/data/tag_types.py +78 -0
- lokit/exporters/__init__.cp313-win32.pyd +0 -0
- lokit/exporters/__init__.py +34 -0
- lokit/exporters/csv.cp313-win32.pyd +0 -0
- lokit/exporters/csv.py +32 -0
- lokit/exporters/html.cp313-win32.pyd +0 -0
- lokit/exporters/html.py +217 -0
- lokit/exporters/idml.cp313-win32.pyd +0 -0
- lokit/exporters/idml.py +178 -0
- lokit/exporters/json_i18n.cp313-win32.pyd +0 -0
- lokit/exporters/json_i18n.py +47 -0
- lokit/exporters/po.cp313-win32.pyd +0 -0
- lokit/exporters/po.py +162 -0
- lokit/exporters/tmx.cp313-win32.pyd +0 -0
- lokit/exporters/tmx.py +247 -0
- lokit/exporters/xliff.cp313-win32.pyd +0 -0
- lokit/exporters/xliff.py +152 -0
- lokit/exporters/xlsx.cp313-win32.pyd +0 -0
- lokit/exporters/xlsx.py +39 -0
- lokit/format_detection.cp313-win32.pyd +0 -0
- lokit/format_detection.py +115 -0
- lokit/importers.py +321 -0
- lokit/io/__init__.cp313-win32.pyd +0 -0
- lokit/io/__init__.py +3 -0
- lokit/io/json.cp313-win32.pyd +0 -0
- lokit/io/json.py +194 -0
- lokit/logic.cp313-win32.pyd +0 -0
- lokit/logic.py +324 -0
- lokit/parsers/__init__.cp313-win32.pyd +0 -0
- lokit/parsers/__init__.py +1 -0
- lokit/parsers/csv/__init__.cp313-win32.pyd +0 -0
- lokit/parsers/csv/__init__.py +1 -0
- lokit/parsers/csv/extraction.cp313-win32.pyd +0 -0
- lokit/parsers/csv/extraction.py +164 -0
- lokit/parsers/html/__init__.cp313-win32.pyd +0 -0
- lokit/parsers/html/__init__.py +3 -0
- lokit/parsers/html/extraction.cp313-win32.pyd +0 -0
- lokit/parsers/html/extraction.py +365 -0
- lokit/parsers/idml/__init__.cp313-win32.pyd +0 -0
- lokit/parsers/idml/__init__.py +3 -0
- lokit/parsers/idml/extraction.cp313-win32.pyd +0 -0
- lokit/parsers/idml/extraction.py +264 -0
- lokit/parsers/json_i18n/__init__.cp313-win32.pyd +0 -0
- lokit/parsers/json_i18n/__init__.py +3 -0
- lokit/parsers/json_i18n/extraction.cp313-win32.pyd +0 -0
- lokit/parsers/json_i18n/extraction.py +163 -0
- lokit/parsers/po/__init__.cp313-win32.pyd +0 -0
- lokit/parsers/po/__init__.py +3 -0
- lokit/parsers/po/extraction.cp313-win32.pyd +0 -0
- lokit/parsers/po/extraction.py +236 -0
- lokit/parsers/tmx/__init__.cp313-win32.pyd +0 -0
- lokit/parsers/tmx/__init__.py +0 -0
- lokit/parsers/tmx/base.cp313-win32.pyd +0 -0
- lokit/parsers/tmx/base.py +145 -0
- lokit/parsers/tmx/extraction.cp313-win32.pyd +0 -0
- lokit/parsers/tmx/extraction.py +170 -0
- lokit/parsers/tmx/header.cp313-win32.pyd +0 -0
- lokit/parsers/tmx/header.py +55 -0
- lokit/parsers/tmx/helpers.cp313-win32.pyd +0 -0
- lokit/parsers/tmx/helpers.py +9 -0
- lokit/parsers/tmx/models.cp313-win32.pyd +0 -0
- lokit/parsers/tmx/models.py +10 -0
- lokit/parsers/tmx/props.cp313-win32.pyd +0 -0
- lokit/parsers/tmx/props.py +201 -0
- lokit/parsers/tmx/tags.cp313-win32.pyd +0 -0
- lokit/parsers/tmx/tags.py +59 -0
- lokit/parsers/tmx/xml_utils.cp313-win32.pyd +0 -0
- lokit/parsers/tmx/xml_utils.py +46 -0
- lokit/parsers/xliff/__init__.cp313-win32.pyd +0 -0
- lokit/parsers/xliff/__init__.py +3 -0
- lokit/parsers/xliff/extraction.cp313-win32.pyd +0 -0
- lokit/parsers/xliff/extraction.py +229 -0
- lokit/parsers/xliff/tags.cp313-win32.pyd +0 -0
- lokit/parsers/xliff/tags.py +128 -0
- lokit/parsers/xlsx/__init__.cp313-win32.pyd +0 -0
- lokit/parsers/xlsx/__init__.py +1 -0
- lokit/parsers/xlsx/extraction.cp313-win32.pyd +0 -0
- lokit/parsers/xlsx/extraction.py +198 -0
- lokit/py.typed +1 -0
- lokit_python-0.1.0.dist-info/METADATA +149 -0
- lokit_python-0.1.0.dist-info/RECORD +97 -0
- lokit_python-0.1.0.dist-info/WHEEL +5 -0
- lokit_python-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import zipfile
|
|
5
|
+
from enum import StrEnum
|
|
6
|
+
from io import BytesIO
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from lokit.parsers.tmx.xml_utils import iterparse_safe, local_name
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LokitInputFormat(StrEnum):
|
|
13
|
+
TMX = "tmx"
|
|
14
|
+
XLIFF = "xliff"
|
|
15
|
+
LOKIT_JSON = "lokit_json"
|
|
16
|
+
CSV = "csv"
|
|
17
|
+
XLSX = "xlsx"
|
|
18
|
+
HTML = "html"
|
|
19
|
+
PO = "po"
|
|
20
|
+
JSON_I18N = "json_i18n"
|
|
21
|
+
IDML = "idml"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def detect_format(filepath: str | Path) -> LokitInputFormat:
|
|
25
|
+
path = Path(filepath)
|
|
26
|
+
suffix = path.suffix.lower()
|
|
27
|
+
if suffix == ".csv":
|
|
28
|
+
return LokitInputFormat.CSV
|
|
29
|
+
if suffix == ".xlsx":
|
|
30
|
+
return LokitInputFormat.XLSX
|
|
31
|
+
if suffix in (".html", ".htm"):
|
|
32
|
+
return LokitInputFormat.HTML
|
|
33
|
+
if suffix == ".po":
|
|
34
|
+
return LokitInputFormat.PO
|
|
35
|
+
if suffix == ".idml":
|
|
36
|
+
return LokitInputFormat.IDML
|
|
37
|
+
if suffix == ".json":
|
|
38
|
+
try:
|
|
39
|
+
with path.open("r", encoding="utf-8") as f:
|
|
40
|
+
data = json.load(f)
|
|
41
|
+
if isinstance(data, dict) and ("format_version" in data or "data" in data):
|
|
42
|
+
return LokitInputFormat.LOKIT_JSON
|
|
43
|
+
except Exception:
|
|
44
|
+
pass
|
|
45
|
+
return LokitInputFormat.JSON_I18N
|
|
46
|
+
|
|
47
|
+
try:
|
|
48
|
+
context = iterparse_safe(str(path), events=("start",))
|
|
49
|
+
for _, element in context:
|
|
50
|
+
return _format_from_root(local_name(element.tag))
|
|
51
|
+
except Exception:
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
raise ValueError(f"Could not detect input format for file: {path}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def detect_format_from_bytes(data: bytes) -> LokitInputFormat:
|
|
58
|
+
chunk = data[:1000]
|
|
59
|
+
stripped = chunk.lstrip()
|
|
60
|
+
if not stripped:
|
|
61
|
+
raise ValueError("Could not detect input format for empty byte input")
|
|
62
|
+
|
|
63
|
+
if stripped.startswith(b"{"):
|
|
64
|
+
try:
|
|
65
|
+
parsed = json.loads(data)
|
|
66
|
+
if isinstance(parsed, dict) and ("format_version" in parsed or "data" in parsed):
|
|
67
|
+
return LokitInputFormat.LOKIT_JSON
|
|
68
|
+
except Exception:
|
|
69
|
+
pass
|
|
70
|
+
return LokitInputFormat.JSON_I18N
|
|
71
|
+
|
|
72
|
+
if stripped.startswith(b"PK\x03\x04"):
|
|
73
|
+
try:
|
|
74
|
+
with zipfile.ZipFile(BytesIO(data)) as z:
|
|
75
|
+
names = z.namelist()
|
|
76
|
+
if any(n.startswith("Stories/") for n in names):
|
|
77
|
+
return LokitInputFormat.IDML
|
|
78
|
+
return LokitInputFormat.XLSX
|
|
79
|
+
except Exception:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
if stripped.startswith(b"<"):
|
|
83
|
+
try:
|
|
84
|
+
context = iterparse_safe(BytesIO(data), events=("start",))
|
|
85
|
+
for _, element in context:
|
|
86
|
+
tag = local_name(element.tag).lower()
|
|
87
|
+
if tag == "tmx":
|
|
88
|
+
return LokitInputFormat.TMX
|
|
89
|
+
if tag == "xliff":
|
|
90
|
+
return LokitInputFormat.XLIFF
|
|
91
|
+
if tag in ("html", "head", "body", "p", "div"):
|
|
92
|
+
return LokitInputFormat.HTML
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
if b"<!doctype html" in stripped.lower() or b"<html" in stripped.lower():
|
|
96
|
+
return LokitInputFormat.HTML
|
|
97
|
+
|
|
98
|
+
if b"msgid" in stripped:
|
|
99
|
+
return LokitInputFormat.PO
|
|
100
|
+
|
|
101
|
+
if b"," in stripped or b";" in stripped or b"\t" in stripped:
|
|
102
|
+
return LokitInputFormat.CSV
|
|
103
|
+
|
|
104
|
+
raise ValueError("Could not detect input format for byte input")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _format_from_root(root_name: str) -> LokitInputFormat:
|
|
108
|
+
root_name_lower = root_name.lower()
|
|
109
|
+
if root_name_lower == "tmx":
|
|
110
|
+
return LokitInputFormat.TMX
|
|
111
|
+
if root_name_lower == "xliff":
|
|
112
|
+
return LokitInputFormat.XLIFF
|
|
113
|
+
if root_name_lower == "html":
|
|
114
|
+
return LokitInputFormat.HTML
|
|
115
|
+
raise ValueError(f"Unsupported localization format root: {root_name}")
|
lokit/importers.py
ADDED
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import AsyncIterator
|
|
4
|
+
|
|
5
|
+
from lokit.data.structure import BaseStructure, Data
|
|
6
|
+
from lokit.parsers.csv.extraction import CsvExtractor
|
|
7
|
+
from lokit.parsers.xlsx.extraction import XlsxExtractor
|
|
8
|
+
from lokit.parsers.html.extraction import HtmlExtractor
|
|
9
|
+
from lokit.parsers.po.extraction import PoExtractor
|
|
10
|
+
from lokit.parsers.json_i18n.extraction import JsonI18nExtractor
|
|
11
|
+
from lokit.parsers.idml.extraction import IdmlExtractor
|
|
12
|
+
from lokit.parsers.tmx.extraction import TmxExtractor
|
|
13
|
+
from lokit.parsers.xliff.extraction import XliffExtractor
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def import_tmx(
|
|
17
|
+
filepath: str,
|
|
18
|
+
source_language: str | None = None,
|
|
19
|
+
target_language: str | None = None,
|
|
20
|
+
domain: str | None = None,
|
|
21
|
+
) -> BaseStructure:
|
|
22
|
+
extractor = TmxExtractor(
|
|
23
|
+
filepath=filepath,
|
|
24
|
+
source_language=source_language,
|
|
25
|
+
target_language=target_language,
|
|
26
|
+
domain=domain,
|
|
27
|
+
)
|
|
28
|
+
parsed_data: dict[str, Data] = {
|
|
29
|
+
unit_id: data for unit_id, data in extractor.extract()
|
|
30
|
+
}
|
|
31
|
+
return _build_tmx_structure(extractor, parsed_data)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def import_tmx_async(
|
|
35
|
+
filepath: str,
|
|
36
|
+
source_language: str | None = None,
|
|
37
|
+
target_language: str | None = None,
|
|
38
|
+
domain: str | None = None,
|
|
39
|
+
) -> AsyncIterator[tuple[str, Data]]:
|
|
40
|
+
extractor = TmxExtractor(
|
|
41
|
+
filepath=filepath,
|
|
42
|
+
source_language=source_language,
|
|
43
|
+
target_language=target_language,
|
|
44
|
+
domain=domain,
|
|
45
|
+
)
|
|
46
|
+
async for unit_id, data in extractor.extract_async():
|
|
47
|
+
yield unit_id, data
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def import_xliff(filepath: str) -> BaseStructure:
|
|
51
|
+
extractor = XliffExtractor(filepath)
|
|
52
|
+
parsed_data: dict[str, Data] = {
|
|
53
|
+
unit_id: data for unit_id, data in extractor.extract()
|
|
54
|
+
}
|
|
55
|
+
return _build_xliff_structure(extractor, parsed_data)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def import_xliff_async(filepath: str) -> AsyncIterator[tuple[str, Data]]:
|
|
59
|
+
extractor = XliffExtractor(filepath)
|
|
60
|
+
async for unit_id, data in extractor.extract_async():
|
|
61
|
+
yield unit_id, data
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def import_csv(
|
|
65
|
+
filepath: str,
|
|
66
|
+
source_locale: str = "",
|
|
67
|
+
target_locale: str | None = None,
|
|
68
|
+
) -> BaseStructure:
|
|
69
|
+
extractor = CsvExtractor(filepath, source_locale, target_locale)
|
|
70
|
+
parsed_data: dict[str, Data] = {
|
|
71
|
+
unit_id: data for unit_id, data in extractor.extract()
|
|
72
|
+
}
|
|
73
|
+
return _build_csv_structure(extractor, parsed_data)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def import_csv_async(
|
|
77
|
+
filepath: str,
|
|
78
|
+
source_locale: str = "",
|
|
79
|
+
target_locale: str | None = None,
|
|
80
|
+
) -> AsyncIterator[tuple[str, Data]]:
|
|
81
|
+
extractor = CsvExtractor(filepath, source_locale, target_locale)
|
|
82
|
+
async for unit_id, data in extractor.extract_async():
|
|
83
|
+
yield unit_id, data
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def import_xlsx(
|
|
87
|
+
filepath: str,
|
|
88
|
+
source_locale: str = "",
|
|
89
|
+
target_locale: str | None = None,
|
|
90
|
+
) -> BaseStructure:
|
|
91
|
+
extractor = XlsxExtractor(filepath, source_locale, target_locale)
|
|
92
|
+
parsed_data: dict[str, Data] = {
|
|
93
|
+
unit_id: data for unit_id, data in extractor.extract()
|
|
94
|
+
}
|
|
95
|
+
return _build_xlsx_structure(extractor, parsed_data)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async def import_xlsx_async(
|
|
99
|
+
filepath: str,
|
|
100
|
+
source_locale: str = "",
|
|
101
|
+
target_locale: str | None = None,
|
|
102
|
+
) -> AsyncIterator[tuple[str, Data]]:
|
|
103
|
+
extractor = XlsxExtractor(filepath, source_locale, target_locale)
|
|
104
|
+
async for unit_id, data in extractor.extract_async():
|
|
105
|
+
yield unit_id, data
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def import_html(
|
|
109
|
+
filepath: str,
|
|
110
|
+
source_locale: str = "",
|
|
111
|
+
target_locale: str | None = None,
|
|
112
|
+
) -> BaseStructure:
|
|
113
|
+
extractor = HtmlExtractor(filepath, source_locale, target_locale)
|
|
114
|
+
parsed_data: dict[str, Data] = {
|
|
115
|
+
unit_id: data for unit_id, data in extractor.extract()
|
|
116
|
+
}
|
|
117
|
+
return _build_html_structure(extractor, parsed_data)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
async def import_html_async(
|
|
121
|
+
filepath: str,
|
|
122
|
+
source_locale: str = "",
|
|
123
|
+
target_locale: str | None = None,
|
|
124
|
+
) -> AsyncIterator[tuple[str, Data]]:
|
|
125
|
+
extractor = HtmlExtractor(filepath, source_locale, target_locale)
|
|
126
|
+
async for unit_id, data in extractor.extract_async():
|
|
127
|
+
yield unit_id, data
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def import_po(
|
|
131
|
+
filepath: str,
|
|
132
|
+
source_locale: str = "",
|
|
133
|
+
target_locale: str | None = None,
|
|
134
|
+
) -> BaseStructure:
|
|
135
|
+
extractor = PoExtractor(filepath, source_locale, target_locale)
|
|
136
|
+
parsed_data: dict[str, Data] = {
|
|
137
|
+
unit_id: data for unit_id, data in extractor.extract()
|
|
138
|
+
}
|
|
139
|
+
return _build_po_structure(extractor, parsed_data)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
async def import_po_async(
|
|
143
|
+
filepath: str,
|
|
144
|
+
source_locale: str = "",
|
|
145
|
+
target_locale: str | None = None,
|
|
146
|
+
) -> AsyncIterator[tuple[str, Data]]:
|
|
147
|
+
extractor = PoExtractor(filepath, source_locale, target_locale)
|
|
148
|
+
async for unit_id, data in extractor.extract_async():
|
|
149
|
+
yield unit_id, data
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def import_json_i18n(
|
|
153
|
+
filepath: str,
|
|
154
|
+
source_locale: str = "",
|
|
155
|
+
target_locale: str | None = None,
|
|
156
|
+
target_filepath: str | None = None,
|
|
157
|
+
) -> BaseStructure:
|
|
158
|
+
extractor = JsonI18nExtractor(filepath, source_locale, target_locale, target_filepath)
|
|
159
|
+
parsed_data: dict[str, Data] = {
|
|
160
|
+
unit_id: data for unit_id, data in extractor.extract()
|
|
161
|
+
}
|
|
162
|
+
return _build_json_i18n_structure(extractor, parsed_data)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
async def import_json_i18n_async(
|
|
166
|
+
filepath: str,
|
|
167
|
+
source_locale: str = "",
|
|
168
|
+
target_locale: str | None = None,
|
|
169
|
+
target_filepath: str | None = None,
|
|
170
|
+
) -> AsyncIterator[tuple[str, Data]]:
|
|
171
|
+
extractor = JsonI18nExtractor(filepath, source_locale, target_locale, target_filepath)
|
|
172
|
+
async for unit_id, data in extractor.extract_async():
|
|
173
|
+
yield unit_id, data
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def import_idml(
|
|
177
|
+
filepath: str,
|
|
178
|
+
source_locale: str = "",
|
|
179
|
+
target_locale: str | None = None,
|
|
180
|
+
) -> BaseStructure:
|
|
181
|
+
extractor = IdmlExtractor(filepath, source_locale, target_locale)
|
|
182
|
+
parsed_data: dict[str, Data] = {
|
|
183
|
+
unit_id: data for unit_id, data in extractor.extract()
|
|
184
|
+
}
|
|
185
|
+
return _build_idml_structure(extractor, parsed_data)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
async def import_idml_async(
|
|
189
|
+
filepath: str,
|
|
190
|
+
source_locale: str = "",
|
|
191
|
+
target_locale: str | None = None,
|
|
192
|
+
) -> AsyncIterator[tuple[str, Data]]:
|
|
193
|
+
extractor = IdmlExtractor(filepath, source_locale, target_locale)
|
|
194
|
+
async for unit_id, data in extractor.extract_async():
|
|
195
|
+
yield unit_id, data
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _build_tmx_structure(
|
|
199
|
+
extractor: TmxExtractor,
|
|
200
|
+
parsed_data: dict[str, Data],
|
|
201
|
+
) -> BaseStructure:
|
|
202
|
+
return BaseStructure(
|
|
203
|
+
source_locale=extractor.source_locale or extractor.native_source,
|
|
204
|
+
target_locale=extractor.target_locale or extractor.native_target or None,
|
|
205
|
+
data=parsed_data,
|
|
206
|
+
source_language=extractor.source_language,
|
|
207
|
+
target_language=extractor.target_language,
|
|
208
|
+
export_origin=extractor.export_origin,
|
|
209
|
+
export_timestamp=extractor.export_timestamp,
|
|
210
|
+
extensions=extractor.extensions,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _build_xliff_structure(
|
|
215
|
+
extractor: XliffExtractor,
|
|
216
|
+
parsed_data: dict[str, Data],
|
|
217
|
+
) -> BaseStructure:
|
|
218
|
+
return BaseStructure(
|
|
219
|
+
source_locale=extractor.source_locale or "",
|
|
220
|
+
target_locale=extractor.target_locale,
|
|
221
|
+
data=parsed_data,
|
|
222
|
+
source_language=extractor.source_language,
|
|
223
|
+
target_language=extractor.target_language,
|
|
224
|
+
export_origin=extractor.export_origin,
|
|
225
|
+
export_timestamp=extractor.export_timestamp,
|
|
226
|
+
extensions=extractor.extensions,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _build_csv_structure(
|
|
231
|
+
extractor: CsvExtractor,
|
|
232
|
+
parsed_data: dict[str, Data],
|
|
233
|
+
) -> BaseStructure:
|
|
234
|
+
return BaseStructure(
|
|
235
|
+
source_locale=extractor.source_locale,
|
|
236
|
+
target_locale=extractor.target_locale,
|
|
237
|
+
data=parsed_data,
|
|
238
|
+
source_language=extractor.source_language,
|
|
239
|
+
target_language=extractor.target_language,
|
|
240
|
+
export_origin=extractor.export_origin,
|
|
241
|
+
export_timestamp=extractor.export_timestamp,
|
|
242
|
+
extensions=extractor.extensions,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _build_xlsx_structure(
|
|
247
|
+
extractor: XlsxExtractor,
|
|
248
|
+
parsed_data: dict[str, Data],
|
|
249
|
+
) -> BaseStructure:
|
|
250
|
+
return BaseStructure(
|
|
251
|
+
source_locale=extractor.source_locale,
|
|
252
|
+
target_locale=extractor.target_locale,
|
|
253
|
+
data=parsed_data,
|
|
254
|
+
source_language=extractor.source_language,
|
|
255
|
+
target_language=extractor.target_language,
|
|
256
|
+
export_origin=extractor.export_origin,
|
|
257
|
+
export_timestamp=extractor.export_timestamp,
|
|
258
|
+
extensions=extractor.extensions,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _build_html_structure(
|
|
263
|
+
extractor: HtmlExtractor,
|
|
264
|
+
parsed_data: dict[str, Data],
|
|
265
|
+
) -> BaseStructure:
|
|
266
|
+
return BaseStructure(
|
|
267
|
+
source_locale=extractor.source_locale,
|
|
268
|
+
target_locale=extractor.target_locale,
|
|
269
|
+
data=parsed_data,
|
|
270
|
+
source_language=extractor.source_language,
|
|
271
|
+
target_language=extractor.target_language,
|
|
272
|
+
export_origin=extractor.export_origin,
|
|
273
|
+
export_timestamp=extractor.export_timestamp,
|
|
274
|
+
extensions=extractor.extensions,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _build_po_structure(
|
|
279
|
+
extractor: PoExtractor,
|
|
280
|
+
parsed_data: dict[str, Data],
|
|
281
|
+
) -> BaseStructure:
|
|
282
|
+
return BaseStructure(
|
|
283
|
+
source_locale=extractor.source_locale,
|
|
284
|
+
target_locale=extractor.target_locale,
|
|
285
|
+
data=parsed_data,
|
|
286
|
+
source_language=extractor.source_language,
|
|
287
|
+
target_language=extractor.target_language,
|
|
288
|
+
export_origin=extractor.export_origin,
|
|
289
|
+
extensions=extractor.extensions,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _build_json_i18n_structure(
|
|
294
|
+
extractor: JsonI18nExtractor,
|
|
295
|
+
parsed_data: dict[str, Data],
|
|
296
|
+
) -> BaseStructure:
|
|
297
|
+
return BaseStructure(
|
|
298
|
+
source_locale=extractor.source_locale,
|
|
299
|
+
target_locale=extractor.target_locale,
|
|
300
|
+
data=parsed_data,
|
|
301
|
+
source_language=extractor.source_language,
|
|
302
|
+
target_language=extractor.target_language,
|
|
303
|
+
export_origin=extractor.export_origin,
|
|
304
|
+
extensions=extractor.extensions,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _build_idml_structure(
|
|
309
|
+
extractor: IdmlExtractor,
|
|
310
|
+
parsed_data: dict[str, Data],
|
|
311
|
+
) -> BaseStructure:
|
|
312
|
+
return BaseStructure(
|
|
313
|
+
source_locale=extractor.source_locale,
|
|
314
|
+
target_locale=extractor.target_locale,
|
|
315
|
+
data=parsed_data,
|
|
316
|
+
source_language=extractor.source_language,
|
|
317
|
+
target_language=extractor.target_language,
|
|
318
|
+
export_origin=extractor.export_origin,
|
|
319
|
+
export_timestamp=extractor.export_timestamp,
|
|
320
|
+
extensions=extractor.extensions,
|
|
321
|
+
)
|
|
Binary file
|
lokit/io/__init__.py
ADDED
|
Binary file
|
lokit/io/json.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, cast
|
|
6
|
+
|
|
7
|
+
from lokit.data.structure import (
|
|
8
|
+
AdjacentContext,
|
|
9
|
+
BaseStructure,
|
|
10
|
+
CodePart,
|
|
11
|
+
Comment,
|
|
12
|
+
Data,
|
|
13
|
+
Meta,
|
|
14
|
+
Origin,
|
|
15
|
+
Plural,
|
|
16
|
+
PluralCategory,
|
|
17
|
+
SegmentPart,
|
|
18
|
+
Tags,
|
|
19
|
+
TextPart,
|
|
20
|
+
TranslationStatus,
|
|
21
|
+
)
|
|
22
|
+
from lokit.data.tag_types import TieData, TieType
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def load_lokit_json(filepath: str | Path) -> BaseStructure:
|
|
26
|
+
raw = json.loads(Path(filepath).read_text(encoding="utf-8"))
|
|
27
|
+
return _parse_base(cast(dict[str, Any], raw))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_lokit_json_bytes(data: bytes) -> BaseStructure:
|
|
31
|
+
raw = json.loads(data.decode("utf-8-sig"))
|
|
32
|
+
return _parse_base(cast(dict[str, Any], raw))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _parse_base(raw: dict[str, Any]) -> BaseStructure:
|
|
36
|
+
data_raw = cast(dict[str, Any], raw.get("data", {}))
|
|
37
|
+
return BaseStructure(
|
|
38
|
+
source_locale=str(raw["source_locale"]),
|
|
39
|
+
target_locale=_optional_str(raw.get("target_locale")),
|
|
40
|
+
data={
|
|
41
|
+
str(unit_id): _parse_data(cast(dict[str, Any], unit_raw))
|
|
42
|
+
for unit_id, unit_raw in data_raw.items()
|
|
43
|
+
},
|
|
44
|
+
format_version=str(raw.get("format_version", "0.1")),
|
|
45
|
+
export_origin=str(raw.get("export_origin", "")),
|
|
46
|
+
export_timestamp=str(raw.get("export_timestamp", "")),
|
|
47
|
+
source_language=_optional_str(raw.get("source_language")),
|
|
48
|
+
target_language=_optional_str(raw.get("target_language")),
|
|
49
|
+
extensions=_str_dict(raw.get("extensions")),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _parse_data(raw: dict[str, Any]) -> Data:
|
|
54
|
+
return Data(
|
|
55
|
+
source=str(raw["source"]),
|
|
56
|
+
target=_optional_str(raw.get("target")),
|
|
57
|
+
plural=_parse_plural(raw.get("plural")),
|
|
58
|
+
tags=_parse_tags(raw.get("tags")),
|
|
59
|
+
meta=_parse_meta(cast(dict[str, Any], raw.get("meta", {}))),
|
|
60
|
+
status=TranslationStatus(str(raw.get("status", TranslationStatus.UNKNOWN))),
|
|
61
|
+
comments=[
|
|
62
|
+
_parse_comment(cast(dict[str, Any], item))
|
|
63
|
+
for item in cast(list[Any], raw.get("comments", []))
|
|
64
|
+
],
|
|
65
|
+
previous_context=_parse_adjacent_context(raw.get("previous_context")),
|
|
66
|
+
next_context=_parse_adjacent_context(raw.get("next_context")),
|
|
67
|
+
extensions=_str_dict(raw.get("extensions")),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _parse_plural(raw: object) -> Plural | None:
|
|
72
|
+
if raw is None:
|
|
73
|
+
return None
|
|
74
|
+
data = cast(dict[str, Any], raw)
|
|
75
|
+
category = data.get("category")
|
|
76
|
+
return Plural(
|
|
77
|
+
variant=str(data["variant"]),
|
|
78
|
+
count=_optional_int(data.get("count")),
|
|
79
|
+
category=PluralCategory(str(category)) if category is not None else None,
|
|
80
|
+
extensions=_str_dict(data.get("extensions")),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _parse_meta(raw: dict[str, Any]) -> Meta:
|
|
85
|
+
return Meta(
|
|
86
|
+
usage_count=_optional_int(raw.get("usage_count")),
|
|
87
|
+
last_used=_optional_str(raw.get("last_used")),
|
|
88
|
+
first_used=_optional_str(raw.get("first_used")),
|
|
89
|
+
created=_optional_str(raw.get("created")),
|
|
90
|
+
updated=_optional_str(raw.get("updated")),
|
|
91
|
+
max_length=_optional_int(raw.get("max_length")),
|
|
92
|
+
min_length=_optional_int(raw.get("min_length")),
|
|
93
|
+
extensions=_str_dict(raw.get("extensions")),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _parse_comment(raw: dict[str, Any]) -> Comment:
|
|
98
|
+
return Comment(
|
|
99
|
+
context=str(raw.get("context", "")),
|
|
100
|
+
timestamp=_optional_str(raw.get("timestamp")),
|
|
101
|
+
origin=_parse_origin(raw.get("origin")),
|
|
102
|
+
context_key=_optional_str(raw.get("context_key")),
|
|
103
|
+
extensions=_str_dict(raw.get("extensions")),
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _parse_origin(raw: object) -> Origin | None:
|
|
108
|
+
if raw is None:
|
|
109
|
+
return None
|
|
110
|
+
data = cast(dict[str, Any], raw)
|
|
111
|
+
return Origin(
|
|
112
|
+
system=_optional_str(data.get("system")),
|
|
113
|
+
project=_optional_str(data.get("project")),
|
|
114
|
+
creator_id=_optional_str(data.get("creator_id")),
|
|
115
|
+
extensions=_str_dict(data.get("extensions")),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _parse_adjacent_context(raw: object) -> AdjacentContext | None:
|
|
120
|
+
if raw is None:
|
|
121
|
+
return None
|
|
122
|
+
data = cast(dict[str, Any], raw)
|
|
123
|
+
return AdjacentContext(
|
|
124
|
+
unit_id=_optional_str(data.get("unit_id")),
|
|
125
|
+
source=_optional_str(data.get("source")),
|
|
126
|
+
target=_optional_str(data.get("target")),
|
|
127
|
+
extensions=_str_dict(data.get("extensions")),
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _parse_tags(raw: object) -> Tags | None:
|
|
132
|
+
if raw is None:
|
|
133
|
+
return None
|
|
134
|
+
data = cast(dict[str, Any], raw)
|
|
135
|
+
return Tags(
|
|
136
|
+
source_tag_map=_parse_tag_map(data.get("source_tag_map")),
|
|
137
|
+
target_tag_map=_parse_tag_map(data.get("target_tag_map")),
|
|
138
|
+
source_parts=_parse_parts(data.get("source_parts")),
|
|
139
|
+
target_parts=_parse_parts(data.get("target_parts")),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _parse_tag_map(raw: object) -> dict[str, TieData]:
|
|
144
|
+
data = cast(dict[str, Any], raw or {})
|
|
145
|
+
return {
|
|
146
|
+
str(tag_id): _parse_tie_data(cast(dict[str, Any], tag_raw))
|
|
147
|
+
for tag_id, tag_raw in data.items()
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _parse_tie_data(raw: dict[str, Any]) -> TieData:
|
|
152
|
+
return TieData(
|
|
153
|
+
id=str(raw["id"]),
|
|
154
|
+
type=TieType(str(raw["type"])),
|
|
155
|
+
attributes=_str_dict(raw.get("attributes")),
|
|
156
|
+
attribute_data=str(raw.get("attribute_data", "")),
|
|
157
|
+
position=int(raw.get("position", 0)),
|
|
158
|
+
order=int(raw.get("order", 0)),
|
|
159
|
+
pair_id=_optional_str(raw.get("pair_id")),
|
|
160
|
+
original_name=_optional_str(raw.get("original_name")),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _parse_parts(raw: object) -> list[SegmentPart]:
|
|
165
|
+
parts: list[SegmentPart] = []
|
|
166
|
+
for item in cast(list[Any], raw or []):
|
|
167
|
+
data = cast(dict[str, Any], item)
|
|
168
|
+
if "ref" in data:
|
|
169
|
+
parts.append(CodePart(ref=str(data["ref"])))
|
|
170
|
+
else:
|
|
171
|
+
parts.append(TextPart(value=str(data.get("value", ""))))
|
|
172
|
+
return parts
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _optional_str(value: object) -> str | None:
|
|
176
|
+
if value is None:
|
|
177
|
+
return None
|
|
178
|
+
return str(value)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _optional_int(value: object) -> int | None:
|
|
182
|
+
if value is None:
|
|
183
|
+
return None
|
|
184
|
+
if isinstance(value, int):
|
|
185
|
+
return value
|
|
186
|
+
if isinstance(value, str):
|
|
187
|
+
return int(value)
|
|
188
|
+
raise TypeError(f"Expected int-compatible value, got {type(value).__name__}")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _str_dict(value: object) -> dict[str, str]:
|
|
192
|
+
if value is None:
|
|
193
|
+
return {}
|
|
194
|
+
return {str(key): str(item) for key, item in cast(dict[Any, Any], value).items()}
|
|
Binary file
|