lokit-python 0.1.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. 821d8b73c2a02cb7980f__mypyc.cp313-win_amd64.pyd +0 -0
  2. lokit/__init__.cp313-win_amd64.pyd +0 -0
  3. lokit/__init__.py +128 -0
  4. lokit/core/__init__.cp313-win_amd64.pyd +0 -0
  5. lokit/core/__init__.py +0 -0
  6. lokit/core/logger.cp313-win_amd64.pyd +0 -0
  7. lokit/core/logger.py +20 -0
  8. lokit/data/__init__.cp313-win_amd64.pyd +0 -0
  9. lokit/data/__init__.py +0 -0
  10. lokit/data/lang_codes.cp313-win_amd64.pyd +0 -0
  11. lokit/data/lang_codes.py +455 -0
  12. lokit/data/structure.cp313-win_amd64.pyd +0 -0
  13. lokit/data/structure.py +118 -0
  14. lokit/data/tag_types.cp313-win_amd64.pyd +0 -0
  15. lokit/data/tag_types.py +78 -0
  16. lokit/exporters/__init__.cp313-win_amd64.pyd +0 -0
  17. lokit/exporters/__init__.py +34 -0
  18. lokit/exporters/csv.cp313-win_amd64.pyd +0 -0
  19. lokit/exporters/csv.py +32 -0
  20. lokit/exporters/html.cp313-win_amd64.pyd +0 -0
  21. lokit/exporters/html.py +217 -0
  22. lokit/exporters/idml.cp313-win_amd64.pyd +0 -0
  23. lokit/exporters/idml.py +178 -0
  24. lokit/exporters/json_i18n.cp313-win_amd64.pyd +0 -0
  25. lokit/exporters/json_i18n.py +47 -0
  26. lokit/exporters/po.cp313-win_amd64.pyd +0 -0
  27. lokit/exporters/po.py +162 -0
  28. lokit/exporters/tmx.cp313-win_amd64.pyd +0 -0
  29. lokit/exporters/tmx.py +247 -0
  30. lokit/exporters/xliff.cp313-win_amd64.pyd +0 -0
  31. lokit/exporters/xliff.py +152 -0
  32. lokit/exporters/xlsx.cp313-win_amd64.pyd +0 -0
  33. lokit/exporters/xlsx.py +39 -0
  34. lokit/format_detection.cp313-win_amd64.pyd +0 -0
  35. lokit/format_detection.py +115 -0
  36. lokit/importers.py +321 -0
  37. lokit/io/__init__.cp313-win_amd64.pyd +0 -0
  38. lokit/io/__init__.py +3 -0
  39. lokit/io/json.cp313-win_amd64.pyd +0 -0
  40. lokit/io/json.py +194 -0
  41. lokit/logic.cp313-win_amd64.pyd +0 -0
  42. lokit/logic.py +324 -0
  43. lokit/parsers/__init__.cp313-win_amd64.pyd +0 -0
  44. lokit/parsers/__init__.py +1 -0
  45. lokit/parsers/csv/__init__.cp313-win_amd64.pyd +0 -0
  46. lokit/parsers/csv/__init__.py +1 -0
  47. lokit/parsers/csv/extraction.cp313-win_amd64.pyd +0 -0
  48. lokit/parsers/csv/extraction.py +164 -0
  49. lokit/parsers/html/__init__.cp313-win_amd64.pyd +0 -0
  50. lokit/parsers/html/__init__.py +3 -0
  51. lokit/parsers/html/extraction.cp313-win_amd64.pyd +0 -0
  52. lokit/parsers/html/extraction.py +365 -0
  53. lokit/parsers/idml/__init__.cp313-win_amd64.pyd +0 -0
  54. lokit/parsers/idml/__init__.py +3 -0
  55. lokit/parsers/idml/extraction.cp313-win_amd64.pyd +0 -0
  56. lokit/parsers/idml/extraction.py +264 -0
  57. lokit/parsers/json_i18n/__init__.cp313-win_amd64.pyd +0 -0
  58. lokit/parsers/json_i18n/__init__.py +3 -0
  59. lokit/parsers/json_i18n/extraction.cp313-win_amd64.pyd +0 -0
  60. lokit/parsers/json_i18n/extraction.py +163 -0
  61. lokit/parsers/po/__init__.cp313-win_amd64.pyd +0 -0
  62. lokit/parsers/po/__init__.py +3 -0
  63. lokit/parsers/po/extraction.cp313-win_amd64.pyd +0 -0
  64. lokit/parsers/po/extraction.py +236 -0
  65. lokit/parsers/tmx/__init__.cp313-win_amd64.pyd +0 -0
  66. lokit/parsers/tmx/__init__.py +0 -0
  67. lokit/parsers/tmx/base.cp313-win_amd64.pyd +0 -0
  68. lokit/parsers/tmx/base.py +145 -0
  69. lokit/parsers/tmx/extraction.cp313-win_amd64.pyd +0 -0
  70. lokit/parsers/tmx/extraction.py +170 -0
  71. lokit/parsers/tmx/header.cp313-win_amd64.pyd +0 -0
  72. lokit/parsers/tmx/header.py +55 -0
  73. lokit/parsers/tmx/helpers.cp313-win_amd64.pyd +0 -0
  74. lokit/parsers/tmx/helpers.py +9 -0
  75. lokit/parsers/tmx/models.cp313-win_amd64.pyd +0 -0
  76. lokit/parsers/tmx/models.py +10 -0
  77. lokit/parsers/tmx/props.cp313-win_amd64.pyd +0 -0
  78. lokit/parsers/tmx/props.py +201 -0
  79. lokit/parsers/tmx/tags.cp313-win_amd64.pyd +0 -0
  80. lokit/parsers/tmx/tags.py +59 -0
  81. lokit/parsers/tmx/xml_utils.cp313-win_amd64.pyd +0 -0
  82. lokit/parsers/tmx/xml_utils.py +46 -0
  83. lokit/parsers/xliff/__init__.cp313-win_amd64.pyd +0 -0
  84. lokit/parsers/xliff/__init__.py +3 -0
  85. lokit/parsers/xliff/extraction.cp313-win_amd64.pyd +0 -0
  86. lokit/parsers/xliff/extraction.py +229 -0
  87. lokit/parsers/xliff/tags.cp313-win_amd64.pyd +0 -0
  88. lokit/parsers/xliff/tags.py +128 -0
  89. lokit/parsers/xlsx/__init__.cp313-win_amd64.pyd +0 -0
  90. lokit/parsers/xlsx/__init__.py +1 -0
  91. lokit/parsers/xlsx/extraction.cp313-win_amd64.pyd +0 -0
  92. lokit/parsers/xlsx/extraction.py +198 -0
  93. lokit/py.typed +1 -0
  94. lokit_python-0.1.0.dist-info/METADATA +149 -0
  95. lokit_python-0.1.0.dist-info/RECORD +97 -0
  96. lokit_python-0.1.0.dist-info/WHEEL +5 -0
  97. lokit_python-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,115 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import zipfile
5
+ from enum import StrEnum
6
+ from io import BytesIO
7
+ from pathlib import Path
8
+
9
+ from lokit.parsers.tmx.xml_utils import iterparse_safe, local_name
10
+
11
+
12
+ class LokitInputFormat(StrEnum):
13
+ TMX = "tmx"
14
+ XLIFF = "xliff"
15
+ LOKIT_JSON = "lokit_json"
16
+ CSV = "csv"
17
+ XLSX = "xlsx"
18
+ HTML = "html"
19
+ PO = "po"
20
+ JSON_I18N = "json_i18n"
21
+ IDML = "idml"
22
+
23
+
24
+ def detect_format(filepath: str | Path) -> LokitInputFormat:
25
+ path = Path(filepath)
26
+ suffix = path.suffix.lower()
27
+ if suffix == ".csv":
28
+ return LokitInputFormat.CSV
29
+ if suffix == ".xlsx":
30
+ return LokitInputFormat.XLSX
31
+ if suffix in (".html", ".htm"):
32
+ return LokitInputFormat.HTML
33
+ if suffix == ".po":
34
+ return LokitInputFormat.PO
35
+ if suffix == ".idml":
36
+ return LokitInputFormat.IDML
37
+ if suffix == ".json":
38
+ try:
39
+ with path.open("r", encoding="utf-8") as f:
40
+ data = json.load(f)
41
+ if isinstance(data, dict) and ("format_version" in data or "data" in data):
42
+ return LokitInputFormat.LOKIT_JSON
43
+ except Exception:
44
+ pass
45
+ return LokitInputFormat.JSON_I18N
46
+
47
+ try:
48
+ context = iterparse_safe(str(path), events=("start",))
49
+ for _, element in context:
50
+ return _format_from_root(local_name(element.tag))
51
+ except Exception:
52
+ pass
53
+
54
+ raise ValueError(f"Could not detect input format for file: {path}")
55
+
56
+
57
+ def detect_format_from_bytes(data: bytes) -> LokitInputFormat:
58
+ chunk = data[:1000]
59
+ stripped = chunk.lstrip()
60
+ if not stripped:
61
+ raise ValueError("Could not detect input format for empty byte input")
62
+
63
+ if stripped.startswith(b"{"):
64
+ try:
65
+ parsed = json.loads(data)
66
+ if isinstance(parsed, dict) and ("format_version" in parsed or "data" in parsed):
67
+ return LokitInputFormat.LOKIT_JSON
68
+ except Exception:
69
+ pass
70
+ return LokitInputFormat.JSON_I18N
71
+
72
+ if stripped.startswith(b"PK\x03\x04"):
73
+ try:
74
+ with zipfile.ZipFile(BytesIO(data)) as z:
75
+ names = z.namelist()
76
+ if any(n.startswith("Stories/") for n in names):
77
+ return LokitInputFormat.IDML
78
+ return LokitInputFormat.XLSX
79
+ except Exception:
80
+ pass
81
+
82
+ if stripped.startswith(b"<"):
83
+ try:
84
+ context = iterparse_safe(BytesIO(data), events=("start",))
85
+ for _, element in context:
86
+ tag = local_name(element.tag).lower()
87
+ if tag == "tmx":
88
+ return LokitInputFormat.TMX
89
+ if tag == "xliff":
90
+ return LokitInputFormat.XLIFF
91
+ if tag in ("html", "head", "body", "p", "div"):
92
+ return LokitInputFormat.HTML
93
+ except Exception:
94
+ pass
95
+ if b"<!doctype html" in stripped.lower() or b"<html" in stripped.lower():
96
+ return LokitInputFormat.HTML
97
+
98
+ if b"msgid" in stripped:
99
+ return LokitInputFormat.PO
100
+
101
+ if b"," in stripped or b";" in stripped or b"\t" in stripped:
102
+ return LokitInputFormat.CSV
103
+
104
+ raise ValueError("Could not detect input format for byte input")
105
+
106
+
107
+ def _format_from_root(root_name: str) -> LokitInputFormat:
108
+ root_name_lower = root_name.lower()
109
+ if root_name_lower == "tmx":
110
+ return LokitInputFormat.TMX
111
+ if root_name_lower == "xliff":
112
+ return LokitInputFormat.XLIFF
113
+ if root_name_lower == "html":
114
+ return LokitInputFormat.HTML
115
+ raise ValueError(f"Unsupported localization format root: {root_name}")
lokit/importers.py ADDED
@@ -0,0 +1,321 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import AsyncIterator
4
+
5
+ from lokit.data.structure import BaseStructure, Data
6
+ from lokit.parsers.csv.extraction import CsvExtractor
7
+ from lokit.parsers.xlsx.extraction import XlsxExtractor
8
+ from lokit.parsers.html.extraction import HtmlExtractor
9
+ from lokit.parsers.po.extraction import PoExtractor
10
+ from lokit.parsers.json_i18n.extraction import JsonI18nExtractor
11
+ from lokit.parsers.idml.extraction import IdmlExtractor
12
+ from lokit.parsers.tmx.extraction import TmxExtractor
13
+ from lokit.parsers.xliff.extraction import XliffExtractor
14
+
15
+
16
+ def import_tmx(
17
+ filepath: str,
18
+ source_language: str | None = None,
19
+ target_language: str | None = None,
20
+ domain: str | None = None,
21
+ ) -> BaseStructure:
22
+ extractor = TmxExtractor(
23
+ filepath=filepath,
24
+ source_language=source_language,
25
+ target_language=target_language,
26
+ domain=domain,
27
+ )
28
+ parsed_data: dict[str, Data] = {
29
+ unit_id: data for unit_id, data in extractor.extract()
30
+ }
31
+ return _build_tmx_structure(extractor, parsed_data)
32
+
33
+
34
+ async def import_tmx_async(
35
+ filepath: str,
36
+ source_language: str | None = None,
37
+ target_language: str | None = None,
38
+ domain: str | None = None,
39
+ ) -> AsyncIterator[tuple[str, Data]]:
40
+ extractor = TmxExtractor(
41
+ filepath=filepath,
42
+ source_language=source_language,
43
+ target_language=target_language,
44
+ domain=domain,
45
+ )
46
+ async for unit_id, data in extractor.extract_async():
47
+ yield unit_id, data
48
+
49
+
50
+ def import_xliff(filepath: str) -> BaseStructure:
51
+ extractor = XliffExtractor(filepath)
52
+ parsed_data: dict[str, Data] = {
53
+ unit_id: data for unit_id, data in extractor.extract()
54
+ }
55
+ return _build_xliff_structure(extractor, parsed_data)
56
+
57
+
58
+ async def import_xliff_async(filepath: str) -> AsyncIterator[tuple[str, Data]]:
59
+ extractor = XliffExtractor(filepath)
60
+ async for unit_id, data in extractor.extract_async():
61
+ yield unit_id, data
62
+
63
+
64
+ def import_csv(
65
+ filepath: str,
66
+ source_locale: str = "",
67
+ target_locale: str | None = None,
68
+ ) -> BaseStructure:
69
+ extractor = CsvExtractor(filepath, source_locale, target_locale)
70
+ parsed_data: dict[str, Data] = {
71
+ unit_id: data for unit_id, data in extractor.extract()
72
+ }
73
+ return _build_csv_structure(extractor, parsed_data)
74
+
75
+
76
+ async def import_csv_async(
77
+ filepath: str,
78
+ source_locale: str = "",
79
+ target_locale: str | None = None,
80
+ ) -> AsyncIterator[tuple[str, Data]]:
81
+ extractor = CsvExtractor(filepath, source_locale, target_locale)
82
+ async for unit_id, data in extractor.extract_async():
83
+ yield unit_id, data
84
+
85
+
86
+ def import_xlsx(
87
+ filepath: str,
88
+ source_locale: str = "",
89
+ target_locale: str | None = None,
90
+ ) -> BaseStructure:
91
+ extractor = XlsxExtractor(filepath, source_locale, target_locale)
92
+ parsed_data: dict[str, Data] = {
93
+ unit_id: data for unit_id, data in extractor.extract()
94
+ }
95
+ return _build_xlsx_structure(extractor, parsed_data)
96
+
97
+
98
+ async def import_xlsx_async(
99
+ filepath: str,
100
+ source_locale: str = "",
101
+ target_locale: str | None = None,
102
+ ) -> AsyncIterator[tuple[str, Data]]:
103
+ extractor = XlsxExtractor(filepath, source_locale, target_locale)
104
+ async for unit_id, data in extractor.extract_async():
105
+ yield unit_id, data
106
+
107
+
108
+ def import_html(
109
+ filepath: str,
110
+ source_locale: str = "",
111
+ target_locale: str | None = None,
112
+ ) -> BaseStructure:
113
+ extractor = HtmlExtractor(filepath, source_locale, target_locale)
114
+ parsed_data: dict[str, Data] = {
115
+ unit_id: data for unit_id, data in extractor.extract()
116
+ }
117
+ return _build_html_structure(extractor, parsed_data)
118
+
119
+
120
+ async def import_html_async(
121
+ filepath: str,
122
+ source_locale: str = "",
123
+ target_locale: str | None = None,
124
+ ) -> AsyncIterator[tuple[str, Data]]:
125
+ extractor = HtmlExtractor(filepath, source_locale, target_locale)
126
+ async for unit_id, data in extractor.extract_async():
127
+ yield unit_id, data
128
+
129
+
130
+ def import_po(
131
+ filepath: str,
132
+ source_locale: str = "",
133
+ target_locale: str | None = None,
134
+ ) -> BaseStructure:
135
+ extractor = PoExtractor(filepath, source_locale, target_locale)
136
+ parsed_data: dict[str, Data] = {
137
+ unit_id: data for unit_id, data in extractor.extract()
138
+ }
139
+ return _build_po_structure(extractor, parsed_data)
140
+
141
+
142
+ async def import_po_async(
143
+ filepath: str,
144
+ source_locale: str = "",
145
+ target_locale: str | None = None,
146
+ ) -> AsyncIterator[tuple[str, Data]]:
147
+ extractor = PoExtractor(filepath, source_locale, target_locale)
148
+ async for unit_id, data in extractor.extract_async():
149
+ yield unit_id, data
150
+
151
+
152
+ def import_json_i18n(
153
+ filepath: str,
154
+ source_locale: str = "",
155
+ target_locale: str | None = None,
156
+ target_filepath: str | None = None,
157
+ ) -> BaseStructure:
158
+ extractor = JsonI18nExtractor(filepath, source_locale, target_locale, target_filepath)
159
+ parsed_data: dict[str, Data] = {
160
+ unit_id: data for unit_id, data in extractor.extract()
161
+ }
162
+ return _build_json_i18n_structure(extractor, parsed_data)
163
+
164
+
165
+ async def import_json_i18n_async(
166
+ filepath: str,
167
+ source_locale: str = "",
168
+ target_locale: str | None = None,
169
+ target_filepath: str | None = None,
170
+ ) -> AsyncIterator[tuple[str, Data]]:
171
+ extractor = JsonI18nExtractor(filepath, source_locale, target_locale, target_filepath)
172
+ async for unit_id, data in extractor.extract_async():
173
+ yield unit_id, data
174
+
175
+
176
+ def import_idml(
177
+ filepath: str,
178
+ source_locale: str = "",
179
+ target_locale: str | None = None,
180
+ ) -> BaseStructure:
181
+ extractor = IdmlExtractor(filepath, source_locale, target_locale)
182
+ parsed_data: dict[str, Data] = {
183
+ unit_id: data for unit_id, data in extractor.extract()
184
+ }
185
+ return _build_idml_structure(extractor, parsed_data)
186
+
187
+
188
+ async def import_idml_async(
189
+ filepath: str,
190
+ source_locale: str = "",
191
+ target_locale: str | None = None,
192
+ ) -> AsyncIterator[tuple[str, Data]]:
193
+ extractor = IdmlExtractor(filepath, source_locale, target_locale)
194
+ async for unit_id, data in extractor.extract_async():
195
+ yield unit_id, data
196
+
197
+
198
+ def _build_tmx_structure(
199
+ extractor: TmxExtractor,
200
+ parsed_data: dict[str, Data],
201
+ ) -> BaseStructure:
202
+ return BaseStructure(
203
+ source_locale=extractor.source_locale or extractor.native_source,
204
+ target_locale=extractor.target_locale or extractor.native_target or None,
205
+ data=parsed_data,
206
+ source_language=extractor.source_language,
207
+ target_language=extractor.target_language,
208
+ export_origin=extractor.export_origin,
209
+ export_timestamp=extractor.export_timestamp,
210
+ extensions=extractor.extensions,
211
+ )
212
+
213
+
214
+ def _build_xliff_structure(
215
+ extractor: XliffExtractor,
216
+ parsed_data: dict[str, Data],
217
+ ) -> BaseStructure:
218
+ return BaseStructure(
219
+ source_locale=extractor.source_locale or "",
220
+ target_locale=extractor.target_locale,
221
+ data=parsed_data,
222
+ source_language=extractor.source_language,
223
+ target_language=extractor.target_language,
224
+ export_origin=extractor.export_origin,
225
+ export_timestamp=extractor.export_timestamp,
226
+ extensions=extractor.extensions,
227
+ )
228
+
229
+
230
+ def _build_csv_structure(
231
+ extractor: CsvExtractor,
232
+ parsed_data: dict[str, Data],
233
+ ) -> BaseStructure:
234
+ return BaseStructure(
235
+ source_locale=extractor.source_locale,
236
+ target_locale=extractor.target_locale,
237
+ data=parsed_data,
238
+ source_language=extractor.source_language,
239
+ target_language=extractor.target_language,
240
+ export_origin=extractor.export_origin,
241
+ export_timestamp=extractor.export_timestamp,
242
+ extensions=extractor.extensions,
243
+ )
244
+
245
+
246
+ def _build_xlsx_structure(
247
+ extractor: XlsxExtractor,
248
+ parsed_data: dict[str, Data],
249
+ ) -> BaseStructure:
250
+ return BaseStructure(
251
+ source_locale=extractor.source_locale,
252
+ target_locale=extractor.target_locale,
253
+ data=parsed_data,
254
+ source_language=extractor.source_language,
255
+ target_language=extractor.target_language,
256
+ export_origin=extractor.export_origin,
257
+ export_timestamp=extractor.export_timestamp,
258
+ extensions=extractor.extensions,
259
+ )
260
+
261
+
262
+ def _build_html_structure(
263
+ extractor: HtmlExtractor,
264
+ parsed_data: dict[str, Data],
265
+ ) -> BaseStructure:
266
+ return BaseStructure(
267
+ source_locale=extractor.source_locale,
268
+ target_locale=extractor.target_locale,
269
+ data=parsed_data,
270
+ source_language=extractor.source_language,
271
+ target_language=extractor.target_language,
272
+ export_origin=extractor.export_origin,
273
+ export_timestamp=extractor.export_timestamp,
274
+ extensions=extractor.extensions,
275
+ )
276
+
277
+
278
+ def _build_po_structure(
279
+ extractor: PoExtractor,
280
+ parsed_data: dict[str, Data],
281
+ ) -> BaseStructure:
282
+ return BaseStructure(
283
+ source_locale=extractor.source_locale,
284
+ target_locale=extractor.target_locale,
285
+ data=parsed_data,
286
+ source_language=extractor.source_language,
287
+ target_language=extractor.target_language,
288
+ export_origin=extractor.export_origin,
289
+ extensions=extractor.extensions,
290
+ )
291
+
292
+
293
+ def _build_json_i18n_structure(
294
+ extractor: JsonI18nExtractor,
295
+ parsed_data: dict[str, Data],
296
+ ) -> BaseStructure:
297
+ return BaseStructure(
298
+ source_locale=extractor.source_locale,
299
+ target_locale=extractor.target_locale,
300
+ data=parsed_data,
301
+ source_language=extractor.source_language,
302
+ target_language=extractor.target_language,
303
+ export_origin=extractor.export_origin,
304
+ extensions=extractor.extensions,
305
+ )
306
+
307
+
308
+ def _build_idml_structure(
309
+ extractor: IdmlExtractor,
310
+ parsed_data: dict[str, Data],
311
+ ) -> BaseStructure:
312
+ return BaseStructure(
313
+ source_locale=extractor.source_locale,
314
+ target_locale=extractor.target_locale,
315
+ data=parsed_data,
316
+ source_language=extractor.source_language,
317
+ target_language=extractor.target_language,
318
+ export_origin=extractor.export_origin,
319
+ export_timestamp=extractor.export_timestamp,
320
+ extensions=extractor.extensions,
321
+ )
Binary file
lokit/io/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from lokit.io.json import load_lokit_json, load_lokit_json_bytes
2
+
3
+ __all__ = ["load_lokit_json", "load_lokit_json_bytes"]
Binary file
lokit/io/json.py ADDED
@@ -0,0 +1,194 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, cast
6
+
7
+ from lokit.data.structure import (
8
+ AdjacentContext,
9
+ BaseStructure,
10
+ CodePart,
11
+ Comment,
12
+ Data,
13
+ Meta,
14
+ Origin,
15
+ Plural,
16
+ PluralCategory,
17
+ SegmentPart,
18
+ Tags,
19
+ TextPart,
20
+ TranslationStatus,
21
+ )
22
+ from lokit.data.tag_types import TieData, TieType
23
+
24
+
25
+ def load_lokit_json(filepath: str | Path) -> BaseStructure:
26
+ raw = json.loads(Path(filepath).read_text(encoding="utf-8"))
27
+ return _parse_base(cast(dict[str, Any], raw))
28
+
29
+
30
+ def load_lokit_json_bytes(data: bytes) -> BaseStructure:
31
+ raw = json.loads(data.decode("utf-8-sig"))
32
+ return _parse_base(cast(dict[str, Any], raw))
33
+
34
+
35
+ def _parse_base(raw: dict[str, Any]) -> BaseStructure:
36
+ data_raw = cast(dict[str, Any], raw.get("data", {}))
37
+ return BaseStructure(
38
+ source_locale=str(raw["source_locale"]),
39
+ target_locale=_optional_str(raw.get("target_locale")),
40
+ data={
41
+ str(unit_id): _parse_data(cast(dict[str, Any], unit_raw))
42
+ for unit_id, unit_raw in data_raw.items()
43
+ },
44
+ format_version=str(raw.get("format_version", "0.1")),
45
+ export_origin=str(raw.get("export_origin", "")),
46
+ export_timestamp=str(raw.get("export_timestamp", "")),
47
+ source_language=_optional_str(raw.get("source_language")),
48
+ target_language=_optional_str(raw.get("target_language")),
49
+ extensions=_str_dict(raw.get("extensions")),
50
+ )
51
+
52
+
53
+ def _parse_data(raw: dict[str, Any]) -> Data:
54
+ return Data(
55
+ source=str(raw["source"]),
56
+ target=_optional_str(raw.get("target")),
57
+ plural=_parse_plural(raw.get("plural")),
58
+ tags=_parse_tags(raw.get("tags")),
59
+ meta=_parse_meta(cast(dict[str, Any], raw.get("meta", {}))),
60
+ status=TranslationStatus(str(raw.get("status", TranslationStatus.UNKNOWN))),
61
+ comments=[
62
+ _parse_comment(cast(dict[str, Any], item))
63
+ for item in cast(list[Any], raw.get("comments", []))
64
+ ],
65
+ previous_context=_parse_adjacent_context(raw.get("previous_context")),
66
+ next_context=_parse_adjacent_context(raw.get("next_context")),
67
+ extensions=_str_dict(raw.get("extensions")),
68
+ )
69
+
70
+
71
+ def _parse_plural(raw: object) -> Plural | None:
72
+ if raw is None:
73
+ return None
74
+ data = cast(dict[str, Any], raw)
75
+ category = data.get("category")
76
+ return Plural(
77
+ variant=str(data["variant"]),
78
+ count=_optional_int(data.get("count")),
79
+ category=PluralCategory(str(category)) if category is not None else None,
80
+ extensions=_str_dict(data.get("extensions")),
81
+ )
82
+
83
+
84
+ def _parse_meta(raw: dict[str, Any]) -> Meta:
85
+ return Meta(
86
+ usage_count=_optional_int(raw.get("usage_count")),
87
+ last_used=_optional_str(raw.get("last_used")),
88
+ first_used=_optional_str(raw.get("first_used")),
89
+ created=_optional_str(raw.get("created")),
90
+ updated=_optional_str(raw.get("updated")),
91
+ max_length=_optional_int(raw.get("max_length")),
92
+ min_length=_optional_int(raw.get("min_length")),
93
+ extensions=_str_dict(raw.get("extensions")),
94
+ )
95
+
96
+
97
+ def _parse_comment(raw: dict[str, Any]) -> Comment:
98
+ return Comment(
99
+ context=str(raw.get("context", "")),
100
+ timestamp=_optional_str(raw.get("timestamp")),
101
+ origin=_parse_origin(raw.get("origin")),
102
+ context_key=_optional_str(raw.get("context_key")),
103
+ extensions=_str_dict(raw.get("extensions")),
104
+ )
105
+
106
+
107
+ def _parse_origin(raw: object) -> Origin | None:
108
+ if raw is None:
109
+ return None
110
+ data = cast(dict[str, Any], raw)
111
+ return Origin(
112
+ system=_optional_str(data.get("system")),
113
+ project=_optional_str(data.get("project")),
114
+ creator_id=_optional_str(data.get("creator_id")),
115
+ extensions=_str_dict(data.get("extensions")),
116
+ )
117
+
118
+
119
+ def _parse_adjacent_context(raw: object) -> AdjacentContext | None:
120
+ if raw is None:
121
+ return None
122
+ data = cast(dict[str, Any], raw)
123
+ return AdjacentContext(
124
+ unit_id=_optional_str(data.get("unit_id")),
125
+ source=_optional_str(data.get("source")),
126
+ target=_optional_str(data.get("target")),
127
+ extensions=_str_dict(data.get("extensions")),
128
+ )
129
+
130
+
131
+ def _parse_tags(raw: object) -> Tags | None:
132
+ if raw is None:
133
+ return None
134
+ data = cast(dict[str, Any], raw)
135
+ return Tags(
136
+ source_tag_map=_parse_tag_map(data.get("source_tag_map")),
137
+ target_tag_map=_parse_tag_map(data.get("target_tag_map")),
138
+ source_parts=_parse_parts(data.get("source_parts")),
139
+ target_parts=_parse_parts(data.get("target_parts")),
140
+ )
141
+
142
+
143
+ def _parse_tag_map(raw: object) -> dict[str, TieData]:
144
+ data = cast(dict[str, Any], raw or {})
145
+ return {
146
+ str(tag_id): _parse_tie_data(cast(dict[str, Any], tag_raw))
147
+ for tag_id, tag_raw in data.items()
148
+ }
149
+
150
+
151
+ def _parse_tie_data(raw: dict[str, Any]) -> TieData:
152
+ return TieData(
153
+ id=str(raw["id"]),
154
+ type=TieType(str(raw["type"])),
155
+ attributes=_str_dict(raw.get("attributes")),
156
+ attribute_data=str(raw.get("attribute_data", "")),
157
+ position=int(raw.get("position", 0)),
158
+ order=int(raw.get("order", 0)),
159
+ pair_id=_optional_str(raw.get("pair_id")),
160
+ original_name=_optional_str(raw.get("original_name")),
161
+ )
162
+
163
+
164
+ def _parse_parts(raw: object) -> list[SegmentPart]:
165
+ parts: list[SegmentPart] = []
166
+ for item in cast(list[Any], raw or []):
167
+ data = cast(dict[str, Any], item)
168
+ if "ref" in data:
169
+ parts.append(CodePart(ref=str(data["ref"])))
170
+ else:
171
+ parts.append(TextPart(value=str(data.get("value", ""))))
172
+ return parts
173
+
174
+
175
+ def _optional_str(value: object) -> str | None:
176
+ if value is None:
177
+ return None
178
+ return str(value)
179
+
180
+
181
+ def _optional_int(value: object) -> int | None:
182
+ if value is None:
183
+ return None
184
+ if isinstance(value, int):
185
+ return value
186
+ if isinstance(value, str):
187
+ return int(value)
188
+ raise TypeError(f"Expected int-compatible value, got {type(value).__name__}")
189
+
190
+
191
+ def _str_dict(value: object) -> dict[str, str]:
192
+ if value is None:
193
+ return {}
194
+ return {str(key): str(item) for key, item in cast(dict[Any, Any], value).items()}
Binary file