lokit-python 0.1.0__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- 821d8b73c2a02cb7980f__mypyc.cp313-win_amd64.pyd +0 -0
- lokit/__init__.cp313-win_amd64.pyd +0 -0
- lokit/__init__.py +128 -0
- lokit/core/__init__.cp313-win_amd64.pyd +0 -0
- lokit/core/__init__.py +0 -0
- lokit/core/logger.cp313-win_amd64.pyd +0 -0
- lokit/core/logger.py +20 -0
- lokit/data/__init__.cp313-win_amd64.pyd +0 -0
- lokit/data/__init__.py +0 -0
- lokit/data/lang_codes.cp313-win_amd64.pyd +0 -0
- lokit/data/lang_codes.py +455 -0
- lokit/data/structure.cp313-win_amd64.pyd +0 -0
- lokit/data/structure.py +118 -0
- lokit/data/tag_types.cp313-win_amd64.pyd +0 -0
- lokit/data/tag_types.py +78 -0
- lokit/exporters/__init__.cp313-win_amd64.pyd +0 -0
- lokit/exporters/__init__.py +34 -0
- lokit/exporters/csv.cp313-win_amd64.pyd +0 -0
- lokit/exporters/csv.py +32 -0
- lokit/exporters/html.cp313-win_amd64.pyd +0 -0
- lokit/exporters/html.py +217 -0
- lokit/exporters/idml.cp313-win_amd64.pyd +0 -0
- lokit/exporters/idml.py +178 -0
- lokit/exporters/json_i18n.cp313-win_amd64.pyd +0 -0
- lokit/exporters/json_i18n.py +47 -0
- lokit/exporters/po.cp313-win_amd64.pyd +0 -0
- lokit/exporters/po.py +162 -0
- lokit/exporters/tmx.cp313-win_amd64.pyd +0 -0
- lokit/exporters/tmx.py +247 -0
- lokit/exporters/xliff.cp313-win_amd64.pyd +0 -0
- lokit/exporters/xliff.py +152 -0
- lokit/exporters/xlsx.cp313-win_amd64.pyd +0 -0
- lokit/exporters/xlsx.py +39 -0
- lokit/format_detection.cp313-win_amd64.pyd +0 -0
- lokit/format_detection.py +115 -0
- lokit/importers.py +321 -0
- lokit/io/__init__.cp313-win_amd64.pyd +0 -0
- lokit/io/__init__.py +3 -0
- lokit/io/json.cp313-win_amd64.pyd +0 -0
- lokit/io/json.py +194 -0
- lokit/logic.cp313-win_amd64.pyd +0 -0
- lokit/logic.py +324 -0
- lokit/parsers/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/__init__.py +1 -0
- lokit/parsers/csv/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/csv/__init__.py +1 -0
- lokit/parsers/csv/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/csv/extraction.py +164 -0
- lokit/parsers/html/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/html/__init__.py +3 -0
- lokit/parsers/html/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/html/extraction.py +365 -0
- lokit/parsers/idml/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/idml/__init__.py +3 -0
- lokit/parsers/idml/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/idml/extraction.py +264 -0
- lokit/parsers/json_i18n/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/json_i18n/__init__.py +3 -0
- lokit/parsers/json_i18n/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/json_i18n/extraction.py +163 -0
- lokit/parsers/po/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/po/__init__.py +3 -0
- lokit/parsers/po/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/po/extraction.py +236 -0
- lokit/parsers/tmx/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/__init__.py +0 -0
- lokit/parsers/tmx/base.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/base.py +145 -0
- lokit/parsers/tmx/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/extraction.py +170 -0
- lokit/parsers/tmx/header.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/header.py +55 -0
- lokit/parsers/tmx/helpers.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/helpers.py +9 -0
- lokit/parsers/tmx/models.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/models.py +10 -0
- lokit/parsers/tmx/props.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/props.py +201 -0
- lokit/parsers/tmx/tags.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/tags.py +59 -0
- lokit/parsers/tmx/xml_utils.cp313-win_amd64.pyd +0 -0
- lokit/parsers/tmx/xml_utils.py +46 -0
- lokit/parsers/xliff/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xliff/__init__.py +3 -0
- lokit/parsers/xliff/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xliff/extraction.py +229 -0
- lokit/parsers/xliff/tags.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xliff/tags.py +128 -0
- lokit/parsers/xlsx/__init__.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xlsx/__init__.py +1 -0
- lokit/parsers/xlsx/extraction.cp313-win_amd64.pyd +0 -0
- lokit/parsers/xlsx/extraction.py +198 -0
- lokit/py.typed +1 -0
- lokit_python-0.1.0.dist-info/METADATA +149 -0
- lokit_python-0.1.0.dist-info/RECORD +97 -0
- lokit_python-0.1.0.dist-info/WHEEL +5 -0
- lokit_python-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, AsyncIterator, Iterator, Optional
|
|
8
|
+
|
|
9
|
+
from lokit.data.structure import Data, Meta, TranslationStatus
|
|
10
|
+
|
|
11
|
+
ExtractItem = tuple[str, Data]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(slots=True)
|
|
15
|
+
class _AsyncExtractionResult:
|
|
16
|
+
item: Optional[ExtractItem] = None
|
|
17
|
+
error: Optional[BaseException] = None
|
|
18
|
+
done: bool = False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class _AsyncJsonI18nExtraction:
|
|
22
|
+
def __init__(self, extractor: JsonI18nExtractor) -> None:
|
|
23
|
+
self._extractor = extractor
|
|
24
|
+
self._queue: asyncio.Queue[_AsyncExtractionResult] = asyncio.Queue()
|
|
25
|
+
self._producer: asyncio.Task[None] | None = None
|
|
26
|
+
|
|
27
|
+
def __aiter__(self) -> _AsyncJsonI18nExtraction:
|
|
28
|
+
return self
|
|
29
|
+
|
|
30
|
+
async def __anext__(self) -> ExtractItem:
|
|
31
|
+
if self._producer is None:
|
|
32
|
+
self._start()
|
|
33
|
+
result = await self._queue.get()
|
|
34
|
+
if result.done:
|
|
35
|
+
await self._finish()
|
|
36
|
+
raise StopAsyncIteration
|
|
37
|
+
if result.error is not None:
|
|
38
|
+
await self._finish()
|
|
39
|
+
raise result.error
|
|
40
|
+
if result.item is None:
|
|
41
|
+
await self._finish()
|
|
42
|
+
raise StopAsyncIteration
|
|
43
|
+
return result.item
|
|
44
|
+
|
|
45
|
+
def _start(self) -> None:
|
|
46
|
+
loop = asyncio.get_running_loop()
|
|
47
|
+
|
|
48
|
+
def produce() -> None:
|
|
49
|
+
try:
|
|
50
|
+
for item in self._extractor.extract():
|
|
51
|
+
loop.call_soon_threadsafe(
|
|
52
|
+
self._queue.put_nowait,
|
|
53
|
+
_AsyncExtractionResult(item=item),
|
|
54
|
+
)
|
|
55
|
+
except BaseException as exc:
|
|
56
|
+
loop.call_soon_threadsafe(
|
|
57
|
+
self._queue.put_nowait,
|
|
58
|
+
_AsyncExtractionResult(error=exc),
|
|
59
|
+
)
|
|
60
|
+
finally:
|
|
61
|
+
loop.call_soon_threadsafe(
|
|
62
|
+
self._queue.put_nowait,
|
|
63
|
+
_AsyncExtractionResult(done=True),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
self._producer = asyncio.create_task(asyncio.to_thread(produce))
|
|
67
|
+
|
|
68
|
+
async def _finish(self) -> None:
|
|
69
|
+
if self._producer is not None:
|
|
70
|
+
await self._producer
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class JsonI18nExtractor:
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
filepath: str,
|
|
77
|
+
source_locale: str = "",
|
|
78
|
+
target_locale: str | None = None,
|
|
79
|
+
target_filepath: str | None = None,
|
|
80
|
+
) -> None:
|
|
81
|
+
self.filepath = filepath
|
|
82
|
+
self.source_locale = source_locale
|
|
83
|
+
self.target_locale = target_locale
|
|
84
|
+
self.target_filepath = target_filepath
|
|
85
|
+
self.source_language: str | None = None
|
|
86
|
+
self.target_language: str | None = None
|
|
87
|
+
self.export_origin = ""
|
|
88
|
+
self.extensions: dict[str, str] = {"input_format": "json_i18n"}
|
|
89
|
+
|
|
90
|
+
def extract(self) -> Iterator[ExtractItem]:
|
|
91
|
+
source_data = self._load_json(self.filepath)
|
|
92
|
+
source_flat = self._flatten(source_data)
|
|
93
|
+
|
|
94
|
+
target_flat: dict[str, str] = {}
|
|
95
|
+
if self.target_filepath is not None:
|
|
96
|
+
target_data = self._load_json(self.target_filepath)
|
|
97
|
+
target_flat = self._flatten(target_data)
|
|
98
|
+
|
|
99
|
+
self._infer_locale()
|
|
100
|
+
|
|
101
|
+
for key, source_value in source_flat.items():
|
|
102
|
+
target_value = target_flat.get(key)
|
|
103
|
+
status = (
|
|
104
|
+
TranslationStatus.TRANSLATED
|
|
105
|
+
if target_value
|
|
106
|
+
else TranslationStatus.NEW
|
|
107
|
+
)
|
|
108
|
+
data = Data(
|
|
109
|
+
source=source_value,
|
|
110
|
+
target=target_value,
|
|
111
|
+
meta=Meta(),
|
|
112
|
+
status=status,
|
|
113
|
+
extensions={"input_format": "json_i18n"},
|
|
114
|
+
)
|
|
115
|
+
yield key, data
|
|
116
|
+
|
|
117
|
+
def extract_async(self) -> AsyncIterator[ExtractItem]:
|
|
118
|
+
return _AsyncJsonI18nExtraction(self)
|
|
119
|
+
|
|
120
|
+
def _load_json(self, filepath: str) -> dict[str, Any]:
|
|
121
|
+
with Path(filepath).open("r", encoding="utf-8") as f:
|
|
122
|
+
result: dict[str, Any] = json.load(f)
|
|
123
|
+
return result
|
|
124
|
+
|
|
125
|
+
def _flatten(
|
|
126
|
+
self, obj: dict[str, Any], prefix: str = ""
|
|
127
|
+
) -> dict[str, str]:
|
|
128
|
+
flat: dict[str, str] = {}
|
|
129
|
+
for key, value in obj.items():
|
|
130
|
+
full_key = f"{prefix}.{key}" if prefix else key
|
|
131
|
+
if isinstance(value, dict):
|
|
132
|
+
flat.update(self._flatten(value, full_key))
|
|
133
|
+
elif isinstance(value, str):
|
|
134
|
+
flat[full_key] = value
|
|
135
|
+
return flat
|
|
136
|
+
|
|
137
|
+
def _infer_locale(self) -> None:
|
|
138
|
+
if not self.source_locale:
|
|
139
|
+
inferred = self._locale_from_filename(self.filepath)
|
|
140
|
+
if inferred:
|
|
141
|
+
self.source_locale = inferred
|
|
142
|
+
if self.source_locale:
|
|
143
|
+
self.source_language = self._base_language(self.source_locale)
|
|
144
|
+
if not self.target_locale and self.target_filepath:
|
|
145
|
+
inferred = self._locale_from_filename(self.target_filepath)
|
|
146
|
+
if inferred:
|
|
147
|
+
self.target_locale = inferred
|
|
148
|
+
if self.target_locale:
|
|
149
|
+
self.target_language = self._base_language(self.target_locale)
|
|
150
|
+
|
|
151
|
+
def _locale_from_filename(self, filepath: str) -> str | None:
|
|
152
|
+
path = Path(filepath)
|
|
153
|
+
if path.suffix.lower() != ".json":
|
|
154
|
+
return None
|
|
155
|
+
stem = path.stem
|
|
156
|
+
if not stem or not all(c.isalnum() or c in "_-" for c in stem):
|
|
157
|
+
return None
|
|
158
|
+
if len(stem) < 2 or not stem[:2].isalpha():
|
|
159
|
+
return None
|
|
160
|
+
return stem
|
|
161
|
+
|
|
162
|
+
def _base_language(self, locale: str) -> str:
|
|
163
|
+
return locale.replace("_", "-").split("-")[0].lower()
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Any, AsyncIterator, Iterator, Optional
|
|
6
|
+
|
|
7
|
+
import polib
|
|
8
|
+
|
|
9
|
+
from lokit.data.structure import (
|
|
10
|
+
Comment,
|
|
11
|
+
Data,
|
|
12
|
+
Meta,
|
|
13
|
+
Plural,
|
|
14
|
+
PluralCategory,
|
|
15
|
+
TranslationStatus,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
ExtractItem = tuple[str, Data]
|
|
19
|
+
|
|
20
|
+
_PLURAL_CATEGORIES: tuple[PluralCategory, ...] = (
|
|
21
|
+
PluralCategory.ONE,
|
|
22
|
+
PluralCategory.TWO,
|
|
23
|
+
PluralCategory.FEW,
|
|
24
|
+
PluralCategory.MANY,
|
|
25
|
+
PluralCategory.OTHER,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _category_from_index(index: int) -> PluralCategory:
|
|
30
|
+
if index < len(_PLURAL_CATEGORIES):
|
|
31
|
+
return _PLURAL_CATEGORIES[index]
|
|
32
|
+
return PluralCategory.OTHER
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(slots=True)
|
|
36
|
+
class _AsyncExtractionResult:
|
|
37
|
+
item: Optional[ExtractItem] = None
|
|
38
|
+
error: Optional[BaseException] = None
|
|
39
|
+
done: bool = False
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class _AsyncPoExtraction:
|
|
43
|
+
def __init__(self, extractor: PoExtractor) -> None:
|
|
44
|
+
self._extractor = extractor
|
|
45
|
+
self._queue: asyncio.Queue[_AsyncExtractionResult] = asyncio.Queue()
|
|
46
|
+
self._producer: asyncio.Task[None] | None = None
|
|
47
|
+
|
|
48
|
+
def __aiter__(self) -> _AsyncPoExtraction:
|
|
49
|
+
return self
|
|
50
|
+
|
|
51
|
+
async def __anext__(self) -> ExtractItem:
|
|
52
|
+
if self._producer is None:
|
|
53
|
+
self._start()
|
|
54
|
+
result = await self._queue.get()
|
|
55
|
+
if result.done:
|
|
56
|
+
await self._finish()
|
|
57
|
+
raise StopAsyncIteration
|
|
58
|
+
if result.error is not None:
|
|
59
|
+
await self._finish()
|
|
60
|
+
raise result.error
|
|
61
|
+
if result.item is None:
|
|
62
|
+
await self._finish()
|
|
63
|
+
raise StopAsyncIteration
|
|
64
|
+
return result.item
|
|
65
|
+
|
|
66
|
+
def _start(self) -> None:
|
|
67
|
+
loop = asyncio.get_running_loop()
|
|
68
|
+
|
|
69
|
+
def produce() -> None:
|
|
70
|
+
try:
|
|
71
|
+
for item in self._extractor.extract():
|
|
72
|
+
loop.call_soon_threadsafe(
|
|
73
|
+
self._queue.put_nowait,
|
|
74
|
+
_AsyncExtractionResult(item=item),
|
|
75
|
+
)
|
|
76
|
+
except BaseException as exc:
|
|
77
|
+
loop.call_soon_threadsafe(
|
|
78
|
+
self._queue.put_nowait,
|
|
79
|
+
_AsyncExtractionResult(error=exc),
|
|
80
|
+
)
|
|
81
|
+
finally:
|
|
82
|
+
loop.call_soon_threadsafe(
|
|
83
|
+
self._queue.put_nowait,
|
|
84
|
+
_AsyncExtractionResult(done=True),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
self._producer = asyncio.create_task(asyncio.to_thread(produce))
|
|
88
|
+
|
|
89
|
+
async def _finish(self) -> None:
|
|
90
|
+
if self._producer is not None:
|
|
91
|
+
await self._producer
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class PoExtractor:
|
|
95
|
+
def __init__(
|
|
96
|
+
self,
|
|
97
|
+
filepath: str,
|
|
98
|
+
source_locale: str = "",
|
|
99
|
+
target_locale: str | None = None,
|
|
100
|
+
) -> None:
|
|
101
|
+
self.filepath = filepath
|
|
102
|
+
self.source_locale = source_locale
|
|
103
|
+
self.target_locale = target_locale
|
|
104
|
+
self.source_language: str | None = None
|
|
105
|
+
self.target_language: str | None = None
|
|
106
|
+
self.export_origin = ""
|
|
107
|
+
self.extensions: dict[str, str] = {"input_format": "po"}
|
|
108
|
+
|
|
109
|
+
def extract(self) -> Iterator[ExtractItem]:
|
|
110
|
+
po: Any = polib.pofile(self.filepath)
|
|
111
|
+
self._read_metadata(po)
|
|
112
|
+
|
|
113
|
+
for entry in po:
|
|
114
|
+
if entry.obsolete:
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
if entry.msgid_plural:
|
|
118
|
+
yield from self._extract_plural(entry)
|
|
119
|
+
else:
|
|
120
|
+
yield self._extract_singular(entry)
|
|
121
|
+
|
|
122
|
+
def extract_async(self) -> AsyncIterator[ExtractItem]:
|
|
123
|
+
return _AsyncPoExtraction(self)
|
|
124
|
+
|
|
125
|
+
def _read_metadata(self, po: Any) -> None:
|
|
126
|
+
metadata: dict[str, str] = po.metadata or {}
|
|
127
|
+
lang = metadata.get("Language", "")
|
|
128
|
+
if lang and not self.target_locale:
|
|
129
|
+
self.target_locale = lang
|
|
130
|
+
if self.target_locale:
|
|
131
|
+
self.target_language = self._base_language(self.target_locale)
|
|
132
|
+
if self.source_locale:
|
|
133
|
+
self.source_language = self._base_language(self.source_locale)
|
|
134
|
+
self.export_origin = metadata.get("X-Generator", "")
|
|
135
|
+
|
|
136
|
+
def _extract_singular(self, entry: Any) -> ExtractItem:
|
|
137
|
+
unit_id = self._unit_id(entry)
|
|
138
|
+
target = entry.msgstr if entry.msgstr else None
|
|
139
|
+
status = self._status(entry)
|
|
140
|
+
comments = self._comments(entry)
|
|
141
|
+
extensions = self._extensions(entry)
|
|
142
|
+
data = Data(
|
|
143
|
+
source=entry.msgid,
|
|
144
|
+
target=target,
|
|
145
|
+
meta=Meta(),
|
|
146
|
+
status=status,
|
|
147
|
+
comments=comments,
|
|
148
|
+
extensions=extensions,
|
|
149
|
+
)
|
|
150
|
+
return unit_id, data
|
|
151
|
+
|
|
152
|
+
def _extract_plural(self, entry: Any) -> Iterator[ExtractItem]:
|
|
153
|
+
unit_id = self._unit_id(entry)
|
|
154
|
+
plural_dict: dict[int, str] = entry.msgstr_plural or {}
|
|
155
|
+
base_target = plural_dict.get(0) or None
|
|
156
|
+
status = self._status(entry)
|
|
157
|
+
comments = self._comments(entry)
|
|
158
|
+
extensions = self._extensions(entry)
|
|
159
|
+
data = Data(
|
|
160
|
+
source=entry.msgid,
|
|
161
|
+
target=base_target,
|
|
162
|
+
plural=Plural(variant=entry.msgid_plural),
|
|
163
|
+
meta=Meta(),
|
|
164
|
+
status=status,
|
|
165
|
+
comments=comments,
|
|
166
|
+
extensions=extensions,
|
|
167
|
+
)
|
|
168
|
+
yield unit_id, data
|
|
169
|
+
|
|
170
|
+
for n in sorted(plural_dict):
|
|
171
|
+
if n == 0:
|
|
172
|
+
continue
|
|
173
|
+
plural_target = plural_dict[n] if plural_dict[n] else None
|
|
174
|
+
plural_data = Data(
|
|
175
|
+
source=entry.msgid,
|
|
176
|
+
target=plural_target,
|
|
177
|
+
plural=Plural(
|
|
178
|
+
variant=entry.msgid_plural,
|
|
179
|
+
category=_category_from_index(n),
|
|
180
|
+
),
|
|
181
|
+
meta=Meta(),
|
|
182
|
+
status=self._plural_form_status(plural_target, entry),
|
|
183
|
+
comments=[],
|
|
184
|
+
extensions=extensions.copy(),
|
|
185
|
+
)
|
|
186
|
+
yield f"{unit_id}[{n}]", plural_data
|
|
187
|
+
|
|
188
|
+
def _unit_id(self, entry: Any) -> str:
|
|
189
|
+
if entry.msgctxt:
|
|
190
|
+
return f"{entry.msgctxt}\x04{entry.msgid}"
|
|
191
|
+
return str(entry.msgid)
|
|
192
|
+
|
|
193
|
+
def _status(self, entry: Any) -> TranslationStatus:
|
|
194
|
+
if "fuzzy" in entry.flags:
|
|
195
|
+
return TranslationStatus.DRAFT
|
|
196
|
+
target = entry.msgstr if not entry.msgid_plural else (entry.msgstr_plural or {}).get(0, "")
|
|
197
|
+
if target:
|
|
198
|
+
return TranslationStatus.TRANSLATED
|
|
199
|
+
return TranslationStatus.NEW
|
|
200
|
+
|
|
201
|
+
def _plural_form_status(
|
|
202
|
+
self, target: str | None, entry: Any
|
|
203
|
+
) -> TranslationStatus:
|
|
204
|
+
if "fuzzy" in entry.flags:
|
|
205
|
+
return TranslationStatus.DRAFT
|
|
206
|
+
if target:
|
|
207
|
+
return TranslationStatus.TRANSLATED
|
|
208
|
+
return TranslationStatus.NEW
|
|
209
|
+
|
|
210
|
+
def _comments(self, entry: Any) -> list[Comment]:
|
|
211
|
+
comments: list[Comment] = []
|
|
212
|
+
if entry.comment:
|
|
213
|
+
comments.append(
|
|
214
|
+
Comment(
|
|
215
|
+
context=entry.comment,
|
|
216
|
+
context_key=entry.msgctxt or None,
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
if entry.tcomment:
|
|
220
|
+
comments.append(Comment(context=entry.tcomment))
|
|
221
|
+
return comments
|
|
222
|
+
|
|
223
|
+
def _extensions(self, entry: Any) -> dict[str, str]:
|
|
224
|
+
extensions: dict[str, str] = {}
|
|
225
|
+
if entry.occurrences:
|
|
226
|
+
refs = ", ".join(
|
|
227
|
+
f"{path}:{line}" for path, line in entry.occurrences
|
|
228
|
+
)
|
|
229
|
+
extensions["references"] = refs
|
|
230
|
+
non_fuzzy = [f for f in entry.flags if f != "fuzzy"]
|
|
231
|
+
if non_fuzzy:
|
|
232
|
+
extensions["flags"] = ", ".join(non_fuzzy)
|
|
233
|
+
return extensions
|
|
234
|
+
|
|
235
|
+
def _base_language(self, locale: str) -> str:
|
|
236
|
+
return locale.replace("_", "-").split("-")[0].lower()
|
|
Binary file
|
|
File without changes
|
|
Binary file
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from lxml import etree
|
|
4
|
+
|
|
5
|
+
from lokit.core.logger import logger
|
|
6
|
+
from lokit.parsers.tmx.header import TmxHeaderParser
|
|
7
|
+
from lokit.parsers.tmx.models import HeaderData
|
|
8
|
+
from lokit.parsers.tmx.xml_utils import clear_element, element_children, iterparse_safe, local_name
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TmxParser:
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
tmx_file_path: str,
|
|
15
|
+
source_language: Optional[str] = None,
|
|
16
|
+
target_language: Optional[str] = None,
|
|
17
|
+
domain: Optional[str] = None,
|
|
18
|
+
) -> None:
|
|
19
|
+
self.filepath: str = tmx_file_path
|
|
20
|
+
self.domain: str = domain or ""
|
|
21
|
+
|
|
22
|
+
self.native_source: str = source_language or ""
|
|
23
|
+
self.native_target: str = target_language or ""
|
|
24
|
+
|
|
25
|
+
self.source_language: Optional[str] = None
|
|
26
|
+
self.source_locale: Optional[str] = None
|
|
27
|
+
self.target_language: Optional[str] = None
|
|
28
|
+
self.target_locale: Optional[str] = None
|
|
29
|
+
|
|
30
|
+
self.export_origin: str = ""
|
|
31
|
+
self.export_timestamp: str = ""
|
|
32
|
+
self.extensions: dict[str, str] = {}
|
|
33
|
+
|
|
34
|
+
self.header_parser: TmxHeaderParser = TmxHeaderParser()
|
|
35
|
+
|
|
36
|
+
self._initialize_from_file()
|
|
37
|
+
self._validate_and_set_languages()
|
|
38
|
+
|
|
39
|
+
def _initialize_from_file(self) -> None:
|
|
40
|
+
context = iterparse_safe(self.filepath, events=("end",))
|
|
41
|
+
|
|
42
|
+
for _, elem in context:
|
|
43
|
+
elem_name = local_name(elem.tag)
|
|
44
|
+
if elem_name == "header":
|
|
45
|
+
header_data: HeaderData = self.header_parser.parse(elem)
|
|
46
|
+
self.export_origin = header_data.origin
|
|
47
|
+
self.export_timestamp = header_data.timestamp
|
|
48
|
+
self.extensions.update(header_data.extensions)
|
|
49
|
+
|
|
50
|
+
if self.native_source:
|
|
51
|
+
if header_data.srclang and not self._compare_base_lang(
|
|
52
|
+
self.native_source, header_data.srclang
|
|
53
|
+
):
|
|
54
|
+
logger.warning(
|
|
55
|
+
f"Provided source '{self.native_source}' mismatches header '{header_data.srclang}'"
|
|
56
|
+
)
|
|
57
|
+
else:
|
|
58
|
+
self.native_source = header_data.srclang
|
|
59
|
+
|
|
60
|
+
if self.native_target:
|
|
61
|
+
if header_data.tgtlang and not self._compare_base_lang(
|
|
62
|
+
self.native_target, header_data.tgtlang
|
|
63
|
+
):
|
|
64
|
+
logger.warning(
|
|
65
|
+
f"Provided target '{self.native_target}' mismatches header '{header_data.tgtlang}'"
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
self.native_target = header_data.tgtlang
|
|
69
|
+
|
|
70
|
+
clear_element(elem)
|
|
71
|
+
if self.native_source and self.native_target:
|
|
72
|
+
break
|
|
73
|
+
|
|
74
|
+
elif elem_name == "tu" and not (self.native_source and self.native_target):
|
|
75
|
+
self._initialize_missing_languages_from_tu(elem)
|
|
76
|
+
clear_element(elem)
|
|
77
|
+
if self.native_source and self.native_target:
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
def _compare_base_lang(self, lang1: str, lang2: str) -> bool:
|
|
81
|
+
if not lang1 or not lang2:
|
|
82
|
+
return False
|
|
83
|
+
l1 = lang1.replace("_", "-").split("-")[0].lower()
|
|
84
|
+
l2 = lang2.replace("_", "-").split("-")[0].lower()
|
|
85
|
+
return l1 == l2
|
|
86
|
+
|
|
87
|
+
def _initialize_missing_languages_from_tu(self, element: etree._Element) -> None:
|
|
88
|
+
langs: list[str] = []
|
|
89
|
+
for tuv in element_children(element, "tuv"):
|
|
90
|
+
lang = self._get_xml_lang(tuv)
|
|
91
|
+
if lang:
|
|
92
|
+
langs.append(lang)
|
|
93
|
+
|
|
94
|
+
if not self.native_source and langs:
|
|
95
|
+
self.native_source = langs[0]
|
|
96
|
+
|
|
97
|
+
if not self.native_target:
|
|
98
|
+
self.native_target = next(
|
|
99
|
+
(
|
|
100
|
+
lang
|
|
101
|
+
for lang in langs
|
|
102
|
+
if not self._compare_base_lang(lang, self.native_source)
|
|
103
|
+
),
|
|
104
|
+
"",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def _validate_and_set_languages(self) -> None:
|
|
108
|
+
if self.native_source:
|
|
109
|
+
self.source_language, self.source_locale = self._parse_locale_string(
|
|
110
|
+
self.native_source
|
|
111
|
+
)
|
|
112
|
+
if self.native_target:
|
|
113
|
+
self.target_language, self.target_locale = self._parse_locale_string(
|
|
114
|
+
self.native_target
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def _parse_locale_string(self, lang_string: str) -> tuple[str, str]:
|
|
118
|
+
if not lang_string:
|
|
119
|
+
raise ValueError("Cannot parse empty language string")
|
|
120
|
+
|
|
121
|
+
normalized: str = lang_string.replace("_", "-")
|
|
122
|
+
parts: list[str] = normalized.split("-")
|
|
123
|
+
|
|
124
|
+
lang_code: str = parts[0].lower()
|
|
125
|
+
canonical_parts = [lang_code]
|
|
126
|
+
if len(parts) > 1:
|
|
127
|
+
canonical_parts.extend(
|
|
128
|
+
self._canonicalize_subtag(part) for part in parts[1:]
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return lang_code, "-".join(canonical_parts)
|
|
132
|
+
|
|
133
|
+
def _canonicalize_subtag(self, subtag: str) -> str:
|
|
134
|
+
if len(subtag) == 2 and subtag.isalpha():
|
|
135
|
+
return subtag.upper()
|
|
136
|
+
if len(subtag) == 4 and subtag.isalpha():
|
|
137
|
+
return subtag.title()
|
|
138
|
+
return subtag
|
|
139
|
+
|
|
140
|
+
def _get_xml_lang(self, element: etree._Element) -> str:
|
|
141
|
+
return (
|
|
142
|
+
element.get("{http://www.w3.org/XML/1998/namespace}lang")
|
|
143
|
+
or element.get("lang")
|
|
144
|
+
or ""
|
|
145
|
+
)
|
|
Binary file
|