lokit-python 0.1.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. 821d8b73c2a02cb7980f__mypyc.cp313-win_amd64.pyd +0 -0
  2. lokit/__init__.cp313-win_amd64.pyd +0 -0
  3. lokit/__init__.py +128 -0
  4. lokit/core/__init__.cp313-win_amd64.pyd +0 -0
  5. lokit/core/__init__.py +0 -0
  6. lokit/core/logger.cp313-win_amd64.pyd +0 -0
  7. lokit/core/logger.py +20 -0
  8. lokit/data/__init__.cp313-win_amd64.pyd +0 -0
  9. lokit/data/__init__.py +0 -0
  10. lokit/data/lang_codes.cp313-win_amd64.pyd +0 -0
  11. lokit/data/lang_codes.py +455 -0
  12. lokit/data/structure.cp313-win_amd64.pyd +0 -0
  13. lokit/data/structure.py +118 -0
  14. lokit/data/tag_types.cp313-win_amd64.pyd +0 -0
  15. lokit/data/tag_types.py +78 -0
  16. lokit/exporters/__init__.cp313-win_amd64.pyd +0 -0
  17. lokit/exporters/__init__.py +34 -0
  18. lokit/exporters/csv.cp313-win_amd64.pyd +0 -0
  19. lokit/exporters/csv.py +32 -0
  20. lokit/exporters/html.cp313-win_amd64.pyd +0 -0
  21. lokit/exporters/html.py +217 -0
  22. lokit/exporters/idml.cp313-win_amd64.pyd +0 -0
  23. lokit/exporters/idml.py +178 -0
  24. lokit/exporters/json_i18n.cp313-win_amd64.pyd +0 -0
  25. lokit/exporters/json_i18n.py +47 -0
  26. lokit/exporters/po.cp313-win_amd64.pyd +0 -0
  27. lokit/exporters/po.py +162 -0
  28. lokit/exporters/tmx.cp313-win_amd64.pyd +0 -0
  29. lokit/exporters/tmx.py +247 -0
  30. lokit/exporters/xliff.cp313-win_amd64.pyd +0 -0
  31. lokit/exporters/xliff.py +152 -0
  32. lokit/exporters/xlsx.cp313-win_amd64.pyd +0 -0
  33. lokit/exporters/xlsx.py +39 -0
  34. lokit/format_detection.cp313-win_amd64.pyd +0 -0
  35. lokit/format_detection.py +115 -0
  36. lokit/importers.py +321 -0
  37. lokit/io/__init__.cp313-win_amd64.pyd +0 -0
  38. lokit/io/__init__.py +3 -0
  39. lokit/io/json.cp313-win_amd64.pyd +0 -0
  40. lokit/io/json.py +194 -0
  41. lokit/logic.cp313-win_amd64.pyd +0 -0
  42. lokit/logic.py +324 -0
  43. lokit/parsers/__init__.cp313-win_amd64.pyd +0 -0
  44. lokit/parsers/__init__.py +1 -0
  45. lokit/parsers/csv/__init__.cp313-win_amd64.pyd +0 -0
  46. lokit/parsers/csv/__init__.py +1 -0
  47. lokit/parsers/csv/extraction.cp313-win_amd64.pyd +0 -0
  48. lokit/parsers/csv/extraction.py +164 -0
  49. lokit/parsers/html/__init__.cp313-win_amd64.pyd +0 -0
  50. lokit/parsers/html/__init__.py +3 -0
  51. lokit/parsers/html/extraction.cp313-win_amd64.pyd +0 -0
  52. lokit/parsers/html/extraction.py +365 -0
  53. lokit/parsers/idml/__init__.cp313-win_amd64.pyd +0 -0
  54. lokit/parsers/idml/__init__.py +3 -0
  55. lokit/parsers/idml/extraction.cp313-win_amd64.pyd +0 -0
  56. lokit/parsers/idml/extraction.py +264 -0
  57. lokit/parsers/json_i18n/__init__.cp313-win_amd64.pyd +0 -0
  58. lokit/parsers/json_i18n/__init__.py +3 -0
  59. lokit/parsers/json_i18n/extraction.cp313-win_amd64.pyd +0 -0
  60. lokit/parsers/json_i18n/extraction.py +163 -0
  61. lokit/parsers/po/__init__.cp313-win_amd64.pyd +0 -0
  62. lokit/parsers/po/__init__.py +3 -0
  63. lokit/parsers/po/extraction.cp313-win_amd64.pyd +0 -0
  64. lokit/parsers/po/extraction.py +236 -0
  65. lokit/parsers/tmx/__init__.cp313-win_amd64.pyd +0 -0
  66. lokit/parsers/tmx/__init__.py +0 -0
  67. lokit/parsers/tmx/base.cp313-win_amd64.pyd +0 -0
  68. lokit/parsers/tmx/base.py +145 -0
  69. lokit/parsers/tmx/extraction.cp313-win_amd64.pyd +0 -0
  70. lokit/parsers/tmx/extraction.py +170 -0
  71. lokit/parsers/tmx/header.cp313-win_amd64.pyd +0 -0
  72. lokit/parsers/tmx/header.py +55 -0
  73. lokit/parsers/tmx/helpers.cp313-win_amd64.pyd +0 -0
  74. lokit/parsers/tmx/helpers.py +9 -0
  75. lokit/parsers/tmx/models.cp313-win_amd64.pyd +0 -0
  76. lokit/parsers/tmx/models.py +10 -0
  77. lokit/parsers/tmx/props.cp313-win_amd64.pyd +0 -0
  78. lokit/parsers/tmx/props.py +201 -0
  79. lokit/parsers/tmx/tags.cp313-win_amd64.pyd +0 -0
  80. lokit/parsers/tmx/tags.py +59 -0
  81. lokit/parsers/tmx/xml_utils.cp313-win_amd64.pyd +0 -0
  82. lokit/parsers/tmx/xml_utils.py +46 -0
  83. lokit/parsers/xliff/__init__.cp313-win_amd64.pyd +0 -0
  84. lokit/parsers/xliff/__init__.py +3 -0
  85. lokit/parsers/xliff/extraction.cp313-win_amd64.pyd +0 -0
  86. lokit/parsers/xliff/extraction.py +229 -0
  87. lokit/parsers/xliff/tags.cp313-win_amd64.pyd +0 -0
  88. lokit/parsers/xliff/tags.py +128 -0
  89. lokit/parsers/xlsx/__init__.cp313-win_amd64.pyd +0 -0
  90. lokit/parsers/xlsx/__init__.py +1 -0
  91. lokit/parsers/xlsx/extraction.cp313-win_amd64.pyd +0 -0
  92. lokit/parsers/xlsx/extraction.py +198 -0
  93. lokit/py.typed +1 -0
  94. lokit_python-0.1.0.dist-info/METADATA +149 -0
  95. lokit_python-0.1.0.dist-info/RECORD +97 -0
  96. lokit_python-0.1.0.dist-info/WHEEL +5 -0
  97. lokit_python-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,229 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from dataclasses import dataclass
5
+ from typing import AsyncIterator, Iterator, Optional
6
+
7
+ from lxml.etree import _Element
8
+
9
+ from lokit.data.structure import Comment, Data, Meta, SegmentPart, Tags, TranslationStatus
10
+ from lokit.data.tag_types import TieData
11
+ from lokit.parsers.tmx.xml_utils import (
12
+ clear_element,
13
+ element_children,
14
+ find_child,
15
+ iterparse_safe,
16
+ local_name,
17
+ )
18
+ from lokit.parsers.xliff.tags import XliffTagParser
19
+
20
+ ExtractItem = tuple[str, Data]
21
+
22
+
23
+ @dataclass(slots=True)
24
+ class XliffFileContext:
25
+ index: int
26
+ original: str
27
+ source_locale: str
28
+ target_locale: str | None
29
+ data_type: str
30
+ tool_name: str | None = None
31
+ tool_version: str | None = None
32
+
33
+
34
+ @dataclass(slots=True)
35
+ class _AsyncExtractionResult:
36
+ item: Optional[ExtractItem] = None
37
+ error: Optional[BaseException] = None
38
+ done: bool = False
39
+
40
+
41
+ class AsyncXliffExtraction:
42
+ def __init__(self, extractor: XliffExtractor) -> None:
43
+ self._extractor = extractor
44
+ self._queue: asyncio.Queue[_AsyncExtractionResult] = asyncio.Queue()
45
+ self._producer: asyncio.Task[None] | None = None
46
+
47
+ def __aiter__(self) -> AsyncXliffExtraction:
48
+ return self
49
+
50
+ async def __anext__(self) -> ExtractItem:
51
+ if self._producer is None:
52
+ self._start()
53
+ result = await self._queue.get()
54
+ if result.done:
55
+ await self._finish()
56
+ raise StopAsyncIteration
57
+ if result.error is not None:
58
+ await self._finish()
59
+ raise result.error
60
+ if result.item is None:
61
+ await self._finish()
62
+ raise StopAsyncIteration
63
+ return result.item
64
+
65
+ def _start(self) -> None:
66
+ loop = asyncio.get_running_loop()
67
+
68
+ def produce() -> None:
69
+ try:
70
+ for item in self._extractor.extract():
71
+ loop.call_soon_threadsafe(
72
+ self._queue.put_nowait,
73
+ _AsyncExtractionResult(item=item),
74
+ )
75
+ except BaseException as exc:
76
+ loop.call_soon_threadsafe(
77
+ self._queue.put_nowait,
78
+ _AsyncExtractionResult(error=exc),
79
+ )
80
+ finally:
81
+ loop.call_soon_threadsafe(
82
+ self._queue.put_nowait,
83
+ _AsyncExtractionResult(done=True),
84
+ )
85
+
86
+ self._producer = asyncio.create_task(asyncio.to_thread(produce))
87
+
88
+ async def _finish(self) -> None:
89
+ if self._producer is not None:
90
+ await self._producer
91
+
92
+
93
+ class XliffExtractor:
94
+ def __init__(self, filepath: str) -> None:
95
+ self.filepath = filepath
96
+ self.version = "1.2"
97
+ self.source_locale: str | None = None
98
+ self.target_locale: str | None = None
99
+ self.source_language: str | None = None
100
+ self.target_language: str | None = None
101
+ self.export_origin = ""
102
+ self.export_timestamp = ""
103
+ self.extensions: dict[str, str] = {"input_format": "xliff"}
104
+ self.tag_parser = XliffTagParser()
105
+
106
+ def extract(self) -> Iterator[ExtractItem]:
107
+ context = iterparse_safe(self.filepath, events=("start", "end"))
108
+ file_stack: list[XliffFileContext] = []
109
+ file_index = 0
110
+
111
+ for event, elem in context:
112
+ name = local_name(elem.tag)
113
+ if event == "start" and name == "xliff":
114
+ self.version = elem.attrib.get("version", "1.2")
115
+ self.extensions["xliff_version"] = self.version
116
+ elif event == "start" and name == "file":
117
+ current = self._file_context(elem, file_index)
118
+ file_index += 1
119
+ file_stack.append(current)
120
+ self._set_document_languages(current)
121
+ elif event == "end" and name == "file":
122
+ if file_stack:
123
+ file_stack.pop()
124
+ clear_element(elem)
125
+ elif event == "end" and name == "trans-unit" and file_stack:
126
+ current_file = file_stack[-1]
127
+ yield self._parse_unit(elem, current_file)
128
+ clear_element(elem)
129
+
130
+ def extract_async(self) -> AsyncIterator[ExtractItem]:
131
+ return AsyncXliffExtraction(self)
132
+
133
+ def _file_context(self, element: _Element, index: int) -> XliffFileContext:
134
+ original = element.attrib.get("original", "")
135
+ source_locale = element.attrib.get("source-language", "")
136
+ target_locale = element.attrib.get("target-language")
137
+ data_type = element.attrib.get("datatype", "")
138
+ return XliffFileContext(
139
+ index=index,
140
+ original=original,
141
+ source_locale=source_locale,
142
+ target_locale=target_locale,
143
+ data_type=data_type,
144
+ )
145
+
146
+ def _set_document_languages(self, context: XliffFileContext) -> None:
147
+ if self.source_locale is None and context.source_locale:
148
+ self.source_locale = context.source_locale
149
+ self.source_language = self._base_language(context.source_locale)
150
+ if self.target_locale is None and context.target_locale:
151
+ self.target_locale = context.target_locale
152
+ self.target_language = self._base_language(context.target_locale)
153
+
154
+ def _parse_unit(
155
+ self,
156
+ element: _Element,
157
+ file_context: XliffFileContext,
158
+ ) -> ExtractItem:
159
+ source = find_child(element, "source")
160
+ target = find_child(element, "target")
161
+ source_text, source_tags, source_parts = self._parse_segment(source)
162
+ target_text, target_tags, target_parts = self._parse_segment(target)
163
+ unit_id = element.attrib.get("id", "")
164
+ stable_id = f"{file_context.index}:{unit_id}" if unit_id else f"{file_context.index}"
165
+ tags = Tags(
166
+ source_tag_map=source_tags,
167
+ target_tag_map=target_tags,
168
+ source_parts=source_parts,
169
+ target_parts=target_parts,
170
+ )
171
+ data = Data(
172
+ source=source_text,
173
+ target=target_text if target is not None else None,
174
+ tags=tags if source_tags or target_tags else None,
175
+ meta=Meta(),
176
+ status=self._status(target),
177
+ comments=self._comments(element),
178
+ extensions=self._extensions(element, file_context, unit_id),
179
+ )
180
+ return stable_id, data
181
+
182
+ def _parse_segment(
183
+ self, element: _Element | None
184
+ ) -> tuple[str, dict[str, TieData], list[SegmentPart]]:
185
+ if element is None:
186
+ return "", {}, []
187
+ return self.tag_parser.parse(element)
188
+
189
+ def _status(self, target: _Element | None) -> TranslationStatus:
190
+ if target is None:
191
+ return TranslationStatus.NEW
192
+ state = (target.attrib.get("state") or "").lower()
193
+ if state in ("final", "signed-off"):
194
+ return TranslationStatus.APPROVED
195
+ if state in ("translated", "needs-review-translation"):
196
+ return TranslationStatus.TRANSLATED
197
+ if state in ("needs-review-adaptation", "needs-review-l10n"):
198
+ return TranslationStatus.REVIEWED
199
+ if state in ("new", "needs-translation"):
200
+ return TranslationStatus.NEW
201
+ return TranslationStatus.UNKNOWN
202
+
203
+ def _comments(self, element: _Element) -> list[Comment]:
204
+ comments: list[Comment] = []
205
+ for child in element_children(element, "note"):
206
+ if child.text:
207
+ comments.append(Comment(context=child.text.strip()))
208
+ return comments
209
+
210
+ def _extensions(
211
+ self,
212
+ element: _Element,
213
+ file_context: XliffFileContext,
214
+ unit_id: str,
215
+ ) -> dict[str, str]:
216
+ extensions = {
217
+ "resource": file_context.original,
218
+ "resource_index": str(file_context.index),
219
+ "unit_id": unit_id,
220
+ }
221
+ if file_context.data_type:
222
+ extensions["data_type"] = file_context.data_type
223
+ xml_space = element.attrib.get("{http://www.w3.org/XML/1998/namespace}space")
224
+ if xml_space:
225
+ extensions["space"] = xml_space
226
+ return extensions
227
+
228
+ def _base_language(self, locale: str) -> str:
229
+ return locale.replace("_", "-").split("-")[0].lower()
@@ -0,0 +1,128 @@
1
+ from __future__ import annotations
2
+
3
+ from lxml.etree import _Element
4
+
5
+ from lokit.data.structure import CodePart, SegmentPart, TextPart
6
+ from lokit.data.tag_types import TieData, TieType
7
+ from lokit.parsers.tmx.xml_utils import element_children, local_name
8
+
9
+
10
+ class XliffTagParser:
11
+ def parse(
12
+ self, element: _Element
13
+ ) -> tuple[str, dict[str, TieData], list[SegmentPart]]:
14
+ raw_text = ""
15
+ tag_map: dict[str, TieData] = {}
16
+ parts: list[SegmentPart] = []
17
+ pair_ids: dict[str, str] = {}
18
+ order = 0
19
+
20
+ raw_text, order = self._append_content(
21
+ element,
22
+ raw_text,
23
+ parts,
24
+ tag_map,
25
+ pair_ids,
26
+ order,
27
+ include_element_code=False,
28
+ )
29
+ return raw_text, tag_map, parts
30
+
31
+ def _append_content(
32
+ self,
33
+ element: _Element,
34
+ raw_text: str,
35
+ parts: list[SegmentPart],
36
+ tag_map: dict[str, TieData],
37
+ pair_ids: dict[str, str],
38
+ order: int,
39
+ include_element_code: bool,
40
+ ) -> tuple[str, int]:
41
+ if include_element_code:
42
+ open_id = f"c{order}"
43
+ pair_id = self._pair_id(element, pair_ids)
44
+ tag_map[open_id] = TieData(
45
+ id=open_id,
46
+ type=self._open_type(element),
47
+ position=len(raw_text),
48
+ order=order,
49
+ pair_id=pair_id,
50
+ )
51
+ parts.append(CodePart(open_id))
52
+ order += 1
53
+
54
+ if element.text:
55
+ raw_text += element.text
56
+ parts.append(TextPart(element.text))
57
+
58
+ for child in element_children(element):
59
+ child_name = local_name(child.tag)
60
+ if child_name in ("g", "mrk", "sub"):
61
+ raw_text, order = self._append_content(
62
+ child,
63
+ raw_text,
64
+ parts,
65
+ tag_map,
66
+ pair_ids,
67
+ order,
68
+ include_element_code=True,
69
+ )
70
+ else:
71
+ code_id = f"c{order}"
72
+ tag_map[code_id] = TieData(
73
+ id=code_id,
74
+ type=self._inline_type(child),
75
+ position=len(raw_text),
76
+ order=order,
77
+ pair_id=self._pair_id(child, pair_ids),
78
+ )
79
+ parts.append(CodePart(code_id))
80
+ order += 1
81
+
82
+ if child.tail:
83
+ raw_text += child.tail
84
+ parts.append(TextPart(child.tail))
85
+
86
+ if include_element_code:
87
+ close_id = f"c{order}"
88
+ tag_map[close_id] = TieData(
89
+ id=close_id,
90
+ type=self._close_type(element),
91
+ position=len(raw_text),
92
+ order=order,
93
+ pair_id=self._pair_id(element, pair_ids),
94
+ )
95
+ parts.append(CodePart(close_id))
96
+ order += 1
97
+
98
+ return raw_text, order
99
+
100
+ def _pair_id(self, element: _Element, pair_ids: dict[str, str]) -> str | None:
101
+ source_id = (
102
+ element.attrib.get("rid")
103
+ or element.attrib.get("id")
104
+ or element.attrib.get("xid")
105
+ or element.attrib.get("ctype")
106
+ )
107
+ if source_id is None:
108
+ return None
109
+ existing = pair_ids.get(source_id)
110
+ if existing is not None:
111
+ return existing
112
+ normalized = f"p{len(pair_ids)}"
113
+ pair_ids[source_id] = normalized
114
+ return normalized
115
+
116
+ def _inline_type(self, element: _Element) -> TieType:
117
+ name = local_name(element.tag)
118
+ if name in ("bpt", "bx"):
119
+ return TieType.CUSTOM_OPEN
120
+ if name in ("ept", "ex"):
121
+ return TieType.CUSTOM_CLOSE
122
+ return TieType.CUSTOM_STANDALONE
123
+
124
+ def _open_type(self, element: _Element) -> TieType:
125
+ return TieType.CUSTOM_OPEN
126
+
127
+ def _close_type(self, element: _Element) -> TieType:
128
+ return TieType.CUSTOM_CLOSE
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,198 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import AsyncIterator, Iterator, Optional
7
+
8
+ from openpyxl import load_workbook
9
+ from openpyxl.cell.cell import Cell, MergedCell
10
+
11
+ from lokit.data.structure import Comment, Data, TranslationStatus
12
+
13
+ ExtractItem = tuple[str, Data]
14
+
15
+ _KNOWN_COLUMNS = frozenset({"id", "source", "target", "status", "comment"})
16
+
17
+
18
+ def _parse_base_lang(locale: str) -> str:
19
+ return locale.replace("_", "-").split("-")[0].lower()
20
+
21
+
22
+ def _parse_status(value: str) -> TranslationStatus:
23
+ normalized = value.strip().lower()
24
+ try:
25
+ return TranslationStatus(normalized)
26
+ except ValueError:
27
+ return TranslationStatus.UNKNOWN
28
+
29
+
30
+ def _cell_str(cell: Cell | MergedCell) -> str:
31
+ if cell.value is None:
32
+ return ""
33
+ return str(cell.value)
34
+
35
+
36
+ def _infer_locales_from_filename(filepath: str) -> tuple[str, str | None]:
37
+ stem = Path(filepath).stem
38
+ if "-" in stem:
39
+ parts = stem.split("-")
40
+ if len(parts) == 2:
41
+ return parts[0], parts[1]
42
+ if len(parts) == 4:
43
+ return f"{parts[0]}-{parts[1]}", f"{parts[2]}-{parts[3]}"
44
+ if "_" in stem:
45
+ parts = stem.split("_")
46
+ if len(parts) == 2:
47
+ return parts[0], parts[1]
48
+ if len(parts) == 4:
49
+ return f"{parts[0]}_{parts[1]}", f"{parts[2]}_{parts[3]}"
50
+ return "", None
51
+
52
+
53
+ @dataclass(slots=True)
54
+ class _AsyncExtractionResult:
55
+ item: Optional[ExtractItem] = None
56
+ error: Optional[BaseException] = None
57
+ done: bool = False
58
+
59
+
60
+ class AsyncXlsxExtraction:
61
+ def __init__(self, extractor: XlsxExtractor) -> None:
62
+ self._extractor = extractor
63
+ self._queue: asyncio.Queue[_AsyncExtractionResult] = asyncio.Queue()
64
+ self._producer: asyncio.Task[None] | None = None
65
+
66
+ def __aiter__(self) -> AsyncXlsxExtraction:
67
+ return self
68
+
69
+ async def __anext__(self) -> ExtractItem:
70
+ if self._producer is None:
71
+ self._start()
72
+
73
+ result = await self._queue.get()
74
+ if result.done:
75
+ await self._finish()
76
+ raise StopAsyncIteration
77
+ if result.error is not None:
78
+ await self._finish()
79
+ raise result.error
80
+ if result.item is None:
81
+ await self._finish()
82
+ raise StopAsyncIteration
83
+ return result.item
84
+
85
+ def _start(self) -> None:
86
+ loop = asyncio.get_running_loop()
87
+
88
+ def produce() -> None:
89
+ try:
90
+ for item in self._extractor.extract():
91
+ loop.call_soon_threadsafe(
92
+ self._queue.put_nowait,
93
+ _AsyncExtractionResult(item=item),
94
+ )
95
+ except BaseException as exc:
96
+ loop.call_soon_threadsafe(
97
+ self._queue.put_nowait,
98
+ _AsyncExtractionResult(error=exc),
99
+ )
100
+ finally:
101
+ loop.call_soon_threadsafe(
102
+ self._queue.put_nowait,
103
+ _AsyncExtractionResult(done=True),
104
+ )
105
+
106
+ self._producer = asyncio.create_task(asyncio.to_thread(produce))
107
+
108
+ async def _finish(self) -> None:
109
+ if self._producer is not None:
110
+ await self._producer
111
+
112
+
113
+ class XlsxExtractor:
114
+ def __init__(
115
+ self,
116
+ filepath: str,
117
+ source_locale: str = "",
118
+ target_locale: str | None = None,
119
+ ) -> None:
120
+ self.filepath: str = filepath
121
+
122
+ if source_locale:
123
+ self.source_locale: str = source_locale
124
+ self.target_locale: str | None = target_locale
125
+ else:
126
+ inferred_source, inferred_target = _infer_locales_from_filename(filepath)
127
+ self.source_locale = inferred_source
128
+ self.target_locale = target_locale or inferred_target
129
+
130
+ self.source_language: str | None = (
131
+ _parse_base_lang(self.source_locale) if self.source_locale else None
132
+ )
133
+ self.target_language: str | None = (
134
+ _parse_base_lang(self.target_locale) if self.target_locale else None
135
+ )
136
+
137
+ self.export_origin: str = ""
138
+ self.export_timestamp: str = ""
139
+ self.extensions: dict[str, str] = {"input_format": "xlsx"}
140
+
141
+ def extract(self) -> Iterator[ExtractItem]:
142
+ wb = load_workbook(self.filepath, read_only=True, data_only=True)
143
+ try:
144
+ ws = wb.active
145
+ if ws is None:
146
+ return
147
+
148
+ rows = ws.iter_rows()
149
+ header_row = next(rows, None)
150
+ if header_row is None:
151
+ return
152
+
153
+ headers: list[str] = [_cell_str(c).strip().lower() for c in header_row]
154
+ col_map: dict[str, int] = {name: i for i, name in enumerate(headers) if name}
155
+ has_id = "id" in col_map
156
+ extra_columns = [h for h in headers if h and h not in _KNOWN_COLUMNS]
157
+
158
+ for index, row in enumerate(rows):
159
+ cells = list(row)
160
+
161
+ def get(col: str) -> str:
162
+ idx = col_map.get(col)
163
+ if idx is None or idx >= len(cells):
164
+ return ""
165
+ return _cell_str(cells[idx])
166
+
167
+ unit_id = get("id") if has_id else ""
168
+ if not unit_id:
169
+ unit_id = f"xlsx:{index}"
170
+
171
+ source = get("source")
172
+ raw_target = get("target")
173
+ target = raw_target if raw_target else None
174
+ status = _parse_status(get("status")) if get("status") else TranslationStatus.UNKNOWN
175
+
176
+ comments: list[Comment] = []
177
+ comment_text = get("comment").strip()
178
+ if comment_text:
179
+ comments.append(Comment(context=comment_text))
180
+
181
+ extensions: dict[str, str] = {}
182
+ for col in extra_columns:
183
+ val = get(col)
184
+ if val:
185
+ extensions[col] = val
186
+
187
+ yield unit_id, Data(
188
+ source=source,
189
+ target=target,
190
+ status=status,
191
+ comments=comments,
192
+ extensions=extensions,
193
+ )
194
+ finally:
195
+ wb.close()
196
+
197
+ def extract_async(self) -> AsyncIterator[ExtractItem]:
198
+ return AsyncXlsxExtraction(self)
lokit/py.typed ADDED
@@ -0,0 +1 @@
1
+