lokit-python 0.1.0__cp312-cp312-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. 821d8b73c2a02cb7980f__mypyc.cp312-win32.pyd +0 -0
  2. lokit/__init__.cp312-win32.pyd +0 -0
  3. lokit/__init__.py +128 -0
  4. lokit/core/__init__.cp312-win32.pyd +0 -0
  5. lokit/core/__init__.py +0 -0
  6. lokit/core/logger.cp312-win32.pyd +0 -0
  7. lokit/core/logger.py +20 -0
  8. lokit/data/__init__.cp312-win32.pyd +0 -0
  9. lokit/data/__init__.py +0 -0
  10. lokit/data/lang_codes.cp312-win32.pyd +0 -0
  11. lokit/data/lang_codes.py +455 -0
  12. lokit/data/structure.cp312-win32.pyd +0 -0
  13. lokit/data/structure.py +118 -0
  14. lokit/data/tag_types.cp312-win32.pyd +0 -0
  15. lokit/data/tag_types.py +78 -0
  16. lokit/exporters/__init__.cp312-win32.pyd +0 -0
  17. lokit/exporters/__init__.py +34 -0
  18. lokit/exporters/csv.cp312-win32.pyd +0 -0
  19. lokit/exporters/csv.py +32 -0
  20. lokit/exporters/html.cp312-win32.pyd +0 -0
  21. lokit/exporters/html.py +217 -0
  22. lokit/exporters/idml.cp312-win32.pyd +0 -0
  23. lokit/exporters/idml.py +178 -0
  24. lokit/exporters/json_i18n.cp312-win32.pyd +0 -0
  25. lokit/exporters/json_i18n.py +47 -0
  26. lokit/exporters/po.cp312-win32.pyd +0 -0
  27. lokit/exporters/po.py +162 -0
  28. lokit/exporters/tmx.cp312-win32.pyd +0 -0
  29. lokit/exporters/tmx.py +247 -0
  30. lokit/exporters/xliff.cp312-win32.pyd +0 -0
  31. lokit/exporters/xliff.py +152 -0
  32. lokit/exporters/xlsx.cp312-win32.pyd +0 -0
  33. lokit/exporters/xlsx.py +39 -0
  34. lokit/format_detection.cp312-win32.pyd +0 -0
  35. lokit/format_detection.py +115 -0
  36. lokit/importers.py +321 -0
  37. lokit/io/__init__.cp312-win32.pyd +0 -0
  38. lokit/io/__init__.py +3 -0
  39. lokit/io/json.cp312-win32.pyd +0 -0
  40. lokit/io/json.py +194 -0
  41. lokit/logic.cp312-win32.pyd +0 -0
  42. lokit/logic.py +324 -0
  43. lokit/parsers/__init__.cp312-win32.pyd +0 -0
  44. lokit/parsers/__init__.py +1 -0
  45. lokit/parsers/csv/__init__.cp312-win32.pyd +0 -0
  46. lokit/parsers/csv/__init__.py +1 -0
  47. lokit/parsers/csv/extraction.cp312-win32.pyd +0 -0
  48. lokit/parsers/csv/extraction.py +164 -0
  49. lokit/parsers/html/__init__.cp312-win32.pyd +0 -0
  50. lokit/parsers/html/__init__.py +3 -0
  51. lokit/parsers/html/extraction.cp312-win32.pyd +0 -0
  52. lokit/parsers/html/extraction.py +365 -0
  53. lokit/parsers/idml/__init__.cp312-win32.pyd +0 -0
  54. lokit/parsers/idml/__init__.py +3 -0
  55. lokit/parsers/idml/extraction.cp312-win32.pyd +0 -0
  56. lokit/parsers/idml/extraction.py +264 -0
  57. lokit/parsers/json_i18n/__init__.cp312-win32.pyd +0 -0
  58. lokit/parsers/json_i18n/__init__.py +3 -0
  59. lokit/parsers/json_i18n/extraction.cp312-win32.pyd +0 -0
  60. lokit/parsers/json_i18n/extraction.py +163 -0
  61. lokit/parsers/po/__init__.cp312-win32.pyd +0 -0
  62. lokit/parsers/po/__init__.py +3 -0
  63. lokit/parsers/po/extraction.cp312-win32.pyd +0 -0
  64. lokit/parsers/po/extraction.py +236 -0
  65. lokit/parsers/tmx/__init__.cp312-win32.pyd +0 -0
  66. lokit/parsers/tmx/__init__.py +0 -0
  67. lokit/parsers/tmx/base.cp312-win32.pyd +0 -0
  68. lokit/parsers/tmx/base.py +145 -0
  69. lokit/parsers/tmx/extraction.cp312-win32.pyd +0 -0
  70. lokit/parsers/tmx/extraction.py +170 -0
  71. lokit/parsers/tmx/header.cp312-win32.pyd +0 -0
  72. lokit/parsers/tmx/header.py +55 -0
  73. lokit/parsers/tmx/helpers.cp312-win32.pyd +0 -0
  74. lokit/parsers/tmx/helpers.py +9 -0
  75. lokit/parsers/tmx/models.cp312-win32.pyd +0 -0
  76. lokit/parsers/tmx/models.py +10 -0
  77. lokit/parsers/tmx/props.cp312-win32.pyd +0 -0
  78. lokit/parsers/tmx/props.py +201 -0
  79. lokit/parsers/tmx/tags.cp312-win32.pyd +0 -0
  80. lokit/parsers/tmx/tags.py +59 -0
  81. lokit/parsers/tmx/xml_utils.cp312-win32.pyd +0 -0
  82. lokit/parsers/tmx/xml_utils.py +46 -0
  83. lokit/parsers/xliff/__init__.cp312-win32.pyd +0 -0
  84. lokit/parsers/xliff/__init__.py +3 -0
  85. lokit/parsers/xliff/extraction.cp312-win32.pyd +0 -0
  86. lokit/parsers/xliff/extraction.py +229 -0
  87. lokit/parsers/xliff/tags.cp312-win32.pyd +0 -0
  88. lokit/parsers/xliff/tags.py +128 -0
  89. lokit/parsers/xlsx/__init__.cp312-win32.pyd +0 -0
  90. lokit/parsers/xlsx/__init__.py +1 -0
  91. lokit/parsers/xlsx/extraction.cp312-win32.pyd +0 -0
  92. lokit/parsers/xlsx/extraction.py +198 -0
  93. lokit/py.typed +1 -0
  94. lokit_python-0.1.0.dist-info/METADATA +149 -0
  95. lokit_python-0.1.0.dist-info/RECORD +97 -0
  96. lokit_python-0.1.0.dist-info/WHEEL +5 -0
  97. lokit_python-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,118 @@
1
+ from dataclasses import dataclass, field
2
+ from enum import StrEnum
3
+ from typing import Optional
4
+
5
+ from lokit.data.tag_types import TieData
6
+
7
+
8
+ class TranslationStatus(StrEnum):
9
+ NEW = "new"
10
+ DRAFT = "draft"
11
+ TRANSLATED = "translated"
12
+ REVIEWED = "reviewed"
13
+ APPROVED = "approved"
14
+ REJECTED = "rejected"
15
+ UNKNOWN = "unknown"
16
+
17
+
18
+ class PluralCategory(StrEnum):
19
+ GENERIC = "generic"
20
+ ZERO = "zero"
21
+ ONE = "one"
22
+ TWO = "two"
23
+ FEW = "few"
24
+ MANY = "many"
25
+ OTHER = "other"
26
+
27
+
28
+ @dataclass(slots=True)
29
+ class Plural:
30
+ variant: str
31
+ count: Optional[int] = None
32
+ category: Optional[PluralCategory] = None
33
+ extensions: dict[str, str] = field(default_factory=dict)
34
+
35
+
36
+ @dataclass(slots=True)
37
+ class Meta:
38
+ usage_count: Optional[int] = None
39
+ last_used: Optional[str] = None
40
+ first_used: Optional[str] = None
41
+ created: Optional[str] = None
42
+ updated: Optional[str] = None
43
+ max_length: Optional[int] = None
44
+ min_length: Optional[int] = None
45
+ extensions: dict[str, str] = field(default_factory=dict)
46
+
47
+
48
+ @dataclass(slots=True)
49
+ class Origin:
50
+ system: Optional[str] = None
51
+ project: Optional[str] = None
52
+ creator_id: Optional[str] = None
53
+ extensions: dict[str, str] = field(default_factory=dict)
54
+
55
+
56
+ @dataclass(slots=True)
57
+ class Comment:
58
+ context: str
59
+ timestamp: Optional[str] = None
60
+ origin: Optional[Origin] = None
61
+ context_key: Optional[str] = None
62
+ extensions: dict[str, str] = field(default_factory=dict)
63
+
64
+
65
+ @dataclass(slots=True)
66
+ class TextPart:
67
+ value: str
68
+
69
+
70
+ @dataclass(slots=True)
71
+ class CodePart:
72
+ ref: str
73
+
74
+
75
+ SegmentPart = TextPart | CodePart
76
+
77
+
78
+ @dataclass(slots=True)
79
+ class Tags:
80
+ source_tag_map: dict[str, TieData] = field(default_factory=dict)
81
+ target_tag_map: dict[str, TieData] = field(default_factory=dict)
82
+ source_parts: list[SegmentPart] = field(default_factory=list)
83
+ target_parts: list[SegmentPart] = field(default_factory=list)
84
+
85
+
86
+ @dataclass(slots=True)
87
+ class AdjacentContext:
88
+ unit_id: Optional[str] = None
89
+ source: Optional[str] = None
90
+ target: Optional[str] = None
91
+ extensions: dict[str, str] = field(default_factory=dict)
92
+
93
+
94
+ @dataclass(slots=True)
95
+ class Data:
96
+ source: str
97
+ target: Optional[str] = None
98
+ plural: Optional[Plural] = None
99
+ tags: Optional[Tags] = None
100
+ meta: Meta = field(default_factory=Meta)
101
+ status: TranslationStatus = TranslationStatus.UNKNOWN
102
+ comments: list[Comment] = field(default_factory=list)
103
+ previous_context: Optional[AdjacentContext] = None
104
+ next_context: Optional[AdjacentContext] = None
105
+ extensions: dict[str, str] = field(default_factory=dict)
106
+
107
+
108
+ @dataclass(slots=True)
109
+ class BaseStructure:
110
+ source_locale: str
111
+ target_locale: Optional[str]
112
+ data: dict[str, Data]
113
+ format_version: str = "0.1"
114
+ export_origin: str = ""
115
+ export_timestamp: str = ""
116
+ source_language: Optional[str] = None
117
+ target_language: Optional[str] = None
118
+ extensions: dict[str, str] = field(default_factory=dict)
Binary file
@@ -0,0 +1,78 @@
1
+ from dataclasses import dataclass, field
2
+ from enum import StrEnum
3
+ from typing import Optional
4
+
5
+
6
+ class TieType(StrEnum):
7
+ A_OPEN = "a.open"
8
+ A_CLOSE = "a.close"
9
+ ABBR_OPEN = "abbr.open"
10
+ ABBR_CLOSE = "abbr.close"
11
+ B_OPEN = "b.open"
12
+ B_CLOSE = "b.close"
13
+ BDI_OPEN = "bdi.open"
14
+ BDI_CLOSE = "bdi.close"
15
+ BDO_OPEN = "bdo.open"
16
+ BDO_CLOSE = "bdo.close"
17
+ BR = "br.standalone"
18
+ CITE_OPEN = "cite.open"
19
+ CITE_CLOSE = "cite.close"
20
+ CODE_OPEN = "code.open"
21
+ CODE_CLOSE = "code.close"
22
+ DATA_OPEN = "data.open"
23
+ DATA_CLOSE = "data.close"
24
+ DFN_OPEN = "dfn.open"
25
+ DFN_CLOSE = "dfn.close"
26
+ EM_OPEN = "em.open"
27
+ EM_CLOSE = "em.close"
28
+ I_OPEN = "i.open"
29
+ I_CLOSE = "i.close"
30
+ IMG = "img.standalone"
31
+ KBD_OPEN = "kbd.open"
32
+ KBD_CLOSE = "kbd.close"
33
+ MARK_OPEN = "mark.open"
34
+ MARK_CLOSE = "mark.close"
35
+ Q_OPEN = "q.open"
36
+ Q_CLOSE = "q.close"
37
+ RP_OPEN = "rp.open"
38
+ RP_CLOSE = "rp.close"
39
+ RT_OPEN = "rt.open"
40
+ RT_CLOSE = "rt.close"
41
+ RUBY_OPEN = "ruby.open"
42
+ RUBY_CLOSE = "ruby.close"
43
+ S_OPEN = "s.open"
44
+ S_CLOSE = "s.close"
45
+ SAMP_OPEN = "samp.open"
46
+ SAMP_CLOSE = "samp.close"
47
+ SMALL_OPEN = "small.open"
48
+ SMALL_CLOSE = "small.close"
49
+ SPAN_OPEN = "span.open"
50
+ SPAN_CLOSE = "span.close"
51
+ STRONG_OPEN = "strong.open"
52
+ STRONG_CLOSE = "strong.close"
53
+ SUB_OPEN = "sub.open"
54
+ SUB_CLOSE = "sub.close"
55
+ SUP_OPEN = "sup.open"
56
+ SUP_CLOSE = "sup.close"
57
+ TIME_OPEN = "time.open"
58
+ TIME_CLOSE = "time.close"
59
+ U_OPEN = "u.open"
60
+ U_CLOSE = "u.close"
61
+ VAR_OPEN = "var.open"
62
+ VAR_CLOSE = "var.close"
63
+ WBR = "wbr.standalone"
64
+ CUSTOM_OPEN = "custom.open"
65
+ CUSTOM_CLOSE = "custom.close"
66
+ CUSTOM_STANDALONE = "custom.standalone"
67
+
68
+
69
+ @dataclass(slots=True)
70
+ class TieData:
71
+ id: str
72
+ type: TieType
73
+ attributes: dict[str, str] = field(default_factory=dict)
74
+ attribute_data: str = ""
75
+ position: int = 0
76
+ order: int = 0
77
+ pair_id: Optional[str] = None
78
+ original_name: Optional[str] = None
Binary file
@@ -0,0 +1,34 @@
1
+ from lokit.exporters.csv import export_csv, export_csv_async
2
+ from lokit.exporters.html import export_html, export_html_async
3
+ from lokit.exporters.idml import export_idml, export_idml_async
4
+ from lokit.exporters.json_i18n import export_json_i18n, export_json_i18n_async
5
+ from lokit.exporters.po import export_po, export_po_async
6
+ from lokit.exporters.tmx import export_tmx, export_tmx_from_json
7
+ from lokit.exporters.xliff import (
8
+ export_xliff,
9
+ export_xliff_async,
10
+ export_xliff_from_json,
11
+ export_xliff_from_json_async,
12
+ )
13
+ from lokit.exporters.xlsx import export_xlsx, export_xlsx_async
14
+
15
+ __all__ = [
16
+ "export_csv",
17
+ "export_csv_async",
18
+ "export_html",
19
+ "export_html_async",
20
+ "export_idml",
21
+ "export_idml_async",
22
+ "export_json_i18n",
23
+ "export_json_i18n_async",
24
+ "export_po",
25
+ "export_po_async",
26
+ "export_tmx",
27
+ "export_tmx_from_json",
28
+ "export_xliff",
29
+ "export_xliff_async",
30
+ "export_xliff_from_json",
31
+ "export_xliff_from_json_async",
32
+ "export_xlsx",
33
+ "export_xlsx_async",
34
+ ]
Binary file
lokit/exporters/csv.py ADDED
@@ -0,0 +1,32 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import csv
5
+ from pathlib import Path
6
+
7
+ from lokit.data.structure import BaseStructure, TranslationStatus
8
+
9
+
10
+ def export_csv(document: BaseStructure, filepath: str | Path) -> None:
11
+ path = Path(filepath)
12
+ path.parent.mkdir(parents=True, exist_ok=True)
13
+
14
+ with path.open("w", newline="", encoding="utf-8") as fh:
15
+ writer = csv.DictWriter(fh, fieldnames=["id", "source", "target", "status", "comment"])
16
+ writer.writeheader()
17
+
18
+ for unit_id, unit in document.data.items():
19
+ comment = "; ".join(c.context for c in unit.comments if c.context)
20
+ status = unit.status.value if unit.status != TranslationStatus.UNKNOWN else ""
21
+
22
+ writer.writerow({
23
+ "id": unit_id,
24
+ "source": unit.source,
25
+ "target": unit.target or "",
26
+ "status": status,
27
+ "comment": comment,
28
+ })
29
+
30
+
31
+ async def export_csv_async(document: BaseStructure, filepath: str | Path) -> None:
32
+ await asyncio.to_thread(export_csv, document, filepath)
Binary file
@@ -0,0 +1,217 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+ from typing import Any, cast
6
+
7
+ from lxml import html as lxml_html
8
+ from lxml.html import HtmlElement, tostring
9
+
10
+ from lokit.data.structure import BaseStructure, CodePart, Data, TextPart
11
+ from lokit.data.tag_types import TieData, TieType
12
+
13
+
14
+ def export_html(
15
+ document: BaseStructure,
16
+ filepath: str | Path,
17
+ source_html: str | Path | None = None,
18
+ ) -> None:
19
+ path = Path(filepath)
20
+ path.parent.mkdir(parents=True, exist_ok=True)
21
+
22
+ if source_html is not None:
23
+ _export_from_source(document, path, Path(source_html))
24
+ else:
25
+ _export_minimal(document, path)
26
+
27
+
28
+ async def export_html_async(
29
+ document: BaseStructure,
30
+ filepath: str | Path,
31
+ source_html: str | Path | None = None,
32
+ ) -> None:
33
+ await asyncio.to_thread(export_html, document, filepath, source_html)
34
+
35
+
36
+ def _export_from_source(
37
+ document: BaseStructure, output: Path, source: Path
38
+ ) -> None:
39
+ doc = lxml_html.parse(str(source))
40
+ root = doc.getroot()
41
+ if root is None:
42
+ _export_minimal(document, output)
43
+ return
44
+
45
+ if document.target_locale:
46
+ root.set("lang", document.target_locale)
47
+
48
+ unit_lookup = _build_unit_lookup(document)
49
+ index = 0
50
+
51
+ head = root.find(".//head")
52
+ if head is not None:
53
+ for meta_el in head.iterfind(".//meta"):
54
+ name = (meta_el.get("name") or "").lower()
55
+ if name in ("description", "keywords"):
56
+ key = f"html:meta.{name}:{index}"
57
+ unit = unit_lookup.get(key)
58
+ if unit is not None and unit.target:
59
+ meta_el.set("content", unit.target)
60
+ index += 1
61
+
62
+ block_tags = {
63
+ "p", "h1", "h2", "h3", "h4", "h5", "h6",
64
+ "li", "td", "th", "dt", "dd", "caption",
65
+ "figcaption", "blockquote", "label", "option", "title",
66
+ }
67
+
68
+ for el in list(root.iter()):
69
+ tag = el.tag if isinstance(el.tag, str) else ""
70
+ tag_lower = tag.lower()
71
+
72
+ if tag_lower in block_tags:
73
+ key = f"html:{tag_lower}:{index}"
74
+ unit = unit_lookup.get(key)
75
+ if unit is not None and unit.target:
76
+ _replace_element_text(el, unit)
77
+ index += 1
78
+
79
+ if tag_lower == "img":
80
+ alt = el.get("alt")
81
+ if alt and alt.strip():
82
+ key = f"html:img.alt:{index}"
83
+ unit = unit_lookup.get(key)
84
+ if unit is not None and unit.target:
85
+ el.set("alt", unit.target)
86
+ index += 1
87
+
88
+ result = tostring(root, encoding="unicode", doctype="<!DOCTYPE html>")
89
+ output.write_text(result, encoding="utf-8")
90
+
91
+
92
+ def _export_minimal(document: BaseStructure, output: Path) -> None:
93
+ lang = document.target_locale or document.source_locale
94
+ lines: list[str] = [
95
+ "<!DOCTYPE html>",
96
+ f'<html lang="{_escape(lang)}">',
97
+ "<head>",
98
+ '<meta charset="utf-8">',
99
+ ]
100
+
101
+ for unit_id, unit in document.data.items():
102
+ if "meta." in unit_id:
103
+ name = unit.extensions.get("meta_name", "")
104
+ text = unit.target or unit.source
105
+ lines.append(f'<meta name="{_escape(name)}" content="{_escape(text)}">')
106
+
107
+ lines.append("</head>")
108
+ lines.append("<body>")
109
+
110
+ for unit_id, unit in document.data.items():
111
+ if "meta." in unit_id or "img.alt" in unit_id:
112
+ continue
113
+ text = unit.target or unit.source
114
+ tag = _extract_tag_from_id(unit_id)
115
+ if unit.tags and unit.tags.source_parts:
116
+ content = _rebuild_inline(unit, is_target=unit.target is not None)
117
+ lines.append(f"<{tag}>{content}</{tag}>")
118
+ else:
119
+ lines.append(f"<{tag}>{_escape(text)}</{tag}>")
120
+
121
+ lines.append("</body>")
122
+ lines.append("</html>")
123
+ output.write_text("\n".join(lines), encoding="utf-8")
124
+
125
+
126
+ def _replace_element_text(element: HtmlElement, unit: Data) -> None:
127
+ if unit.tags and unit.tags.source_parts:
128
+ content = _rebuild_inline(unit, is_target=True)
129
+ for child in list(element):
130
+ element.remove(child)
131
+ element.text = None
132
+ fragment: list[Any] = cast("list[Any]", lxml_html.fragments_fromstring(content))
133
+ if isinstance(fragment[0], str):
134
+ element.text = fragment[0]
135
+ children = fragment[1:]
136
+ else:
137
+ children = fragment
138
+ for child in children:
139
+ if isinstance(child, HtmlElement):
140
+ element.append(child)
141
+ elif isinstance(child, str):
142
+ if len(element):
143
+ last = element[-1]
144
+ last.tail = (last.tail or "") + child
145
+ else:
146
+ element.text = (element.text or "") + child
147
+ else:
148
+ for child in list(element):
149
+ element.remove(child)
150
+ element.text = unit.target
151
+
152
+
153
+ def _rebuild_inline(unit: Data, is_target: bool) -> str:
154
+ if is_target and unit.tags and unit.tags.target_parts:
155
+ parts = unit.tags.target_parts
156
+ tag_map = unit.tags.target_tag_map
157
+ elif unit.tags:
158
+ parts = unit.tags.source_parts
159
+ tag_map = unit.tags.source_tag_map
160
+ else:
161
+ return _escape(unit.target or unit.source)
162
+
163
+ result: list[str] = []
164
+ for part in parts:
165
+ if isinstance(part, TextPart):
166
+ result.append(_escape(part.value))
167
+ elif isinstance(part, CodePart):
168
+ tie = tag_map.get(part.ref)
169
+ if tie is None:
170
+ continue
171
+ result.append(_tie_to_html(tie))
172
+ return "".join(result)
173
+
174
+
175
+ def _tie_to_html(tie: TieData) -> str:
176
+ name = tie.original_name or ""
177
+ if tie.type.value.endswith(".open"):
178
+ attrs = _format_attrs(tie.attributes)
179
+ return f"<{name}{attrs}>"
180
+ if tie.type.value.endswith(".close"):
181
+ return f"</{name}>"
182
+ if tie.type == TieType.BR:
183
+ return "<br>"
184
+ if tie.type == TieType.WBR:
185
+ return "<wbr>"
186
+ if tie.type == TieType.IMG:
187
+ attrs = _format_attrs(tie.attributes)
188
+ return f"<img{attrs}>"
189
+ attrs = _format_attrs(tie.attributes)
190
+ return f"<{name}{attrs}/>"
191
+
192
+
193
+ def _format_attrs(attributes: dict[str, str]) -> str:
194
+ if not attributes:
195
+ return ""
196
+ parts = [f' {k}="{_escape(v)}"' for k, v in attributes.items()]
197
+ return "".join(parts)
198
+
199
+
200
+ def _build_unit_lookup(document: BaseStructure) -> dict[str, Data]:
201
+ return dict(document.data)
202
+
203
+
204
+ def _extract_tag_from_id(unit_id: str) -> str:
205
+ parts = unit_id.split(":")
206
+ if len(parts) >= 2:
207
+ return parts[1]
208
+ return "p"
209
+
210
+
211
+ def _escape(text: str) -> str:
212
+ return (
213
+ text.replace("&", "&amp;")
214
+ .replace("<", "&lt;")
215
+ .replace(">", "&gt;")
216
+ .replace('"', "&quot;")
217
+ )
Binary file
@@ -0,0 +1,178 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import shutil
5
+ import zipfile
6
+ from pathlib import Path
7
+
8
+ from lxml import etree
9
+ from lxml.etree import _Element
10
+
11
+ from lokit.data.structure import BaseStructure, CodePart, Data, TextPart
12
+
13
+
14
+ def export_idml(
15
+ document: BaseStructure,
16
+ filepath: str | Path,
17
+ source_idml: str | Path,
18
+ ) -> None:
19
+ output_path = Path(filepath)
20
+ source_path = Path(source_idml)
21
+ output_path.parent.mkdir(parents=True, exist_ok=True)
22
+
23
+ story_units = _group_by_story(document)
24
+ shutil.copy2(str(source_path), str(output_path))
25
+
26
+ with zipfile.ZipFile(str(output_path), "a") as zf_out:
27
+ with zipfile.ZipFile(str(source_path), "r") as zf_in:
28
+ story_files = [
29
+ name for name in zf_in.namelist()
30
+ if name.startswith("Stories/Story_") and name.endswith(".xml")
31
+ ]
32
+ for story_file in story_files:
33
+ units = story_units.get(story_file)
34
+ if not units:
35
+ continue
36
+
37
+ with zf_in.open(story_file) as stream:
38
+ tree = etree.parse(stream)
39
+ root = tree.getroot()
40
+ _apply_translations(root, units)
41
+ modified_xml = etree.tostring(
42
+ root, xml_declaration=True, encoding="UTF-8"
43
+ )
44
+
45
+ _replace_in_zip(zf_out, story_file, modified_xml)
46
+
47
+
48
+ async def export_idml_async(
49
+ document: BaseStructure,
50
+ filepath: str | Path,
51
+ source_idml: str | Path,
52
+ ) -> None:
53
+ await asyncio.to_thread(export_idml, document, filepath, source_idml)
54
+
55
+
56
+ def _group_by_story(
57
+ document: BaseStructure,
58
+ ) -> dict[str, dict[str, Data]]:
59
+ groups: dict[str, dict[str, Data]] = {}
60
+ for unit_id, unit in document.data.items():
61
+ story = unit.extensions.get("story", "")
62
+ if story:
63
+ groups.setdefault(story, {})[unit_id] = unit
64
+ return groups
65
+
66
+
67
+ def _apply_translations(root: _Element, units: dict[str, Data]) -> None:
68
+ paragraph_index = 0
69
+ story_name = _story_name_from_units(units)
70
+
71
+ for psr in root.iter():
72
+ if _local_name(psr.tag) != "ParagraphStyleRange":
73
+ continue
74
+
75
+ unit_id = f"{story_name}:p{paragraph_index}"
76
+ unit = units.get(unit_id)
77
+ if unit is not None and unit.target:
78
+ _replace_paragraph_text(psr, unit)
79
+ paragraph_index += 1
80
+
81
+
82
+ def _replace_paragraph_text(psr: _Element, unit: Data) -> None:
83
+ char_ranges = [
84
+ el for el in psr
85
+ if _local_name(el.tag) == "CharacterStyleRange"
86
+ ]
87
+ if not char_ranges:
88
+ return
89
+
90
+ if unit.tags and unit.tags.target_parts:
91
+ _replace_with_tagged_parts(char_ranges, unit)
92
+ else:
93
+ target_text = unit.target or ""
94
+ _distribute_text(char_ranges, target_text)
95
+
96
+
97
+ def _replace_with_tagged_parts(
98
+ char_ranges: list[_Element], unit: Data
99
+ ) -> None:
100
+ if unit.tags is None:
101
+ return
102
+
103
+ parts = unit.tags.target_parts
104
+ tag_map = unit.tags.target_tag_map
105
+
106
+ range_texts: dict[str, str] = {}
107
+ current_style: str | None = None
108
+ current_text_parts: list[str] = []
109
+
110
+ for part in parts:
111
+ if isinstance(part, TextPart):
112
+ current_text_parts.append(part.value)
113
+ elif isinstance(part, CodePart):
114
+ tie = tag_map.get(part.ref)
115
+ if tie is None:
116
+ continue
117
+ if tie.type.value.endswith(".open"):
118
+ style = tie.attributes.get("style", "")
119
+ if current_text_parts and current_style is not None:
120
+ range_texts[current_style] = "".join(current_text_parts)
121
+ current_text_parts = []
122
+ current_style = style
123
+ elif tie.type.value.endswith(".close"):
124
+ if current_style is not None:
125
+ range_texts[current_style] = "".join(current_text_parts)
126
+ current_text_parts = []
127
+ current_style = None
128
+
129
+ plain_text = "".join(current_text_parts) if current_text_parts else None
130
+
131
+ for csr in char_ranges:
132
+ style = csr.get("AppliedCharacterStyle") or ""
133
+ if style in range_texts:
134
+ _set_content_text(csr, range_texts[style])
135
+ elif plain_text is not None and (
136
+ not style or style == "CharacterStyle/$ID/[No character style]"
137
+ ):
138
+ _set_content_text(csr, plain_text)
139
+ plain_text = None
140
+ else:
141
+ _set_content_text(csr, "")
142
+
143
+
144
+ def _distribute_text(char_ranges: list[_Element], text: str) -> None:
145
+ if len(char_ranges) == 1:
146
+ _set_content_text(char_ranges[0], text)
147
+ return
148
+
149
+ first = char_ranges[0]
150
+ _set_content_text(first, text)
151
+ for csr in char_ranges[1:]:
152
+ _set_content_text(csr, "")
153
+
154
+
155
+ def _set_content_text(csr: _Element, text: str) -> None:
156
+ for child in csr.iter():
157
+ if _local_name(child.tag) == "Content":
158
+ child.text = text
159
+ text = ""
160
+
161
+
162
+ def _replace_in_zip(zf: zipfile.ZipFile, name: str, data: bytes) -> None:
163
+ zf.writestr(name, data)
164
+
165
+
166
+ def _story_name_from_units(units: dict[str, Data]) -> str:
167
+ for unit_id in units:
168
+ parts = unit_id.split(":")
169
+ if parts:
170
+ return parts[0]
171
+ return ""
172
+
173
+
174
+ def _local_name(tag: str | bytes) -> str:
175
+ name = tag if isinstance(tag, str) else tag.decode("utf-8")
176
+ if "}" in name:
177
+ return name.split("}", 1)[1]
178
+ return name