lokit-python 0.1.0__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- 821d8b73c2a02cb7980f__mypyc.cp312-win_amd64.pyd +0 -0
- lokit/__init__.cp312-win_amd64.pyd +0 -0
- lokit/__init__.py +128 -0
- lokit/core/__init__.cp312-win_amd64.pyd +0 -0
- lokit/core/__init__.py +0 -0
- lokit/core/logger.cp312-win_amd64.pyd +0 -0
- lokit/core/logger.py +20 -0
- lokit/data/__init__.cp312-win_amd64.pyd +0 -0
- lokit/data/__init__.py +0 -0
- lokit/data/lang_codes.cp312-win_amd64.pyd +0 -0
- lokit/data/lang_codes.py +455 -0
- lokit/data/structure.cp312-win_amd64.pyd +0 -0
- lokit/data/structure.py +118 -0
- lokit/data/tag_types.cp312-win_amd64.pyd +0 -0
- lokit/data/tag_types.py +78 -0
- lokit/exporters/__init__.cp312-win_amd64.pyd +0 -0
- lokit/exporters/__init__.py +34 -0
- lokit/exporters/csv.cp312-win_amd64.pyd +0 -0
- lokit/exporters/csv.py +32 -0
- lokit/exporters/html.cp312-win_amd64.pyd +0 -0
- lokit/exporters/html.py +217 -0
- lokit/exporters/idml.cp312-win_amd64.pyd +0 -0
- lokit/exporters/idml.py +178 -0
- lokit/exporters/json_i18n.cp312-win_amd64.pyd +0 -0
- lokit/exporters/json_i18n.py +47 -0
- lokit/exporters/po.cp312-win_amd64.pyd +0 -0
- lokit/exporters/po.py +162 -0
- lokit/exporters/tmx.cp312-win_amd64.pyd +0 -0
- lokit/exporters/tmx.py +247 -0
- lokit/exporters/xliff.cp312-win_amd64.pyd +0 -0
- lokit/exporters/xliff.py +152 -0
- lokit/exporters/xlsx.cp312-win_amd64.pyd +0 -0
- lokit/exporters/xlsx.py +39 -0
- lokit/format_detection.cp312-win_amd64.pyd +0 -0
- lokit/format_detection.py +115 -0
- lokit/importers.py +321 -0
- lokit/io/__init__.cp312-win_amd64.pyd +0 -0
- lokit/io/__init__.py +3 -0
- lokit/io/json.cp312-win_amd64.pyd +0 -0
- lokit/io/json.py +194 -0
- lokit/logic.cp312-win_amd64.pyd +0 -0
- lokit/logic.py +324 -0
- lokit/parsers/__init__.cp312-win_amd64.pyd +0 -0
- lokit/parsers/__init__.py +1 -0
- lokit/parsers/csv/__init__.cp312-win_amd64.pyd +0 -0
- lokit/parsers/csv/__init__.py +1 -0
- lokit/parsers/csv/extraction.cp312-win_amd64.pyd +0 -0
- lokit/parsers/csv/extraction.py +164 -0
- lokit/parsers/html/__init__.cp312-win_amd64.pyd +0 -0
- lokit/parsers/html/__init__.py +3 -0
- lokit/parsers/html/extraction.cp312-win_amd64.pyd +0 -0
- lokit/parsers/html/extraction.py +365 -0
- lokit/parsers/idml/__init__.cp312-win_amd64.pyd +0 -0
- lokit/parsers/idml/__init__.py +3 -0
- lokit/parsers/idml/extraction.cp312-win_amd64.pyd +0 -0
- lokit/parsers/idml/extraction.py +264 -0
- lokit/parsers/json_i18n/__init__.cp312-win_amd64.pyd +0 -0
- lokit/parsers/json_i18n/__init__.py +3 -0
- lokit/parsers/json_i18n/extraction.cp312-win_amd64.pyd +0 -0
- lokit/parsers/json_i18n/extraction.py +163 -0
- lokit/parsers/po/__init__.cp312-win_amd64.pyd +0 -0
- lokit/parsers/po/__init__.py +3 -0
- lokit/parsers/po/extraction.cp312-win_amd64.pyd +0 -0
- lokit/parsers/po/extraction.py +236 -0
- lokit/parsers/tmx/__init__.cp312-win_amd64.pyd +0 -0
- lokit/parsers/tmx/__init__.py +0 -0
- lokit/parsers/tmx/base.cp312-win_amd64.pyd +0 -0
- lokit/parsers/tmx/base.py +145 -0
- lokit/parsers/tmx/extraction.cp312-win_amd64.pyd +0 -0
- lokit/parsers/tmx/extraction.py +170 -0
- lokit/parsers/tmx/header.cp312-win_amd64.pyd +0 -0
- lokit/parsers/tmx/header.py +55 -0
- lokit/parsers/tmx/helpers.cp312-win_amd64.pyd +0 -0
- lokit/parsers/tmx/helpers.py +9 -0
- lokit/parsers/tmx/models.cp312-win_amd64.pyd +0 -0
- lokit/parsers/tmx/models.py +10 -0
- lokit/parsers/tmx/props.cp312-win_amd64.pyd +0 -0
- lokit/parsers/tmx/props.py +201 -0
- lokit/parsers/tmx/tags.cp312-win_amd64.pyd +0 -0
- lokit/parsers/tmx/tags.py +59 -0
- lokit/parsers/tmx/xml_utils.cp312-win_amd64.pyd +0 -0
- lokit/parsers/tmx/xml_utils.py +46 -0
- lokit/parsers/xliff/__init__.cp312-win_amd64.pyd +0 -0
- lokit/parsers/xliff/__init__.py +3 -0
- lokit/parsers/xliff/extraction.cp312-win_amd64.pyd +0 -0
- lokit/parsers/xliff/extraction.py +229 -0
- lokit/parsers/xliff/tags.cp312-win_amd64.pyd +0 -0
- lokit/parsers/xliff/tags.py +128 -0
- lokit/parsers/xlsx/__init__.cp312-win_amd64.pyd +0 -0
- lokit/parsers/xlsx/__init__.py +1 -0
- lokit/parsers/xlsx/extraction.cp312-win_amd64.pyd +0 -0
- lokit/parsers/xlsx/extraction.py +198 -0
- lokit/py.typed +1 -0
- lokit_python-0.1.0.dist-info/METADATA +149 -0
- lokit_python-0.1.0.dist-info/RECORD +97 -0
- lokit_python-0.1.0.dist-info/WHEEL +5 -0
- lokit_python-0.1.0.dist-info/top_level.txt +2 -0
lokit/data/structure.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from enum import StrEnum
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from lokit.data.tag_types import TieData
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TranslationStatus(StrEnum):
|
|
9
|
+
NEW = "new"
|
|
10
|
+
DRAFT = "draft"
|
|
11
|
+
TRANSLATED = "translated"
|
|
12
|
+
REVIEWED = "reviewed"
|
|
13
|
+
APPROVED = "approved"
|
|
14
|
+
REJECTED = "rejected"
|
|
15
|
+
UNKNOWN = "unknown"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class PluralCategory(StrEnum):
|
|
19
|
+
GENERIC = "generic"
|
|
20
|
+
ZERO = "zero"
|
|
21
|
+
ONE = "one"
|
|
22
|
+
TWO = "two"
|
|
23
|
+
FEW = "few"
|
|
24
|
+
MANY = "many"
|
|
25
|
+
OTHER = "other"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(slots=True)
|
|
29
|
+
class Plural:
|
|
30
|
+
variant: str
|
|
31
|
+
count: Optional[int] = None
|
|
32
|
+
category: Optional[PluralCategory] = None
|
|
33
|
+
extensions: dict[str, str] = field(default_factory=dict)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(slots=True)
|
|
37
|
+
class Meta:
|
|
38
|
+
usage_count: Optional[int] = None
|
|
39
|
+
last_used: Optional[str] = None
|
|
40
|
+
first_used: Optional[str] = None
|
|
41
|
+
created: Optional[str] = None
|
|
42
|
+
updated: Optional[str] = None
|
|
43
|
+
max_length: Optional[int] = None
|
|
44
|
+
min_length: Optional[int] = None
|
|
45
|
+
extensions: dict[str, str] = field(default_factory=dict)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(slots=True)
|
|
49
|
+
class Origin:
|
|
50
|
+
system: Optional[str] = None
|
|
51
|
+
project: Optional[str] = None
|
|
52
|
+
creator_id: Optional[str] = None
|
|
53
|
+
extensions: dict[str, str] = field(default_factory=dict)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(slots=True)
|
|
57
|
+
class Comment:
|
|
58
|
+
context: str
|
|
59
|
+
timestamp: Optional[str] = None
|
|
60
|
+
origin: Optional[Origin] = None
|
|
61
|
+
context_key: Optional[str] = None
|
|
62
|
+
extensions: dict[str, str] = field(default_factory=dict)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(slots=True)
|
|
66
|
+
class TextPart:
|
|
67
|
+
value: str
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(slots=True)
|
|
71
|
+
class CodePart:
|
|
72
|
+
ref: str
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
SegmentPart = TextPart | CodePart
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(slots=True)
|
|
79
|
+
class Tags:
|
|
80
|
+
source_tag_map: dict[str, TieData] = field(default_factory=dict)
|
|
81
|
+
target_tag_map: dict[str, TieData] = field(default_factory=dict)
|
|
82
|
+
source_parts: list[SegmentPart] = field(default_factory=list)
|
|
83
|
+
target_parts: list[SegmentPart] = field(default_factory=list)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass(slots=True)
|
|
87
|
+
class AdjacentContext:
|
|
88
|
+
unit_id: Optional[str] = None
|
|
89
|
+
source: Optional[str] = None
|
|
90
|
+
target: Optional[str] = None
|
|
91
|
+
extensions: dict[str, str] = field(default_factory=dict)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@dataclass(slots=True)
|
|
95
|
+
class Data:
|
|
96
|
+
source: str
|
|
97
|
+
target: Optional[str] = None
|
|
98
|
+
plural: Optional[Plural] = None
|
|
99
|
+
tags: Optional[Tags] = None
|
|
100
|
+
meta: Meta = field(default_factory=Meta)
|
|
101
|
+
status: TranslationStatus = TranslationStatus.UNKNOWN
|
|
102
|
+
comments: list[Comment] = field(default_factory=list)
|
|
103
|
+
previous_context: Optional[AdjacentContext] = None
|
|
104
|
+
next_context: Optional[AdjacentContext] = None
|
|
105
|
+
extensions: dict[str, str] = field(default_factory=dict)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass(slots=True)
|
|
109
|
+
class BaseStructure:
|
|
110
|
+
source_locale: str
|
|
111
|
+
target_locale: Optional[str]
|
|
112
|
+
data: dict[str, Data]
|
|
113
|
+
format_version: str = "0.1"
|
|
114
|
+
export_origin: str = ""
|
|
115
|
+
export_timestamp: str = ""
|
|
116
|
+
source_language: Optional[str] = None
|
|
117
|
+
target_language: Optional[str] = None
|
|
118
|
+
extensions: dict[str, str] = field(default_factory=dict)
|
|
Binary file
|
lokit/data/tag_types.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
from enum import StrEnum
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TieType(StrEnum):
|
|
7
|
+
A_OPEN = "a.open"
|
|
8
|
+
A_CLOSE = "a.close"
|
|
9
|
+
ABBR_OPEN = "abbr.open"
|
|
10
|
+
ABBR_CLOSE = "abbr.close"
|
|
11
|
+
B_OPEN = "b.open"
|
|
12
|
+
B_CLOSE = "b.close"
|
|
13
|
+
BDI_OPEN = "bdi.open"
|
|
14
|
+
BDI_CLOSE = "bdi.close"
|
|
15
|
+
BDO_OPEN = "bdo.open"
|
|
16
|
+
BDO_CLOSE = "bdo.close"
|
|
17
|
+
BR = "br.standalone"
|
|
18
|
+
CITE_OPEN = "cite.open"
|
|
19
|
+
CITE_CLOSE = "cite.close"
|
|
20
|
+
CODE_OPEN = "code.open"
|
|
21
|
+
CODE_CLOSE = "code.close"
|
|
22
|
+
DATA_OPEN = "data.open"
|
|
23
|
+
DATA_CLOSE = "data.close"
|
|
24
|
+
DFN_OPEN = "dfn.open"
|
|
25
|
+
DFN_CLOSE = "dfn.close"
|
|
26
|
+
EM_OPEN = "em.open"
|
|
27
|
+
EM_CLOSE = "em.close"
|
|
28
|
+
I_OPEN = "i.open"
|
|
29
|
+
I_CLOSE = "i.close"
|
|
30
|
+
IMG = "img.standalone"
|
|
31
|
+
KBD_OPEN = "kbd.open"
|
|
32
|
+
KBD_CLOSE = "kbd.close"
|
|
33
|
+
MARK_OPEN = "mark.open"
|
|
34
|
+
MARK_CLOSE = "mark.close"
|
|
35
|
+
Q_OPEN = "q.open"
|
|
36
|
+
Q_CLOSE = "q.close"
|
|
37
|
+
RP_OPEN = "rp.open"
|
|
38
|
+
RP_CLOSE = "rp.close"
|
|
39
|
+
RT_OPEN = "rt.open"
|
|
40
|
+
RT_CLOSE = "rt.close"
|
|
41
|
+
RUBY_OPEN = "ruby.open"
|
|
42
|
+
RUBY_CLOSE = "ruby.close"
|
|
43
|
+
S_OPEN = "s.open"
|
|
44
|
+
S_CLOSE = "s.close"
|
|
45
|
+
SAMP_OPEN = "samp.open"
|
|
46
|
+
SAMP_CLOSE = "samp.close"
|
|
47
|
+
SMALL_OPEN = "small.open"
|
|
48
|
+
SMALL_CLOSE = "small.close"
|
|
49
|
+
SPAN_OPEN = "span.open"
|
|
50
|
+
SPAN_CLOSE = "span.close"
|
|
51
|
+
STRONG_OPEN = "strong.open"
|
|
52
|
+
STRONG_CLOSE = "strong.close"
|
|
53
|
+
SUB_OPEN = "sub.open"
|
|
54
|
+
SUB_CLOSE = "sub.close"
|
|
55
|
+
SUP_OPEN = "sup.open"
|
|
56
|
+
SUP_CLOSE = "sup.close"
|
|
57
|
+
TIME_OPEN = "time.open"
|
|
58
|
+
TIME_CLOSE = "time.close"
|
|
59
|
+
U_OPEN = "u.open"
|
|
60
|
+
U_CLOSE = "u.close"
|
|
61
|
+
VAR_OPEN = "var.open"
|
|
62
|
+
VAR_CLOSE = "var.close"
|
|
63
|
+
WBR = "wbr.standalone"
|
|
64
|
+
CUSTOM_OPEN = "custom.open"
|
|
65
|
+
CUSTOM_CLOSE = "custom.close"
|
|
66
|
+
CUSTOM_STANDALONE = "custom.standalone"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass(slots=True)
|
|
70
|
+
class TieData:
|
|
71
|
+
id: str
|
|
72
|
+
type: TieType
|
|
73
|
+
attributes: dict[str, str] = field(default_factory=dict)
|
|
74
|
+
attribute_data: str = ""
|
|
75
|
+
position: int = 0
|
|
76
|
+
order: int = 0
|
|
77
|
+
pair_id: Optional[str] = None
|
|
78
|
+
original_name: Optional[str] = None
|
|
Binary file
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from lokit.exporters.csv import export_csv, export_csv_async
|
|
2
|
+
from lokit.exporters.html import export_html, export_html_async
|
|
3
|
+
from lokit.exporters.idml import export_idml, export_idml_async
|
|
4
|
+
from lokit.exporters.json_i18n import export_json_i18n, export_json_i18n_async
|
|
5
|
+
from lokit.exporters.po import export_po, export_po_async
|
|
6
|
+
from lokit.exporters.tmx import export_tmx, export_tmx_from_json
|
|
7
|
+
from lokit.exporters.xliff import (
|
|
8
|
+
export_xliff,
|
|
9
|
+
export_xliff_async,
|
|
10
|
+
export_xliff_from_json,
|
|
11
|
+
export_xliff_from_json_async,
|
|
12
|
+
)
|
|
13
|
+
from lokit.exporters.xlsx import export_xlsx, export_xlsx_async
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"export_csv",
|
|
17
|
+
"export_csv_async",
|
|
18
|
+
"export_html",
|
|
19
|
+
"export_html_async",
|
|
20
|
+
"export_idml",
|
|
21
|
+
"export_idml_async",
|
|
22
|
+
"export_json_i18n",
|
|
23
|
+
"export_json_i18n_async",
|
|
24
|
+
"export_po",
|
|
25
|
+
"export_po_async",
|
|
26
|
+
"export_tmx",
|
|
27
|
+
"export_tmx_from_json",
|
|
28
|
+
"export_xliff",
|
|
29
|
+
"export_xliff_async",
|
|
30
|
+
"export_xliff_from_json",
|
|
31
|
+
"export_xliff_from_json_async",
|
|
32
|
+
"export_xlsx",
|
|
33
|
+
"export_xlsx_async",
|
|
34
|
+
]
|
|
Binary file
|
lokit/exporters/csv.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import csv
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from lokit.data.structure import BaseStructure, TranslationStatus
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def export_csv(document: BaseStructure, filepath: str | Path) -> None:
|
|
11
|
+
path = Path(filepath)
|
|
12
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
13
|
+
|
|
14
|
+
with path.open("w", newline="", encoding="utf-8") as fh:
|
|
15
|
+
writer = csv.DictWriter(fh, fieldnames=["id", "source", "target", "status", "comment"])
|
|
16
|
+
writer.writeheader()
|
|
17
|
+
|
|
18
|
+
for unit_id, unit in document.data.items():
|
|
19
|
+
comment = "; ".join(c.context for c in unit.comments if c.context)
|
|
20
|
+
status = unit.status.value if unit.status != TranslationStatus.UNKNOWN else ""
|
|
21
|
+
|
|
22
|
+
writer.writerow({
|
|
23
|
+
"id": unit_id,
|
|
24
|
+
"source": unit.source,
|
|
25
|
+
"target": unit.target or "",
|
|
26
|
+
"status": status,
|
|
27
|
+
"comment": comment,
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def export_csv_async(document: BaseStructure, filepath: str | Path) -> None:
|
|
32
|
+
await asyncio.to_thread(export_csv, document, filepath)
|
|
Binary file
|
lokit/exporters/html.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any, cast
|
|
6
|
+
|
|
7
|
+
from lxml import html as lxml_html
|
|
8
|
+
from lxml.html import HtmlElement, tostring
|
|
9
|
+
|
|
10
|
+
from lokit.data.structure import BaseStructure, CodePart, Data, TextPart
|
|
11
|
+
from lokit.data.tag_types import TieData, TieType
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def export_html(
|
|
15
|
+
document: BaseStructure,
|
|
16
|
+
filepath: str | Path,
|
|
17
|
+
source_html: str | Path | None = None,
|
|
18
|
+
) -> None:
|
|
19
|
+
path = Path(filepath)
|
|
20
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
21
|
+
|
|
22
|
+
if source_html is not None:
|
|
23
|
+
_export_from_source(document, path, Path(source_html))
|
|
24
|
+
else:
|
|
25
|
+
_export_minimal(document, path)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
async def export_html_async(
|
|
29
|
+
document: BaseStructure,
|
|
30
|
+
filepath: str | Path,
|
|
31
|
+
source_html: str | Path | None = None,
|
|
32
|
+
) -> None:
|
|
33
|
+
await asyncio.to_thread(export_html, document, filepath, source_html)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _export_from_source(
|
|
37
|
+
document: BaseStructure, output: Path, source: Path
|
|
38
|
+
) -> None:
|
|
39
|
+
doc = lxml_html.parse(str(source))
|
|
40
|
+
root = doc.getroot()
|
|
41
|
+
if root is None:
|
|
42
|
+
_export_minimal(document, output)
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
if document.target_locale:
|
|
46
|
+
root.set("lang", document.target_locale)
|
|
47
|
+
|
|
48
|
+
unit_lookup = _build_unit_lookup(document)
|
|
49
|
+
index = 0
|
|
50
|
+
|
|
51
|
+
head = root.find(".//head")
|
|
52
|
+
if head is not None:
|
|
53
|
+
for meta_el in head.iterfind(".//meta"):
|
|
54
|
+
name = (meta_el.get("name") or "").lower()
|
|
55
|
+
if name in ("description", "keywords"):
|
|
56
|
+
key = f"html:meta.{name}:{index}"
|
|
57
|
+
unit = unit_lookup.get(key)
|
|
58
|
+
if unit is not None and unit.target:
|
|
59
|
+
meta_el.set("content", unit.target)
|
|
60
|
+
index += 1
|
|
61
|
+
|
|
62
|
+
block_tags = {
|
|
63
|
+
"p", "h1", "h2", "h3", "h4", "h5", "h6",
|
|
64
|
+
"li", "td", "th", "dt", "dd", "caption",
|
|
65
|
+
"figcaption", "blockquote", "label", "option", "title",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
for el in list(root.iter()):
|
|
69
|
+
tag = el.tag if isinstance(el.tag, str) else ""
|
|
70
|
+
tag_lower = tag.lower()
|
|
71
|
+
|
|
72
|
+
if tag_lower in block_tags:
|
|
73
|
+
key = f"html:{tag_lower}:{index}"
|
|
74
|
+
unit = unit_lookup.get(key)
|
|
75
|
+
if unit is not None and unit.target:
|
|
76
|
+
_replace_element_text(el, unit)
|
|
77
|
+
index += 1
|
|
78
|
+
|
|
79
|
+
if tag_lower == "img":
|
|
80
|
+
alt = el.get("alt")
|
|
81
|
+
if alt and alt.strip():
|
|
82
|
+
key = f"html:img.alt:{index}"
|
|
83
|
+
unit = unit_lookup.get(key)
|
|
84
|
+
if unit is not None and unit.target:
|
|
85
|
+
el.set("alt", unit.target)
|
|
86
|
+
index += 1
|
|
87
|
+
|
|
88
|
+
result = tostring(root, encoding="unicode", doctype="<!DOCTYPE html>")
|
|
89
|
+
output.write_text(result, encoding="utf-8")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _export_minimal(document: BaseStructure, output: Path) -> None:
|
|
93
|
+
lang = document.target_locale or document.source_locale
|
|
94
|
+
lines: list[str] = [
|
|
95
|
+
"<!DOCTYPE html>",
|
|
96
|
+
f'<html lang="{_escape(lang)}">',
|
|
97
|
+
"<head>",
|
|
98
|
+
'<meta charset="utf-8">',
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
for unit_id, unit in document.data.items():
|
|
102
|
+
if "meta." in unit_id:
|
|
103
|
+
name = unit.extensions.get("meta_name", "")
|
|
104
|
+
text = unit.target or unit.source
|
|
105
|
+
lines.append(f'<meta name="{_escape(name)}" content="{_escape(text)}">')
|
|
106
|
+
|
|
107
|
+
lines.append("</head>")
|
|
108
|
+
lines.append("<body>")
|
|
109
|
+
|
|
110
|
+
for unit_id, unit in document.data.items():
|
|
111
|
+
if "meta." in unit_id or "img.alt" in unit_id:
|
|
112
|
+
continue
|
|
113
|
+
text = unit.target or unit.source
|
|
114
|
+
tag = _extract_tag_from_id(unit_id)
|
|
115
|
+
if unit.tags and unit.tags.source_parts:
|
|
116
|
+
content = _rebuild_inline(unit, is_target=unit.target is not None)
|
|
117
|
+
lines.append(f"<{tag}>{content}</{tag}>")
|
|
118
|
+
else:
|
|
119
|
+
lines.append(f"<{tag}>{_escape(text)}</{tag}>")
|
|
120
|
+
|
|
121
|
+
lines.append("</body>")
|
|
122
|
+
lines.append("</html>")
|
|
123
|
+
output.write_text("\n".join(lines), encoding="utf-8")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _replace_element_text(element: HtmlElement, unit: Data) -> None:
|
|
127
|
+
if unit.tags and unit.tags.source_parts:
|
|
128
|
+
content = _rebuild_inline(unit, is_target=True)
|
|
129
|
+
for child in list(element):
|
|
130
|
+
element.remove(child)
|
|
131
|
+
element.text = None
|
|
132
|
+
fragment: list[Any] = cast("list[Any]", lxml_html.fragments_fromstring(content))
|
|
133
|
+
if isinstance(fragment[0], str):
|
|
134
|
+
element.text = fragment[0]
|
|
135
|
+
children = fragment[1:]
|
|
136
|
+
else:
|
|
137
|
+
children = fragment
|
|
138
|
+
for child in children:
|
|
139
|
+
if isinstance(child, HtmlElement):
|
|
140
|
+
element.append(child)
|
|
141
|
+
elif isinstance(child, str):
|
|
142
|
+
if len(element):
|
|
143
|
+
last = element[-1]
|
|
144
|
+
last.tail = (last.tail or "") + child
|
|
145
|
+
else:
|
|
146
|
+
element.text = (element.text or "") + child
|
|
147
|
+
else:
|
|
148
|
+
for child in list(element):
|
|
149
|
+
element.remove(child)
|
|
150
|
+
element.text = unit.target
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _rebuild_inline(unit: Data, is_target: bool) -> str:
|
|
154
|
+
if is_target and unit.tags and unit.tags.target_parts:
|
|
155
|
+
parts = unit.tags.target_parts
|
|
156
|
+
tag_map = unit.tags.target_tag_map
|
|
157
|
+
elif unit.tags:
|
|
158
|
+
parts = unit.tags.source_parts
|
|
159
|
+
tag_map = unit.tags.source_tag_map
|
|
160
|
+
else:
|
|
161
|
+
return _escape(unit.target or unit.source)
|
|
162
|
+
|
|
163
|
+
result: list[str] = []
|
|
164
|
+
for part in parts:
|
|
165
|
+
if isinstance(part, TextPart):
|
|
166
|
+
result.append(_escape(part.value))
|
|
167
|
+
elif isinstance(part, CodePart):
|
|
168
|
+
tie = tag_map.get(part.ref)
|
|
169
|
+
if tie is None:
|
|
170
|
+
continue
|
|
171
|
+
result.append(_tie_to_html(tie))
|
|
172
|
+
return "".join(result)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _tie_to_html(tie: TieData) -> str:
|
|
176
|
+
name = tie.original_name or ""
|
|
177
|
+
if tie.type.value.endswith(".open"):
|
|
178
|
+
attrs = _format_attrs(tie.attributes)
|
|
179
|
+
return f"<{name}{attrs}>"
|
|
180
|
+
if tie.type.value.endswith(".close"):
|
|
181
|
+
return f"</{name}>"
|
|
182
|
+
if tie.type == TieType.BR:
|
|
183
|
+
return "<br>"
|
|
184
|
+
if tie.type == TieType.WBR:
|
|
185
|
+
return "<wbr>"
|
|
186
|
+
if tie.type == TieType.IMG:
|
|
187
|
+
attrs = _format_attrs(tie.attributes)
|
|
188
|
+
return f"<img{attrs}>"
|
|
189
|
+
attrs = _format_attrs(tie.attributes)
|
|
190
|
+
return f"<{name}{attrs}/>"
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _format_attrs(attributes: dict[str, str]) -> str:
|
|
194
|
+
if not attributes:
|
|
195
|
+
return ""
|
|
196
|
+
parts = [f' {k}="{_escape(v)}"' for k, v in attributes.items()]
|
|
197
|
+
return "".join(parts)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _build_unit_lookup(document: BaseStructure) -> dict[str, Data]:
|
|
201
|
+
return dict(document.data)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _extract_tag_from_id(unit_id: str) -> str:
|
|
205
|
+
parts = unit_id.split(":")
|
|
206
|
+
if len(parts) >= 2:
|
|
207
|
+
return parts[1]
|
|
208
|
+
return "p"
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _escape(text: str) -> str:
|
|
212
|
+
return (
|
|
213
|
+
text.replace("&", "&")
|
|
214
|
+
.replace("<", "<")
|
|
215
|
+
.replace(">", ">")
|
|
216
|
+
.replace('"', """)
|
|
217
|
+
)
|
|
Binary file
|
lokit/exporters/idml.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import shutil
|
|
5
|
+
import zipfile
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from lxml import etree
|
|
9
|
+
from lxml.etree import _Element
|
|
10
|
+
|
|
11
|
+
from lokit.data.structure import BaseStructure, CodePart, Data, TextPart
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def export_idml(
|
|
15
|
+
document: BaseStructure,
|
|
16
|
+
filepath: str | Path,
|
|
17
|
+
source_idml: str | Path,
|
|
18
|
+
) -> None:
|
|
19
|
+
output_path = Path(filepath)
|
|
20
|
+
source_path = Path(source_idml)
|
|
21
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
|
|
23
|
+
story_units = _group_by_story(document)
|
|
24
|
+
shutil.copy2(str(source_path), str(output_path))
|
|
25
|
+
|
|
26
|
+
with zipfile.ZipFile(str(output_path), "a") as zf_out:
|
|
27
|
+
with zipfile.ZipFile(str(source_path), "r") as zf_in:
|
|
28
|
+
story_files = [
|
|
29
|
+
name for name in zf_in.namelist()
|
|
30
|
+
if name.startswith("Stories/Story_") and name.endswith(".xml")
|
|
31
|
+
]
|
|
32
|
+
for story_file in story_files:
|
|
33
|
+
units = story_units.get(story_file)
|
|
34
|
+
if not units:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
with zf_in.open(story_file) as stream:
|
|
38
|
+
tree = etree.parse(stream)
|
|
39
|
+
root = tree.getroot()
|
|
40
|
+
_apply_translations(root, units)
|
|
41
|
+
modified_xml = etree.tostring(
|
|
42
|
+
root, xml_declaration=True, encoding="UTF-8"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
_replace_in_zip(zf_out, story_file, modified_xml)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
async def export_idml_async(
|
|
49
|
+
document: BaseStructure,
|
|
50
|
+
filepath: str | Path,
|
|
51
|
+
source_idml: str | Path,
|
|
52
|
+
) -> None:
|
|
53
|
+
await asyncio.to_thread(export_idml, document, filepath, source_idml)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _group_by_story(
|
|
57
|
+
document: BaseStructure,
|
|
58
|
+
) -> dict[str, dict[str, Data]]:
|
|
59
|
+
groups: dict[str, dict[str, Data]] = {}
|
|
60
|
+
for unit_id, unit in document.data.items():
|
|
61
|
+
story = unit.extensions.get("story", "")
|
|
62
|
+
if story:
|
|
63
|
+
groups.setdefault(story, {})[unit_id] = unit
|
|
64
|
+
return groups
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _apply_translations(root: _Element, units: dict[str, Data]) -> None:
|
|
68
|
+
paragraph_index = 0
|
|
69
|
+
story_name = _story_name_from_units(units)
|
|
70
|
+
|
|
71
|
+
for psr in root.iter():
|
|
72
|
+
if _local_name(psr.tag) != "ParagraphStyleRange":
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
unit_id = f"{story_name}:p{paragraph_index}"
|
|
76
|
+
unit = units.get(unit_id)
|
|
77
|
+
if unit is not None and unit.target:
|
|
78
|
+
_replace_paragraph_text(psr, unit)
|
|
79
|
+
paragraph_index += 1
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _replace_paragraph_text(psr: _Element, unit: Data) -> None:
|
|
83
|
+
char_ranges = [
|
|
84
|
+
el for el in psr
|
|
85
|
+
if _local_name(el.tag) == "CharacterStyleRange"
|
|
86
|
+
]
|
|
87
|
+
if not char_ranges:
|
|
88
|
+
return
|
|
89
|
+
|
|
90
|
+
if unit.tags and unit.tags.target_parts:
|
|
91
|
+
_replace_with_tagged_parts(char_ranges, unit)
|
|
92
|
+
else:
|
|
93
|
+
target_text = unit.target or ""
|
|
94
|
+
_distribute_text(char_ranges, target_text)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _replace_with_tagged_parts(
|
|
98
|
+
char_ranges: list[_Element], unit: Data
|
|
99
|
+
) -> None:
|
|
100
|
+
if unit.tags is None:
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
parts = unit.tags.target_parts
|
|
104
|
+
tag_map = unit.tags.target_tag_map
|
|
105
|
+
|
|
106
|
+
range_texts: dict[str, str] = {}
|
|
107
|
+
current_style: str | None = None
|
|
108
|
+
current_text_parts: list[str] = []
|
|
109
|
+
|
|
110
|
+
for part in parts:
|
|
111
|
+
if isinstance(part, TextPart):
|
|
112
|
+
current_text_parts.append(part.value)
|
|
113
|
+
elif isinstance(part, CodePart):
|
|
114
|
+
tie = tag_map.get(part.ref)
|
|
115
|
+
if tie is None:
|
|
116
|
+
continue
|
|
117
|
+
if tie.type.value.endswith(".open"):
|
|
118
|
+
style = tie.attributes.get("style", "")
|
|
119
|
+
if current_text_parts and current_style is not None:
|
|
120
|
+
range_texts[current_style] = "".join(current_text_parts)
|
|
121
|
+
current_text_parts = []
|
|
122
|
+
current_style = style
|
|
123
|
+
elif tie.type.value.endswith(".close"):
|
|
124
|
+
if current_style is not None:
|
|
125
|
+
range_texts[current_style] = "".join(current_text_parts)
|
|
126
|
+
current_text_parts = []
|
|
127
|
+
current_style = None
|
|
128
|
+
|
|
129
|
+
plain_text = "".join(current_text_parts) if current_text_parts else None
|
|
130
|
+
|
|
131
|
+
for csr in char_ranges:
|
|
132
|
+
style = csr.get("AppliedCharacterStyle") or ""
|
|
133
|
+
if style in range_texts:
|
|
134
|
+
_set_content_text(csr, range_texts[style])
|
|
135
|
+
elif plain_text is not None and (
|
|
136
|
+
not style or style == "CharacterStyle/$ID/[No character style]"
|
|
137
|
+
):
|
|
138
|
+
_set_content_text(csr, plain_text)
|
|
139
|
+
plain_text = None
|
|
140
|
+
else:
|
|
141
|
+
_set_content_text(csr, "")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _distribute_text(char_ranges: list[_Element], text: str) -> None:
|
|
145
|
+
if len(char_ranges) == 1:
|
|
146
|
+
_set_content_text(char_ranges[0], text)
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
first = char_ranges[0]
|
|
150
|
+
_set_content_text(first, text)
|
|
151
|
+
for csr in char_ranges[1:]:
|
|
152
|
+
_set_content_text(csr, "")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _set_content_text(csr: _Element, text: str) -> None:
|
|
156
|
+
for child in csr.iter():
|
|
157
|
+
if _local_name(child.tag) == "Content":
|
|
158
|
+
child.text = text
|
|
159
|
+
text = ""
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _replace_in_zip(zf: zipfile.ZipFile, name: str, data: bytes) -> None:
|
|
163
|
+
zf.writestr(name, data)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _story_name_from_units(units: dict[str, Data]) -> str:
|
|
167
|
+
for unit_id in units:
|
|
168
|
+
parts = unit_id.split(":")
|
|
169
|
+
if parts:
|
|
170
|
+
return parts[0]
|
|
171
|
+
return ""
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _local_name(tag: str | bytes) -> str:
|
|
175
|
+
name = tag if isinstance(tag, str) else tag.decode("utf-8")
|
|
176
|
+
if "}" in name:
|
|
177
|
+
return name.split("}", 1)[1]
|
|
178
|
+
return name
|
|
Binary file
|