epub-translator 0.0.6__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. epub_translator/__init__.py +3 -1
  2. epub_translator/data/fill.jinja +66 -0
  3. epub_translator/data/mmltex/README.md +67 -0
  4. epub_translator/data/mmltex/cmarkup.xsl +1106 -0
  5. epub_translator/data/mmltex/entities.xsl +459 -0
  6. epub_translator/data/mmltex/glayout.xsl +222 -0
  7. epub_translator/data/mmltex/mmltex.xsl +36 -0
  8. epub_translator/data/mmltex/scripts.xsl +375 -0
  9. epub_translator/data/mmltex/tables.xsl +130 -0
  10. epub_translator/data/mmltex/tokens.xsl +328 -0
  11. epub_translator/data/translate.jinja +15 -12
  12. epub_translator/epub/__init__.py +4 -2
  13. epub_translator/epub/common.py +43 -0
  14. epub_translator/epub/math.py +193 -0
  15. epub_translator/epub/placeholder.py +53 -0
  16. epub_translator/epub/spines.py +42 -0
  17. epub_translator/epub/toc.py +505 -0
  18. epub_translator/epub/zip.py +67 -0
  19. epub_translator/iter_sync.py +24 -0
  20. epub_translator/language.py +23 -0
  21. epub_translator/llm/__init__.py +2 -1
  22. epub_translator/llm/core.py +175 -0
  23. epub_translator/llm/error.py +38 -35
  24. epub_translator/llm/executor.py +159 -136
  25. epub_translator/llm/increasable.py +28 -28
  26. epub_translator/llm/types.py +17 -0
  27. epub_translator/serial/__init__.py +2 -0
  28. epub_translator/serial/chunk.py +52 -0
  29. epub_translator/serial/segment.py +17 -0
  30. epub_translator/serial/splitter.py +50 -0
  31. epub_translator/template.py +35 -33
  32. epub_translator/translator.py +205 -168
  33. epub_translator/utils.py +7 -0
  34. epub_translator/xml/__init__.py +4 -3
  35. epub_translator/xml/deduplication.py +38 -0
  36. epub_translator/xml/firendly/__init__.py +2 -0
  37. epub_translator/xml/firendly/decoder.py +75 -0
  38. epub_translator/xml/firendly/encoder.py +84 -0
  39. epub_translator/xml/firendly/parser.py +177 -0
  40. epub_translator/xml/firendly/tag.py +118 -0
  41. epub_translator/xml/firendly/transform.py +36 -0
  42. epub_translator/xml/xml.py +52 -0
  43. epub_translator/xml/xml_like.py +176 -0
  44. epub_translator/xml_translator/__init__.py +3 -0
  45. epub_translator/xml_translator/const.py +2 -0
  46. epub_translator/xml_translator/fill.py +128 -0
  47. epub_translator/xml_translator/format.py +282 -0
  48. epub_translator/xml_translator/fragmented.py +125 -0
  49. epub_translator/xml_translator/group.py +183 -0
  50. epub_translator/xml_translator/progressive_locking.py +256 -0
  51. epub_translator/xml_translator/submitter.py +102 -0
  52. epub_translator/xml_translator/text_segment.py +263 -0
  53. epub_translator/xml_translator/translator.py +178 -0
  54. epub_translator/xml_translator/utils.py +29 -0
  55. epub_translator-0.1.0.dist-info/METADATA +283 -0
  56. epub_translator-0.1.0.dist-info/RECORD +58 -0
  57. epub_translator/data/format.jinja +0 -33
  58. epub_translator/epub/content_parser.py +0 -162
  59. epub_translator/epub/html/__init__.py +0 -1
  60. epub_translator/epub/html/dom_operator.py +0 -62
  61. epub_translator/epub/html/empty_tags.py +0 -23
  62. epub_translator/epub/html/file.py +0 -80
  63. epub_translator/epub/html/texts_searcher.py +0 -46
  64. epub_translator/llm/node.py +0 -201
  65. epub_translator/translation/__init__.py +0 -2
  66. epub_translator/translation/chunk.py +0 -118
  67. epub_translator/translation/splitter.py +0 -78
  68. epub_translator/translation/store.py +0 -36
  69. epub_translator/translation/translation.py +0 -231
  70. epub_translator/translation/types.py +0 -45
  71. epub_translator/translation/utils.py +0 -11
  72. epub_translator/xml/decoder.py +0 -71
  73. epub_translator/xml/encoder.py +0 -95
  74. epub_translator/xml/parser.py +0 -172
  75. epub_translator/xml/tag.py +0 -93
  76. epub_translator/xml/transform.py +0 -34
  77. epub_translator/xml/utils.py +0 -12
  78. epub_translator/zip_context.py +0 -74
  79. epub_translator-0.0.6.dist-info/METADATA +0 -170
  80. epub_translator-0.0.6.dist-info/RECORD +0 -36
  81. {epub_translator-0.0.6.dist-info → epub_translator-0.1.0.dist-info}/LICENSE +0 -0
  82. {epub_translator-0.0.6.dist-info → epub_translator-0.1.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,193 @@
1
+ from xml.etree.ElementTree import Element
2
+
3
+ # 运算符映射表
4
+ _OPERATOR_MAP = {
5
+ "→": r"\rightarrow",
6
+ "←": r"\leftarrow",
7
+ "↔": r"\leftrightarrow",
8
+ "×": r"\times",
9
+ "·": r"\cdot",
10
+ "÷": r"\div",
11
+ "±": r"\pm",
12
+ "∓": r"\mp",
13
+ "≤": r"\leq",
14
+ "≥": r"\geq",
15
+ "≠": r"\neq",
16
+ "≈": r"\approx",
17
+ "∞": r"\infty",
18
+ "∫": r"\int",
19
+ "∑": r"\sum",
20
+ "∏": r"\prod",
21
+ "√": r"\sqrt",
22
+ "∂": r"\partial",
23
+ "∇": r"\nabla",
24
+ "∈": r"\in",
25
+ "∉": r"\notin",
26
+ "⊂": r"\subset",
27
+ "⊃": r"\supset",
28
+ "⊆": r"\subseteq",
29
+ "⊇": r"\supseteq",
30
+ "∪": r"\cup",
31
+ "∩": r"\cap",
32
+ "∅": r"\emptyset",
33
+ "∀": r"\forall",
34
+ "∃": r"\exists",
35
+ "¬": r"\neg",
36
+ "∧": r"\land",
37
+ "∨": r"\lor",
38
+ "α": r"\alpha",
39
+ "β": r"\beta",
40
+ "γ": r"\gamma",
41
+ "δ": r"\delta",
42
+ "ε": r"\epsilon",
43
+ "θ": r"\theta",
44
+ "λ": r"\lambda",
45
+ "μ": r"\mu",
46
+ "π": r"\pi",
47
+ "σ": r"\sigma",
48
+ "φ": r"\phi",
49
+ "ω": r"\omega",
50
+ "Δ": r"\Delta",
51
+ "Σ": r"\Sigma",
52
+ "Ω": r"\Omega",
53
+ }
54
+
55
+
56
+ def xml_to_latex(element: Element) -> str:
57
+ tag = element.tag
58
+
59
+ # 根据元素类型进行转换
60
+ if tag == "math":
61
+ # 根元素,只处理子元素
62
+ return "".join(xml_to_latex(child) for child in element)
63
+
64
+ elif tag == "mrow":
65
+ # 分组元素,递归处理所有子元素
66
+ return "".join(xml_to_latex(child) for child in element)
67
+
68
+ elif tag == "mi":
69
+ # 标识符(变量名)
70
+ text = element.text or ""
71
+ # 多字符标识符用 \mathrm
72
+ if len(text) > 1:
73
+ return f"\\mathrm{{{text}}}"
74
+ return text
75
+
76
+ elif tag == "mn":
77
+ # 数字
78
+ return element.text or ""
79
+
80
+ elif tag == "mo":
81
+ # 运算符
82
+ text = (element.text or "").strip()
83
+ return _OPERATOR_MAP.get(text, text)
84
+
85
+ elif tag == "mfrac":
86
+ # 分数
87
+ children = list(element)
88
+ if len(children) >= 2:
89
+ numerator = xml_to_latex(children[0])
90
+ denominator = xml_to_latex(children[1])
91
+ return f"\\frac{{{numerator}}}{{{denominator}}}"
92
+ return ""
93
+
94
+ elif tag == "msub":
95
+ # 下标
96
+ children = list(element)
97
+ if len(children) >= 2:
98
+ base = xml_to_latex(children[0])
99
+ subscript = xml_to_latex(children[1])
100
+ return f"{base}_{{{subscript}}}"
101
+ return ""
102
+
103
+ elif tag == "msup":
104
+ # 上标
105
+ children = list(element)
106
+ if len(children) >= 2:
107
+ base = xml_to_latex(children[0])
108
+ superscript = xml_to_latex(children[1])
109
+ return f"{base}^{{{superscript}}}"
110
+ return ""
111
+
112
+ elif tag == "msubsup":
113
+ # 同时有上下标
114
+ children = list(element)
115
+ if len(children) >= 3:
116
+ base = xml_to_latex(children[0])
117
+ subscript = xml_to_latex(children[1])
118
+ superscript = xml_to_latex(children[2])
119
+ return f"{base}_{{{subscript}}}^{{{superscript}}}"
120
+ return ""
121
+
122
+ elif tag == "msqrt":
123
+ # 平方根
124
+ content = "".join(xml_to_latex(child) for child in element)
125
+ return f"\\sqrt{{{content}}}"
126
+
127
+ elif tag == "mroot":
128
+ # n次根
129
+ children = list(element)
130
+ if len(children) >= 2:
131
+ base = xml_to_latex(children[0])
132
+ index = xml_to_latex(children[1])
133
+ return f"\\sqrt[{index}]{{{base}}}"
134
+ return ""
135
+
136
+ elif tag == "munder":
137
+ # 下方符号
138
+ children = list(element)
139
+ if len(children) >= 2:
140
+ base = xml_to_latex(children[0])
141
+ under = xml_to_latex(children[1])
142
+ return f"\\underset{{{under}}}{{{base}}}"
143
+ return ""
144
+
145
+ elif tag == "mover":
146
+ # 上方符号
147
+ children = list(element)
148
+ if len(children) >= 2:
149
+ base = xml_to_latex(children[0])
150
+ over = xml_to_latex(children[1])
151
+ return f"\\overset{{{over}}}{{{base}}}"
152
+ return ""
153
+
154
+ elif tag == "munderover":
155
+ # 上下方符号
156
+ children = list(element)
157
+ if len(children) >= 3:
158
+ base = xml_to_latex(children[0])
159
+ under = xml_to_latex(children[1])
160
+ over = xml_to_latex(children[2])
161
+ # 特殊处理求和、积分等
162
+ base_str = base.strip()
163
+ if base_str in (r"\sum", r"\int", r"\prod"):
164
+ return f"{base}_{{{under}}}^{{{over}}}"
165
+ return f"\\overset{{{over}}}{{\\underset{{{under}}}{{{base}}}}}"
166
+ return ""
167
+
168
+ elif tag == "mtext":
169
+ # 文本
170
+ text = element.text or ""
171
+ return f"\\text{{{text}}}"
172
+
173
+ elif tag == "mspace":
174
+ # 空格
175
+ return r"\,"
176
+
177
+ elif tag == "mtable":
178
+ # 表格/矩阵
179
+ rows = [xml_to_latex(child) for child in element if child.tag.endswith("mtr")]
180
+ return f"\\begin{{array}}{{{rows[0].count('&') + 1}}}\n" + "\\\\\n".join(rows) + "\n\\end{array}"
181
+
182
+ elif tag == "mtr":
183
+ # 表格行
184
+ cells = [xml_to_latex(child) for child in element if child.tag.endswith("mtd")]
185
+ return " & ".join(cells)
186
+
187
+ elif tag == "mtd":
188
+ # 表格单元格
189
+ return "".join(xml_to_latex(child) for child in element)
190
+
191
+ else:
192
+ # 未知元素,递归处理子元素
193
+ return "".join(xml_to_latex(child) for child in element)
@@ -0,0 +1,53 @@
1
+ from collections.abc import Callable
2
+ from xml.etree.ElementTree import Element
3
+
4
+ from .math import xml_to_latex
5
+
6
+ _MATH_TAG = "math"
7
+ _EXPRESSION_TAG = "expression"
8
+
9
+ _PLACEHOLDER_TAGS = frozenset((_EXPRESSION_TAG,))
10
+
11
+
12
+ def is_placeholder_tag(tag: str) -> bool:
13
+ return tag in _PLACEHOLDER_TAGS
14
+
15
+
16
+ class Placeholder:
17
+ def __init__(self, root: Element):
18
+ self._raw_elements: dict[int, Element] = {}
19
+ self._root: Element = self._replace(
20
+ element=root,
21
+ replace=self._replace_raw,
22
+ )
23
+ assert id(self._root) == id(root)
24
+
25
+ def recover(self) -> None:
26
+ self._replace(
27
+ element=self._root,
28
+ replace=self._recover_to_raw,
29
+ )
30
+
31
+ def _replace(self, element: Element, replace: Callable[[Element], Element | None]) -> Element:
32
+ replaced = replace(element)
33
+ if replaced is not None:
34
+ return replaced
35
+ if len(element):
36
+ element[:] = [self._replace(child, replace) for child in element]
37
+ return element
38
+
39
+ def _replace_raw(self, element: Element) -> Element | None:
40
+ if element.tag == _MATH_TAG:
41
+ replaced = Element(_EXPRESSION_TAG)
42
+ replaced.text = xml_to_latex(element)
43
+ replaced.tail = element.tail
44
+ self._raw_elements[id(replaced)] = element
45
+ return replaced
46
+ return None
47
+
48
+ def _recover_to_raw(self, replaced: Element) -> Element | None:
49
+ raw = self._raw_elements.get(id(replaced))
50
+ if raw is not None:
51
+ del self._raw_elements[id(replaced)]
52
+ return raw
53
+ return None
@@ -0,0 +1,42 @@
1
+ from collections.abc import Generator
2
+ from pathlib import Path
3
+ from xml.etree import ElementTree as ET
4
+
5
+ from .common import find_opf_path, strip_namespace
6
+ from .zip import Zip
7
+
8
+
9
+ def search_spine_paths(zip: Zip) -> Generator[Path, None, None]:
10
+ opf_path = find_opf_path(zip)
11
+ opf_dir = opf_path.parent
12
+
13
+ with zip.read(opf_path) as f:
14
+ content = f.read()
15
+ root = ET.fromstring(content)
16
+ strip_namespace(root)
17
+
18
+ manifest = root.find(".//manifest")
19
+ if manifest is None:
20
+ return
21
+
22
+ manifest_items = {}
23
+ for item in manifest.findall("item"):
24
+ item_id = item.get("id")
25
+ item_href = item.get("href")
26
+ media_type = item.get("media-type", "")
27
+ if item_id and item_href:
28
+ manifest_items[item_id] = (item_href, media_type)
29
+
30
+ spine = root.find(".//spine")
31
+ if spine is None:
32
+ return
33
+
34
+ for itemref in spine.findall("itemref"):
35
+ idref = itemref.get("idref")
36
+ if not idref:
37
+ continue
38
+
39
+ if idref in manifest_items:
40
+ href, media_type = manifest_items[idref]
41
+ if media_type in ("application/xhtml+xml", "text/html"):
42
+ yield opf_dir / href