epub-translator 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epub_translator/epub/__init__.py +2 -2
- epub_translator/epub/metadata.py +29 -66
- epub_translator/epub/toc.py +76 -94
- epub_translator/llm/core.py +19 -1
- epub_translator/llm/executor.py +5 -0
- epub_translator/llm/statistics.py +25 -0
- epub_translator/segment/text_segment.py +6 -1
- epub_translator/translation/translator.py +16 -6
- epub_translator/translation/xml_interrupter.py +10 -7
- epub_translator/xml/const.py +1 -0
- epub_translator/xml/inline.py +10 -3
- epub_translator/xml/self_closing.py +5 -4
- epub_translator/xml/xml_like.py +23 -1
- {epub_translator-0.1.7.dist-info → epub_translator-0.1.9.dist-info}/METADATA +106 -16
- {epub_translator-0.1.7.dist-info → epub_translator-0.1.9.dist-info}/RECORD +17 -16
- {epub_translator-0.1.7.dist-info → epub_translator-0.1.9.dist-info}/LICENSE +0 -0
- {epub_translator-0.1.7.dist-info → epub_translator-0.1.9.dist-info}/WHEEL +0 -0
epub_translator/epub/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .metadata import read_metadata, write_metadata
|
|
1
|
+
from .metadata import MetadataContext, read_metadata, write_metadata
|
|
2
2
|
from .spines import search_spine_paths
|
|
3
|
-
from .toc import Toc, read_toc, write_toc
|
|
3
|
+
from .toc import Toc, TocContext, read_toc, write_toc
|
|
4
4
|
from .zip import Zip
|
epub_translator/epub/metadata.py
CHANGED
|
@@ -1,91 +1,63 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
|
|
4
|
+
from ..xml import XMLLikeNode
|
|
3
5
|
from .common import find_opf_path
|
|
4
6
|
from .zip import Zip
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
@dataclass
|
|
8
10
|
class MetadataField:
|
|
9
|
-
"""
|
|
10
|
-
表示 EPUB OPF 文件中的元数据字段
|
|
11
|
-
|
|
12
|
-
- tag_name: 标签名(不带命名空间)
|
|
13
|
-
- text: 文本内容
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
11
|
tag_name: str
|
|
17
12
|
text: str
|
|
18
13
|
|
|
19
14
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"meta",
|
|
26
|
-
"contributor", # Usually technical information
|
|
27
|
-
}
|
|
15
|
+
@dataclass
|
|
16
|
+
class MetadataContext:
|
|
17
|
+
opf_path: Path # OPF 文件路径
|
|
18
|
+
xml_node: XMLLikeNode # XMLLikeNode 对象,保留原始文件信息
|
|
19
|
+
|
|
28
20
|
|
|
21
|
+
SKIP_FIELDS = frozenset(
|
|
22
|
+
(
|
|
23
|
+
"language",
|
|
24
|
+
"identifier",
|
|
25
|
+
"date",
|
|
26
|
+
"meta",
|
|
27
|
+
"contributor", # Usually technical information
|
|
28
|
+
)
|
|
29
|
+
)
|
|
29
30
|
|
|
30
|
-
def read_metadata(zip: Zip) -> list[MetadataField]:
|
|
31
|
-
"""
|
|
32
|
-
从 EPUB 的 OPF 文件中读取所有可翻译的元数据字段。
|
|
33
31
|
|
|
34
|
-
|
|
35
|
-
自动过滤掉不应该翻译的字段(language, identifier, date, meta, contributor 等)。
|
|
36
|
-
"""
|
|
32
|
+
def read_metadata(zip: Zip) -> tuple[list[MetadataField], MetadataContext]:
|
|
37
33
|
opf_path = find_opf_path(zip)
|
|
38
34
|
|
|
39
35
|
with zip.read(opf_path) as f:
|
|
40
|
-
|
|
36
|
+
xml_node = XMLLikeNode(f, is_html_like=False)
|
|
41
37
|
|
|
42
|
-
from xml.etree import ElementTree as ET
|
|
43
|
-
|
|
44
|
-
root = ET.fromstring(content)
|
|
45
|
-
|
|
46
|
-
# Find metadata element
|
|
47
38
|
metadata_elem = None
|
|
48
|
-
for child in
|
|
39
|
+
for child in xml_node.element:
|
|
49
40
|
if child.tag.endswith("metadata"):
|
|
50
41
|
metadata_elem = child
|
|
51
42
|
break
|
|
52
43
|
|
|
53
44
|
if metadata_elem is None:
|
|
54
|
-
|
|
45
|
+
context = MetadataContext(opf_path=opf_path, xml_node=xml_node)
|
|
46
|
+
return [], context
|
|
55
47
|
|
|
56
|
-
# Collect metadata fields to translate
|
|
57
48
|
fields: list[MetadataField] = []
|
|
58
|
-
|
|
59
49
|
for elem in metadata_elem:
|
|
60
|
-
|
|
61
|
-
tag_name = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
62
|
-
|
|
63
|
-
# Check if element has text content and should be translated
|
|
50
|
+
tag_name = elem.tag
|
|
64
51
|
if elem.text and elem.text.strip() and tag_name not in SKIP_FIELDS:
|
|
65
52
|
fields.append(MetadataField(tag_name=tag_name, text=elem.text.strip()))
|
|
66
53
|
|
|
67
|
-
|
|
54
|
+
context = MetadataContext(opf_path=opf_path, xml_node=xml_node)
|
|
55
|
+
return fields, context
|
|
68
56
|
|
|
69
57
|
|
|
70
|
-
def write_metadata(zip: Zip, fields: list[MetadataField]) -> None:
|
|
71
|
-
"""
|
|
72
|
-
将翻译后的元数据字段写回 EPUB 的 OPF 文件。
|
|
73
|
-
|
|
74
|
-
根据 tag_name 匹配对应的元素,并更新其文本内容。
|
|
75
|
-
匹配策略:按照 tag_name 和在文件中出现的顺序依次匹配。
|
|
76
|
-
"""
|
|
77
|
-
opf_path = find_opf_path(zip)
|
|
78
|
-
|
|
79
|
-
with zip.read(opf_path) as f:
|
|
80
|
-
content = f.read()
|
|
81
|
-
|
|
82
|
-
from xml.etree import ElementTree as ET
|
|
83
|
-
|
|
84
|
-
root = ET.fromstring(content)
|
|
85
|
-
|
|
86
|
-
# Find metadata element
|
|
58
|
+
def write_metadata(zip: Zip, fields: list[MetadataField], context: MetadataContext) -> None:
|
|
87
59
|
metadata_elem = None
|
|
88
|
-
for child in
|
|
60
|
+
for child in context.xml_node.element:
|
|
89
61
|
if child.tag.endswith("metadata"):
|
|
90
62
|
metadata_elem = child
|
|
91
63
|
break
|
|
@@ -93,30 +65,21 @@ def write_metadata(zip: Zip, fields: list[MetadataField]) -> None:
|
|
|
93
65
|
if metadata_elem is None:
|
|
94
66
|
return
|
|
95
67
|
|
|
96
|
-
# Build a mapping: tag_name -> list of fields with that tag_name
|
|
97
68
|
fields_by_tag: dict[str, list[str]] = {}
|
|
98
69
|
for field in fields:
|
|
99
70
|
if field.tag_name not in fields_by_tag:
|
|
100
71
|
fields_by_tag[field.tag_name] = []
|
|
101
72
|
fields_by_tag[field.tag_name].append(field.text)
|
|
102
73
|
|
|
103
|
-
# Create a counter for each tag to track which occurrence we're at
|
|
104
74
|
tag_counters: dict[str, int] = {tag: 0 for tag in fields_by_tag}
|
|
105
75
|
|
|
106
|
-
# Update elements in metadata
|
|
107
76
|
for elem in metadata_elem:
|
|
108
|
-
|
|
109
|
-
tag_name = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
110
|
-
|
|
111
|
-
# Check if this tag has translated text
|
|
77
|
+
tag_name = elem.tag
|
|
112
78
|
if tag_name in fields_by_tag and elem.text and elem.text.strip():
|
|
113
79
|
counter = tag_counters[tag_name]
|
|
114
80
|
if counter < len(fields_by_tag[tag_name]):
|
|
115
|
-
# Update the text with translated version
|
|
116
81
|
elem.text = fields_by_tag[tag_name][counter]
|
|
117
82
|
tag_counters[tag_name] += 1
|
|
118
83
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
with zip.replace(opf_path) as f:
|
|
122
|
-
tree.write(f, encoding="utf-8", xml_declaration=True)
|
|
84
|
+
with zip.replace(context.opf_path) as f:
|
|
85
|
+
context.xml_node.save(f)
|
epub_translator/epub/toc.py
CHANGED
|
@@ -3,8 +3,8 @@ from pathlib import Path
|
|
|
3
3
|
from xml.etree import ElementTree as ET
|
|
4
4
|
from xml.etree.ElementTree import Element
|
|
5
5
|
|
|
6
|
-
from ..xml
|
|
7
|
-
from .common import
|
|
6
|
+
from ..xml import XMLLikeNode, plain_text
|
|
7
|
+
from .common import find_opf_path, strip_namespace
|
|
8
8
|
from .zip import Zip
|
|
9
9
|
|
|
10
10
|
|
|
@@ -41,30 +41,40 @@ class Toc:
|
|
|
41
41
|
return self.href
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
@dataclass
|
|
45
|
+
class TocContext:
|
|
46
|
+
version: int
|
|
47
|
+
toc_path: Path
|
|
48
|
+
xml_node: XMLLikeNode
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def read_toc(zip: Zip) -> tuple[list[Toc], TocContext]:
|
|
45
52
|
version = _detect_epub_version(zip)
|
|
46
53
|
toc_path = _find_toc_path(zip, version)
|
|
47
54
|
|
|
48
55
|
if toc_path is None:
|
|
49
|
-
|
|
56
|
+
raise ValueError("Cannot find TOC file in EPUB")
|
|
50
57
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
else:
|
|
54
|
-
return _read_nav_toc(zip, toc_path)
|
|
58
|
+
with zip.read(toc_path) as f:
|
|
59
|
+
xml_node = XMLLikeNode(f, is_html_like=False)
|
|
55
60
|
|
|
61
|
+
if version == 3:
|
|
62
|
+
toc_list = _read_nav_toc(xml_node.element)
|
|
63
|
+
else:
|
|
64
|
+
toc_list = _read_ncx_toc(xml_node.element)
|
|
56
65
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
toc_path = _find_toc_path(zip, version)
|
|
66
|
+
context = TocContext(version=version, toc_path=toc_path, xml_node=xml_node)
|
|
67
|
+
return toc_list, context
|
|
60
68
|
|
|
61
|
-
if toc_path is None:
|
|
62
|
-
raise ValueError("Cannot find TOC file in EPUB")
|
|
63
69
|
|
|
64
|
-
|
|
65
|
-
|
|
70
|
+
def write_toc(zip: Zip, toc: list[Toc], context: TocContext) -> None:
|
|
71
|
+
if context.version == 2:
|
|
72
|
+
_update_ncx_toc(context.xml_node.element, toc)
|
|
66
73
|
else:
|
|
67
|
-
|
|
74
|
+
_update_nav_toc(context.xml_node.element, toc)
|
|
75
|
+
|
|
76
|
+
with zip.replace(context.toc_path) as f:
|
|
77
|
+
context.xml_node.save(f)
|
|
68
78
|
|
|
69
79
|
|
|
70
80
|
def _detect_epub_version(zip: Zip) -> int:
|
|
@@ -72,8 +82,6 @@ def _detect_epub_version(zip: Zip) -> int:
|
|
|
72
82
|
with zip.read(opf_path) as f:
|
|
73
83
|
content = f.read()
|
|
74
84
|
root = ET.fromstring(content)
|
|
75
|
-
|
|
76
|
-
# 检查 package 元素的 version 属性
|
|
77
85
|
version_str = root.get("version", "2.0")
|
|
78
86
|
|
|
79
87
|
if version_str.startswith("3"):
|
|
@@ -89,7 +97,7 @@ def _find_toc_path(zip: Zip, version: int) -> Path | None:
|
|
|
89
97
|
with zip.read(opf_path) as f:
|
|
90
98
|
content = f.read()
|
|
91
99
|
root = ET.fromstring(content)
|
|
92
|
-
strip_namespace(root)
|
|
100
|
+
strip_namespace(root)
|
|
93
101
|
|
|
94
102
|
manifest = root.find(".//manifest")
|
|
95
103
|
if manifest is None:
|
|
@@ -115,23 +123,18 @@ def _find_toc_path(zip: Zip, version: int) -> Path | None:
|
|
|
115
123
|
return None
|
|
116
124
|
|
|
117
125
|
|
|
118
|
-
def _read_ncx_toc(
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
strip_namespace(root) # 移除命名空间前缀以简化 XPath
|
|
123
|
-
|
|
124
|
-
nav_map = root.find(".//navMap")
|
|
125
|
-
if nav_map is None:
|
|
126
|
-
return []
|
|
126
|
+
def _read_ncx_toc(root: Element) -> list[Toc]:
|
|
127
|
+
nav_map = root.find(".//navMap")
|
|
128
|
+
if nav_map is None:
|
|
129
|
+
return []
|
|
127
130
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
131
|
+
result = []
|
|
132
|
+
for nav_point in nav_map.findall("navPoint"):
|
|
133
|
+
toc_item = _parse_nav_point(nav_point)
|
|
134
|
+
if toc_item:
|
|
135
|
+
result.append(toc_item)
|
|
133
136
|
|
|
134
|
-
|
|
137
|
+
return result
|
|
135
138
|
|
|
136
139
|
|
|
137
140
|
def _parse_nav_point(nav_point: Element) -> Toc | None:
|
|
@@ -172,18 +175,11 @@ def _parse_nav_point(nav_point: Element) -> Toc | None:
|
|
|
172
175
|
)
|
|
173
176
|
|
|
174
177
|
|
|
175
|
-
def
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
nav_map = root.find(f".//{{{ns}}}navMap" if ns else ".//navMap")
|
|
181
|
-
if nav_map is None:
|
|
182
|
-
raise ValueError("Cannot find navMap in NCX file")
|
|
183
|
-
_update_nav_points(nav_map, toc_list, ns)
|
|
184
|
-
tree = ET.ElementTree(root)
|
|
185
|
-
with zip.replace(ncx_path) as out:
|
|
186
|
-
tree.write(out, encoding="utf-8", xml_declaration=True)
|
|
178
|
+
def _update_ncx_toc(root: Element, toc_list: list[Toc]) -> None:
|
|
179
|
+
nav_map = root.find(".//navMap")
|
|
180
|
+
if nav_map is None:
|
|
181
|
+
raise ValueError("Cannot find navMap in NCX file")
|
|
182
|
+
_update_nav_points(nav_map, toc_list, None)
|
|
187
183
|
|
|
188
184
|
|
|
189
185
|
def _update_nav_points(parent: Element, toc_list: list[Toc], ns: str | None, start_play_order: int = 1) -> int:
|
|
@@ -255,34 +251,28 @@ def _create_nav_point(toc: Toc, ns: str | None, play_order: int) -> Element:
|
|
|
255
251
|
return nav_point
|
|
256
252
|
|
|
257
253
|
|
|
258
|
-
def _read_nav_toc(
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
nav_elem = None
|
|
266
|
-
for nav in root.findall(".//nav"):
|
|
267
|
-
epub_type = nav.get("{http://www.idpf.org/2007/ops}type") or nav.get("type")
|
|
268
|
-
if epub_type == "toc":
|
|
269
|
-
nav_elem = nav
|
|
270
|
-
break
|
|
254
|
+
def _read_nav_toc(root: Element) -> list[Toc]:
|
|
255
|
+
nav_elem = None
|
|
256
|
+
for nav in root.findall(".//nav"):
|
|
257
|
+
epub_type = nav.get("type")
|
|
258
|
+
if epub_type == "toc":
|
|
259
|
+
nav_elem = nav
|
|
260
|
+
break
|
|
271
261
|
|
|
272
|
-
|
|
273
|
-
|
|
262
|
+
if nav_elem is None:
|
|
263
|
+
return []
|
|
274
264
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
265
|
+
ol = nav_elem.find(".//ol")
|
|
266
|
+
if ol is None:
|
|
267
|
+
return []
|
|
278
268
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
269
|
+
result = []
|
|
270
|
+
for li in ol.findall("li"):
|
|
271
|
+
toc_item = _parse_nav_li(li)
|
|
272
|
+
if toc_item:
|
|
273
|
+
result.append(toc_item)
|
|
284
274
|
|
|
285
|
-
|
|
275
|
+
return result
|
|
286
276
|
|
|
287
277
|
|
|
288
278
|
def _parse_nav_li(li: Element) -> Toc | None:
|
|
@@ -331,30 +321,22 @@ def _parse_nav_li(li: Element) -> Toc | None:
|
|
|
331
321
|
)
|
|
332
322
|
|
|
333
323
|
|
|
334
|
-
def
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
if ol is None:
|
|
351
|
-
raise ValueError("Cannot find ol in nav element")
|
|
352
|
-
|
|
353
|
-
_update_nav_lis(ol, toc_list, ns)
|
|
354
|
-
|
|
355
|
-
tree = ET.ElementTree(root)
|
|
356
|
-
with zip.replace(nav_path) as out:
|
|
357
|
-
tree.write(out, encoding="utf-8", xml_declaration=True)
|
|
324
|
+
def _update_nav_toc(root: Element, toc_list: list[Toc]) -> None:
|
|
325
|
+
nav_elem = None
|
|
326
|
+
for nav in root.findall(".//nav"):
|
|
327
|
+
epub_type = nav.get("type")
|
|
328
|
+
if epub_type == "toc":
|
|
329
|
+
nav_elem = nav
|
|
330
|
+
break
|
|
331
|
+
|
|
332
|
+
if nav_elem is None:
|
|
333
|
+
raise ValueError("Cannot find nav element with type='toc'")
|
|
334
|
+
|
|
335
|
+
ol = nav_elem.find(".//ol")
|
|
336
|
+
if ol is None:
|
|
337
|
+
raise ValueError("Cannot find ol in nav element")
|
|
338
|
+
|
|
339
|
+
_update_nav_lis(ol, toc_list, None)
|
|
358
340
|
|
|
359
341
|
|
|
360
342
|
def _update_nav_lis(ol: Element, toc_list: list[Toc], ns: str | None) -> None:
|
epub_translator/llm/core.py
CHANGED
|
@@ -13,6 +13,7 @@ from ..template import create_env
|
|
|
13
13
|
from .context import LLMContext
|
|
14
14
|
from .executor import LLMExecutor
|
|
15
15
|
from .increasable import Increasable
|
|
16
|
+
from .statistics import Statistics
|
|
16
17
|
from .types import Message
|
|
17
18
|
|
|
18
19
|
# Global state for logger filename generation
|
|
@@ -44,7 +45,7 @@ class LLM:
|
|
|
44
45
|
self._temperature: Increasable = Increasable(temperature)
|
|
45
46
|
self._cache_path: Path | None = self._ensure_dir_path(cache_path)
|
|
46
47
|
self._logger_save_path: Path | None = self._ensure_dir_path(log_dir_path)
|
|
47
|
-
|
|
48
|
+
self._statistics = Statistics()
|
|
48
49
|
self._executor = LLMExecutor(
|
|
49
50
|
url=url,
|
|
50
51
|
model=model,
|
|
@@ -53,12 +54,29 @@ class LLM:
|
|
|
53
54
|
retry_times=retry_times,
|
|
54
55
|
retry_interval_seconds=retry_interval_seconds,
|
|
55
56
|
create_logger=self._create_logger,
|
|
57
|
+
statistics=self._statistics,
|
|
56
58
|
)
|
|
57
59
|
|
|
58
60
|
@property
|
|
59
61
|
def encoding(self) -> Encoding:
|
|
60
62
|
return self._encoding
|
|
61
63
|
|
|
64
|
+
@property
|
|
65
|
+
def total_tokens(self) -> int:
|
|
66
|
+
return self._statistics.total_tokens
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def input_tokens(self) -> int:
|
|
70
|
+
return self._statistics.input_tokens
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def input_cache_tokens(self) -> int:
|
|
74
|
+
return self._statistics.input_cache_tokens
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def output_tokens(self) -> int:
|
|
78
|
+
return self._statistics.output_tokens
|
|
79
|
+
|
|
62
80
|
def context(self, cache_seed_content: str | None = None) -> LLMContext:
|
|
63
81
|
return LLMContext(
|
|
64
82
|
executor=self._executor,
|
epub_translator/llm/executor.py
CHANGED
|
@@ -7,6 +7,7 @@ from openai import OpenAI
|
|
|
7
7
|
from openai.types.chat import ChatCompletionMessageParam
|
|
8
8
|
|
|
9
9
|
from .error import is_retry_error
|
|
10
|
+
from .statistics import Statistics
|
|
10
11
|
from .types import Message, MessageRole
|
|
11
12
|
|
|
12
13
|
|
|
@@ -20,12 +21,14 @@ class LLMExecutor:
|
|
|
20
21
|
retry_times: int,
|
|
21
22
|
retry_interval_seconds: float,
|
|
22
23
|
create_logger: Callable[[], Logger | None],
|
|
24
|
+
statistics: Statistics,
|
|
23
25
|
) -> None:
|
|
24
26
|
self._model_name: str = model
|
|
25
27
|
self._timeout: float | None = timeout
|
|
26
28
|
self._retry_times: int = retry_times
|
|
27
29
|
self._retry_interval_seconds: float = retry_interval_seconds
|
|
28
30
|
self._create_logger: Callable[[], Logger | None] = create_logger
|
|
31
|
+
self._statistics = statistics
|
|
29
32
|
self._client = OpenAI(
|
|
30
33
|
api_key=api_key,
|
|
31
34
|
base_url=url,
|
|
@@ -156,6 +159,7 @@ class LLMExecutor:
|
|
|
156
159
|
model=self._model_name,
|
|
157
160
|
messages=messages,
|
|
158
161
|
stream=True,
|
|
162
|
+
stream_options={"include_usage": True},
|
|
159
163
|
top_p=top_p,
|
|
160
164
|
temperature=temperature,
|
|
161
165
|
max_tokens=max_tokens,
|
|
@@ -164,4 +168,5 @@ class LLMExecutor:
|
|
|
164
168
|
for chunk in stream:
|
|
165
169
|
if chunk.choices and chunk.choices[0].delta.content:
|
|
166
170
|
buffer.write(chunk.choices[0].delta.content)
|
|
171
|
+
self._statistics.submit_usage(chunk.usage)
|
|
167
172
|
return buffer.getvalue()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from threading import Lock
|
|
2
|
+
|
|
3
|
+
from openai.types import CompletionUsage
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Statistics:
|
|
7
|
+
def __init__(self) -> None:
|
|
8
|
+
self._lock = Lock()
|
|
9
|
+
self.total_tokens = 0
|
|
10
|
+
self.input_tokens = 0
|
|
11
|
+
self.input_cache_tokens = 0
|
|
12
|
+
self.output_tokens = 0
|
|
13
|
+
|
|
14
|
+
def submit_usage(self, usage: CompletionUsage | None) -> None:
|
|
15
|
+
if usage is None:
|
|
16
|
+
return
|
|
17
|
+
with self._lock:
|
|
18
|
+
if usage.total_tokens:
|
|
19
|
+
self.total_tokens += usage.total_tokens
|
|
20
|
+
if usage.prompt_tokens:
|
|
21
|
+
self.input_tokens += usage.prompt_tokens
|
|
22
|
+
if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
|
|
23
|
+
self.input_cache_tokens += usage.prompt_tokens_details.cached_tokens
|
|
24
|
+
if usage.completion_tokens:
|
|
25
|
+
self.output_tokens += usage.completion_tokens
|
|
@@ -4,7 +4,12 @@ from enum import Enum, auto
|
|
|
4
4
|
from typing import Self
|
|
5
5
|
from xml.etree.ElementTree import Element
|
|
6
6
|
|
|
7
|
-
from ..xml import
|
|
7
|
+
from ..xml import (
|
|
8
|
+
expand_left_element_texts,
|
|
9
|
+
expand_right_element_texts,
|
|
10
|
+
is_inline_element,
|
|
11
|
+
normalize_text_in_element,
|
|
12
|
+
)
|
|
8
13
|
|
|
9
14
|
|
|
10
15
|
class TextPosition(Enum):
|
|
@@ -6,6 +6,8 @@ from os import PathLike
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
|
|
8
8
|
from ..epub import (
|
|
9
|
+
MetadataContext,
|
|
10
|
+
TocContext,
|
|
9
11
|
Zip,
|
|
10
12
|
read_metadata,
|
|
11
13
|
read_toc,
|
|
@@ -31,6 +33,8 @@ class _ElementType(Enum):
|
|
|
31
33
|
class _ElementContext:
|
|
32
34
|
element_type: _ElementType
|
|
33
35
|
chapter_data: tuple[Path, XMLLikeNode] | None = None
|
|
36
|
+
toc_context: TocContext | None = None
|
|
37
|
+
metadata_context: MetadataContext | None = None
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
def translate(
|
|
@@ -74,8 +78,8 @@ def translate(
|
|
|
74
78
|
zip.migrate(Path("mimetype"))
|
|
75
79
|
|
|
76
80
|
total_chapters = sum(1 for _, _ in search_spine_paths(zip))
|
|
77
|
-
toc_list = read_toc(zip)
|
|
78
|
-
metadata_fields = read_metadata(zip)
|
|
81
|
+
toc_list, toc_context = read_toc(zip)
|
|
82
|
+
metadata_fields, metadata_context = read_metadata(zip)
|
|
79
83
|
|
|
80
84
|
# Calculate weights: TOC (5%), Metadata (5%), Chapters (90%)
|
|
81
85
|
toc_has_items = len(toc_list) > 0
|
|
@@ -101,14 +105,17 @@ def translate(
|
|
|
101
105
|
tasks=_generate_tasks_from_book(
|
|
102
106
|
zip=zip,
|
|
103
107
|
toc_list=toc_list,
|
|
108
|
+
toc_context=toc_context,
|
|
104
109
|
metadata_fields=metadata_fields,
|
|
110
|
+
metadata_context=metadata_context,
|
|
105
111
|
submit=submit,
|
|
106
112
|
),
|
|
107
113
|
):
|
|
108
114
|
if context.element_type == _ElementType.TOC:
|
|
109
115
|
translated_elem = unwrap_french_quotes(translated_elem)
|
|
110
116
|
decoded_toc = decode_toc_list(translated_elem)
|
|
111
|
-
|
|
117
|
+
if context.toc_context is not None:
|
|
118
|
+
write_toc(zip, decoded_toc, context.toc_context)
|
|
112
119
|
|
|
113
120
|
current_progress += toc_weight
|
|
114
121
|
if on_progress:
|
|
@@ -117,7 +124,8 @@ def translate(
|
|
|
117
124
|
elif context.element_type == _ElementType.METADATA:
|
|
118
125
|
translated_elem = unwrap_french_quotes(translated_elem)
|
|
119
126
|
decoded_metadata = decode_metadata(translated_elem)
|
|
120
|
-
|
|
127
|
+
if context.metadata_context is not None:
|
|
128
|
+
write_metadata(zip, decoded_metadata, context.metadata_context)
|
|
121
129
|
|
|
122
130
|
current_progress += metadata_weight
|
|
123
131
|
if on_progress:
|
|
@@ -138,7 +146,9 @@ def translate(
|
|
|
138
146
|
def _generate_tasks_from_book(
|
|
139
147
|
zip: Zip,
|
|
140
148
|
toc_list: list,
|
|
149
|
+
toc_context: TocContext,
|
|
141
150
|
metadata_fields: list,
|
|
151
|
+
metadata_context: MetadataContext,
|
|
142
152
|
submit: SubmitKind,
|
|
143
153
|
) -> Generator[TranslationTask[_ElementContext], None, None]:
|
|
144
154
|
head_submit = submit
|
|
@@ -149,14 +159,14 @@ def _generate_tasks_from_book(
|
|
|
149
159
|
yield TranslationTask(
|
|
150
160
|
element=encode_toc_list(toc_list),
|
|
151
161
|
action=head_submit,
|
|
152
|
-
payload=_ElementContext(element_type=_ElementType.TOC),
|
|
162
|
+
payload=_ElementContext(element_type=_ElementType.TOC, toc_context=toc_context),
|
|
153
163
|
)
|
|
154
164
|
|
|
155
165
|
if metadata_fields:
|
|
156
166
|
yield TranslationTask(
|
|
157
167
|
element=encode_metadata(metadata_fields),
|
|
158
168
|
action=head_submit,
|
|
159
|
-
payload=_ElementContext(element_type=_ElementType.METADATA),
|
|
169
|
+
payload=_ElementContext(element_type=_ElementType.METADATA, metadata_context=metadata_context),
|
|
160
170
|
)
|
|
161
171
|
|
|
162
172
|
for chapter_path, media_type in search_spine_paths(zip):
|
|
@@ -6,8 +6,8 @@ from bs4 import BeautifulSoup
|
|
|
6
6
|
from mathml2latex.mathml import process_mathml
|
|
7
7
|
|
|
8
8
|
from ..segment import TextSegment, combine_text_segments, find_block_depth
|
|
9
|
-
from ..utils import ensure_list
|
|
10
|
-
from ..xml import clone_element
|
|
9
|
+
from ..utils import ensure_list, normalize_whitespace
|
|
10
|
+
from ..xml import DISPLAY_ATTRIBUTE, clone_element, is_inline_element
|
|
11
11
|
|
|
12
12
|
_ID_KEY = "__XML_INTERRUPTER_ID"
|
|
13
13
|
_MATH_TAG = "math"
|
|
@@ -87,9 +87,9 @@ class XMLInterrupter:
|
|
|
87
87
|
_ID_KEY: cast(str, interrupted_element.get(_ID_KEY)),
|
|
88
88
|
},
|
|
89
89
|
)
|
|
90
|
-
interrupted_display = interrupted_element.get(
|
|
90
|
+
interrupted_display = interrupted_element.get(DISPLAY_ATTRIBUTE, None)
|
|
91
91
|
if interrupted_display is not None:
|
|
92
|
-
placeholder_element.set(
|
|
92
|
+
placeholder_element.set(DISPLAY_ATTRIBUTE, interrupted_display)
|
|
93
93
|
|
|
94
94
|
raw_parent_stack = text_segment.parent_stack[:interrupted_index]
|
|
95
95
|
parent_stack = raw_parent_stack + [placeholder_element]
|
|
@@ -159,10 +159,13 @@ class XMLInterrupter:
|
|
|
159
159
|
|
|
160
160
|
if latex is None:
|
|
161
161
|
latex = "".join(t.text for t in text_segments)
|
|
162
|
-
|
|
163
|
-
latex = f"${latex}$"
|
|
162
|
+
latex = normalize_whitespace(latex).strip()
|
|
164
163
|
else:
|
|
165
|
-
latex =
|
|
164
|
+
latex = normalize_whitespace(latex).strip()
|
|
165
|
+
if is_inline_element(math_element):
|
|
166
|
+
latex = f"${latex}$"
|
|
167
|
+
else:
|
|
168
|
+
latex = f"$${latex}$$"
|
|
166
169
|
|
|
167
170
|
return f" {latex} "
|
|
168
171
|
|
epub_translator/xml/const.py
CHANGED
epub_translator/xml/inline.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from xml.etree.ElementTree import Element
|
|
2
2
|
|
|
3
|
+
from .const import DISPLAY_ATTRIBUTE
|
|
4
|
+
|
|
3
5
|
# HTML inline-level elements
|
|
4
6
|
# Reference: https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements
|
|
5
7
|
# Reference: https://developer.mozilla.org/en-US/docs/Glossary/Inline-level_content
|
|
@@ -105,9 +107,14 @@ _HTML_INLINE_TAGS = frozenset(
|
|
|
105
107
|
|
|
106
108
|
|
|
107
109
|
def is_inline_element(element: Element) -> bool:
|
|
108
|
-
|
|
110
|
+
tag = element.tag.lower()
|
|
111
|
+
if tag in _HTML_INLINE_TAGS:
|
|
109
112
|
return True
|
|
110
|
-
display = element.get(
|
|
111
|
-
if display is not None
|
|
113
|
+
display = element.get(DISPLAY_ATTRIBUTE, None)
|
|
114
|
+
if display is not None:
|
|
115
|
+
display = display.lower()
|
|
116
|
+
if display == "inline":
|
|
117
|
+
return True
|
|
118
|
+
if tag == "math" and display != "block":
|
|
112
119
|
return True
|
|
113
120
|
return False
|
|
@@ -3,6 +3,8 @@ import re
|
|
|
3
3
|
# Some non-standard EPUB generators use HTML-style tags without self-closing syntax
|
|
4
4
|
# We need to convert them to XML-compatible format before parsing
|
|
5
5
|
# These are HTML5 void elements that must be self-closing in XHTML
|
|
6
|
+
# Note: "meta" is excluded because OPF files have <meta property="...">content</meta>
|
|
7
|
+
# which is NOT a void element (different namespace, different rules)
|
|
6
8
|
_VOID_TAGS = (
|
|
7
9
|
"area",
|
|
8
10
|
"base",
|
|
@@ -13,7 +15,6 @@ _VOID_TAGS = (
|
|
|
13
15
|
"img",
|
|
14
16
|
"input",
|
|
15
17
|
"link",
|
|
16
|
-
"meta",
|
|
17
18
|
"param",
|
|
18
19
|
"source",
|
|
19
20
|
"track",
|
|
@@ -26,7 +27,8 @@ def self_close_void_elements(xml_content: str) -> str:
|
|
|
26
27
|
Convert void HTML elements to self-closing format for XML parsing.
|
|
27
28
|
|
|
28
29
|
This function handles non-standard HTML where void elements are not self-closed.
|
|
29
|
-
|
|
30
|
+
Note: "meta" is excluded from processing because EPUB OPF files have
|
|
31
|
+
<meta property="...">content</meta> which is NOT a void element.
|
|
30
32
|
|
|
31
33
|
Args:
|
|
32
34
|
xml_content: HTML/XHTML content string
|
|
@@ -35,9 +37,8 @@ def self_close_void_elements(xml_content: str) -> str:
|
|
|
35
37
|
Content with void elements in self-closing format
|
|
36
38
|
|
|
37
39
|
Example:
|
|
38
|
-
<meta charset="utf-8"> → <meta charset="utf-8" />
|
|
39
40
|
<br> → <br />
|
|
40
|
-
<
|
|
41
|
+
<link rel="stylesheet" href="style.css"> → <link rel="stylesheet" href="style.css" />
|
|
41
42
|
"""
|
|
42
43
|
for tag in _VOID_TAGS:
|
|
43
44
|
xml_content = _fix_void_element(xml_content, tag)
|
epub_translator/xml/xml_like.py
CHANGED
|
@@ -32,6 +32,25 @@ _ENCODING_PATTERN = re.compile(r'encoding\s*=\s*["\']([^"\']+)["\']', re.IGNOREC
|
|
|
32
32
|
_FIRST_ELEMENT_PATTERN = re.compile(r"<(?![?!])[a-zA-Z]")
|
|
33
33
|
_NAMESPACE_IN_TAG = re.compile(r"\{([^}]+)\}")
|
|
34
34
|
|
|
35
|
+
# When an attribute name exists in multiple namespaces (e.g., 'type' in XHTML and EPUB ops),
|
|
36
|
+
# _attr_to_namespace only records ONE namespace per attribute name. During serialization,
|
|
37
|
+
# the global string replacement wrongly adds namespace prefixes to ALL occurrences of that
|
|
38
|
+
# attribute, including ones that should remain unprefixed (e.g., <link type="text/css">).
|
|
39
|
+
#
|
|
40
|
+
# Example problem:
|
|
41
|
+
# Original file has:
|
|
42
|
+
# - <link type="text/css"> (no namespace, standard HTML attribute)
|
|
43
|
+
# - <nav epub:type="toc"> (EPUB ops namespace)
|
|
44
|
+
# After parsing, _attr_to_namespace records: {'type': 'http://www.idpf.org/2007/ops'}
|
|
45
|
+
# During serialization, ALL ' type="' get replaced to ' epub:type="', breaking <link>
|
|
46
|
+
#
|
|
47
|
+
# This workaround fixes specific known cases where HTML standard attributes should not
|
|
48
|
+
# be prefixed, even if the same attribute name appears with a namespace elsewhere.
|
|
49
|
+
_STANDARD_HTML_ATTRS = (
|
|
50
|
+
(re.compile(r'<link([^>]*?) epub:type="'), r'<link\1 type="'), # <link type="...">
|
|
51
|
+
(re.compile(r'<link([^>]*?) epub:rel="'), r'<link\1 rel="'), # <link rel="...">
|
|
52
|
+
)
|
|
53
|
+
|
|
35
54
|
|
|
36
55
|
class XMLLikeNode:
|
|
37
56
|
def __init__(self, file: IO[bytes], is_html_like: bool = False) -> None:
|
|
@@ -197,10 +216,13 @@ class XMLLikeNode:
|
|
|
197
216
|
xml_string = xml_string.replace(f"</{tag_name}>", f"</{prefix}:{tag_name}>")
|
|
198
217
|
xml_string = xml_string.replace(f"<{tag_name}/>", f"<{prefix}:{tag_name}/>")
|
|
199
218
|
|
|
200
|
-
# Similarly for attributes (though less common in EPUB)
|
|
201
219
|
for attr_name, namespace_uri in self._attr_to_namespace.items():
|
|
202
220
|
if namespace_uri not in _ROOT_NAMESPACES:
|
|
203
221
|
prefix = self._namespaces[namespace_uri]
|
|
204
222
|
xml_string = xml_string.replace(f' {attr_name}="', f' {prefix}:{attr_name}="')
|
|
205
223
|
|
|
224
|
+
# Apply workaround to fix standard HTML attributes (see _STANDARD_HTML_ATTRS comment)
|
|
225
|
+
for pattern, replacement in _STANDARD_HTML_ATTRS:
|
|
226
|
+
xml_string = pattern.sub(replacement, xml_string)
|
|
227
|
+
|
|
206
228
|
return xml_string
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: epub-translator
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.9
|
|
4
4
|
Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: epub,llm,translation,translator
|
|
@@ -46,26 +46,17 @@ Description-Content-Type: text/markdown
|
|
|
46
46
|
</div>
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
|
|
49
|
+
Want to read a book in a foreign language without losing the original context? EPUB Translator transforms any EPUB into a bilingual edition with AI-powered translations displayed side-by-side with the original text.
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
Whether you're learning a new language, conducting academic research, or simply enjoying foreign literature, you get both versions in one book - preserving all formatting, images, and structure.
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+

|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
- **LLM-Powered**: Leverages large language models for high-quality, context-aware translations
|
|
57
|
-
- **Format Preservation**: Maintains EPUB structure, styles, images, and formatting
|
|
58
|
-
- **Complete Translation**: Translates chapter content, table of contents, and metadata
|
|
59
|
-
- **Progress Tracking**: Monitor translation progress with built-in callbacks
|
|
60
|
-
- **Flexible LLM Support**: Works with any OpenAI-compatible API endpoint
|
|
61
|
-
- **Caching**: Built-in caching for progress recovery when translation fails
|
|
55
|
+
### Online Demo
|
|
62
56
|
|
|
63
|
-
|
|
57
|
+
We provide an [online demo platform](https://hub.oomol.com/package/books-translator) where you can try EPUB Translator's bilingual translation capabilities without any installation. Simply upload your EPUB file and get a translated bilingual edition.
|
|
64
58
|
|
|
65
|
-
|
|
66
|
-
- **Academic Research**: Access foreign literature with bilingual references
|
|
67
|
-
- **Content Localization**: Prepare books for international audiences
|
|
68
|
-
- **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
|
|
59
|
+
[](https://hub.oomol.com/package/books-translator)
|
|
69
60
|
|
|
70
61
|
## Installation
|
|
71
62
|
|
|
@@ -422,6 +413,105 @@ translate(
|
|
|
422
413
|
|
|
423
414
|
When using `concurrency > 1`, ensure that any custom callback functions (`on_progress`, `on_fill_failed`) are thread-safe. Built-in callbacks are thread-safe by default.
|
|
424
415
|
|
|
416
|
+
### Token Usage Monitoring
|
|
417
|
+
|
|
418
|
+
Track token consumption during translation to monitor API costs and usage:
|
|
419
|
+
|
|
420
|
+
```python
|
|
421
|
+
from epub_translator import LLM, translate, language, SubmitKind
|
|
422
|
+
|
|
423
|
+
llm = LLM(
|
|
424
|
+
key="your-api-key",
|
|
425
|
+
url="https://api.openai.com/v1",
|
|
426
|
+
model="gpt-4",
|
|
427
|
+
token_encoding="o200k_base",
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
translate(
|
|
431
|
+
source_path="source.epub",
|
|
432
|
+
target_path="translated.epub",
|
|
433
|
+
target_language=language.ENGLISH,
|
|
434
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
435
|
+
llm=llm,
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
# Access token statistics after translation
|
|
439
|
+
print(f"Total tokens: {llm.total_tokens}")
|
|
440
|
+
print(f"Input tokens: {llm.input_tokens}")
|
|
441
|
+
print(f"Input cache tokens: {llm.input_cache_tokens}")
|
|
442
|
+
print(f"Output tokens: {llm.output_tokens}")
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
**Available Statistics:**
|
|
446
|
+
|
|
447
|
+
- `total_tokens` - Total number of tokens used (input + output)
|
|
448
|
+
- `input_tokens` - Number of prompt/input tokens
|
|
449
|
+
- `input_cache_tokens` - Number of cached input tokens (when using prompt caching)
|
|
450
|
+
- `output_tokens` - Number of generated/completion tokens
|
|
451
|
+
|
|
452
|
+
**Real-time Monitoring:**
|
|
453
|
+
|
|
454
|
+
You can also monitor token usage in real-time during translation:
|
|
455
|
+
|
|
456
|
+
```python
|
|
457
|
+
from tqdm import tqdm
|
|
458
|
+
import time
|
|
459
|
+
|
|
460
|
+
with tqdm(total=100, desc="Translating", unit="%") as pbar:
|
|
461
|
+
last_progress = 0.0
|
|
462
|
+
start_time = time.time()
|
|
463
|
+
|
|
464
|
+
def on_progress(progress: float):
|
|
465
|
+
nonlocal last_progress
|
|
466
|
+
increment = (progress - last_progress) * 100
|
|
467
|
+
pbar.update(increment)
|
|
468
|
+
last_progress = progress
|
|
469
|
+
|
|
470
|
+
# Update token stats in progress bar
|
|
471
|
+
pbar.set_postfix({
|
|
472
|
+
'tokens': llm.total_tokens,
|
|
473
|
+
'cost_est': f'${llm.total_tokens * 0.00001:.4f}' # Estimate based on your pricing
|
|
474
|
+
})
|
|
475
|
+
|
|
476
|
+
translate(
|
|
477
|
+
source_path="source.epub",
|
|
478
|
+
target_path="translated.epub",
|
|
479
|
+
target_language=language.ENGLISH,
|
|
480
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
481
|
+
llm=llm,
|
|
482
|
+
on_progress=on_progress,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
elapsed = time.time() - start_time
|
|
486
|
+
print(f"\nTranslation completed in {elapsed:.1f}s")
|
|
487
|
+
print(f"Total tokens used: {llm.total_tokens:,}")
|
|
488
|
+
print(f"Average tokens/second: {llm.total_tokens/elapsed:.1f}")
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
**Dual-LLM Token Tracking:**
|
|
492
|
+
|
|
493
|
+
When using separate LLMs for translation and filling, each LLM tracks its own statistics:
|
|
494
|
+
|
|
495
|
+
```python
|
|
496
|
+
translation_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
|
|
497
|
+
fill_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
|
|
498
|
+
|
|
499
|
+
translate(
|
|
500
|
+
source_path="source.epub",
|
|
501
|
+
target_path="translated.epub",
|
|
502
|
+
target_language=language.ENGLISH,
|
|
503
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
504
|
+
translation_llm=translation_llm,
|
|
505
|
+
fill_llm=fill_llm,
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
print(f"Translation tokens: {translation_llm.total_tokens}")
|
|
509
|
+
print(f"Fill tokens: {fill_llm.total_tokens}")
|
|
510
|
+
print(f"Combined total: {translation_llm.total_tokens + fill_llm.total_tokens}")
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
**Note:** Token statistics are cumulative across all API calls made by the LLM instance. The counts only increase and are thread-safe when using concurrent translation.
|
|
514
|
+
|
|
425
515
|
## Related Projects
|
|
426
516
|
|
|
427
517
|
### PDF Craft
|
|
@@ -1,25 +1,26 @@
|
|
|
1
1
|
epub_translator/__init__.py,sha256=JsiOUPpk5k7q8mXIgnRQWdVVnkJww_KDTg7jXsP7_C4,222
|
|
2
2
|
epub_translator/data/fill.jinja,sha256=zSytA8Vhp2i6YBZ09F1z9iPJq1-jUaiphoXqTNZwnvo,6964
|
|
3
3
|
epub_translator/data/translate.jinja,sha256=MVAWvgO9kybEFi0zMiZLEWwuRUL3l8PrwJdsoueQeCs,855
|
|
4
|
-
epub_translator/epub/__init__.py,sha256=
|
|
4
|
+
epub_translator/epub/__init__.py,sha256=4kIHrFUvFBemqg4lpjOpa9mmvWZGycgWdiQUaJ4JmL4,183
|
|
5
5
|
epub_translator/epub/common.py,sha256=4-SpTe8iot9hMfyXILmlUFvYVNYqPAHL5hn1fr2wgis,1180
|
|
6
6
|
epub_translator/epub/math.py,sha256=-Q2LJQxxjgQZQUe_WlJA9tjzLqgqtw2ZmbGbHsPRp2U,5422
|
|
7
|
-
epub_translator/epub/metadata.py,sha256=
|
|
7
|
+
epub_translator/epub/metadata.py,sha256=Ddhq-kDtYz2yy41ayXtFxEL-_Lsvn-_vf8hm76HUbRE,2387
|
|
8
8
|
epub_translator/epub/spines.py,sha256=bP2IsobZm7zs4z10iXGc9SmgAFSIq9pJc8HE-V0aW9Y,1331
|
|
9
|
-
epub_translator/epub/toc.py,sha256=
|
|
9
|
+
epub_translator/epub/toc.py,sha256=N-tBR9Pv0FyCWq8swtSI93mCywN7mIXqweDBBmcDYJ8,13815
|
|
10
10
|
epub_translator/epub/zip.py,sha256=-3LI8f-ksgU8xCy28NjBOKyQPE8PhPEUPqIKZE1p8dw,2364
|
|
11
11
|
epub_translator/llm/__init__.py,sha256=YcFYYnXmXyX0RUyC-PDbj5k7Woygp_XOpTI3vDiNSPM,75
|
|
12
12
|
epub_translator/llm/context.py,sha256=8-0UnrZIaNshR_imy_ed_UpOK7H1a6dOsG-boaYOX8k,4186
|
|
13
|
-
epub_translator/llm/core.py,sha256=
|
|
13
|
+
epub_translator/llm/core.py,sha256=MnToX8Zhr_r4sj9B3s54bclesojQEFarzl0VqHGDKlo,6488
|
|
14
14
|
epub_translator/llm/error.py,sha256=4efAIQL14DFSvAnSTUfgdAbZRqaWBqOfUGsSfvxa5zM,1503
|
|
15
|
-
epub_translator/llm/executor.py,sha256=
|
|
15
|
+
epub_translator/llm/executor.py,sha256=wxgFwWaLmuqAvctO3lcQX4U52aiw7EdaFw9Ut0v-ZzU,5745
|
|
16
16
|
epub_translator/llm/increasable.py,sha256=8XkKeI1hiHlpMHj8dQ4fW0BkViSx4hH8QfbQsy-5SDw,1297
|
|
17
|
+
epub_translator/llm/statistics.py,sha256=BX75qVWJ9aWbMoFtaQzoE8oVCLh7wiHoR06dX-AAl3E,875
|
|
17
18
|
epub_translator/llm/types.py,sha256=c-dMAIvlG4R3la3mUTWEw5xei-sIYKmQeBja7mirxcI,219
|
|
18
19
|
epub_translator/segment/__init__.py,sha256=nCHNaHASElKTbC8HEAQkI1Y12m6kEdX5uJVvVvHKtFg,595
|
|
19
20
|
epub_translator/segment/block_segment.py,sha256=psNKA_HMIcwZtoug8AtnAcV9_mQ2WXLnXqFsekHzt2g,4570
|
|
20
21
|
epub_translator/segment/common.py,sha256=gGWYQaJ0tGnWCuF1me9TOo-Q_DrZVakCu2patyFIOs0,714
|
|
21
22
|
epub_translator/segment/inline_segment.py,sha256=nrRKoJ-vblsNITJeixrCgIOkVQyUXrchMg0XYU_8pLo,14563
|
|
22
|
-
epub_translator/segment/text_segment.py,sha256=
|
|
23
|
+
epub_translator/segment/text_segment.py,sha256=E_qgPI09sCV_-PsJtgwcloTa0tpOP3wl0pw5gV9dDNY,6288
|
|
23
24
|
epub_translator/segment/utils.py,sha256=_tlIA1I7rYz9_q-oQ5cPZWPmhTObCXjksQzRtX3beXY,636
|
|
24
25
|
epub_translator/serial/__init__.py,sha256=b3IMVmWcUwEqHKcGmey88b057pyz5ct946CaUZi4LB4,67
|
|
25
26
|
epub_translator/serial/chunk.py,sha256=FrTaHikVOd6bLYumnEriTaAQ_DIDLjHm16gh-wBVR9k,1495
|
|
@@ -30,11 +31,11 @@ epub_translator/translation/__init__.py,sha256=R0c0ZngocOC-Qczs0a8JYAdAcCu2gv3FL
|
|
|
30
31
|
epub_translator/translation/epub_transcode.py,sha256=_pRzmQgDrlfsibalkUogVi0F0Qy_uuYfKhZk3nP5pkA,2747
|
|
31
32
|
epub_translator/translation/language.py,sha256=88osG0JNYxOkxBjg5Pm-P0Mhiyxf6GqdxoPW12HW0PE,493
|
|
32
33
|
epub_translator/translation/punctuation.py,sha256=TPCGjEmlAyN3G11VuXdHn-pvUkuWDwWqbTNzw-ij60E,813
|
|
33
|
-
epub_translator/translation/translator.py,sha256=
|
|
34
|
-
epub_translator/translation/xml_interrupter.py,sha256=
|
|
34
|
+
epub_translator/translation/translator.py,sha256=rly6hXwZ0bylV0-5LVeEEHrZSJ6xKaZlEbrjnG4kkOE,7033
|
|
35
|
+
epub_translator/translation/xml_interrupter.py,sha256=7TRGskn_OxRZT5mvKfjL0VMtU2VCgl1d9ElmfhFG0pM,8628
|
|
35
36
|
epub_translator/utils.py,sha256=BfZWrYjzDNQ4cFrgvRNzd4i1CKLtPxS8Z4LBHhqEV78,914
|
|
36
37
|
epub_translator/xml/__init__.py,sha256=qluFTfZYlPmOie8nR2C5O0tZ3UbCQEoEoR-Fq-__79c,160
|
|
37
|
-
epub_translator/xml/const.py,sha256=
|
|
38
|
+
epub_translator/xml/const.py,sha256=tCdeJfGwH5xgS4uOmR-pXSfyWXGxOHMJyZKE46BVkJU,54
|
|
38
39
|
epub_translator/xml/deduplication.py,sha256=TaMbzeA70VvUQV0X1wcQFVbuMEPJUtj9Hq6iWlUmtAQ,1152
|
|
39
40
|
epub_translator/xml/friendly/__init__.py,sha256=I5jhnhFWoHvojLsYXH4jfR4Gi8lKFZ3yQ56ze5hEe1M,74
|
|
40
41
|
epub_translator/xml/friendly/decoder.py,sha256=xRQ5LnSunmYbba_0oT39oUr86-sLYAHYMUGmlseIu2U,2467
|
|
@@ -42,11 +43,11 @@ epub_translator/xml/friendly/encoder.py,sha256=evjvw6oE-oCud44IsJ-YZVHn6dtUzjNYX
|
|
|
42
43
|
epub_translator/xml/friendly/parser.py,sha256=QlMHA0nfPJbNyx6IwRFrYVw7okuvzDB42NXCauIFV-o,6560
|
|
43
44
|
epub_translator/xml/friendly/tag.py,sha256=ahaGoYttuAlnFxLFFgTV51KUZSpUiHho-COZX14nxN8,3308
|
|
44
45
|
epub_translator/xml/friendly/transform.py,sha256=5tG1MJmzrXIR_Z5gmRxwcoKvXBzJBVH0ELeaRsG-8w0,1201
|
|
45
|
-
epub_translator/xml/inline.py,sha256=
|
|
46
|
-
epub_translator/xml/self_closing.py,sha256=
|
|
46
|
+
epub_translator/xml/inline.py,sha256=VcaNEF2ebVl2fogVk2yV3f4vOP4rePsPTV_qU3fJCE0,3108
|
|
47
|
+
epub_translator/xml/self_closing.py,sha256=gA3wI4axhx281iMnK7Eu81mSXfOhaGsHAVYCYKTXAoQ,5446
|
|
47
48
|
epub_translator/xml/utils.py,sha256=7tQ6L5P0_JXhxONeG64hEeeL5mKjA6NKS1H1Q9B1Cac,1062
|
|
48
49
|
epub_translator/xml/xml.py,sha256=qQ5Wk1-KVVHE4TX25zGOR7fINsGkXnoq-qyKKNl5no4,1675
|
|
49
|
-
epub_translator/xml/xml_like.py,sha256=
|
|
50
|
+
epub_translator/xml/xml_like.py,sha256=oW8JhpdihlayOxDLFlM29uA2HTjpHk7r85jxTcqajME,10142
|
|
50
51
|
epub_translator/xml_translator/__init__.py,sha256=lqts1mJL_WfojDnMAQ5OM7TbT6u9X3H-X4C_avHzvXM,128
|
|
51
52
|
epub_translator/xml_translator/callbacks.py,sha256=IoZrsaivd2W76cHFupwv6auVxgEWHcBN2MHQJYcWoJ8,1324
|
|
52
53
|
epub_translator/xml_translator/common.py,sha256=hSPptgPp7j6dm47imELB5DgmEbzTEyJD6WEeELOOc50,38
|
|
@@ -57,7 +58,7 @@ epub_translator/xml_translator/stream_mapper.py,sha256=nk8iRUHAUQA2B35_y-JOCo6il
|
|
|
57
58
|
epub_translator/xml_translator/submitter.py,sha256=_ic2_JBPdEd2nMSu2mtQ5OzqpGv0zGrvYaicVUXAiUQ,14159
|
|
58
59
|
epub_translator/xml_translator/translator.py,sha256=7Ja1jFbmjIgHcmI9V6gg_K0t7qb6in9mhRn54a7qhZ8,9497
|
|
59
60
|
epub_translator/xml_translator/validation.py,sha256=-OKlSZuD__sjAiEpGAO93YQme4ZDSPmoPjRsAMOCEjc,16668
|
|
60
|
-
epub_translator-0.1.
|
|
61
|
-
epub_translator-0.1.
|
|
62
|
-
epub_translator-0.1.
|
|
63
|
-
epub_translator-0.1.
|
|
61
|
+
epub_translator-0.1.9.dist-info/LICENSE,sha256=5RF32sL3LtMOJIErdDKp1ZEYPGXS8WPpsiSz_jMBnGI,1066
|
|
62
|
+
epub_translator-0.1.9.dist-info/METADATA,sha256=0Av_UtT49b-yCrurxxzXxMS-KGnraqPLzQCOdxzLh9U,18274
|
|
63
|
+
epub_translator-0.1.9.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
64
|
+
epub_translator-0.1.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|