epub-translator 0.0.6__tar.gz → 0.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epub_translator-0.0.6 → epub_translator-0.0.7}/PKG-INFO +1 -1
- epub_translator-0.0.7/epub_translator/__init__.py +3 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/epub/html/dom_operator.py +23 -17
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/epub/html/file.py +3 -3
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/translator.py +12 -2
- {epub_translator-0.0.6 → epub_translator-0.0.7}/pyproject.toml +1 -1
- epub_translator-0.0.6/epub_translator/__init__.py +0 -3
- {epub_translator-0.0.6 → epub_translator-0.0.7}/LICENSE +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/README.md +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/data/format.jinja +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/data/translate.jinja +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/epub/__init__.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/epub/content_parser.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/epub/html/__init__.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/epub/html/empty_tags.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/epub/html/texts_searcher.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/llm/__init__.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/llm/error.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/llm/executor.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/llm/increasable.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/llm/node.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/template.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/translation/__init__.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/translation/chunk.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/translation/splitter.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/translation/store.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/translation/translation.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/translation/types.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/translation/utils.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/xml/__init__.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/xml/decoder.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/xml/encoder.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/xml/parser.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/xml/tag.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/xml/transform.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/xml/utils.py +0 -0
- {epub_translator-0.0.6 → epub_translator-0.0.7}/epub_translator/zip_context.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: epub-translator
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.7
|
|
4
4
|
Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: epub,llm,translation,translator
|
|
@@ -13,7 +13,7 @@ def read_texts(root: Element) -> Generator[str, None, None]:
|
|
|
13
13
|
elif position == TextPosition.TAIL:
|
|
14
14
|
yield cast(str, element.tail)
|
|
15
15
|
|
|
16
|
-
def
|
|
16
|
+
def write_texts(root: Element, texts: Iterable[str | Iterable[str] | None], append: bool):
|
|
17
17
|
zip_list = list(zip(texts, search_texts(root)))
|
|
18
18
|
for text, (element, position, parent) in reversed(zip_list):
|
|
19
19
|
if text is None:
|
|
@@ -23,26 +23,32 @@ def append_texts(root: Element, texts: Iterable[str | Iterable[str] | None]):
|
|
|
23
23
|
text = "".join(text)
|
|
24
24
|
if position == TextPosition.WHOLE_DOM:
|
|
25
25
|
if parent is not None:
|
|
26
|
-
|
|
26
|
+
_write_dom(parent, element, text, append)
|
|
27
27
|
elif position == TextPosition.TEXT:
|
|
28
|
-
element.text =
|
|
28
|
+
element.text = _write_text(element.text, text, append)
|
|
29
29
|
elif position == TextPosition.TAIL:
|
|
30
|
-
element.tail =
|
|
30
|
+
element.tail = _write_text(element.tail, text, append)
|
|
31
31
|
|
|
32
|
-
def
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
32
|
+
def _write_dom(parent: Element, origin: Element, text: str, append: bool):
|
|
33
|
+
if append:
|
|
34
|
+
appended = Element(origin.tag, {**origin.attrib})
|
|
35
|
+
for index, child in enumerate(parent):
|
|
36
|
+
if child == origin:
|
|
37
|
+
parent.insert(index + 1, appended)
|
|
38
|
+
break
|
|
39
|
+
appended.attrib.pop("id", None)
|
|
40
|
+
appended.text = text
|
|
41
|
+
appended.tail = origin.tail
|
|
42
|
+
origin.tail = None
|
|
43
|
+
else:
|
|
44
|
+
for child in origin:
|
|
45
|
+
origin.remove(child)
|
|
46
|
+
origin.text = text
|
|
43
47
|
|
|
44
|
-
def
|
|
45
|
-
if
|
|
48
|
+
def _write_text(left: str | None, right: str, append: bool) -> str:
|
|
49
|
+
if not append:
|
|
50
|
+
return right
|
|
51
|
+
elif left is None:
|
|
46
52
|
return right
|
|
47
53
|
else:
|
|
48
54
|
return left + right
|
|
@@ -2,7 +2,7 @@ import re
|
|
|
2
2
|
|
|
3
3
|
from typing import Iterable
|
|
4
4
|
from xml.etree.ElementTree import fromstring, tostring, Element
|
|
5
|
-
from .dom_operator import read_texts,
|
|
5
|
+
from .dom_operator import read_texts, write_texts
|
|
6
6
|
from .empty_tags import to_xml, to_html
|
|
7
7
|
|
|
8
8
|
|
|
@@ -41,8 +41,8 @@ class HTMLFile:
|
|
|
41
41
|
self._texts_length = len(texts)
|
|
42
42
|
return texts
|
|
43
43
|
|
|
44
|
-
def write_texts(self, texts: Iterable[str]):
|
|
45
|
-
|
|
44
|
+
def write_texts(self, texts: Iterable[str], append: bool):
|
|
45
|
+
write_texts(self._root, texts, append)
|
|
46
46
|
|
|
47
47
|
@property
|
|
48
48
|
def texts_length(self) -> int:
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from os import PathLike
|
|
2
2
|
from pathlib import Path
|
|
3
|
+
from enum import auto, Enum
|
|
3
4
|
from tempfile import mkdtemp
|
|
4
5
|
from shutil import rmtree
|
|
5
6
|
|
|
@@ -9,11 +10,16 @@ from .zip_context import ZipContext
|
|
|
9
10
|
from .translation import translate as _translate, Incision, Fragment, Language, ProgressReporter
|
|
10
11
|
|
|
11
12
|
|
|
13
|
+
class TranslatedWriteMode(Enum):
|
|
14
|
+
APPEND = auto()
|
|
15
|
+
REPLACE = auto()
|
|
16
|
+
|
|
12
17
|
def translate(
|
|
13
18
|
llm: LLM,
|
|
14
19
|
source_path: PathLike,
|
|
15
20
|
translated_path: PathLike,
|
|
16
21
|
target_language: Language,
|
|
22
|
+
write_mode: TranslatedWriteMode = TranslatedWriteMode.APPEND,
|
|
17
23
|
user_prompt: str | None = None,
|
|
18
24
|
working_path: PathLike | None = None,
|
|
19
25
|
max_chunk_tokens_count: int = 3000,
|
|
@@ -29,6 +35,7 @@ def translate(
|
|
|
29
35
|
_Translator(
|
|
30
36
|
llm=llm,
|
|
31
37
|
target_language=target_language,
|
|
38
|
+
write_mode=write_mode,
|
|
32
39
|
user_prompt=user_prompt,
|
|
33
40
|
max_chunk_tokens_count=max_chunk_tokens_count,
|
|
34
41
|
max_threads_count=max_threads_count,
|
|
@@ -44,6 +51,7 @@ class _Translator:
|
|
|
44
51
|
self,
|
|
45
52
|
llm: LLM,
|
|
46
53
|
target_language: Language,
|
|
54
|
+
write_mode: TranslatedWriteMode,
|
|
47
55
|
user_prompt: str | None,
|
|
48
56
|
max_chunk_tokens_count: int,
|
|
49
57
|
max_threads_count: int,
|
|
@@ -52,6 +60,7 @@ class _Translator:
|
|
|
52
60
|
|
|
53
61
|
self._llm: LLM = llm
|
|
54
62
|
self._target_language: Language = target_language
|
|
63
|
+
self._write_mode: TranslatedWriteMode = write_mode
|
|
55
64
|
self._user_prompt: str | None = user_prompt
|
|
56
65
|
self._max_chunk_tokens_count: int = max_chunk_tokens_count
|
|
57
66
|
self._max_threads_count: int = max_threads_count
|
|
@@ -110,6 +119,7 @@ class _Translator:
|
|
|
110
119
|
spine: tuple[Path, HTMLFile] | None = None
|
|
111
120
|
translated_texts: list[str] = []
|
|
112
121
|
translated_count: int = 0
|
|
122
|
+
append = (self._write_mode == TranslatedWriteMode.APPEND)
|
|
113
123
|
|
|
114
124
|
for translated_text in _translate(
|
|
115
125
|
llm=self._llm,
|
|
@@ -125,7 +135,7 @@ class _Translator:
|
|
|
125
135
|
|
|
126
136
|
if spine and translated_count >= len(translated_texts):
|
|
127
137
|
spine_path, spine_file = spine
|
|
128
|
-
spine_file.write_texts(translated_texts)
|
|
138
|
+
spine_file.write_texts(translated_texts, append)
|
|
129
139
|
context.write_spine_file(spine_path, spine_file)
|
|
130
140
|
spine = None
|
|
131
141
|
|
|
@@ -152,7 +162,7 @@ class _Translator:
|
|
|
152
162
|
if spine:
|
|
153
163
|
spine_path, spine_file = spine
|
|
154
164
|
if translated_count > 0:
|
|
155
|
-
spine_file.write_texts(translated_texts)
|
|
165
|
+
spine_file.write_texts(translated_texts, append)
|
|
156
166
|
context.write_spine_file(spine_path, spine_file)
|
|
157
167
|
|
|
158
168
|
def _gen_fragments(context: ZipContext):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "epub-translator"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.7"
|
|
4
4
|
description = "Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text."
|
|
5
5
|
keywords=["epub", "llm", "translation", "translator"]
|
|
6
6
|
authors = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|