epub-translator 0.0.4__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epub_translator-0.0.4 → epub_translator-0.0.5}/PKG-INFO +1 -1
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/translation/translation.py +3 -2
- epub_translator-0.0.5/epub_translator/translation/types.py +40 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/pyproject.toml +1 -1
- epub_translator-0.0.4/epub_translator/translation/types.py +0 -49
- {epub_translator-0.0.4 → epub_translator-0.0.5}/LICENSE +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/README.md +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/__init__.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/data/format.jinja +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/data/translate.jinja +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/epub/__init__.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/epub/content_parser.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/epub/html/__init__.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/epub/html/dom_operator.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/epub/html/empty_tags.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/epub/html/file.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/epub/html/texts_searcher.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/llm/__init__.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/llm/error.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/llm/executor.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/llm/increasable.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/llm/node.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/template.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/translation/__init__.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/translation/chunk.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/translation/splitter.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/translation/store.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/translation/utils.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/translator.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/xml/__init__.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/xml/decoder.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/xml/encoder.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/xml/parser.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/xml/tag.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/xml/transform.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/xml/utils.py +0 -0
- {epub_translator-0.0.4 → epub_translator-0.0.5}/epub_translator/zip_context.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: epub-translator
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.5
|
|
4
4
|
Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: epub,llm,translation,translator
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from math import ceil
|
|
1
2
|
from typing import Callable, Iterator, Generator
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
from concurrent.futures import as_completed, ThreadPoolExecutor
|
|
@@ -158,7 +159,7 @@ def _translate_texts(
|
|
|
158
159
|
text_tag="TXT",
|
|
159
160
|
user_data=user_data,
|
|
160
161
|
parser=lambda r: r,
|
|
161
|
-
max_tokens=texts_tokens * _PLAIN_TEXT_SCALE,
|
|
162
|
+
max_tokens=ceil(texts_tokens * _PLAIN_TEXT_SCALE),
|
|
162
163
|
params={
|
|
163
164
|
"target_language": language_chinese_name(target_language),
|
|
164
165
|
"user_prompt": user_prompt,
|
|
@@ -179,7 +180,7 @@ def _translate_texts(
|
|
|
179
180
|
return llm.request_xml(
|
|
180
181
|
template_name="format",
|
|
181
182
|
user_data=request_text,
|
|
182
|
-
max_tokens=texts_tokens * _XML_TEXT_SCALE,
|
|
183
|
+
max_tokens=ceil(texts_tokens * _XML_TEXT_SCALE),
|
|
183
184
|
parser=lambda r: _parse_translated_response(r, len(texts)),
|
|
184
185
|
params={
|
|
185
186
|
"target_language": language_chinese_name(target_language),
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from resource_segmentation import Incision
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class Fragment:
|
|
8
|
+
text: str
|
|
9
|
+
start_incision: Incision
|
|
10
|
+
end_incision: Incision
|
|
11
|
+
|
|
12
|
+
class Language(Enum):
|
|
13
|
+
SIMPLIFIED_CHINESE = "zh-Hans"
|
|
14
|
+
TRADITIONAL_CHINESE = "zh-Hant"
|
|
15
|
+
ENGLISH = "en"
|
|
16
|
+
FRENCH = "fr"
|
|
17
|
+
GERMAN = "de"
|
|
18
|
+
SPANISH = "es"
|
|
19
|
+
RUSSIAN = "ru"
|
|
20
|
+
ITALIAN = "it"
|
|
21
|
+
PORTUGUESE = "pt"
|
|
22
|
+
JAPANESE = "ja"
|
|
23
|
+
KOREAN = "ko"
|
|
24
|
+
|
|
25
|
+
_LANGUAGE_NAMES = {
|
|
26
|
+
Language.SIMPLIFIED_CHINESE: "简体中文",
|
|
27
|
+
Language.TRADITIONAL_CHINESE: "繁体中文",
|
|
28
|
+
Language.ENGLISH: "英语",
|
|
29
|
+
Language.FRENCH: "法语",
|
|
30
|
+
Language.GERMAN: "德语",
|
|
31
|
+
Language.SPANISH: "西班牙语",
|
|
32
|
+
Language.RUSSIAN: "俄语",
|
|
33
|
+
Language.ITALIAN: "意大利语",
|
|
34
|
+
Language.PORTUGUESE: "葡萄牙语",
|
|
35
|
+
Language.JAPANESE: "日语",
|
|
36
|
+
Language.KOREAN: "韩语",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
def language_chinese_name(language: Language) -> str:
|
|
40
|
+
return _LANGUAGE_NAMES[language]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "epub-translator"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.5"
|
|
4
4
|
description = "Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text."
|
|
5
5
|
keywords=["epub", "llm", "translation", "translator"]
|
|
6
6
|
authors = [
|
|
@@ -1,49 +0,0 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
from dataclasses import dataclass
|
|
3
|
-
from resource_segmentation import Incision
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
@dataclass
|
|
7
|
-
class Fragment:
|
|
8
|
-
text: str
|
|
9
|
-
start_incision: Incision
|
|
10
|
-
end_incision: Incision
|
|
11
|
-
|
|
12
|
-
class Language(Enum):
|
|
13
|
-
SIMPLIFIED_CHINESE = "zh-Hans"
|
|
14
|
-
TRADITIONAL_CHINESE = "zh-Hant"
|
|
15
|
-
ENGLISH = "en"
|
|
16
|
-
FRENCH = "fr"
|
|
17
|
-
GERMAN = "de"
|
|
18
|
-
SPANISH = "es"
|
|
19
|
-
RUSSIAN = "ru"
|
|
20
|
-
ITALIAN = "it"
|
|
21
|
-
PORTUGUESE = "pt"
|
|
22
|
-
JAPANESE = "ja"
|
|
23
|
-
KOREAN = "ko"
|
|
24
|
-
|
|
25
|
-
def language_chinese_name(language: Language) -> str:
|
|
26
|
-
if language == Language.SIMPLIFIED_CHINESE:
|
|
27
|
-
return "简体中文"
|
|
28
|
-
elif language == Language.TRADITIONAL_CHINESE:
|
|
29
|
-
return "繁体中文"
|
|
30
|
-
elif language == Language.ENGLISH:
|
|
31
|
-
return "英语"
|
|
32
|
-
elif language == Language.FRENCH:
|
|
33
|
-
return "法语"
|
|
34
|
-
elif language == Language.GERMAN:
|
|
35
|
-
return "德语"
|
|
36
|
-
elif language == Language.SPANISH:
|
|
37
|
-
return "西班牙语"
|
|
38
|
-
elif language == Language.RUSSIAN:
|
|
39
|
-
return "俄语"
|
|
40
|
-
elif language == Language.ITALIAN:
|
|
41
|
-
return "意大利语"
|
|
42
|
-
elif language == Language.PORTUGUESE:
|
|
43
|
-
return "葡萄牙语"
|
|
44
|
-
elif language == Language.JAPANESE:
|
|
45
|
-
return "日语"
|
|
46
|
-
elif language == Language.KOREAN:
|
|
47
|
-
return "韩语"
|
|
48
|
-
else:
|
|
49
|
-
raise ValueError(f"Unknown language: {language}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|