epub-translator 0.0.7__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epub_translator-0.1.0/PKG-INFO +283 -0
- epub_translator-0.1.0/README.md +249 -0
- epub_translator-0.1.0/epub_translator/__init__.py +5 -0
- epub_translator-0.1.0/epub_translator/data/fill.jinja +66 -0
- epub_translator-0.1.0/epub_translator/data/mmltex/README.md +67 -0
- epub_translator-0.1.0/epub_translator/data/mmltex/cmarkup.xsl +1106 -0
- epub_translator-0.1.0/epub_translator/data/mmltex/entities.xsl +459 -0
- epub_translator-0.1.0/epub_translator/data/mmltex/glayout.xsl +222 -0
- epub_translator-0.1.0/epub_translator/data/mmltex/mmltex.xsl +36 -0
- epub_translator-0.1.0/epub_translator/data/mmltex/scripts.xsl +375 -0
- epub_translator-0.1.0/epub_translator/data/mmltex/tables.xsl +130 -0
- epub_translator-0.1.0/epub_translator/data/mmltex/tokens.xsl +328 -0
- epub_translator-0.1.0/epub_translator/data/translate.jinja +18 -0
- epub_translator-0.1.0/epub_translator/epub/__init__.py +4 -0
- epub_translator-0.1.0/epub_translator/epub/common.py +43 -0
- epub_translator-0.1.0/epub_translator/epub/math.py +193 -0
- epub_translator-0.1.0/epub_translator/epub/placeholder.py +53 -0
- epub_translator-0.1.0/epub_translator/epub/spines.py +42 -0
- epub_translator-0.1.0/epub_translator/epub/toc.py +505 -0
- epub_translator-0.1.0/epub_translator/epub/zip.py +67 -0
- epub_translator-0.1.0/epub_translator/iter_sync.py +24 -0
- epub_translator-0.1.0/epub_translator/language.py +23 -0
- epub_translator-0.1.0/epub_translator/llm/__init__.py +2 -0
- epub_translator-0.1.0/epub_translator/llm/core.py +175 -0
- epub_translator-0.1.0/epub_translator/llm/error.py +52 -0
- epub_translator-0.1.0/epub_translator/llm/executor.py +173 -0
- epub_translator-0.1.0/epub_translator/llm/increasable.py +35 -0
- epub_translator-0.1.0/epub_translator/llm/types.py +17 -0
- epub_translator-0.1.0/epub_translator/serial/__init__.py +2 -0
- epub_translator-0.1.0/epub_translator/serial/chunk.py +52 -0
- epub_translator-0.1.0/epub_translator/serial/segment.py +17 -0
- epub_translator-0.1.0/epub_translator/serial/splitter.py +50 -0
- epub_translator-0.1.0/epub_translator/template.py +52 -0
- epub_translator-0.1.0/epub_translator/translator.py +211 -0
- epub_translator-0.1.0/epub_translator/utils.py +7 -0
- epub_translator-0.1.0/epub_translator/xml/__init__.py +4 -0
- epub_translator-0.1.0/epub_translator/xml/deduplication.py +38 -0
- epub_translator-0.1.0/epub_translator/xml/firendly/__init__.py +2 -0
- epub_translator-0.1.0/epub_translator/xml/firendly/decoder.py +75 -0
- epub_translator-0.1.0/epub_translator/xml/firendly/encoder.py +84 -0
- epub_translator-0.1.0/epub_translator/xml/firendly/parser.py +177 -0
- epub_translator-0.1.0/epub_translator/xml/firendly/tag.py +118 -0
- epub_translator-0.1.0/epub_translator/xml/firendly/transform.py +36 -0
- epub_translator-0.1.0/epub_translator/xml/xml.py +52 -0
- epub_translator-0.1.0/epub_translator/xml/xml_like.py +176 -0
- epub_translator-0.1.0/epub_translator/xml_translator/__init__.py +3 -0
- epub_translator-0.1.0/epub_translator/xml_translator/const.py +2 -0
- epub_translator-0.1.0/epub_translator/xml_translator/fill.py +128 -0
- epub_translator-0.1.0/epub_translator/xml_translator/format.py +282 -0
- epub_translator-0.1.0/epub_translator/xml_translator/fragmented.py +125 -0
- epub_translator-0.1.0/epub_translator/xml_translator/group.py +183 -0
- epub_translator-0.1.0/epub_translator/xml_translator/progressive_locking.py +256 -0
- epub_translator-0.1.0/epub_translator/xml_translator/submitter.py +102 -0
- epub_translator-0.1.0/epub_translator/xml_translator/text_segment.py +263 -0
- epub_translator-0.1.0/epub_translator/xml_translator/translator.py +178 -0
- epub_translator-0.1.0/epub_translator/xml_translator/utils.py +29 -0
- epub_translator-0.1.0/pyproject.toml +74 -0
- epub_translator-0.0.7/PKG-INFO +0 -170
- epub_translator-0.0.7/README.md +0 -145
- epub_translator-0.0.7/epub_translator/__init__.py +0 -3
- epub_translator-0.0.7/epub_translator/data/format.jinja +0 -33
- epub_translator-0.0.7/epub_translator/data/translate.jinja +0 -15
- epub_translator-0.0.7/epub_translator/epub/__init__.py +0 -2
- epub_translator-0.0.7/epub_translator/epub/content_parser.py +0 -162
- epub_translator-0.0.7/epub_translator/epub/html/__init__.py +0 -1
- epub_translator-0.0.7/epub_translator/epub/html/dom_operator.py +0 -68
- epub_translator-0.0.7/epub_translator/epub/html/empty_tags.py +0 -23
- epub_translator-0.0.7/epub_translator/epub/html/file.py +0 -80
- epub_translator-0.0.7/epub_translator/epub/html/texts_searcher.py +0 -46
- epub_translator-0.0.7/epub_translator/llm/__init__.py +0 -1
- epub_translator-0.0.7/epub_translator/llm/error.py +0 -49
- epub_translator-0.0.7/epub_translator/llm/executor.py +0 -150
- epub_translator-0.0.7/epub_translator/llm/increasable.py +0 -35
- epub_translator-0.0.7/epub_translator/llm/node.py +0 -201
- epub_translator-0.0.7/epub_translator/template.py +0 -50
- epub_translator-0.0.7/epub_translator/translation/__init__.py +0 -2
- epub_translator-0.0.7/epub_translator/translation/chunk.py +0 -118
- epub_translator-0.0.7/epub_translator/translation/splitter.py +0 -78
- epub_translator-0.0.7/epub_translator/translation/store.py +0 -36
- epub_translator-0.0.7/epub_translator/translation/translation.py +0 -231
- epub_translator-0.0.7/epub_translator/translation/types.py +0 -45
- epub_translator-0.0.7/epub_translator/translation/utils.py +0 -11
- epub_translator-0.0.7/epub_translator/translator.py +0 -184
- epub_translator-0.0.7/epub_translator/xml/__init__.py +0 -3
- epub_translator-0.0.7/epub_translator/xml/decoder.py +0 -71
- epub_translator-0.0.7/epub_translator/xml/encoder.py +0 -95
- epub_translator-0.0.7/epub_translator/xml/parser.py +0 -172
- epub_translator-0.0.7/epub_translator/xml/tag.py +0 -93
- epub_translator-0.0.7/epub_translator/xml/transform.py +0 -34
- epub_translator-0.0.7/epub_translator/xml/utils.py +0 -12
- epub_translator-0.0.7/epub_translator/zip_context.py +0 -74
- epub_translator-0.0.7/pyproject.toml +0 -38
- {epub_translator-0.0.7 → epub_translator-0.1.0}/LICENSE +0 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: epub-translator
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: epub,llm,translation,translator
|
|
7
|
+
Author: Tao Zeyu
|
|
8
|
+
Author-email: i@taozeyu.com
|
|
9
|
+
Maintainer: Tao Zeyu
|
|
10
|
+
Maintainer-email: i@taozeyu.com
|
|
11
|
+
Requires-Python: >=3.11,<3.14
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Education
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Topic :: Software Development :: Localization
|
|
24
|
+
Classifier: Topic :: Text Processing :: Markup
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
26
|
+
Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
|
|
27
|
+
Requires-Dist: openai (>=2.14.0,<3.0.0)
|
|
28
|
+
Requires-Dist: resource-segmentation (>=0.0.7,<0.1.0)
|
|
29
|
+
Requires-Dist: tiktoken (>=0.12.0,<1.0.0)
|
|
30
|
+
Project-URL: Homepage, https://hub.oomol.com/package/books-translator
|
|
31
|
+
Project-URL: Repository, https://github.com/oomol-lab/epub-translator
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
<div align=center>
|
|
35
|
+
<h1>EPUB Translator</h1>
|
|
36
|
+
<p>
|
|
37
|
+
<a href="https://github.com/oomol-lab/epub-translator/actions/workflows/merge-build.yml" target="_blank"><img src="https://img.shields.io/github/actions/workflow/status/oomol-lab/epub-translator/merge-build.yml" alt="ci" /></a>
|
|
38
|
+
<a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/badge/pip_install-epub--translator-blue" alt="pip install epub-translator" /></a>
|
|
39
|
+
<a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/pypi/v/epub-translator.svg" alt="pypi epub-translator" /></a>
|
|
40
|
+
<a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/pypi/pyversions/epub-translator.svg" alt="python versions" /></a>
|
|
41
|
+
<a href="https://github.com/oomol-lab/epub-translator/blob/main/LICENSE" target="_blank"><img src="https://img.shields.io/github/license/oomol-lab/epub-translator" alt="license" /></a>
|
|
42
|
+
</p>
|
|
43
|
+
<p><a href="https://hub.oomol.com/package/books-translator?open=true" target="_blank"><img src="https://static.oomol.com/assets/button.svg" alt="Open in OOMOL Studio" /></a></p>
|
|
44
|
+
<p>English | <a href="./README_zh-CN.md">中文</a></p>
|
|
45
|
+
</div>
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
Translate EPUB books using Large Language Models while preserving the original text. The translated content is displayed side-by-side with the original, creating bilingual books perfect for language learning and cross-reference reading.
|
|
49
|
+
|
|
50
|
+

|
|
51
|
+
|
|
52
|
+
## Features
|
|
53
|
+
|
|
54
|
+
- **Bilingual Output**: Preserves original text alongside translations for easy comparison
|
|
55
|
+
- **LLM-Powered**: Leverages large language models for high-quality, context-aware translations
|
|
56
|
+
- **Format Preservation**: Maintains EPUB structure, styles, images, and formatting
|
|
57
|
+
- **Complete Translation**: Translates chapter content, table of contents, and metadata
|
|
58
|
+
- **Progress Tracking**: Monitor translation progress with built-in callbacks
|
|
59
|
+
- **Flexible LLM Support**: Works with any OpenAI-compatible API endpoint
|
|
60
|
+
- **Caching**: Built-in caching for progress recovery when translation fails
|
|
61
|
+
|
|
62
|
+
## Installation
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install epub-translator
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
**Requirements**: Python 3.11, 3.12, or 3.13
|
|
69
|
+
|
|
70
|
+
## Quick Start
|
|
71
|
+
|
|
72
|
+
### Using OOMOL Studio (Recommended)
|
|
73
|
+
|
|
74
|
+
The easiest way to use EPUB Translator is through OOMOL Studio with a visual interface:
|
|
75
|
+
|
|
76
|
+
[](https://www.youtube.com/watch?v=QsAdiskxfXI)
|
|
77
|
+
|
|
78
|
+
### Using Python API
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from pathlib import Path
|
|
82
|
+
from epub_translator import LLM, translate, language
|
|
83
|
+
|
|
84
|
+
# Initialize LLM with your API credentials
|
|
85
|
+
llm = LLM(
|
|
86
|
+
key="your-api-key",
|
|
87
|
+
url="https://api.openai.com/v1",
|
|
88
|
+
model="gpt-4",
|
|
89
|
+
token_encoding="o200k_base",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Translate EPUB file using language constants
|
|
93
|
+
translate(
|
|
94
|
+
llm=llm,
|
|
95
|
+
source_path=Path("source.epub"),
|
|
96
|
+
target_path=Path("translated.epub"),
|
|
97
|
+
target_language=language.ENGLISH,
|
|
98
|
+
)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### With Progress Tracking
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from tqdm import tqdm
|
|
105
|
+
|
|
106
|
+
with tqdm(total=100, desc="Translating", unit="%") as pbar:
|
|
107
|
+
last_progress = 0.0
|
|
108
|
+
|
|
109
|
+
def on_progress(progress: float):
|
|
110
|
+
nonlocal last_progress
|
|
111
|
+
increment = (progress - last_progress) * 100
|
|
112
|
+
pbar.update(increment)
|
|
113
|
+
last_progress = progress
|
|
114
|
+
|
|
115
|
+
translate(
|
|
116
|
+
llm=llm,
|
|
117
|
+
source_path=Path("source.epub"),
|
|
118
|
+
target_path=Path("translated.epub"),
|
|
119
|
+
target_language="English",
|
|
120
|
+
on_progress=on_progress,
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## API Reference
|
|
125
|
+
|
|
126
|
+
### `LLM` Class
|
|
127
|
+
|
|
128
|
+
Initialize the LLM client for translation:
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
LLM(
|
|
132
|
+
key: str, # API key
|
|
133
|
+
url: str, # API endpoint URL
|
|
134
|
+
model: str, # Model name (e.g., "gpt-4")
|
|
135
|
+
token_encoding: str, # Token encoding (e.g., "o200k_base")
|
|
136
|
+
cache_path: PathLike | None = None, # Cache directory path
|
|
137
|
+
timeout: float | None = None, # Request timeout in seconds
|
|
138
|
+
top_p: float | tuple[float, float] | None = None,
|
|
139
|
+
temperature: float | tuple[float, float] | None = None,
|
|
140
|
+
retry_times: int = 5, # Number of retries on failure
|
|
141
|
+
retry_interval_seconds: float = 6.0, # Interval between retries
|
|
142
|
+
log_dir_path: PathLike | None = None, # Log directory path
|
|
143
|
+
)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### `translate` Function
|
|
147
|
+
|
|
148
|
+
Translate an EPUB file:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
translate(
|
|
152
|
+
llm: LLM, # LLM instance
|
|
153
|
+
source_path: Path, # Source EPUB file path
|
|
154
|
+
target_path: Path, # Output EPUB file path
|
|
155
|
+
target_language: str, # Target language (e.g., "English", "Chinese")
|
|
156
|
+
user_prompt: str | None = None, # Custom translation instructions
|
|
157
|
+
max_retries: int = 5, # Maximum retries for failed translations
|
|
158
|
+
max_group_tokens: int = 1200, # Maximum tokens per translation group
|
|
159
|
+
on_progress: Callable[[float], None] | None = None, # Progress callback (0.0-1.0)
|
|
160
|
+
)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
#### Language Constants
|
|
164
|
+
|
|
165
|
+
EPUB Translator provides predefined language constants for convenience. You can use these constants instead of writing language names as strings:
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from epub_translator import language
|
|
169
|
+
|
|
170
|
+
# Usage example:
|
|
171
|
+
translate(
|
|
172
|
+
llm=llm,
|
|
173
|
+
source_path=Path("source.epub"),
|
|
174
|
+
target_path=Path("translated.epub"),
|
|
175
|
+
target_language=language.ENGLISH,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# You can also use custom language strings:
|
|
179
|
+
translate(
|
|
180
|
+
llm=llm,
|
|
181
|
+
source_path=Path("source.epub"),
|
|
182
|
+
target_path=Path("translated.epub"),
|
|
183
|
+
target_language="Icelandic", # For languages not in the constants
|
|
184
|
+
)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
## Configuration Examples
|
|
188
|
+
|
|
189
|
+
### OpenAI
|
|
190
|
+
|
|
191
|
+
```python
|
|
192
|
+
llm = LLM(
|
|
193
|
+
key="sk-...",
|
|
194
|
+
url="https://api.openai.com/v1",
|
|
195
|
+
model="gpt-4",
|
|
196
|
+
token_encoding="o200k_base",
|
|
197
|
+
)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Azure OpenAI
|
|
201
|
+
|
|
202
|
+
```python
|
|
203
|
+
llm = LLM(
|
|
204
|
+
key="your-azure-key",
|
|
205
|
+
url="https://your-resource.openai.azure.com/openai/deployments/your-deployment",
|
|
206
|
+
model="gpt-4",
|
|
207
|
+
token_encoding="o200k_base",
|
|
208
|
+
)
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Other OpenAI-Compatible Services
|
|
212
|
+
|
|
213
|
+
Any service with an OpenAI-compatible API can be used:
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
llm = LLM(
|
|
217
|
+
key="your-api-key",
|
|
218
|
+
url="https://your-service.com/v1",
|
|
219
|
+
model="your-model",
|
|
220
|
+
token_encoding="o200k_base", # Match your model's encoding
|
|
221
|
+
)
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## Use Cases
|
|
225
|
+
|
|
226
|
+
- **Language Learning**: Read books in their original language with side-by-side translations
|
|
227
|
+
- **Academic Research**: Access foreign literature with bilingual references
|
|
228
|
+
- **Content Localization**: Prepare books for international audiences
|
|
229
|
+
- **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
|
|
230
|
+
|
|
231
|
+
## Advanced Features
|
|
232
|
+
|
|
233
|
+
### Custom Translation Prompts
|
|
234
|
+
|
|
235
|
+
Provide specific translation instructions:
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
translate(
|
|
239
|
+
llm=llm,
|
|
240
|
+
source_path=Path("source.epub"),
|
|
241
|
+
target_path=Path("translated.epub"),
|
|
242
|
+
target_language="English",
|
|
243
|
+
user_prompt="Use formal language and preserve technical terminology",
|
|
244
|
+
)
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### Caching for Progress Recovery
|
|
248
|
+
|
|
249
|
+
Enable caching to resume translation progress after failures:
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
llm = LLM(
|
|
253
|
+
key="your-api-key",
|
|
254
|
+
url="https://api.openai.com/v1",
|
|
255
|
+
model="gpt-4",
|
|
256
|
+
token_encoding="o200k_base",
|
|
257
|
+
cache_path="./translation_cache", # Translations are cached here
|
|
258
|
+
)
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
## Related Projects
|
|
262
|
+
|
|
263
|
+
### PDF Craft
|
|
264
|
+
|
|
265
|
+
[PDF Craft](https://github.com/oomol-lab/pdf-craft) converts PDF files into EPUB and other formats, with a focus on scanned books. Combine PDF Craft with EPUB Translator to convert and translate scanned PDF books into bilingual EPUB format.
|
|
266
|
+
|
|
267
|
+
**Workflow**: Scanned PDF → [PDF Craft] → EPUB → [EPUB Translator] → Bilingual EPUB
|
|
268
|
+
|
|
269
|
+
For a complete tutorial, watch: [Convert scanned PDF books to EPUB format and translate them into bilingual books](https://www.bilibili.com/video/BV1tMQZY5EYY/)
|
|
270
|
+
|
|
271
|
+
## Contributing
|
|
272
|
+
|
|
273
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
274
|
+
|
|
275
|
+
## License
|
|
276
|
+
|
|
277
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
278
|
+
|
|
279
|
+
## Support
|
|
280
|
+
|
|
281
|
+
- **Issues**: [GitHub Issues](https://github.com/oomol-lab/epub-translator/issues)
|
|
282
|
+
- **OOMOL Studio**: [Open in OOMOL Studio](https://hub.oomol.com/package/books-translator?open=true)
|
|
283
|
+
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
<div align=center>
|
|
2
|
+
<h1>EPUB Translator</h1>
|
|
3
|
+
<p>
|
|
4
|
+
<a href="https://github.com/oomol-lab/epub-translator/actions/workflows/merge-build.yml" target="_blank"><img src="https://img.shields.io/github/actions/workflow/status/oomol-lab/epub-translator/merge-build.yml" alt="ci" /></a>
|
|
5
|
+
<a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/badge/pip_install-epub--translator-blue" alt="pip install epub-translator" /></a>
|
|
6
|
+
<a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/pypi/v/epub-translator.svg" alt="pypi epub-translator" /></a>
|
|
7
|
+
<a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/pypi/pyversions/epub-translator.svg" alt="python versions" /></a>
|
|
8
|
+
<a href="https://github.com/oomol-lab/epub-translator/blob/main/LICENSE" target="_blank"><img src="https://img.shields.io/github/license/oomol-lab/epub-translator" alt="license" /></a>
|
|
9
|
+
</p>
|
|
10
|
+
<p><a href="https://hub.oomol.com/package/books-translator?open=true" target="_blank"><img src="https://static.oomol.com/assets/button.svg" alt="Open in OOMOL Studio" /></a></p>
|
|
11
|
+
<p>English | <a href="./README_zh-CN.md">中文</a></p>
|
|
12
|
+
</div>
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
Translate EPUB books using Large Language Models while preserving the original text. The translated content is displayed side-by-side with the original, creating bilingual books perfect for language learning and cross-reference reading.
|
|
16
|
+
|
|
17
|
+

|
|
18
|
+
|
|
19
|
+
## Features
|
|
20
|
+
|
|
21
|
+
- **Bilingual Output**: Preserves original text alongside translations for easy comparison
|
|
22
|
+
- **LLM-Powered**: Leverages large language models for high-quality, context-aware translations
|
|
23
|
+
- **Format Preservation**: Maintains EPUB structure, styles, images, and formatting
|
|
24
|
+
- **Complete Translation**: Translates chapter content, table of contents, and metadata
|
|
25
|
+
- **Progress Tracking**: Monitor translation progress with built-in callbacks
|
|
26
|
+
- **Flexible LLM Support**: Works with any OpenAI-compatible API endpoint
|
|
27
|
+
- **Caching**: Built-in caching for progress recovery when translation fails
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install epub-translator
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
**Requirements**: Python 3.11, 3.12, or 3.13
|
|
36
|
+
|
|
37
|
+
## Quick Start
|
|
38
|
+
|
|
39
|
+
### Using OOMOL Studio (Recommended)
|
|
40
|
+
|
|
41
|
+
The easiest way to use EPUB Translator is through OOMOL Studio with a visual interface:
|
|
42
|
+
|
|
43
|
+
[](https://www.youtube.com/watch?v=QsAdiskxfXI)
|
|
44
|
+
|
|
45
|
+
### Using Python API
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from pathlib import Path
|
|
49
|
+
from epub_translator import LLM, translate, language
|
|
50
|
+
|
|
51
|
+
# Initialize LLM with your API credentials
|
|
52
|
+
llm = LLM(
|
|
53
|
+
key="your-api-key",
|
|
54
|
+
url="https://api.openai.com/v1",
|
|
55
|
+
model="gpt-4",
|
|
56
|
+
token_encoding="o200k_base",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Translate EPUB file using language constants
|
|
60
|
+
translate(
|
|
61
|
+
llm=llm,
|
|
62
|
+
source_path=Path("source.epub"),
|
|
63
|
+
target_path=Path("translated.epub"),
|
|
64
|
+
target_language=language.ENGLISH,
|
|
65
|
+
)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### With Progress Tracking
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from tqdm import tqdm
|
|
72
|
+
|
|
73
|
+
with tqdm(total=100, desc="Translating", unit="%") as pbar:
|
|
74
|
+
last_progress = 0.0
|
|
75
|
+
|
|
76
|
+
def on_progress(progress: float):
|
|
77
|
+
nonlocal last_progress
|
|
78
|
+
increment = (progress - last_progress) * 100
|
|
79
|
+
pbar.update(increment)
|
|
80
|
+
last_progress = progress
|
|
81
|
+
|
|
82
|
+
translate(
|
|
83
|
+
llm=llm,
|
|
84
|
+
source_path=Path("source.epub"),
|
|
85
|
+
target_path=Path("translated.epub"),
|
|
86
|
+
target_language="English",
|
|
87
|
+
on_progress=on_progress,
|
|
88
|
+
)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## API Reference
|
|
92
|
+
|
|
93
|
+
### `LLM` Class
|
|
94
|
+
|
|
95
|
+
Initialize the LLM client for translation:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
LLM(
|
|
99
|
+
key: str, # API key
|
|
100
|
+
url: str, # API endpoint URL
|
|
101
|
+
model: str, # Model name (e.g., "gpt-4")
|
|
102
|
+
token_encoding: str, # Token encoding (e.g., "o200k_base")
|
|
103
|
+
cache_path: PathLike | None = None, # Cache directory path
|
|
104
|
+
timeout: float | None = None, # Request timeout in seconds
|
|
105
|
+
top_p: float | tuple[float, float] | None = None,
|
|
106
|
+
temperature: float | tuple[float, float] | None = None,
|
|
107
|
+
retry_times: int = 5, # Number of retries on failure
|
|
108
|
+
retry_interval_seconds: float = 6.0, # Interval between retries
|
|
109
|
+
log_dir_path: PathLike | None = None, # Log directory path
|
|
110
|
+
)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### `translate` Function
|
|
114
|
+
|
|
115
|
+
Translate an EPUB file:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
translate(
|
|
119
|
+
llm: LLM, # LLM instance
|
|
120
|
+
source_path: Path, # Source EPUB file path
|
|
121
|
+
target_path: Path, # Output EPUB file path
|
|
122
|
+
target_language: str, # Target language (e.g., "English", "Chinese")
|
|
123
|
+
user_prompt: str | None = None, # Custom translation instructions
|
|
124
|
+
max_retries: int = 5, # Maximum retries for failed translations
|
|
125
|
+
max_group_tokens: int = 1200, # Maximum tokens per translation group
|
|
126
|
+
on_progress: Callable[[float], None] | None = None, # Progress callback (0.0-1.0)
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
#### Language Constants
|
|
131
|
+
|
|
132
|
+
EPUB Translator provides predefined language constants for convenience. You can use these constants instead of writing language names as strings:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from epub_translator import language
|
|
136
|
+
|
|
137
|
+
# Usage example:
|
|
138
|
+
translate(
|
|
139
|
+
llm=llm,
|
|
140
|
+
source_path=Path("source.epub"),
|
|
141
|
+
target_path=Path("translated.epub"),
|
|
142
|
+
target_language=language.ENGLISH,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# You can also use custom language strings:
|
|
146
|
+
translate(
|
|
147
|
+
llm=llm,
|
|
148
|
+
source_path=Path("source.epub"),
|
|
149
|
+
target_path=Path("translated.epub"),
|
|
150
|
+
target_language="Icelandic", # For languages not in the constants
|
|
151
|
+
)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Configuration Examples
|
|
155
|
+
|
|
156
|
+
### OpenAI
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
llm = LLM(
|
|
160
|
+
key="sk-...",
|
|
161
|
+
url="https://api.openai.com/v1",
|
|
162
|
+
model="gpt-4",
|
|
163
|
+
token_encoding="o200k_base",
|
|
164
|
+
)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Azure OpenAI
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
llm = LLM(
|
|
171
|
+
key="your-azure-key",
|
|
172
|
+
url="https://your-resource.openai.azure.com/openai/deployments/your-deployment",
|
|
173
|
+
model="gpt-4",
|
|
174
|
+
token_encoding="o200k_base",
|
|
175
|
+
)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Other OpenAI-Compatible Services
|
|
179
|
+
|
|
180
|
+
Any service with an OpenAI-compatible API can be used:
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
llm = LLM(
|
|
184
|
+
key="your-api-key",
|
|
185
|
+
url="https://your-service.com/v1",
|
|
186
|
+
model="your-model",
|
|
187
|
+
token_encoding="o200k_base", # Match your model's encoding
|
|
188
|
+
)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Use Cases
|
|
192
|
+
|
|
193
|
+
- **Language Learning**: Read books in their original language with side-by-side translations
|
|
194
|
+
- **Academic Research**: Access foreign literature with bilingual references
|
|
195
|
+
- **Content Localization**: Prepare books for international audiences
|
|
196
|
+
- **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
|
|
197
|
+
|
|
198
|
+
## Advanced Features
|
|
199
|
+
|
|
200
|
+
### Custom Translation Prompts
|
|
201
|
+
|
|
202
|
+
Provide specific translation instructions:
|
|
203
|
+
|
|
204
|
+
```python
|
|
205
|
+
translate(
|
|
206
|
+
llm=llm,
|
|
207
|
+
source_path=Path("source.epub"),
|
|
208
|
+
target_path=Path("translated.epub"),
|
|
209
|
+
target_language="English",
|
|
210
|
+
user_prompt="Use formal language and preserve technical terminology",
|
|
211
|
+
)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Caching for Progress Recovery
|
|
215
|
+
|
|
216
|
+
Enable caching to resume translation progress after failures:
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
llm = LLM(
|
|
220
|
+
key="your-api-key",
|
|
221
|
+
url="https://api.openai.com/v1",
|
|
222
|
+
model="gpt-4",
|
|
223
|
+
token_encoding="o200k_base",
|
|
224
|
+
cache_path="./translation_cache", # Translations are cached here
|
|
225
|
+
)
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## Related Projects
|
|
229
|
+
|
|
230
|
+
### PDF Craft
|
|
231
|
+
|
|
232
|
+
[PDF Craft](https://github.com/oomol-lab/pdf-craft) converts PDF files into EPUB and other formats, with a focus on scanned books. Combine PDF Craft with EPUB Translator to convert and translate scanned PDF books into bilingual EPUB format.
|
|
233
|
+
|
|
234
|
+
**Workflow**: Scanned PDF → [PDF Craft] → EPUB → [EPUB Translator] → Bilingual EPUB
|
|
235
|
+
|
|
236
|
+
For a complete tutorial, watch: [Convert scanned PDF books to EPUB format and translate them into bilingual books](https://www.bilibili.com/video/BV1tMQZY5EYY/)
|
|
237
|
+
|
|
238
|
+
## Contributing
|
|
239
|
+
|
|
240
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
241
|
+
|
|
242
|
+
## License
|
|
243
|
+
|
|
244
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
245
|
+
|
|
246
|
+
## Support
|
|
247
|
+
|
|
248
|
+
- **Issues**: [GitHub Issues](https://github.com/oomol-lab/epub-translator/issues)
|
|
249
|
+
- **OOMOL Studio**: [Open in OOMOL Studio](https://hub.oomol.com/package/books-translator?open=true)
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
You must fill translated text into XML template while preserving ALL tag structure.
|
|
2
|
+
|
|
3
|
+
CRITICAL: Every tag with id="X" in XML template MUST appear in your output.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
INPUT FORMAT:
|
|
8
|
+
|
|
9
|
+
You will receive:
|
|
10
|
+
1. Source text (original language)
|
|
11
|
+
2. XML template (with id attributes and structure)
|
|
12
|
+
3. Translated text (target language)
|
|
13
|
+
|
|
14
|
+
Your task: Map the translated text back into the XML structure by comparing with source text.
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
CRITICAL RULE - Tags with NO tail:
|
|
19
|
+
|
|
20
|
+
When a tag has NO text after its closing tag in XML template:
|
|
21
|
+
→ Put ALL remaining text INSIDE that tag
|
|
22
|
+
|
|
23
|
+
EXAMPLE (MOST COMMON ERROR):
|
|
24
|
+
|
|
25
|
+
XML template:
|
|
26
|
+
```xml
|
|
27
|
+
<a id="1">
|
|
28
|
+
<span id="2">4</span>
|
|
29
|
+
The methodology of
|
|
30
|
+
<span id="3">Robotics</span>
|
|
31
|
+
</a>
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Source: "4 The methodology of Robotics"
|
|
35
|
+
Translated: "4 机器人学的方法论"
|
|
36
|
+
|
|
37
|
+
ANALYSIS:
|
|
38
|
+
- <span id="3"> has NO text after </span> (the </a> comes immediately)
|
|
39
|
+
- So ALL remaining text after id="2" must go into id="3"
|
|
40
|
+
|
|
41
|
+
CORRECT output:
|
|
42
|
+
```xml
|
|
43
|
+
<a id="1">
|
|
44
|
+
<span id="2">4</span>
|
|
45
|
+
|
|
46
|
+
<span id="3">机器人学的方法论</span>
|
|
47
|
+
</a>
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
WRONG outputs:
|
|
51
|
+
❌ <a id="1"><span id="2">4</span> 机器人学的方法论</a>
|
|
52
|
+
(Missing id="3" completely!)
|
|
53
|
+
|
|
54
|
+
❌ <a id="1"><span id="2">4</span> <span id="3">机器人学</span>的方法论</a>
|
|
55
|
+
(Added tail "的方法论" to id="3", but source has no tail!)
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
OUTPUT FORMAT:
|
|
60
|
+
```xml
|
|
61
|
+
<xml>
|
|
62
|
+
... your output ...
|
|
63
|
+
</xml>
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Begin.
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# XSLT MathML Library
|
|
2
|
+
|
|
3
|
+
This directory contains XSLT stylesheets from the **XSLT MathML Library 2.1.2**, a set of XSLT stylesheets to transform MathML 2.0 to LaTeX.
|
|
4
|
+
|
|
5
|
+
**Note**: These files are included for reference purposes. Our project uses a custom Python implementation to convert MathML to LaTeX, but we keep these XSLT files as a reference for understanding MathML element mappings and conversion rules.
|
|
6
|
+
|
|
7
|
+
## File Manifest
|
|
8
|
+
|
|
9
|
+
- `mmltex.xsl` - Main stylesheet
|
|
10
|
+
- `tokens.xsl` - Token elements (mi, mn, mo, etc.)
|
|
11
|
+
- `glayout.xsl` - Layout elements (mfrac, msqrt, etc.)
|
|
12
|
+
- `scripts.xsl` - Script elements (msub, msup, etc.)
|
|
13
|
+
- `tables.xsl` - Table elements (mtable, mtr, mtd)
|
|
14
|
+
- `entities.xsl` - Entity definitions
|
|
15
|
+
- `cmarkup.xsl` - Content markup elements
|
|
16
|
+
|
|
17
|
+
## Original Project Information
|
|
18
|
+
|
|
19
|
+
**Original Author**: Vasil Yaroshevich
|
|
20
|
+
|
|
21
|
+
**Original Website**: http://www.raleigh.ru/MathML/mmltex/
|
|
22
|
+
|
|
23
|
+
**Archived Links**:
|
|
24
|
+
- Sourceforge Project: https://sourceforge.net/projects/xsltml/files/xsltml/
|
|
25
|
+
- Archived Documentation: https://web.archive.org/web/20160109063934/http://www.raleigh.ru/MathML/mmltex/index.php
|
|
26
|
+
- Google Translated (English): https://translate.google.com/translate?sl=ru&tl=en&u=https%3A%2F%2Fweb.archive.org%2Fweb%2F20160114170851%2Fhttp%3A%2F%2Fwww.raleigh.ru%2FMathML%2Fmmltex%2Findex.php
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Copyright
|
|
31
|
+
|
|
32
|
+
Copyright (C) 2001-2003 Vasil Yaroshevich
|
|
33
|
+
|
|
34
|
+
Permission is hereby granted, free of charge, to any person
|
|
35
|
+
obtaining a copy of this software and associated documentation
|
|
36
|
+
files (the "Software"), to deal in the Software without
|
|
37
|
+
restriction, including without limitation the rights to use,
|
|
38
|
+
copy, modify, merge, publish, distribute, sublicense, and/or
|
|
39
|
+
sell copies of the Software, and to permit persons to whom the
|
|
40
|
+
Software is furnished to do so, subject to the following
|
|
41
|
+
conditions:
|
|
42
|
+
|
|
43
|
+
The above copyright notice and this permission notice shall be
|
|
44
|
+
included in all copies or substantial portions of the Software.
|
|
45
|
+
|
|
46
|
+
Except as contained in this notice, the names of individuals
|
|
47
|
+
credited with contribution to this software shall not be used in
|
|
48
|
+
advertising or otherwise to promote the sale, use or other
|
|
49
|
+
dealings in this Software without prior written authorization
|
|
50
|
+
from the individuals in question.
|
|
51
|
+
|
|
52
|
+
Any stylesheet derived from this Software that is publically
|
|
53
|
+
distributed will be identified with a different name and the
|
|
54
|
+
version strings in any derived Software will be changed so that
|
|
55
|
+
no possibility of confusion between the derived package and this
|
|
56
|
+
Software will exist.
|
|
57
|
+
|
|
58
|
+
## Warranty
|
|
59
|
+
|
|
60
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
61
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
62
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
63
|
+
NONINFRINGEMENT. IN NO EVENT SHALL NORMAN WALSH OR ANY OTHER
|
|
64
|
+
CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
65
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
66
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
67
|
+
OTHER DEALINGS IN THE SOFTWARE.
|