epub-translator 0.0.6__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. epub_translator-0.1.0/PKG-INFO +283 -0
  2. epub_translator-0.1.0/README.md +249 -0
  3. epub_translator-0.1.0/epub_translator/__init__.py +5 -0
  4. epub_translator-0.1.0/epub_translator/data/fill.jinja +66 -0
  5. epub_translator-0.1.0/epub_translator/data/mmltex/README.md +67 -0
  6. epub_translator-0.1.0/epub_translator/data/mmltex/cmarkup.xsl +1106 -0
  7. epub_translator-0.1.0/epub_translator/data/mmltex/entities.xsl +459 -0
  8. epub_translator-0.1.0/epub_translator/data/mmltex/glayout.xsl +222 -0
  9. epub_translator-0.1.0/epub_translator/data/mmltex/mmltex.xsl +36 -0
  10. epub_translator-0.1.0/epub_translator/data/mmltex/scripts.xsl +375 -0
  11. epub_translator-0.1.0/epub_translator/data/mmltex/tables.xsl +130 -0
  12. epub_translator-0.1.0/epub_translator/data/mmltex/tokens.xsl +328 -0
  13. epub_translator-0.1.0/epub_translator/data/translate.jinja +18 -0
  14. epub_translator-0.1.0/epub_translator/epub/__init__.py +4 -0
  15. epub_translator-0.1.0/epub_translator/epub/common.py +43 -0
  16. epub_translator-0.1.0/epub_translator/epub/math.py +193 -0
  17. epub_translator-0.1.0/epub_translator/epub/placeholder.py +53 -0
  18. epub_translator-0.1.0/epub_translator/epub/spines.py +42 -0
  19. epub_translator-0.1.0/epub_translator/epub/toc.py +505 -0
  20. epub_translator-0.1.0/epub_translator/epub/zip.py +67 -0
  21. epub_translator-0.1.0/epub_translator/iter_sync.py +24 -0
  22. epub_translator-0.1.0/epub_translator/language.py +23 -0
  23. epub_translator-0.1.0/epub_translator/llm/__init__.py +2 -0
  24. epub_translator-0.1.0/epub_translator/llm/core.py +175 -0
  25. epub_translator-0.1.0/epub_translator/llm/error.py +52 -0
  26. epub_translator-0.1.0/epub_translator/llm/executor.py +173 -0
  27. epub_translator-0.1.0/epub_translator/llm/increasable.py +35 -0
  28. epub_translator-0.1.0/epub_translator/llm/types.py +17 -0
  29. epub_translator-0.1.0/epub_translator/serial/__init__.py +2 -0
  30. epub_translator-0.1.0/epub_translator/serial/chunk.py +52 -0
  31. epub_translator-0.1.0/epub_translator/serial/segment.py +17 -0
  32. epub_translator-0.1.0/epub_translator/serial/splitter.py +50 -0
  33. epub_translator-0.1.0/epub_translator/template.py +52 -0
  34. epub_translator-0.1.0/epub_translator/translator.py +211 -0
  35. epub_translator-0.1.0/epub_translator/utils.py +7 -0
  36. epub_translator-0.1.0/epub_translator/xml/__init__.py +4 -0
  37. epub_translator-0.1.0/epub_translator/xml/deduplication.py +38 -0
  38. epub_translator-0.1.0/epub_translator/xml/firendly/__init__.py +2 -0
  39. epub_translator-0.1.0/epub_translator/xml/firendly/decoder.py +75 -0
  40. epub_translator-0.1.0/epub_translator/xml/firendly/encoder.py +84 -0
  41. epub_translator-0.1.0/epub_translator/xml/firendly/parser.py +177 -0
  42. epub_translator-0.1.0/epub_translator/xml/firendly/tag.py +118 -0
  43. epub_translator-0.1.0/epub_translator/xml/firendly/transform.py +36 -0
  44. epub_translator-0.1.0/epub_translator/xml/xml.py +52 -0
  45. epub_translator-0.1.0/epub_translator/xml/xml_like.py +176 -0
  46. epub_translator-0.1.0/epub_translator/xml_translator/__init__.py +3 -0
  47. epub_translator-0.1.0/epub_translator/xml_translator/const.py +2 -0
  48. epub_translator-0.1.0/epub_translator/xml_translator/fill.py +128 -0
  49. epub_translator-0.1.0/epub_translator/xml_translator/format.py +282 -0
  50. epub_translator-0.1.0/epub_translator/xml_translator/fragmented.py +125 -0
  51. epub_translator-0.1.0/epub_translator/xml_translator/group.py +183 -0
  52. epub_translator-0.1.0/epub_translator/xml_translator/progressive_locking.py +256 -0
  53. epub_translator-0.1.0/epub_translator/xml_translator/submitter.py +102 -0
  54. epub_translator-0.1.0/epub_translator/xml_translator/text_segment.py +263 -0
  55. epub_translator-0.1.0/epub_translator/xml_translator/translator.py +178 -0
  56. epub_translator-0.1.0/epub_translator/xml_translator/utils.py +29 -0
  57. epub_translator-0.1.0/pyproject.toml +74 -0
  58. epub_translator-0.0.6/PKG-INFO +0 -170
  59. epub_translator-0.0.6/README.md +0 -145
  60. epub_translator-0.0.6/epub_translator/__init__.py +0 -3
  61. epub_translator-0.0.6/epub_translator/data/format.jinja +0 -33
  62. epub_translator-0.0.6/epub_translator/data/translate.jinja +0 -15
  63. epub_translator-0.0.6/epub_translator/epub/__init__.py +0 -2
  64. epub_translator-0.0.6/epub_translator/epub/content_parser.py +0 -162
  65. epub_translator-0.0.6/epub_translator/epub/html/__init__.py +0 -1
  66. epub_translator-0.0.6/epub_translator/epub/html/dom_operator.py +0 -62
  67. epub_translator-0.0.6/epub_translator/epub/html/empty_tags.py +0 -23
  68. epub_translator-0.0.6/epub_translator/epub/html/file.py +0 -80
  69. epub_translator-0.0.6/epub_translator/epub/html/texts_searcher.py +0 -46
  70. epub_translator-0.0.6/epub_translator/llm/__init__.py +0 -1
  71. epub_translator-0.0.6/epub_translator/llm/error.py +0 -49
  72. epub_translator-0.0.6/epub_translator/llm/executor.py +0 -150
  73. epub_translator-0.0.6/epub_translator/llm/increasable.py +0 -35
  74. epub_translator-0.0.6/epub_translator/llm/node.py +0 -201
  75. epub_translator-0.0.6/epub_translator/template.py +0 -50
  76. epub_translator-0.0.6/epub_translator/translation/__init__.py +0 -2
  77. epub_translator-0.0.6/epub_translator/translation/chunk.py +0 -118
  78. epub_translator-0.0.6/epub_translator/translation/splitter.py +0 -78
  79. epub_translator-0.0.6/epub_translator/translation/store.py +0 -36
  80. epub_translator-0.0.6/epub_translator/translation/translation.py +0 -231
  81. epub_translator-0.0.6/epub_translator/translation/types.py +0 -45
  82. epub_translator-0.0.6/epub_translator/translation/utils.py +0 -11
  83. epub_translator-0.0.6/epub_translator/translator.py +0 -174
  84. epub_translator-0.0.6/epub_translator/xml/__init__.py +0 -3
  85. epub_translator-0.0.6/epub_translator/xml/decoder.py +0 -71
  86. epub_translator-0.0.6/epub_translator/xml/encoder.py +0 -95
  87. epub_translator-0.0.6/epub_translator/xml/parser.py +0 -172
  88. epub_translator-0.0.6/epub_translator/xml/tag.py +0 -93
  89. epub_translator-0.0.6/epub_translator/xml/transform.py +0 -34
  90. epub_translator-0.0.6/epub_translator/xml/utils.py +0 -12
  91. epub_translator-0.0.6/epub_translator/zip_context.py +0 -74
  92. epub_translator-0.0.6/pyproject.toml +0 -38
  93. {epub_translator-0.0.6 → epub_translator-0.1.0}/LICENSE +0 -0
@@ -0,0 +1,283 @@
1
+ Metadata-Version: 2.3
2
+ Name: epub-translator
3
+ Version: 0.1.0
4
+ Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
5
+ License: MIT
6
+ Keywords: epub,llm,translation,translator
7
+ Author: Tao Zeyu
8
+ Author-email: i@taozeyu.com
9
+ Maintainer: Tao Zeyu
10
+ Maintainer-email: i@taozeyu.com
11
+ Requires-Python: >=3.11,<3.14
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Education
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Topic :: Software Development :: Localization
24
+ Classifier: Topic :: Text Processing :: Markup
25
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
26
+ Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
27
+ Requires-Dist: openai (>=2.14.0,<3.0.0)
28
+ Requires-Dist: resource-segmentation (>=0.0.7,<0.1.0)
29
+ Requires-Dist: tiktoken (>=0.12.0,<1.0.0)
30
+ Project-URL: Homepage, https://hub.oomol.com/package/books-translator
31
+ Project-URL: Repository, https://github.com/oomol-lab/epub-translator
32
+ Description-Content-Type: text/markdown
33
+
34
+ <div align=center>
35
+ <h1>EPUB Translator</h1>
36
+ <p>
37
+ <a href="https://github.com/oomol-lab/epub-translator/actions/workflows/merge-build.yml" target="_blank"><img src="https://img.shields.io/github/actions/workflow/status/oomol-lab/epub-translator/merge-build.yml" alt="ci" /></a>
38
+ <a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/badge/pip_install-epub--translator-blue" alt="pip install epub-translator" /></a>
39
+ <a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/pypi/v/epub-translator.svg" alt="pypi epub-translator" /></a>
40
+ <a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/pypi/pyversions/epub-translator.svg" alt="python versions" /></a>
41
+ <a href="https://github.com/oomol-lab/epub-translator/blob/main/LICENSE" target="_blank"><img src="https://img.shields.io/github/license/oomol-lab/epub-translator" alt="license" /></a>
42
+ </p>
43
+ <p><a href="https://hub.oomol.com/package/books-translator?open=true" target="_blank"><img src="https://static.oomol.com/assets/button.svg" alt="Open in OOMOL Studio" /></a></p>
44
+ <p>English | <a href="./README_zh-CN.md">中文</a></p>
45
+ </div>
46
+
47
+
48
+ Translate EPUB books using Large Language Models while preserving the original text. The translated content is displayed side-by-side with the original, creating bilingual books perfect for language learning and cross-reference reading.
49
+
50
+ ![Translation Effect](./docs/images/translation.png)
51
+
52
+ ## Features
53
+
54
+ - **Bilingual Output**: Preserves original text alongside translations for easy comparison
55
+ - **LLM-Powered**: Leverages large language models for high-quality, context-aware translations
56
+ - **Format Preservation**: Maintains EPUB structure, styles, images, and formatting
57
+ - **Complete Translation**: Translates chapter content, table of contents, and metadata
58
+ - **Progress Tracking**: Monitor translation progress with built-in callbacks
59
+ - **Flexible LLM Support**: Works with any OpenAI-compatible API endpoint
60
+ - **Caching**: Built-in caching for progress recovery when translation fails
61
+
62
+ ## Installation
63
+
64
+ ```bash
65
+ pip install epub-translator
66
+ ```
67
+
68
+ **Requirements**: Python 3.11, 3.12, or 3.13
69
+
70
+ ## Quick Start
71
+
72
+ ### Using OOMOL Studio (Recommended)
73
+
74
+ The easiest way to use EPUB Translator is through OOMOL Studio with a visual interface:
75
+
76
+ [![Watch the Tutorial](./docs/images/link2youtube.png)](https://www.youtube.com/watch?v=QsAdiskxfXI)
77
+
78
+ ### Using Python API
79
+
80
+ ```python
81
+ from pathlib import Path
82
+ from epub_translator import LLM, translate, language
83
+
84
+ # Initialize LLM with your API credentials
85
+ llm = LLM(
86
+ key="your-api-key",
87
+ url="https://api.openai.com/v1",
88
+ model="gpt-4",
89
+ token_encoding="o200k_base",
90
+ )
91
+
92
+ # Translate EPUB file using language constants
93
+ translate(
94
+ llm=llm,
95
+ source_path=Path("source.epub"),
96
+ target_path=Path("translated.epub"),
97
+ target_language=language.ENGLISH,
98
+ )
99
+ ```
100
+
101
+ ### With Progress Tracking
102
+
103
+ ```python
104
+ from tqdm import tqdm
105
+
106
+ with tqdm(total=100, desc="Translating", unit="%") as pbar:
107
+ last_progress = 0.0
108
+
109
+ def on_progress(progress: float):
110
+ nonlocal last_progress
111
+ increment = (progress - last_progress) * 100
112
+ pbar.update(increment)
113
+ last_progress = progress
114
+
115
+ translate(
116
+ llm=llm,
117
+ source_path=Path("source.epub"),
118
+ target_path=Path("translated.epub"),
119
+ target_language="English",
120
+ on_progress=on_progress,
121
+ )
122
+ ```
123
+
124
+ ## API Reference
125
+
126
+ ### `LLM` Class
127
+
128
+ Initialize the LLM client for translation:
129
+
130
+ ```python
131
+ LLM(
132
+ key: str, # API key
133
+ url: str, # API endpoint URL
134
+ model: str, # Model name (e.g., "gpt-4")
135
+ token_encoding: str, # Token encoding (e.g., "o200k_base")
136
+ cache_path: PathLike | None = None, # Cache directory path
137
+ timeout: float | None = None, # Request timeout in seconds
138
+ top_p: float | tuple[float, float] | None = None,
139
+ temperature: float | tuple[float, float] | None = None,
140
+ retry_times: int = 5, # Number of retries on failure
141
+ retry_interval_seconds: float = 6.0, # Interval between retries
142
+ log_dir_path: PathLike | None = None, # Log directory path
143
+ )
144
+ ```
145
+
146
+ ### `translate` Function
147
+
148
+ Translate an EPUB file:
149
+
150
+ ```python
151
+ translate(
152
+ llm: LLM, # LLM instance
153
+ source_path: Path, # Source EPUB file path
154
+ target_path: Path, # Output EPUB file path
155
+ target_language: str, # Target language (e.g., "English", "Chinese")
156
+ user_prompt: str | None = None, # Custom translation instructions
157
+ max_retries: int = 5, # Maximum retries for failed translations
158
+ max_group_tokens: int = 1200, # Maximum tokens per translation group
159
+ on_progress: Callable[[float], None] | None = None, # Progress callback (0.0-1.0)
160
+ )
161
+ ```
162
+
163
+ #### Language Constants
164
+
165
+ EPUB Translator provides predefined language constants for convenience. You can use these constants instead of writing language names as strings:
166
+
167
+ ```python
168
+ from epub_translator import language
169
+
170
+ # Usage example:
171
+ translate(
172
+ llm=llm,
173
+ source_path=Path("source.epub"),
174
+ target_path=Path("translated.epub"),
175
+ target_language=language.ENGLISH,
176
+ )
177
+
178
+ # You can also use custom language strings:
179
+ translate(
180
+ llm=llm,
181
+ source_path=Path("source.epub"),
182
+ target_path=Path("translated.epub"),
183
+ target_language="Icelandic", # For languages not in the constants
184
+ )
185
+ ```
186
+
187
+ ## Configuration Examples
188
+
189
+ ### OpenAI
190
+
191
+ ```python
192
+ llm = LLM(
193
+ key="sk-...",
194
+ url="https://api.openai.com/v1",
195
+ model="gpt-4",
196
+ token_encoding="o200k_base",
197
+ )
198
+ ```
199
+
200
+ ### Azure OpenAI
201
+
202
+ ```python
203
+ llm = LLM(
204
+ key="your-azure-key",
205
+ url="https://your-resource.openai.azure.com/openai/deployments/your-deployment",
206
+ model="gpt-4",
207
+ token_encoding="o200k_base",
208
+ )
209
+ ```
210
+
211
+ ### Other OpenAI-Compatible Services
212
+
213
+ Any service with an OpenAI-compatible API can be used:
214
+
215
+ ```python
216
+ llm = LLM(
217
+ key="your-api-key",
218
+ url="https://your-service.com/v1",
219
+ model="your-model",
220
+ token_encoding="o200k_base", # Match your model's encoding
221
+ )
222
+ ```
223
+
224
+ ## Use Cases
225
+
226
+ - **Language Learning**: Read books in their original language with side-by-side translations
227
+ - **Academic Research**: Access foreign literature with bilingual references
228
+ - **Content Localization**: Prepare books for international audiences
229
+ - **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
230
+
231
+ ## Advanced Features
232
+
233
+ ### Custom Translation Prompts
234
+
235
+ Provide specific translation instructions:
236
+
237
+ ```python
238
+ translate(
239
+ llm=llm,
240
+ source_path=Path("source.epub"),
241
+ target_path=Path("translated.epub"),
242
+ target_language="English",
243
+ user_prompt="Use formal language and preserve technical terminology",
244
+ )
245
+ ```
246
+
247
+ ### Caching for Progress Recovery
248
+
249
+ Enable caching to resume translation progress after failures:
250
+
251
+ ```python
252
+ llm = LLM(
253
+ key="your-api-key",
254
+ url="https://api.openai.com/v1",
255
+ model="gpt-4",
256
+ token_encoding="o200k_base",
257
+ cache_path="./translation_cache", # Translations are cached here
258
+ )
259
+ ```
260
+
261
+ ## Related Projects
262
+
263
+ ### PDF Craft
264
+
265
+ [PDF Craft](https://github.com/oomol-lab/pdf-craft) converts PDF files into EPUB and other formats, with a focus on scanned books. Combine PDF Craft with EPUB Translator to convert and translate scanned PDF books into bilingual EPUB format.
266
+
267
+ **Workflow**: Scanned PDF → [PDF Craft] → EPUB → [EPUB Translator] → Bilingual EPUB
268
+
269
+ For a complete tutorial, watch: [Convert scanned PDF books to EPUB format and translate them into bilingual books](https://www.bilibili.com/video/BV1tMQZY5EYY/)
270
+
271
+ ## Contributing
272
+
273
+ Contributions are welcome! Please feel free to submit a Pull Request.
274
+
275
+ ## License
276
+
277
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
278
+
279
+ ## Support
280
+
281
+ - **Issues**: [GitHub Issues](https://github.com/oomol-lab/epub-translator/issues)
282
+ - **OOMOL Studio**: [Open in OOMOL Studio](https://hub.oomol.com/package/books-translator?open=true)
283
+
@@ -0,0 +1,249 @@
1
+ <div align=center>
2
+ <h1>EPUB Translator</h1>
3
+ <p>
4
+ <a href="https://github.com/oomol-lab/epub-translator/actions/workflows/merge-build.yml" target="_blank"><img src="https://img.shields.io/github/actions/workflow/status/oomol-lab/epub-translator/merge-build.yml" alt="ci" /></a>
5
+ <a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/badge/pip_install-epub--translator-blue" alt="pip install epub-translator" /></a>
6
+ <a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/pypi/v/epub-translator.svg" alt="pypi epub-translator" /></a>
7
+ <a href="https://pypi.org/project/epub-translator/" target="_blank"><img src="https://img.shields.io/pypi/pyversions/epub-translator.svg" alt="python versions" /></a>
8
+ <a href="https://github.com/oomol-lab/epub-translator/blob/main/LICENSE" target="_blank"><img src="https://img.shields.io/github/license/oomol-lab/epub-translator" alt="license" /></a>
9
+ </p>
10
+ <p><a href="https://hub.oomol.com/package/books-translator?open=true" target="_blank"><img src="https://static.oomol.com/assets/button.svg" alt="Open in OOMOL Studio" /></a></p>
11
+ <p>English | <a href="./README_zh-CN.md">中文</a></p>
12
+ </div>
13
+
14
+
15
+ Translate EPUB books using Large Language Models while preserving the original text. The translated content is displayed side-by-side with the original, creating bilingual books perfect for language learning and cross-reference reading.
16
+
17
+ ![Translation Effect](./docs/images/translation.png)
18
+
19
+ ## Features
20
+
21
+ - **Bilingual Output**: Preserves original text alongside translations for easy comparison
22
+ - **LLM-Powered**: Leverages large language models for high-quality, context-aware translations
23
+ - **Format Preservation**: Maintains EPUB structure, styles, images, and formatting
24
+ - **Complete Translation**: Translates chapter content, table of contents, and metadata
25
+ - **Progress Tracking**: Monitor translation progress with built-in callbacks
26
+ - **Flexible LLM Support**: Works with any OpenAI-compatible API endpoint
27
+ - **Caching**: Built-in caching for progress recovery when translation fails
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install epub-translator
33
+ ```
34
+
35
+ **Requirements**: Python 3.11, 3.12, or 3.13
36
+
37
+ ## Quick Start
38
+
39
+ ### Using OOMOL Studio (Recommended)
40
+
41
+ The easiest way to use EPUB Translator is through OOMOL Studio with a visual interface:
42
+
43
+ [![Watch the Tutorial](./docs/images/link2youtube.png)](https://www.youtube.com/watch?v=QsAdiskxfXI)
44
+
45
+ ### Using Python API
46
+
47
+ ```python
48
+ from pathlib import Path
49
+ from epub_translator import LLM, translate, language
50
+
51
+ # Initialize LLM with your API credentials
52
+ llm = LLM(
53
+ key="your-api-key",
54
+ url="https://api.openai.com/v1",
55
+ model="gpt-4",
56
+ token_encoding="o200k_base",
57
+ )
58
+
59
+ # Translate EPUB file using language constants
60
+ translate(
61
+ llm=llm,
62
+ source_path=Path("source.epub"),
63
+ target_path=Path("translated.epub"),
64
+ target_language=language.ENGLISH,
65
+ )
66
+ ```
67
+
68
+ ### With Progress Tracking
69
+
70
+ ```python
71
+ from tqdm import tqdm
72
+
73
+ with tqdm(total=100, desc="Translating", unit="%") as pbar:
74
+ last_progress = 0.0
75
+
76
+ def on_progress(progress: float):
77
+ nonlocal last_progress
78
+ increment = (progress - last_progress) * 100
79
+ pbar.update(increment)
80
+ last_progress = progress
81
+
82
+ translate(
83
+ llm=llm,
84
+ source_path=Path("source.epub"),
85
+ target_path=Path("translated.epub"),
86
+ target_language="English",
87
+ on_progress=on_progress,
88
+ )
89
+ ```
90
+
91
+ ## API Reference
92
+
93
+ ### `LLM` Class
94
+
95
+ Initialize the LLM client for translation:
96
+
97
+ ```python
98
+ LLM(
99
+ key: str, # API key
100
+ url: str, # API endpoint URL
101
+ model: str, # Model name (e.g., "gpt-4")
102
+ token_encoding: str, # Token encoding (e.g., "o200k_base")
103
+ cache_path: PathLike | None = None, # Cache directory path
104
+ timeout: float | None = None, # Request timeout in seconds
105
+ top_p: float | tuple[float, float] | None = None,
106
+ temperature: float | tuple[float, float] | None = None,
107
+ retry_times: int = 5, # Number of retries on failure
108
+ retry_interval_seconds: float = 6.0, # Interval between retries
109
+ log_dir_path: PathLike | None = None, # Log directory path
110
+ )
111
+ ```
112
+
113
+ ### `translate` Function
114
+
115
+ Translate an EPUB file:
116
+
117
+ ```python
118
+ translate(
119
+ llm: LLM, # LLM instance
120
+ source_path: Path, # Source EPUB file path
121
+ target_path: Path, # Output EPUB file path
122
+ target_language: str, # Target language (e.g., "English", "Chinese")
123
+ user_prompt: str | None = None, # Custom translation instructions
124
+ max_retries: int = 5, # Maximum retries for failed translations
125
+ max_group_tokens: int = 1200, # Maximum tokens per translation group
126
+ on_progress: Callable[[float], None] | None = None, # Progress callback (0.0-1.0)
127
+ )
128
+ ```
129
+
130
+ #### Language Constants
131
+
132
+ EPUB Translator provides predefined language constants for convenience. You can use these constants instead of writing language names as strings:
133
+
134
+ ```python
135
+ from epub_translator import language
136
+
137
+ # Usage example:
138
+ translate(
139
+ llm=llm,
140
+ source_path=Path("source.epub"),
141
+ target_path=Path("translated.epub"),
142
+ target_language=language.ENGLISH,
143
+ )
144
+
145
+ # You can also use custom language strings:
146
+ translate(
147
+ llm=llm,
148
+ source_path=Path("source.epub"),
149
+ target_path=Path("translated.epub"),
150
+ target_language="Icelandic", # For languages not in the constants
151
+ )
152
+ ```
153
+
154
+ ## Configuration Examples
155
+
156
+ ### OpenAI
157
+
158
+ ```python
159
+ llm = LLM(
160
+ key="sk-...",
161
+ url="https://api.openai.com/v1",
162
+ model="gpt-4",
163
+ token_encoding="o200k_base",
164
+ )
165
+ ```
166
+
167
+ ### Azure OpenAI
168
+
169
+ ```python
170
+ llm = LLM(
171
+ key="your-azure-key",
172
+ url="https://your-resource.openai.azure.com/openai/deployments/your-deployment",
173
+ model="gpt-4",
174
+ token_encoding="o200k_base",
175
+ )
176
+ ```
177
+
178
+ ### Other OpenAI-Compatible Services
179
+
180
+ Any service with an OpenAI-compatible API can be used:
181
+
182
+ ```python
183
+ llm = LLM(
184
+ key="your-api-key",
185
+ url="https://your-service.com/v1",
186
+ model="your-model",
187
+ token_encoding="o200k_base", # Match your model's encoding
188
+ )
189
+ ```
190
+
191
+ ## Use Cases
192
+
193
+ - **Language Learning**: Read books in their original language with side-by-side translations
194
+ - **Academic Research**: Access foreign literature with bilingual references
195
+ - **Content Localization**: Prepare books for international audiences
196
+ - **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
197
+
198
+ ## Advanced Features
199
+
200
+ ### Custom Translation Prompts
201
+
202
+ Provide specific translation instructions:
203
+
204
+ ```python
205
+ translate(
206
+ llm=llm,
207
+ source_path=Path("source.epub"),
208
+ target_path=Path("translated.epub"),
209
+ target_language="English",
210
+ user_prompt="Use formal language and preserve technical terminology",
211
+ )
212
+ ```
213
+
214
+ ### Caching for Progress Recovery
215
+
216
+ Enable caching to resume translation progress after failures:
217
+
218
+ ```python
219
+ llm = LLM(
220
+ key="your-api-key",
221
+ url="https://api.openai.com/v1",
222
+ model="gpt-4",
223
+ token_encoding="o200k_base",
224
+ cache_path="./translation_cache", # Translations are cached here
225
+ )
226
+ ```
227
+
228
+ ## Related Projects
229
+
230
+ ### PDF Craft
231
+
232
+ [PDF Craft](https://github.com/oomol-lab/pdf-craft) converts PDF files into EPUB and other formats, with a focus on scanned books. Combine PDF Craft with EPUB Translator to convert and translate scanned PDF books into bilingual EPUB format.
233
+
234
+ **Workflow**: Scanned PDF → [PDF Craft] → EPUB → [EPUB Translator] → Bilingual EPUB
235
+
236
+ For a complete tutorial, watch: [Convert scanned PDF books to EPUB format and translate them into bilingual books](https://www.bilibili.com/video/BV1tMQZY5EYY/)
237
+
238
+ ## Contributing
239
+
240
+ Contributions are welcome! Please feel free to submit a Pull Request.
241
+
242
+ ## License
243
+
244
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
245
+
246
+ ## Support
247
+
248
+ - **Issues**: [GitHub Issues](https://github.com/oomol-lab/epub-translator/issues)
249
+ - **OOMOL Studio**: [Open in OOMOL Studio](https://hub.oomol.com/package/books-translator?open=true)
@@ -0,0 +1,5 @@
1
+ from . import language
2
+ from .llm import LLM
3
+ from .translator import translate
4
+
5
+ __all__ = ["LLM", "translate", "language"]
@@ -0,0 +1,66 @@
1
+ You must fill translated text into XML template while preserving ALL tag structure.
2
+
3
+ CRITICAL: Every tag with id="X" in XML template MUST appear in your output.
4
+
5
+ ---
6
+
7
+ INPUT FORMAT:
8
+
9
+ You will receive:
10
+ 1. Source text (original language)
11
+ 2. XML template (with id attributes and structure)
12
+ 3. Translated text (target language)
13
+
14
+ Your task: Map the translated text back into the XML structure by comparing with source text.
15
+
16
+ ---
17
+
18
+ CRITICAL RULE - Tags with NO tail:
19
+
20
+ When a tag has NO text after its closing tag in XML template:
21
+ → Put ALL remaining text INSIDE that tag
22
+
23
+ EXAMPLE (MOST COMMON ERROR):
24
+
25
+ XML template:
26
+ ```xml
27
+ <a id="1">
28
+ <span id="2">4</span>
29
+ The methodology of
30
+ <span id="3">Robotics</span>
31
+ </a>
32
+ ```
33
+
34
+ Source: "4 The methodology of Robotics"
35
+ Translated: "4 机器人学的方法论"
36
+
37
+ ANALYSIS:
38
+ - <span id="3"> has NO text after </span> (the </a> comes immediately)
39
+ - So ALL remaining text after id="2" must go into id="3"
40
+
41
+ CORRECT output:
42
+ ```xml
43
+ <a id="1">
44
+ <span id="2">4</span>
45
+
46
+ <span id="3">机器人学的方法论</span>
47
+ </a>
48
+ ```
49
+
50
+ WRONG outputs:
51
+ ❌ <a id="1"><span id="2">4</span> 机器人学的方法论</a>
52
+ (Missing id="3" completely!)
53
+
54
+ ❌ <a id="1"><span id="2">4</span> <span id="3">机器人学</span>的方法论</a>
55
+ (Added tail "的方法论" to id="3", but source has no tail!)
56
+
57
+ ---
58
+
59
+ OUTPUT FORMAT:
60
+ ```xml
61
+ <xml>
62
+ ... your output ...
63
+ </xml>
64
+ ```
65
+
66
+ Begin.
@@ -0,0 +1,67 @@
1
+ # XSLT MathML Library
2
+
3
+ This directory contains XSLT stylesheets from the **XSLT MathML Library 2.1.2**, a set of XSLT stylesheets to transform MathML 2.0 to LaTeX.
4
+
5
+ **Note**: These files are included for reference purposes. Our project uses a custom Python implementation to convert MathML to LaTeX, but we keep these XSLT files as a reference for understanding MathML element mappings and conversion rules.
6
+
7
+ ## File Manifest
8
+
9
+ - `mmltex.xsl` - Main stylesheet
10
+ - `tokens.xsl` - Token elements (mi, mn, mo, etc.)
11
+ - `glayout.xsl` - Layout elements (mfrac, msqrt, etc.)
12
+ - `scripts.xsl` - Script elements (msub, msup, etc.)
13
+ - `tables.xsl` - Table elements (mtable, mtr, mtd)
14
+ - `entities.xsl` - Entity definitions
15
+ - `cmarkup.xsl` - Content markup elements
16
+
17
+ ## Original Project Information
18
+
19
+ **Original Author**: Vasil Yaroshevich
20
+
21
+ **Original Website**: http://www.raleigh.ru/MathML/mmltex/
22
+
23
+ **Archived Links**:
24
+ - Sourceforge Project: https://sourceforge.net/projects/xsltml/files/xsltml/
25
+ - Archived Documentation: https://web.archive.org/web/20160109063934/http://www.raleigh.ru/MathML/mmltex/index.php
26
+ - Google Translated (English): https://translate.google.com/translate?sl=ru&tl=en&u=https%3A%2F%2Fweb.archive.org%2Fweb%2F20160114170851%2Fhttp%3A%2F%2Fwww.raleigh.ru%2FMathML%2Fmmltex%2Findex.php
27
+
28
+ ---
29
+
30
+ ## Copyright
31
+
32
+ Copyright (C) 2001-2003 Vasil Yaroshevich
33
+
34
+ Permission is hereby granted, free of charge, to any person
35
+ obtaining a copy of this software and associated documentation
36
+ files (the "Software"), to deal in the Software without
37
+ restriction, including without limitation the rights to use,
38
+ copy, modify, merge, publish, distribute, sublicense, and/or
39
+ sell copies of the Software, and to permit persons to whom the
40
+ Software is furnished to do so, subject to the following
41
+ conditions:
42
+
43
+ The above copyright notice and this permission notice shall be
44
+ included in all copies or substantial portions of the Software.
45
+
46
+ Except as contained in this notice, the names of individuals
47
+ credited with contribution to this software shall not be used in
48
+ advertising or otherwise to promote the sale, use or other
49
+ dealings in this Software without prior written authorization
50
+ from the individuals in question.
51
+
52
+ Any stylesheet derived from this Software that is publically
53
+ distributed will be identified with a different name and the
54
+ version strings in any derived Software will be changed so that
55
+ no possibility of confusion between the derived package and this
56
+ Software will exist.
57
+
58
+ ## Warranty
59
+
60
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
61
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
62
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
63
+ NONINFRINGEMENT. IN NO EVENT SHALL NORMAN WALSH OR ANY OTHER
64
+ CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
65
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
66
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
67
+ OTHER DEALINGS IN THE SOFTWARE.