epub-translator 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epub_translator-0.1.3 → epub_translator-0.1.5}/PKG-INFO +96 -23
- {epub_translator-0.1.3 → epub_translator-0.1.5}/README.md +95 -22
- epub_translator-0.1.5/epub_translator/__init__.py +12 -0
- epub_translator-0.1.5/epub_translator/punctuation.py +34 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/text_segment.py +2 -67
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/translator.py +33 -29
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/__init__.py +1 -0
- epub_translator-0.1.5/epub_translator/xml/inline.py +67 -0
- epub_translator-0.1.5/epub_translator/xml_translator/__init__.py +3 -0
- epub_translator-0.1.5/epub_translator/xml_translator/submitter.py +381 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/translator.py +31 -12
- {epub_translator-0.1.3 → epub_translator-0.1.5}/pyproject.toml +1 -1
- epub_translator-0.1.3/epub_translator/__init__.py +0 -5
- epub_translator-0.1.3/epub_translator/xml_translator/__init__.py +0 -2
- epub_translator-0.1.3/epub_translator/xml_translator/submitter.py +0 -56
- {epub_translator-0.1.3 → epub_translator-0.1.5}/LICENSE +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/fill.jinja +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/README.md +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/cmarkup.xsl +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/entities.xsl +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/glayout.xsl +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/mmltex.xsl +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/scripts.xsl +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/tables.xsl +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/tokens.xsl +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/translate.jinja +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/__init__.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/common.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/math.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/metadata.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/spines.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/toc.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/zip.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub_transcode.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/language.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/__init__.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/context.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/core.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/error.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/executor.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/increasable.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/types.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/__init__.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/block_segment.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/common.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/inline_segment.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/utils.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/serial/__init__.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/serial/chunk.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/serial/segment.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/serial/splitter.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/template.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/utils.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/const.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/deduplication.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/__init__.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/decoder.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/encoder.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/parser.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/tag.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/transform.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/self_closing.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/utils.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/xml.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/xml_like.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_interrupter.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/callbacks.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/common.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/hill_climbing.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/stream_mapper.py +0 -0
- {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/validation.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: epub-translator
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: epub,llm,translation,translator
|
|
@@ -78,8 +78,7 @@ The easiest way to use EPUB Translator is through OOMOL Studio with a visual int
|
|
|
78
78
|
### Using Python API
|
|
79
79
|
|
|
80
80
|
```python
|
|
81
|
-
from
|
|
82
|
-
from epub_translator import LLM, translate, language
|
|
81
|
+
from epub_translator import LLM, translate, language, SubmitKind
|
|
83
82
|
|
|
84
83
|
# Initialize LLM with your API credentials
|
|
85
84
|
llm = LLM(
|
|
@@ -91,9 +90,10 @@ llm = LLM(
|
|
|
91
90
|
|
|
92
91
|
# Translate EPUB file using language constants
|
|
93
92
|
translate(
|
|
94
|
-
source_path=
|
|
95
|
-
target_path=
|
|
93
|
+
source_path="source.epub",
|
|
94
|
+
target_path="translated.epub",
|
|
96
95
|
target_language=language.ENGLISH,
|
|
96
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
97
97
|
llm=llm,
|
|
98
98
|
)
|
|
99
99
|
```
|
|
@@ -113,9 +113,10 @@ with tqdm(total=100, desc="Translating", unit="%") as pbar:
|
|
|
113
113
|
last_progress = progress
|
|
114
114
|
|
|
115
115
|
translate(
|
|
116
|
-
source_path=
|
|
117
|
-
target_path=
|
|
116
|
+
source_path="source.epub",
|
|
117
|
+
target_path="translated.epub",
|
|
118
118
|
target_language="English",
|
|
119
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
119
120
|
llm=llm,
|
|
120
121
|
on_progress=on_progress,
|
|
121
122
|
)
|
|
@@ -152,6 +153,7 @@ translate(
|
|
|
152
153
|
source_path: PathLike | str, # Source EPUB file path
|
|
153
154
|
target_path: PathLike | str, # Output EPUB file path
|
|
154
155
|
target_language: str, # Target language (e.g., "English", "Chinese")
|
|
156
|
+
submit: SubmitKind, # How to insert translations (REPLACE, APPEND_TEXT, or APPEND_BLOCK)
|
|
155
157
|
user_prompt: str | None = None, # Custom translation instructions
|
|
156
158
|
max_retries: int = 5, # Maximum retries for failed translations
|
|
157
159
|
max_group_tokens: int = 1200, # Maximum tokens per translation group
|
|
@@ -165,6 +167,49 @@ translate(
|
|
|
165
167
|
|
|
166
168
|
**Note**: Either `llm` or both `translation_llm` and `fill_llm` must be provided. Using separate LLMs allows for task-specific optimization.
|
|
167
169
|
|
|
170
|
+
#### Submit Modes
|
|
171
|
+
|
|
172
|
+
The `submit` parameter controls how translated content is inserted into the document. Use `SubmitKind` enum to specify the insertion mode:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
from epub_translator import SubmitKind
|
|
176
|
+
|
|
177
|
+
# Three available modes:
|
|
178
|
+
# - SubmitKind.REPLACE: Replace original content with translation (single-language output)
|
|
179
|
+
# - SubmitKind.APPEND_TEXT: Append translation as inline text (bilingual output)
|
|
180
|
+
# - SubmitKind.APPEND_BLOCK: Append translation as block elements (bilingual output, recommended)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
**Mode Comparison:**
|
|
184
|
+
|
|
185
|
+
- **`SubmitKind.REPLACE`**: Creates a single-language translation by replacing original text with translated content. Useful for creating books in the target language only.
|
|
186
|
+
|
|
187
|
+
- **`SubmitKind.APPEND_TEXT`**: Appends translations as inline text immediately after the original content. Both languages appear in the same paragraph, creating a continuous reading flow.
|
|
188
|
+
|
|
189
|
+
- **`SubmitKind.APPEND_BLOCK`** (Recommended): Appends translations as separate block elements (paragraphs) after the original. This creates clear visual separation between languages, making it ideal for side-by-side bilingual reading.
|
|
190
|
+
|
|
191
|
+
**Example:**
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
# For bilingual books (recommended)
|
|
195
|
+
translate(
|
|
196
|
+
source_path="source.epub",
|
|
197
|
+
target_path="translated.epub",
|
|
198
|
+
target_language=language.ENGLISH,
|
|
199
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
200
|
+
llm=llm,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# For single-language translation
|
|
204
|
+
translate(
|
|
205
|
+
source_path="source.epub",
|
|
206
|
+
target_path="translated.epub",
|
|
207
|
+
target_language=language.ENGLISH,
|
|
208
|
+
submit=SubmitKind.REPLACE,
|
|
209
|
+
llm=llm,
|
|
210
|
+
)
|
|
211
|
+
```
|
|
212
|
+
|
|
168
213
|
#### Language Constants
|
|
169
214
|
|
|
170
215
|
EPUB Translator provides predefined language constants for convenience. You can use these constants instead of writing language names as strings:
|
|
@@ -174,47 +219,73 @@ from epub_translator import language
|
|
|
174
219
|
|
|
175
220
|
# Usage example:
|
|
176
221
|
translate(
|
|
177
|
-
source_path=
|
|
178
|
-
target_path=
|
|
222
|
+
source_path="source.epub",
|
|
223
|
+
target_path="translated.epub",
|
|
179
224
|
target_language=language.ENGLISH,
|
|
225
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
180
226
|
llm=llm,
|
|
181
227
|
)
|
|
182
228
|
|
|
183
229
|
# You can also use custom language strings:
|
|
184
230
|
translate(
|
|
185
|
-
source_path=
|
|
186
|
-
target_path=
|
|
231
|
+
source_path="source.epub",
|
|
232
|
+
target_path="translated.epub",
|
|
187
233
|
target_language="Icelandic", # For languages not in the constants
|
|
234
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
188
235
|
llm=llm,
|
|
189
236
|
)
|
|
190
237
|
```
|
|
191
238
|
|
|
192
239
|
### Error Handling with `on_fill_failed`
|
|
193
240
|
|
|
194
|
-
Monitor
|
|
241
|
+
Monitor translation errors using the `on_fill_failed` callback. The system automatically retries failed translations up to `max_retries` times (default: 5). Most errors are recovered during retries and don't affect the final output.
|
|
195
242
|
|
|
196
243
|
```python
|
|
197
244
|
from epub_translator import FillFailedEvent
|
|
198
245
|
|
|
199
246
|
def handle_fill_error(event: FillFailedEvent):
|
|
200
|
-
|
|
201
|
-
print(f" {event.error_message}")
|
|
247
|
+
# Only log critical errors that will affect the final EPUB
|
|
202
248
|
if event.over_maximum_retries:
|
|
203
|
-
print("
|
|
249
|
+
print(f"Critical error after {event.retried_count} attempts:")
|
|
250
|
+
print(f" {event.error_message}")
|
|
251
|
+
print(" This error will be present in the final EPUB file!")
|
|
204
252
|
|
|
205
253
|
translate(
|
|
206
|
-
source_path=
|
|
207
|
-
target_path=
|
|
254
|
+
source_path="source.epub",
|
|
255
|
+
target_path="translated.epub",
|
|
208
256
|
target_language=language.ENGLISH,
|
|
257
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
209
258
|
llm=llm,
|
|
210
259
|
on_fill_failed=handle_fill_error,
|
|
211
260
|
)
|
|
212
261
|
```
|
|
213
262
|
|
|
263
|
+
**Understanding Error Severity:**
|
|
264
|
+
|
|
214
265
|
The `FillFailedEvent` contains:
|
|
215
266
|
- `error_message: str` - Description of the error
|
|
216
|
-
- `retried_count: int` - Current retry attempt number
|
|
217
|
-
- `over_maximum_retries: bool` - Whether
|
|
267
|
+
- `retried_count: int` - Current retry attempt number (1 to max_retries)
|
|
268
|
+
- `over_maximum_retries: bool` - Whether the error is critical
|
|
269
|
+
|
|
270
|
+
**Error Categories:**
|
|
271
|
+
|
|
272
|
+
- **Recoverable errors** (`over_maximum_retries=False`): Errors during retry attempts. The system will continue retrying and may resolve these automatically. Safe to ignore in most cases.
|
|
273
|
+
|
|
274
|
+
- **Critical errors** (`over_maximum_retries=True`): Errors that persist after all retry attempts. These will appear in the final EPUB file and should be investigated.
|
|
275
|
+
|
|
276
|
+
**Advanced Usage:**
|
|
277
|
+
|
|
278
|
+
For verbose logging during translation debugging:
|
|
279
|
+
|
|
280
|
+
```python
|
|
281
|
+
def handle_fill_error(event: FillFailedEvent):
|
|
282
|
+
if event.over_maximum_retries:
|
|
283
|
+
# Critical: affects final output
|
|
284
|
+
print(f"❌ CRITICAL: {event.error_message}")
|
|
285
|
+
else:
|
|
286
|
+
# Informational: system is retrying
|
|
287
|
+
print(f"⚠️ Retry {event.retried_count}: {event.error_message}")
|
|
288
|
+
```
|
|
218
289
|
|
|
219
290
|
### Dual-LLM Architecture
|
|
220
291
|
|
|
@@ -239,9 +310,10 @@ fill_llm = LLM(
|
|
|
239
310
|
)
|
|
240
311
|
|
|
241
312
|
translate(
|
|
242
|
-
source_path=
|
|
243
|
-
target_path=
|
|
313
|
+
source_path="source.epub",
|
|
314
|
+
target_path="translated.epub",
|
|
244
315
|
target_language=language.ENGLISH,
|
|
316
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
245
317
|
translation_llm=translation_llm,
|
|
246
318
|
fill_llm=fill_llm,
|
|
247
319
|
)
|
|
@@ -299,9 +371,10 @@ Provide specific translation instructions:
|
|
|
299
371
|
|
|
300
372
|
```python
|
|
301
373
|
translate(
|
|
302
|
-
source_path=
|
|
303
|
-
target_path=
|
|
374
|
+
source_path="source.epub",
|
|
375
|
+
target_path="translated.epub",
|
|
304
376
|
target_language="English",
|
|
377
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
305
378
|
llm=llm,
|
|
306
379
|
user_prompt="Use formal language and preserve technical terminology",
|
|
307
380
|
)
|
|
@@ -45,8 +45,7 @@ The easiest way to use EPUB Translator is through OOMOL Studio with a visual int
|
|
|
45
45
|
### Using Python API
|
|
46
46
|
|
|
47
47
|
```python
|
|
48
|
-
from
|
|
49
|
-
from epub_translator import LLM, translate, language
|
|
48
|
+
from epub_translator import LLM, translate, language, SubmitKind
|
|
50
49
|
|
|
51
50
|
# Initialize LLM with your API credentials
|
|
52
51
|
llm = LLM(
|
|
@@ -58,9 +57,10 @@ llm = LLM(
|
|
|
58
57
|
|
|
59
58
|
# Translate EPUB file using language constants
|
|
60
59
|
translate(
|
|
61
|
-
source_path=
|
|
62
|
-
target_path=
|
|
60
|
+
source_path="source.epub",
|
|
61
|
+
target_path="translated.epub",
|
|
63
62
|
target_language=language.ENGLISH,
|
|
63
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
64
64
|
llm=llm,
|
|
65
65
|
)
|
|
66
66
|
```
|
|
@@ -80,9 +80,10 @@ with tqdm(total=100, desc="Translating", unit="%") as pbar:
|
|
|
80
80
|
last_progress = progress
|
|
81
81
|
|
|
82
82
|
translate(
|
|
83
|
-
source_path=
|
|
84
|
-
target_path=
|
|
83
|
+
source_path="source.epub",
|
|
84
|
+
target_path="translated.epub",
|
|
85
85
|
target_language="English",
|
|
86
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
86
87
|
llm=llm,
|
|
87
88
|
on_progress=on_progress,
|
|
88
89
|
)
|
|
@@ -119,6 +120,7 @@ translate(
|
|
|
119
120
|
source_path: PathLike | str, # Source EPUB file path
|
|
120
121
|
target_path: PathLike | str, # Output EPUB file path
|
|
121
122
|
target_language: str, # Target language (e.g., "English", "Chinese")
|
|
123
|
+
submit: SubmitKind, # How to insert translations (REPLACE, APPEND_TEXT, or APPEND_BLOCK)
|
|
122
124
|
user_prompt: str | None = None, # Custom translation instructions
|
|
123
125
|
max_retries: int = 5, # Maximum retries for failed translations
|
|
124
126
|
max_group_tokens: int = 1200, # Maximum tokens per translation group
|
|
@@ -132,6 +134,49 @@ translate(
|
|
|
132
134
|
|
|
133
135
|
**Note**: Either `llm` or both `translation_llm` and `fill_llm` must be provided. Using separate LLMs allows for task-specific optimization.
|
|
134
136
|
|
|
137
|
+
#### Submit Modes
|
|
138
|
+
|
|
139
|
+
The `submit` parameter controls how translated content is inserted into the document. Use `SubmitKind` enum to specify the insertion mode:
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from epub_translator import SubmitKind
|
|
143
|
+
|
|
144
|
+
# Three available modes:
|
|
145
|
+
# - SubmitKind.REPLACE: Replace original content with translation (single-language output)
|
|
146
|
+
# - SubmitKind.APPEND_TEXT: Append translation as inline text (bilingual output)
|
|
147
|
+
# - SubmitKind.APPEND_BLOCK: Append translation as block elements (bilingual output, recommended)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
**Mode Comparison:**
|
|
151
|
+
|
|
152
|
+
- **`SubmitKind.REPLACE`**: Creates a single-language translation by replacing original text with translated content. Useful for creating books in the target language only.
|
|
153
|
+
|
|
154
|
+
- **`SubmitKind.APPEND_TEXT`**: Appends translations as inline text immediately after the original content. Both languages appear in the same paragraph, creating a continuous reading flow.
|
|
155
|
+
|
|
156
|
+
- **`SubmitKind.APPEND_BLOCK`** (Recommended): Appends translations as separate block elements (paragraphs) after the original. This creates clear visual separation between languages, making it ideal for side-by-side bilingual reading.
|
|
157
|
+
|
|
158
|
+
**Example:**
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
# For bilingual books (recommended)
|
|
162
|
+
translate(
|
|
163
|
+
source_path="source.epub",
|
|
164
|
+
target_path="translated.epub",
|
|
165
|
+
target_language=language.ENGLISH,
|
|
166
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
167
|
+
llm=llm,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# For single-language translation
|
|
171
|
+
translate(
|
|
172
|
+
source_path="source.epub",
|
|
173
|
+
target_path="translated.epub",
|
|
174
|
+
target_language=language.ENGLISH,
|
|
175
|
+
submit=SubmitKind.REPLACE,
|
|
176
|
+
llm=llm,
|
|
177
|
+
)
|
|
178
|
+
```
|
|
179
|
+
|
|
135
180
|
#### Language Constants
|
|
136
181
|
|
|
137
182
|
EPUB Translator provides predefined language constants for convenience. You can use these constants instead of writing language names as strings:
|
|
@@ -141,47 +186,73 @@ from epub_translator import language
|
|
|
141
186
|
|
|
142
187
|
# Usage example:
|
|
143
188
|
translate(
|
|
144
|
-
source_path=
|
|
145
|
-
target_path=
|
|
189
|
+
source_path="source.epub",
|
|
190
|
+
target_path="translated.epub",
|
|
146
191
|
target_language=language.ENGLISH,
|
|
192
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
147
193
|
llm=llm,
|
|
148
194
|
)
|
|
149
195
|
|
|
150
196
|
# You can also use custom language strings:
|
|
151
197
|
translate(
|
|
152
|
-
source_path=
|
|
153
|
-
target_path=
|
|
198
|
+
source_path="source.epub",
|
|
199
|
+
target_path="translated.epub",
|
|
154
200
|
target_language="Icelandic", # For languages not in the constants
|
|
201
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
155
202
|
llm=llm,
|
|
156
203
|
)
|
|
157
204
|
```
|
|
158
205
|
|
|
159
206
|
### Error Handling with `on_fill_failed`
|
|
160
207
|
|
|
161
|
-
Monitor
|
|
208
|
+
Monitor translation errors using the `on_fill_failed` callback. The system automatically retries failed translations up to `max_retries` times (default: 5). Most errors are recovered during retries and don't affect the final output.
|
|
162
209
|
|
|
163
210
|
```python
|
|
164
211
|
from epub_translator import FillFailedEvent
|
|
165
212
|
|
|
166
213
|
def handle_fill_error(event: FillFailedEvent):
|
|
167
|
-
|
|
168
|
-
print(f" {event.error_message}")
|
|
214
|
+
# Only log critical errors that will affect the final EPUB
|
|
169
215
|
if event.over_maximum_retries:
|
|
170
|
-
print("
|
|
216
|
+
print(f"Critical error after {event.retried_count} attempts:")
|
|
217
|
+
print(f" {event.error_message}")
|
|
218
|
+
print(" This error will be present in the final EPUB file!")
|
|
171
219
|
|
|
172
220
|
translate(
|
|
173
|
-
source_path=
|
|
174
|
-
target_path=
|
|
221
|
+
source_path="source.epub",
|
|
222
|
+
target_path="translated.epub",
|
|
175
223
|
target_language=language.ENGLISH,
|
|
224
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
176
225
|
llm=llm,
|
|
177
226
|
on_fill_failed=handle_fill_error,
|
|
178
227
|
)
|
|
179
228
|
```
|
|
180
229
|
|
|
230
|
+
**Understanding Error Severity:**
|
|
231
|
+
|
|
181
232
|
The `FillFailedEvent` contains:
|
|
182
233
|
- `error_message: str` - Description of the error
|
|
183
|
-
- `retried_count: int` - Current retry attempt number
|
|
184
|
-
- `over_maximum_retries: bool` - Whether
|
|
234
|
+
- `retried_count: int` - Current retry attempt number (1 to max_retries)
|
|
235
|
+
- `over_maximum_retries: bool` - Whether the error is critical
|
|
236
|
+
|
|
237
|
+
**Error Categories:**
|
|
238
|
+
|
|
239
|
+
- **Recoverable errors** (`over_maximum_retries=False`): Errors during retry attempts. The system will continue retrying and may resolve these automatically. Safe to ignore in most cases.
|
|
240
|
+
|
|
241
|
+
- **Critical errors** (`over_maximum_retries=True`): Errors that persist after all retry attempts. These will appear in the final EPUB file and should be investigated.
|
|
242
|
+
|
|
243
|
+
**Advanced Usage:**
|
|
244
|
+
|
|
245
|
+
For verbose logging during translation debugging:
|
|
246
|
+
|
|
247
|
+
```python
|
|
248
|
+
def handle_fill_error(event: FillFailedEvent):
|
|
249
|
+
if event.over_maximum_retries:
|
|
250
|
+
# Critical: affects final output
|
|
251
|
+
print(f"❌ CRITICAL: {event.error_message}")
|
|
252
|
+
else:
|
|
253
|
+
# Informational: system is retrying
|
|
254
|
+
print(f"⚠️ Retry {event.retried_count}: {event.error_message}")
|
|
255
|
+
```
|
|
185
256
|
|
|
186
257
|
### Dual-LLM Architecture
|
|
187
258
|
|
|
@@ -206,9 +277,10 @@ fill_llm = LLM(
|
|
|
206
277
|
)
|
|
207
278
|
|
|
208
279
|
translate(
|
|
209
|
-
source_path=
|
|
210
|
-
target_path=
|
|
280
|
+
source_path="source.epub",
|
|
281
|
+
target_path="translated.epub",
|
|
211
282
|
target_language=language.ENGLISH,
|
|
283
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
212
284
|
translation_llm=translation_llm,
|
|
213
285
|
fill_llm=fill_llm,
|
|
214
286
|
)
|
|
@@ -266,9 +338,10 @@ Provide specific translation instructions:
|
|
|
266
338
|
|
|
267
339
|
```python
|
|
268
340
|
translate(
|
|
269
|
-
source_path=
|
|
270
|
-
target_path=
|
|
341
|
+
source_path="source.epub",
|
|
342
|
+
target_path="translated.epub",
|
|
271
343
|
target_language="English",
|
|
344
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
272
345
|
llm=llm,
|
|
273
346
|
user_prompt="Use formal language and preserve technical terminology",
|
|
274
347
|
)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
from xml.etree.ElementTree import Element
|
|
2
|
+
|
|
3
|
+
from .xml import iter_with_stack
|
|
4
|
+
|
|
5
|
+
_QUOTE_MAPPING = {
|
|
6
|
+
# 法语引号
|
|
7
|
+
"«": "",
|
|
8
|
+
"»": "",
|
|
9
|
+
"‹": "«",
|
|
10
|
+
"›": "»",
|
|
11
|
+
# 中文书书名号
|
|
12
|
+
"《": "",
|
|
13
|
+
"》": "",
|
|
14
|
+
"〈": "《",
|
|
15
|
+
"〉": "》",
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _strip_quotes(text: str):
|
|
20
|
+
for char in text:
|
|
21
|
+
mapped = _QUOTE_MAPPING.get(char, None)
|
|
22
|
+
if mapped is None:
|
|
23
|
+
yield char
|
|
24
|
+
elif mapped:
|
|
25
|
+
yield mapped
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def unwrap_french_quotes(element: Element) -> Element:
|
|
29
|
+
for _, child_element in iter_with_stack(element):
|
|
30
|
+
if child_element.text:
|
|
31
|
+
child_element.text = "".join(_strip_quotes(child_element.text))
|
|
32
|
+
if child_element.tail:
|
|
33
|
+
child_element.tail = "".join(_strip_quotes(child_element.tail))
|
|
34
|
+
return element
|
|
@@ -4,71 +4,7 @@ from enum import Enum, auto
|
|
|
4
4
|
from typing import Self
|
|
5
5
|
from xml.etree.ElementTree import Element
|
|
6
6
|
|
|
7
|
-
from ..xml import expand_left_element_texts, expand_right_element_texts, normalize_text_in_element
|
|
8
|
-
|
|
9
|
-
# HTML inline-level elements
|
|
10
|
-
# Reference: https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements
|
|
11
|
-
# Reference: https://developer.mozilla.org/en-US/docs/Glossary/Inline-level_content
|
|
12
|
-
_HTML_INLINE_TAGS = frozenset(
|
|
13
|
-
(
|
|
14
|
-
# Inline text semantics
|
|
15
|
-
"a",
|
|
16
|
-
"abbr",
|
|
17
|
-
"b",
|
|
18
|
-
"bdi",
|
|
19
|
-
"bdo",
|
|
20
|
-
"br",
|
|
21
|
-
"cite",
|
|
22
|
-
"code",
|
|
23
|
-
"data",
|
|
24
|
-
"dfn",
|
|
25
|
-
"em",
|
|
26
|
-
"i",
|
|
27
|
-
"kbd",
|
|
28
|
-
"mark",
|
|
29
|
-
"q",
|
|
30
|
-
"rp",
|
|
31
|
-
"rt",
|
|
32
|
-
"ruby",
|
|
33
|
-
"s",
|
|
34
|
-
"samp",
|
|
35
|
-
"small",
|
|
36
|
-
"span",
|
|
37
|
-
"strong",
|
|
38
|
-
"sub",
|
|
39
|
-
"sup",
|
|
40
|
-
"time",
|
|
41
|
-
"u",
|
|
42
|
-
"var",
|
|
43
|
-
"wbr",
|
|
44
|
-
# Image and multimedia
|
|
45
|
-
"img",
|
|
46
|
-
"svg",
|
|
47
|
-
"canvas",
|
|
48
|
-
"audio",
|
|
49
|
-
"video",
|
|
50
|
-
"map",
|
|
51
|
-
"area",
|
|
52
|
-
# Form elements
|
|
53
|
-
"input",
|
|
54
|
-
"button",
|
|
55
|
-
"select",
|
|
56
|
-
"textarea",
|
|
57
|
-
"label",
|
|
58
|
-
"output",
|
|
59
|
-
"progress",
|
|
60
|
-
"meter",
|
|
61
|
-
# Embedded content
|
|
62
|
-
"iframe",
|
|
63
|
-
"embed",
|
|
64
|
-
"object",
|
|
65
|
-
# Other inline elements
|
|
66
|
-
"script",
|
|
67
|
-
"del",
|
|
68
|
-
"ins",
|
|
69
|
-
"slot",
|
|
70
|
-
)
|
|
71
|
-
)
|
|
7
|
+
from ..xml import expand_left_element_texts, expand_right_element_texts, is_inline_tag, normalize_text_in_element
|
|
72
8
|
|
|
73
9
|
|
|
74
10
|
class TextPosition(Enum):
|
|
@@ -196,8 +132,7 @@ def _search_text_segments(stack: list[Element], element: Element) -> Generator[T
|
|
|
196
132
|
def _find_block_depth(parent_stack: list[Element]) -> int:
|
|
197
133
|
index: int = 0
|
|
198
134
|
for i in range(len(parent_stack) - 1, -1, -1):
|
|
199
|
-
|
|
200
|
-
if checked_tag not in _HTML_INLINE_TAGS:
|
|
135
|
+
if not is_inline_tag(parent_stack[i].tag):
|
|
201
136
|
index = i
|
|
202
137
|
break
|
|
203
138
|
return index + 1 # depth is a count not index
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from collections.abc import Callable
|
|
1
|
+
from collections.abc import Callable, Generator
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from enum import Enum, auto
|
|
4
4
|
from importlib.metadata import version as get_package_version
|
|
@@ -15,9 +15,10 @@ from .epub import (
|
|
|
15
15
|
)
|
|
16
16
|
from .epub_transcode import decode_metadata, decode_toc_list, encode_metadata, encode_toc_list
|
|
17
17
|
from .llm import LLM
|
|
18
|
+
from .punctuation import unwrap_french_quotes
|
|
18
19
|
from .xml import XMLLikeNode, deduplicate_ids_in_element, find_first
|
|
19
20
|
from .xml_interrupter import XMLInterrupter
|
|
20
|
-
from .xml_translator import FillFailedEvent, XMLTranslator
|
|
21
|
+
from .xml_translator import FillFailedEvent, SubmitKind, TranslationTask, XMLTranslator
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
class _ElementType(Enum):
|
|
@@ -36,6 +37,7 @@ def translate(
|
|
|
36
37
|
source_path: PathLike | str,
|
|
37
38
|
target_path: PathLike | str,
|
|
38
39
|
target_language: str,
|
|
40
|
+
submit: SubmitKind,
|
|
39
41
|
user_prompt: str | None = None,
|
|
40
42
|
max_retries: int = 5,
|
|
41
43
|
max_group_tokens: int = 1200,
|
|
@@ -83,33 +85,26 @@ def translate(
|
|
|
83
85
|
return
|
|
84
86
|
|
|
85
87
|
interrupter = XMLInterrupter()
|
|
86
|
-
element_contexts: dict[int, _ElementContext] = {}
|
|
87
|
-
|
|
88
88
|
toc_weight = 0.05 if toc_has_items else 0
|
|
89
89
|
metadata_weight = 0.05 if metadata_has_items else 0
|
|
90
90
|
chapters_weight = 1.0 - toc_weight - metadata_weight
|
|
91
91
|
progress_per_chapter = chapters_weight / total_chapters if total_chapters > 0 else 0
|
|
92
92
|
current_progress = 0.0
|
|
93
93
|
|
|
94
|
-
for translated_elem in translator.translate_elements(
|
|
94
|
+
for translated_elem, context in translator.translate_elements(
|
|
95
95
|
interrupt_source_text_segments=interrupter.interrupt_source_text_segments,
|
|
96
96
|
interrupt_translated_text_segments=interrupter.interrupt_translated_text_segments,
|
|
97
97
|
interrupt_block_element=interrupter.interrupt_block_element,
|
|
98
98
|
on_fill_failed=on_fill_failed,
|
|
99
|
-
|
|
99
|
+
tasks=_generate_tasks_from_book(
|
|
100
100
|
zip=zip,
|
|
101
101
|
toc_list=toc_list,
|
|
102
102
|
metadata_fields=metadata_fields,
|
|
103
|
-
|
|
103
|
+
submit=submit,
|
|
104
104
|
),
|
|
105
105
|
):
|
|
106
|
-
elem_id = id(translated_elem)
|
|
107
|
-
context = element_contexts.pop(elem_id, None)
|
|
108
|
-
|
|
109
|
-
if context is None:
|
|
110
|
-
continue
|
|
111
|
-
|
|
112
106
|
if context.element_type == _ElementType.TOC:
|
|
107
|
+
translated_elem = unwrap_french_quotes(translated_elem)
|
|
113
108
|
decoded_toc = decode_toc_list(translated_elem)
|
|
114
109
|
write_toc(zip, decoded_toc)
|
|
115
110
|
|
|
@@ -118,6 +113,7 @@ def translate(
|
|
|
118
113
|
on_progress(current_progress)
|
|
119
114
|
|
|
120
115
|
elif context.element_type == _ElementType.METADATA:
|
|
116
|
+
translated_elem = unwrap_french_quotes(translated_elem)
|
|
121
117
|
decoded_metadata = decode_metadata(translated_elem)
|
|
122
118
|
write_metadata(zip, decoded_metadata)
|
|
123
119
|
|
|
@@ -137,23 +133,29 @@ def translate(
|
|
|
137
133
|
on_progress(current_progress)
|
|
138
134
|
|
|
139
135
|
|
|
140
|
-
def
|
|
136
|
+
def _generate_tasks_from_book(
|
|
141
137
|
zip: Zip,
|
|
142
138
|
toc_list: list,
|
|
143
139
|
metadata_fields: list,
|
|
144
|
-
|
|
145
|
-
):
|
|
140
|
+
submit: SubmitKind,
|
|
141
|
+
) -> Generator[TranslationTask[_ElementContext], None, None]:
|
|
142
|
+
head_submit = submit
|
|
143
|
+
if head_submit == SubmitKind.APPEND_BLOCK:
|
|
144
|
+
head_submit = SubmitKind.APPEND_TEXT
|
|
145
|
+
|
|
146
146
|
if toc_list:
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
147
|
+
yield TranslationTask(
|
|
148
|
+
element=encode_toc_list(toc_list),
|
|
149
|
+
action=head_submit,
|
|
150
|
+
payload=_ElementContext(element_type=_ElementType.TOC),
|
|
151
|
+
)
|
|
151
152
|
|
|
152
153
|
if metadata_fields:
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
154
|
+
yield TranslationTask(
|
|
155
|
+
element=encode_metadata(metadata_fields),
|
|
156
|
+
action=head_submit,
|
|
157
|
+
payload=_ElementContext(element_type=_ElementType.METADATA),
|
|
158
|
+
)
|
|
157
159
|
|
|
158
160
|
for chapter_path, media_type in search_spine_paths(zip):
|
|
159
161
|
with zip.read(chapter_path) as chapter_file:
|
|
@@ -163,12 +165,14 @@ def _generate_elements_from_book(
|
|
|
163
165
|
)
|
|
164
166
|
body_element = find_first(xml.element, "body")
|
|
165
167
|
if body_element is not None:
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
168
|
+
yield TranslationTask(
|
|
169
|
+
element=body_element,
|
|
170
|
+
action=submit,
|
|
171
|
+
payload=_ElementContext(
|
|
172
|
+
element_type=_ElementType.CHAPTER,
|
|
173
|
+
chapter_data=(chapter_path, xml),
|
|
174
|
+
),
|
|
170
175
|
)
|
|
171
|
-
yield body_element
|
|
172
176
|
|
|
173
177
|
|
|
174
178
|
def _get_version() -> str:
|