epub-translator 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {epub_translator-0.1.3 → epub_translator-0.1.5}/PKG-INFO +96 -23
  2. {epub_translator-0.1.3 → epub_translator-0.1.5}/README.md +95 -22
  3. epub_translator-0.1.5/epub_translator/__init__.py +12 -0
  4. epub_translator-0.1.5/epub_translator/punctuation.py +34 -0
  5. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/text_segment.py +2 -67
  6. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/translator.py +33 -29
  7. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/__init__.py +1 -0
  8. epub_translator-0.1.5/epub_translator/xml/inline.py +67 -0
  9. epub_translator-0.1.5/epub_translator/xml_translator/__init__.py +3 -0
  10. epub_translator-0.1.5/epub_translator/xml_translator/submitter.py +381 -0
  11. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/translator.py +31 -12
  12. {epub_translator-0.1.3 → epub_translator-0.1.5}/pyproject.toml +1 -1
  13. epub_translator-0.1.3/epub_translator/__init__.py +0 -5
  14. epub_translator-0.1.3/epub_translator/xml_translator/__init__.py +0 -2
  15. epub_translator-0.1.3/epub_translator/xml_translator/submitter.py +0 -56
  16. {epub_translator-0.1.3 → epub_translator-0.1.5}/LICENSE +0 -0
  17. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/fill.jinja +0 -0
  18. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/README.md +0 -0
  19. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/cmarkup.xsl +0 -0
  20. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/entities.xsl +0 -0
  21. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/glayout.xsl +0 -0
  22. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/mmltex.xsl +0 -0
  23. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/scripts.xsl +0 -0
  24. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/tables.xsl +0 -0
  25. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/mmltex/tokens.xsl +0 -0
  26. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/data/translate.jinja +0 -0
  27. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/__init__.py +0 -0
  28. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/common.py +0 -0
  29. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/math.py +0 -0
  30. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/metadata.py +0 -0
  31. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/spines.py +0 -0
  32. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/toc.py +0 -0
  33. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub/zip.py +0 -0
  34. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/epub_transcode.py +0 -0
  35. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/language.py +0 -0
  36. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/__init__.py +0 -0
  37. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/context.py +0 -0
  38. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/core.py +0 -0
  39. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/error.py +0 -0
  40. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/executor.py +0 -0
  41. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/increasable.py +0 -0
  42. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/llm/types.py +0 -0
  43. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/__init__.py +0 -0
  44. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/block_segment.py +0 -0
  45. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/common.py +0 -0
  46. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/inline_segment.py +0 -0
  47. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/segment/utils.py +0 -0
  48. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/serial/__init__.py +0 -0
  49. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/serial/chunk.py +0 -0
  50. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/serial/segment.py +0 -0
  51. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/serial/splitter.py +0 -0
  52. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/template.py +0 -0
  53. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/utils.py +0 -0
  54. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/const.py +0 -0
  55. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/deduplication.py +0 -0
  56. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/__init__.py +0 -0
  57. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/decoder.py +0 -0
  58. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/encoder.py +0 -0
  59. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/parser.py +0 -0
  60. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/tag.py +0 -0
  61. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/firendly/transform.py +0 -0
  62. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/self_closing.py +0 -0
  63. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/utils.py +0 -0
  64. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/xml.py +0 -0
  65. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml/xml_like.py +0 -0
  66. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_interrupter.py +0 -0
  67. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/callbacks.py +0 -0
  68. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/common.py +0 -0
  69. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/hill_climbing.py +0 -0
  70. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/stream_mapper.py +0 -0
  71. {epub_translator-0.1.3 → epub_translator-0.1.5}/epub_translator/xml_translator/validation.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: epub-translator
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
5
5
  License: MIT
6
6
  Keywords: epub,llm,translation,translator
@@ -78,8 +78,7 @@ The easiest way to use EPUB Translator is through OOMOL Studio with a visual int
78
78
  ### Using Python API
79
79
 
80
80
  ```python
81
- from pathlib import Path
82
- from epub_translator import LLM, translate, language
81
+ from epub_translator import LLM, translate, language, SubmitKind
83
82
 
84
83
  # Initialize LLM with your API credentials
85
84
  llm = LLM(
@@ -91,9 +90,10 @@ llm = LLM(
91
90
 
92
91
  # Translate EPUB file using language constants
93
92
  translate(
94
- source_path=Path("source.epub"),
95
- target_path=Path("translated.epub"),
93
+ source_path="source.epub",
94
+ target_path="translated.epub",
96
95
  target_language=language.ENGLISH,
96
+ submit=SubmitKind.APPEND_BLOCK,
97
97
  llm=llm,
98
98
  )
99
99
  ```
@@ -113,9 +113,10 @@ with tqdm(total=100, desc="Translating", unit="%") as pbar:
113
113
  last_progress = progress
114
114
 
115
115
  translate(
116
- source_path=Path("source.epub"),
117
- target_path=Path("translated.epub"),
116
+ source_path="source.epub",
117
+ target_path="translated.epub",
118
118
  target_language="English",
119
+ submit=SubmitKind.APPEND_BLOCK,
119
120
  llm=llm,
120
121
  on_progress=on_progress,
121
122
  )
@@ -152,6 +153,7 @@ translate(
152
153
  source_path: PathLike | str, # Source EPUB file path
153
154
  target_path: PathLike | str, # Output EPUB file path
154
155
  target_language: str, # Target language (e.g., "English", "Chinese")
156
+ submit: SubmitKind, # How to insert translations (REPLACE, APPEND_TEXT, or APPEND_BLOCK)
155
157
  user_prompt: str | None = None, # Custom translation instructions
156
158
  max_retries: int = 5, # Maximum retries for failed translations
157
159
  max_group_tokens: int = 1200, # Maximum tokens per translation group
@@ -165,6 +167,49 @@ translate(
165
167
 
166
168
  **Note**: Either `llm` or both `translation_llm` and `fill_llm` must be provided. Using separate LLMs allows for task-specific optimization.
167
169
 
170
+ #### Submit Modes
171
+
172
+ The `submit` parameter controls how translated content is inserted into the document. Use `SubmitKind` enum to specify the insertion mode:
173
+
174
+ ```python
175
+ from epub_translator import SubmitKind
176
+
177
+ # Three available modes:
178
+ # - SubmitKind.REPLACE: Replace original content with translation (single-language output)
179
+ # - SubmitKind.APPEND_TEXT: Append translation as inline text (bilingual output)
180
+ # - SubmitKind.APPEND_BLOCK: Append translation as block elements (bilingual output, recommended)
181
+ ```
182
+
183
+ **Mode Comparison:**
184
+
185
+ - **`SubmitKind.REPLACE`**: Creates a single-language translation by replacing original text with translated content. Useful for creating books in the target language only.
186
+
187
+ - **`SubmitKind.APPEND_TEXT`**: Appends translations as inline text immediately after the original content. Both languages appear in the same paragraph, creating a continuous reading flow.
188
+
189
+ - **`SubmitKind.APPEND_BLOCK`** (Recommended): Appends translations as separate block elements (paragraphs) after the original. This creates clear visual separation between languages, making it ideal for side-by-side bilingual reading.
190
+
191
+ **Example:**
192
+
193
+ ```python
194
+ # For bilingual books (recommended)
195
+ translate(
196
+ source_path="source.epub",
197
+ target_path="translated.epub",
198
+ target_language=language.ENGLISH,
199
+ submit=SubmitKind.APPEND_BLOCK,
200
+ llm=llm,
201
+ )
202
+
203
+ # For single-language translation
204
+ translate(
205
+ source_path="source.epub",
206
+ target_path="translated.epub",
207
+ target_language=language.ENGLISH,
208
+ submit=SubmitKind.REPLACE,
209
+ llm=llm,
210
+ )
211
+ ```
212
+
168
213
  #### Language Constants
169
214
 
170
215
  EPUB Translator provides predefined language constants for convenience. You can use these constants instead of writing language names as strings:
@@ -174,47 +219,73 @@ from epub_translator import language
174
219
 
175
220
  # Usage example:
176
221
  translate(
177
- source_path=Path("source.epub"),
178
- target_path=Path("translated.epub"),
222
+ source_path="source.epub",
223
+ target_path="translated.epub",
179
224
  target_language=language.ENGLISH,
225
+ submit=SubmitKind.APPEND_BLOCK,
180
226
  llm=llm,
181
227
  )
182
228
 
183
229
  # You can also use custom language strings:
184
230
  translate(
185
- source_path=Path("source.epub"),
186
- target_path=Path("translated.epub"),
231
+ source_path="source.epub",
232
+ target_path="translated.epub",
187
233
  target_language="Icelandic", # For languages not in the constants
234
+ submit=SubmitKind.APPEND_BLOCK,
188
235
  llm=llm,
189
236
  )
190
237
  ```
191
238
 
192
239
  ### Error Handling with `on_fill_failed`
193
240
 
194
- Monitor and handle translation errors using the `on_fill_failed` callback:
241
+ Monitor translation errors using the `on_fill_failed` callback. The system automatically retries failed translations up to `max_retries` times (default: 5). Most errors are recovered during retries and don't affect the final output.
195
242
 
196
243
  ```python
197
244
  from epub_translator import FillFailedEvent
198
245
 
199
246
  def handle_fill_error(event: FillFailedEvent):
200
- print(f"Translation error (attempt {event.retried_count}):")
201
- print(f" {event.error_message}")
247
+ # Only log critical errors that will affect the final EPUB
202
248
  if event.over_maximum_retries:
203
- print(" Maximum retries exceeded!")
249
+ print(f"Critical error after {event.retried_count} attempts:")
250
+ print(f" {event.error_message}")
251
+ print(" This error will be present in the final EPUB file!")
204
252
 
205
253
  translate(
206
- source_path=Path("source.epub"),
207
- target_path=Path("translated.epub"),
254
+ source_path="source.epub",
255
+ target_path="translated.epub",
208
256
  target_language=language.ENGLISH,
257
+ submit=SubmitKind.APPEND_BLOCK,
209
258
  llm=llm,
210
259
  on_fill_failed=handle_fill_error,
211
260
  )
212
261
  ```
213
262
 
263
+ **Understanding Error Severity:**
264
+
214
265
  The `FillFailedEvent` contains:
215
266
  - `error_message: str` - Description of the error
216
- - `retried_count: int` - Current retry attempt number
217
- - `over_maximum_retries: bool` - Whether max retries has been exceeded
267
+ - `retried_count: int` - Current retry attempt number (1 to max_retries)
268
+ - `over_maximum_retries: bool` - Whether the error is critical
269
+
270
+ **Error Categories:**
271
+
272
+ - **Recoverable errors** (`over_maximum_retries=False`): Errors during retry attempts. The system will continue retrying and may resolve these automatically. Safe to ignore in most cases.
273
+
274
+ - **Critical errors** (`over_maximum_retries=True`): Errors that persist after all retry attempts. These will appear in the final EPUB file and should be investigated.
275
+
276
+ **Advanced Usage:**
277
+
278
+ For verbose logging during translation debugging:
279
+
280
+ ```python
281
+ def handle_fill_error(event: FillFailedEvent):
282
+ if event.over_maximum_retries:
283
+ # Critical: affects final output
284
+ print(f"❌ CRITICAL: {event.error_message}")
285
+ else:
286
+ # Informational: system is retrying
287
+ print(f"⚠️ Retry {event.retried_count}: {event.error_message}")
288
+ ```
218
289
 
219
290
  ### Dual-LLM Architecture
220
291
 
@@ -239,9 +310,10 @@ fill_llm = LLM(
239
310
  )
240
311
 
241
312
  translate(
242
- source_path=Path("source.epub"),
243
- target_path=Path("translated.epub"),
313
+ source_path="source.epub",
314
+ target_path="translated.epub",
244
315
  target_language=language.ENGLISH,
316
+ submit=SubmitKind.APPEND_BLOCK,
245
317
  translation_llm=translation_llm,
246
318
  fill_llm=fill_llm,
247
319
  )
@@ -299,9 +371,10 @@ Provide specific translation instructions:
299
371
 
300
372
  ```python
301
373
  translate(
302
- source_path=Path("source.epub"),
303
- target_path=Path("translated.epub"),
374
+ source_path="source.epub",
375
+ target_path="translated.epub",
304
376
  target_language="English",
377
+ submit=SubmitKind.APPEND_BLOCK,
305
378
  llm=llm,
306
379
  user_prompt="Use formal language and preserve technical terminology",
307
380
  )
@@ -45,8 +45,7 @@ The easiest way to use EPUB Translator is through OOMOL Studio with a visual int
45
45
  ### Using Python API
46
46
 
47
47
  ```python
48
- from pathlib import Path
49
- from epub_translator import LLM, translate, language
48
+ from epub_translator import LLM, translate, language, SubmitKind
50
49
 
51
50
  # Initialize LLM with your API credentials
52
51
  llm = LLM(
@@ -58,9 +57,10 @@ llm = LLM(
58
57
 
59
58
  # Translate EPUB file using language constants
60
59
  translate(
61
- source_path=Path("source.epub"),
62
- target_path=Path("translated.epub"),
60
+ source_path="source.epub",
61
+ target_path="translated.epub",
63
62
  target_language=language.ENGLISH,
63
+ submit=SubmitKind.APPEND_BLOCK,
64
64
  llm=llm,
65
65
  )
66
66
  ```
@@ -80,9 +80,10 @@ with tqdm(total=100, desc="Translating", unit="%") as pbar:
80
80
  last_progress = progress
81
81
 
82
82
  translate(
83
- source_path=Path("source.epub"),
84
- target_path=Path("translated.epub"),
83
+ source_path="source.epub",
84
+ target_path="translated.epub",
85
85
  target_language="English",
86
+ submit=SubmitKind.APPEND_BLOCK,
86
87
  llm=llm,
87
88
  on_progress=on_progress,
88
89
  )
@@ -119,6 +120,7 @@ translate(
119
120
  source_path: PathLike | str, # Source EPUB file path
120
121
  target_path: PathLike | str, # Output EPUB file path
121
122
  target_language: str, # Target language (e.g., "English", "Chinese")
123
+ submit: SubmitKind, # How to insert translations (REPLACE, APPEND_TEXT, or APPEND_BLOCK)
122
124
  user_prompt: str | None = None, # Custom translation instructions
123
125
  max_retries: int = 5, # Maximum retries for failed translations
124
126
  max_group_tokens: int = 1200, # Maximum tokens per translation group
@@ -132,6 +134,49 @@ translate(
132
134
 
133
135
  **Note**: Either `llm` or both `translation_llm` and `fill_llm` must be provided. Using separate LLMs allows for task-specific optimization.
134
136
 
137
+ #### Submit Modes
138
+
139
+ The `submit` parameter controls how translated content is inserted into the document. Use `SubmitKind` enum to specify the insertion mode:
140
+
141
+ ```python
142
+ from epub_translator import SubmitKind
143
+
144
+ # Three available modes:
145
+ # - SubmitKind.REPLACE: Replace original content with translation (single-language output)
146
+ # - SubmitKind.APPEND_TEXT: Append translation as inline text (bilingual output)
147
+ # - SubmitKind.APPEND_BLOCK: Append translation as block elements (bilingual output, recommended)
148
+ ```
149
+
150
+ **Mode Comparison:**
151
+
152
+ - **`SubmitKind.REPLACE`**: Creates a single-language translation by replacing original text with translated content. Useful for creating books in the target language only.
153
+
154
+ - **`SubmitKind.APPEND_TEXT`**: Appends translations as inline text immediately after the original content. Both languages appear in the same paragraph, creating a continuous reading flow.
155
+
156
+ - **`SubmitKind.APPEND_BLOCK`** (Recommended): Appends translations as separate block elements (paragraphs) after the original. This creates clear visual separation between languages, making it ideal for side-by-side bilingual reading.
157
+
158
+ **Example:**
159
+
160
+ ```python
161
+ # For bilingual books (recommended)
162
+ translate(
163
+ source_path="source.epub",
164
+ target_path="translated.epub",
165
+ target_language=language.ENGLISH,
166
+ submit=SubmitKind.APPEND_BLOCK,
167
+ llm=llm,
168
+ )
169
+
170
+ # For single-language translation
171
+ translate(
172
+ source_path="source.epub",
173
+ target_path="translated.epub",
174
+ target_language=language.ENGLISH,
175
+ submit=SubmitKind.REPLACE,
176
+ llm=llm,
177
+ )
178
+ ```
179
+
135
180
  #### Language Constants
136
181
 
137
182
  EPUB Translator provides predefined language constants for convenience. You can use these constants instead of writing language names as strings:
@@ -141,47 +186,73 @@ from epub_translator import language
141
186
 
142
187
  # Usage example:
143
188
  translate(
144
- source_path=Path("source.epub"),
145
- target_path=Path("translated.epub"),
189
+ source_path="source.epub",
190
+ target_path="translated.epub",
146
191
  target_language=language.ENGLISH,
192
+ submit=SubmitKind.APPEND_BLOCK,
147
193
  llm=llm,
148
194
  )
149
195
 
150
196
  # You can also use custom language strings:
151
197
  translate(
152
- source_path=Path("source.epub"),
153
- target_path=Path("translated.epub"),
198
+ source_path="source.epub",
199
+ target_path="translated.epub",
154
200
  target_language="Icelandic", # For languages not in the constants
201
+ submit=SubmitKind.APPEND_BLOCK,
155
202
  llm=llm,
156
203
  )
157
204
  ```
158
205
 
159
206
  ### Error Handling with `on_fill_failed`
160
207
 
161
- Monitor and handle translation errors using the `on_fill_failed` callback:
208
+ Monitor translation errors using the `on_fill_failed` callback. The system automatically retries failed translations up to `max_retries` times (default: 5). Most errors are recovered during retries and don't affect the final output.
162
209
 
163
210
  ```python
164
211
  from epub_translator import FillFailedEvent
165
212
 
166
213
  def handle_fill_error(event: FillFailedEvent):
167
- print(f"Translation error (attempt {event.retried_count}):")
168
- print(f" {event.error_message}")
214
+ # Only log critical errors that will affect the final EPUB
169
215
  if event.over_maximum_retries:
170
- print(" Maximum retries exceeded!")
216
+ print(f"Critical error after {event.retried_count} attempts:")
217
+ print(f" {event.error_message}")
218
+ print(" This error will be present in the final EPUB file!")
171
219
 
172
220
  translate(
173
- source_path=Path("source.epub"),
174
- target_path=Path("translated.epub"),
221
+ source_path="source.epub",
222
+ target_path="translated.epub",
175
223
  target_language=language.ENGLISH,
224
+ submit=SubmitKind.APPEND_BLOCK,
176
225
  llm=llm,
177
226
  on_fill_failed=handle_fill_error,
178
227
  )
179
228
  ```
180
229
 
230
+ **Understanding Error Severity:**
231
+
181
232
  The `FillFailedEvent` contains:
182
233
  - `error_message: str` - Description of the error
183
- - `retried_count: int` - Current retry attempt number
184
- - `over_maximum_retries: bool` - Whether max retries has been exceeded
234
+ - `retried_count: int` - Current retry attempt number (1 to max_retries)
235
+ - `over_maximum_retries: bool` - Whether the error is critical
236
+
237
+ **Error Categories:**
238
+
239
+ - **Recoverable errors** (`over_maximum_retries=False`): Errors during retry attempts. The system will continue retrying and may resolve these automatically. Safe to ignore in most cases.
240
+
241
+ - **Critical errors** (`over_maximum_retries=True`): Errors that persist after all retry attempts. These will appear in the final EPUB file and should be investigated.
242
+
243
+ **Advanced Usage:**
244
+
245
+ For verbose logging during translation debugging:
246
+
247
+ ```python
248
+ def handle_fill_error(event: FillFailedEvent):
249
+ if event.over_maximum_retries:
250
+ # Critical: affects final output
251
+ print(f"❌ CRITICAL: {event.error_message}")
252
+ else:
253
+ # Informational: system is retrying
254
+ print(f"⚠️ Retry {event.retried_count}: {event.error_message}")
255
+ ```
185
256
 
186
257
  ### Dual-LLM Architecture
187
258
 
@@ -206,9 +277,10 @@ fill_llm = LLM(
206
277
  )
207
278
 
208
279
  translate(
209
- source_path=Path("source.epub"),
210
- target_path=Path("translated.epub"),
280
+ source_path="source.epub",
281
+ target_path="translated.epub",
211
282
  target_language=language.ENGLISH,
283
+ submit=SubmitKind.APPEND_BLOCK,
212
284
  translation_llm=translation_llm,
213
285
  fill_llm=fill_llm,
214
286
  )
@@ -266,9 +338,10 @@ Provide specific translation instructions:
266
338
 
267
339
  ```python
268
340
  translate(
269
- source_path=Path("source.epub"),
270
- target_path=Path("translated.epub"),
341
+ source_path="source.epub",
342
+ target_path="translated.epub",
271
343
  target_language="English",
344
+ submit=SubmitKind.APPEND_BLOCK,
272
345
  llm=llm,
273
346
  user_prompt="Use formal language and preserve technical terminology",
274
347
  )
@@ -0,0 +1,12 @@
1
+ from . import language
2
+ from .llm import LLM
3
+ from .translator import FillFailedEvent, translate
4
+ from .xml_translator import SubmitKind
5
+
6
+ __all__ = [
7
+ "LLM",
8
+ "translate",
9
+ "language",
10
+ "FillFailedEvent",
11
+ "SubmitKind",
12
+ ]
@@ -0,0 +1,34 @@
1
+ from xml.etree.ElementTree import Element
2
+
3
+ from .xml import iter_with_stack
4
+
5
+ _QUOTE_MAPPING = {
6
+ # 法语引号
7
+ "«": "",
8
+ "»": "",
9
+ "‹": "«",
10
+ "›": "»",
11
+ # 中文书书名号
12
+ "《": "",
13
+ "》": "",
14
+ "〈": "《",
15
+ "〉": "》",
16
+ }
17
+
18
+
19
+ def _strip_quotes(text: str):
20
+ for char in text:
21
+ mapped = _QUOTE_MAPPING.get(char, None)
22
+ if mapped is None:
23
+ yield char
24
+ elif mapped:
25
+ yield mapped
26
+
27
+
28
+ def unwrap_french_quotes(element: Element) -> Element:
29
+ for _, child_element in iter_with_stack(element):
30
+ if child_element.text:
31
+ child_element.text = "".join(_strip_quotes(child_element.text))
32
+ if child_element.tail:
33
+ child_element.tail = "".join(_strip_quotes(child_element.tail))
34
+ return element
@@ -4,71 +4,7 @@ from enum import Enum, auto
4
4
  from typing import Self
5
5
  from xml.etree.ElementTree import Element
6
6
 
7
- from ..xml import expand_left_element_texts, expand_right_element_texts, normalize_text_in_element
8
-
9
- # HTML inline-level elements
10
- # Reference: https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements
11
- # Reference: https://developer.mozilla.org/en-US/docs/Glossary/Inline-level_content
12
- _HTML_INLINE_TAGS = frozenset(
13
- (
14
- # Inline text semantics
15
- "a",
16
- "abbr",
17
- "b",
18
- "bdi",
19
- "bdo",
20
- "br",
21
- "cite",
22
- "code",
23
- "data",
24
- "dfn",
25
- "em",
26
- "i",
27
- "kbd",
28
- "mark",
29
- "q",
30
- "rp",
31
- "rt",
32
- "ruby",
33
- "s",
34
- "samp",
35
- "small",
36
- "span",
37
- "strong",
38
- "sub",
39
- "sup",
40
- "time",
41
- "u",
42
- "var",
43
- "wbr",
44
- # Image and multimedia
45
- "img",
46
- "svg",
47
- "canvas",
48
- "audio",
49
- "video",
50
- "map",
51
- "area",
52
- # Form elements
53
- "input",
54
- "button",
55
- "select",
56
- "textarea",
57
- "label",
58
- "output",
59
- "progress",
60
- "meter",
61
- # Embedded content
62
- "iframe",
63
- "embed",
64
- "object",
65
- # Other inline elements
66
- "script",
67
- "del",
68
- "ins",
69
- "slot",
70
- )
71
- )
7
+ from ..xml import expand_left_element_texts, expand_right_element_texts, is_inline_tag, normalize_text_in_element
72
8
 
73
9
 
74
10
  class TextPosition(Enum):
@@ -196,8 +132,7 @@ def _search_text_segments(stack: list[Element], element: Element) -> Generator[T
196
132
  def _find_block_depth(parent_stack: list[Element]) -> int:
197
133
  index: int = 0
198
134
  for i in range(len(parent_stack) - 1, -1, -1):
199
- checked_tag = parent_stack[i].tag.lower()
200
- if checked_tag not in _HTML_INLINE_TAGS:
135
+ if not is_inline_tag(parent_stack[i].tag):
201
136
  index = i
202
137
  break
203
138
  return index + 1 # depth is a count not index
@@ -1,4 +1,4 @@
1
- from collections.abc import Callable
1
+ from collections.abc import Callable, Generator
2
2
  from dataclasses import dataclass
3
3
  from enum import Enum, auto
4
4
  from importlib.metadata import version as get_package_version
@@ -15,9 +15,10 @@ from .epub import (
15
15
  )
16
16
  from .epub_transcode import decode_metadata, decode_toc_list, encode_metadata, encode_toc_list
17
17
  from .llm import LLM
18
+ from .punctuation import unwrap_french_quotes
18
19
  from .xml import XMLLikeNode, deduplicate_ids_in_element, find_first
19
20
  from .xml_interrupter import XMLInterrupter
20
- from .xml_translator import FillFailedEvent, XMLTranslator
21
+ from .xml_translator import FillFailedEvent, SubmitKind, TranslationTask, XMLTranslator
21
22
 
22
23
 
23
24
  class _ElementType(Enum):
@@ -36,6 +37,7 @@ def translate(
36
37
  source_path: PathLike | str,
37
38
  target_path: PathLike | str,
38
39
  target_language: str,
40
+ submit: SubmitKind,
39
41
  user_prompt: str | None = None,
40
42
  max_retries: int = 5,
41
43
  max_group_tokens: int = 1200,
@@ -83,33 +85,26 @@ def translate(
83
85
  return
84
86
 
85
87
  interrupter = XMLInterrupter()
86
- element_contexts: dict[int, _ElementContext] = {}
87
-
88
88
  toc_weight = 0.05 if toc_has_items else 0
89
89
  metadata_weight = 0.05 if metadata_has_items else 0
90
90
  chapters_weight = 1.0 - toc_weight - metadata_weight
91
91
  progress_per_chapter = chapters_weight / total_chapters if total_chapters > 0 else 0
92
92
  current_progress = 0.0
93
93
 
94
- for translated_elem in translator.translate_elements(
94
+ for translated_elem, context in translator.translate_elements(
95
95
  interrupt_source_text_segments=interrupter.interrupt_source_text_segments,
96
96
  interrupt_translated_text_segments=interrupter.interrupt_translated_text_segments,
97
97
  interrupt_block_element=interrupter.interrupt_block_element,
98
98
  on_fill_failed=on_fill_failed,
99
- elements=_generate_elements_from_book(
99
+ tasks=_generate_tasks_from_book(
100
100
  zip=zip,
101
101
  toc_list=toc_list,
102
102
  metadata_fields=metadata_fields,
103
- element_contexts=element_contexts,
103
+ submit=submit,
104
104
  ),
105
105
  ):
106
- elem_id = id(translated_elem)
107
- context = element_contexts.pop(elem_id, None)
108
-
109
- if context is None:
110
- continue
111
-
112
106
  if context.element_type == _ElementType.TOC:
107
+ translated_elem = unwrap_french_quotes(translated_elem)
113
108
  decoded_toc = decode_toc_list(translated_elem)
114
109
  write_toc(zip, decoded_toc)
115
110
 
@@ -118,6 +113,7 @@ def translate(
118
113
  on_progress(current_progress)
119
114
 
120
115
  elif context.element_type == _ElementType.METADATA:
116
+ translated_elem = unwrap_french_quotes(translated_elem)
121
117
  decoded_metadata = decode_metadata(translated_elem)
122
118
  write_metadata(zip, decoded_metadata)
123
119
 
@@ -137,23 +133,29 @@ def translate(
137
133
  on_progress(current_progress)
138
134
 
139
135
 
140
- def _generate_elements_from_book(
136
+ def _generate_tasks_from_book(
141
137
  zip: Zip,
142
138
  toc_list: list,
143
139
  metadata_fields: list,
144
- element_contexts: dict[int, _ElementContext],
145
- ):
140
+ submit: SubmitKind,
141
+ ) -> Generator[TranslationTask[_ElementContext], None, None]:
142
+ head_submit = submit
143
+ if head_submit == SubmitKind.APPEND_BLOCK:
144
+ head_submit = SubmitKind.APPEND_TEXT
145
+
146
146
  if toc_list:
147
- toc_elem = encode_toc_list(toc_list)
148
- elem_id = id(toc_elem)
149
- element_contexts[elem_id] = _ElementContext(element_type=_ElementType.TOC)
150
- yield toc_elem
147
+ yield TranslationTask(
148
+ element=encode_toc_list(toc_list),
149
+ action=head_submit,
150
+ payload=_ElementContext(element_type=_ElementType.TOC),
151
+ )
151
152
 
152
153
  if metadata_fields:
153
- metadata_elem = encode_metadata(metadata_fields)
154
- elem_id = id(metadata_elem)
155
- element_contexts[elem_id] = _ElementContext(element_type=_ElementType.METADATA)
156
- yield metadata_elem
154
+ yield TranslationTask(
155
+ element=encode_metadata(metadata_fields),
156
+ action=head_submit,
157
+ payload=_ElementContext(element_type=_ElementType.METADATA),
158
+ )
157
159
 
158
160
  for chapter_path, media_type in search_spine_paths(zip):
159
161
  with zip.read(chapter_path) as chapter_file:
@@ -163,12 +165,14 @@ def _generate_elements_from_book(
163
165
  )
164
166
  body_element = find_first(xml.element, "body")
165
167
  if body_element is not None:
166
- elem_id = id(body_element)
167
- element_contexts[elem_id] = _ElementContext(
168
- element_type=_ElementType.CHAPTER,
169
- chapter_data=(chapter_path, xml),
168
+ yield TranslationTask(
169
+ element=body_element,
170
+ action=submit,
171
+ payload=_ElementContext(
172
+ element_type=_ElementType.CHAPTER,
173
+ chapter_data=(chapter_path, xml),
174
+ ),
170
175
  )
171
- yield body_element
172
176
 
173
177
 
174
178
  def _get_version() -> str:
@@ -1,6 +1,7 @@
1
1
  from .const import *
2
2
  from .deduplication import *
3
3
  from .firendly import *
4
+ from .inline import *
4
5
  from .utils import *
5
6
  from .xml import *
6
7
  from .xml_like import *