epub-translator 0.1.1__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. epub_translator/__init__.py +9 -2
  2. epub_translator/data/fill.jinja +143 -38
  3. epub_translator/epub/__init__.py +1 -1
  4. epub_translator/epub/metadata.py +122 -0
  5. epub_translator/epub/spines.py +3 -2
  6. epub_translator/epub/zip.py +11 -9
  7. epub_translator/epub_transcode.py +108 -0
  8. epub_translator/llm/__init__.py +1 -0
  9. epub_translator/llm/context.py +109 -0
  10. epub_translator/llm/core.py +32 -113
  11. epub_translator/llm/executor.py +25 -31
  12. epub_translator/llm/increasable.py +1 -1
  13. epub_translator/llm/types.py +0 -3
  14. epub_translator/punctuation.py +34 -0
  15. epub_translator/segment/__init__.py +26 -0
  16. epub_translator/segment/block_segment.py +124 -0
  17. epub_translator/segment/common.py +29 -0
  18. epub_translator/segment/inline_segment.py +356 -0
  19. epub_translator/{xml_translator → segment}/text_segment.py +7 -72
  20. epub_translator/segment/utils.py +43 -0
  21. epub_translator/translator.py +152 -184
  22. epub_translator/utils.py +33 -0
  23. epub_translator/xml/__init__.py +3 -0
  24. epub_translator/xml/const.py +1 -0
  25. epub_translator/xml/deduplication.py +3 -3
  26. epub_translator/xml/inline.py +67 -0
  27. epub_translator/xml/self_closing.py +182 -0
  28. epub_translator/xml/utils.py +42 -0
  29. epub_translator/xml/xml.py +7 -0
  30. epub_translator/xml/xml_like.py +8 -33
  31. epub_translator/xml_interrupter.py +165 -0
  32. epub_translator/xml_translator/__init__.py +3 -3
  33. epub_translator/xml_translator/callbacks.py +34 -0
  34. epub_translator/xml_translator/{const.py → common.py} +0 -1
  35. epub_translator/xml_translator/hill_climbing.py +104 -0
  36. epub_translator/xml_translator/stream_mapper.py +253 -0
  37. epub_translator/xml_translator/submitter.py +352 -91
  38. epub_translator/xml_translator/translator.py +182 -114
  39. epub_translator/xml_translator/validation.py +458 -0
  40. {epub_translator-0.1.1.dist-info → epub_translator-0.1.4.dist-info}/METADATA +134 -21
  41. epub_translator-0.1.4.dist-info/RECORD +68 -0
  42. epub_translator/epub/placeholder.py +0 -53
  43. epub_translator/iter_sync.py +0 -24
  44. epub_translator/xml_translator/fill.py +0 -128
  45. epub_translator/xml_translator/format.py +0 -282
  46. epub_translator/xml_translator/fragmented.py +0 -125
  47. epub_translator/xml_translator/group.py +0 -183
  48. epub_translator/xml_translator/progressive_locking.py +0 -256
  49. epub_translator/xml_translator/utils.py +0 -29
  50. epub_translator-0.1.1.dist-info/RECORD +0 -58
  51. {epub_translator-0.1.1.dist-info → epub_translator-0.1.4.dist-info}/LICENSE +0 -0
  52. {epub_translator-0.1.1.dist-info → epub_translator-0.1.4.dist-info}/WHEEL +0 -0
@@ -0,0 +1,458 @@
1
+ from collections.abc import Generator, Iterable
2
+ from dataclasses import dataclass
3
+ from typing import Generic, TypeVar, cast
4
+ from xml.etree.ElementTree import Element
5
+
6
+ from tiktoken import Encoding
7
+
8
+ from ..segment import (
9
+ BlockContentError,
10
+ BlockError,
11
+ BlockExpectedIDsError,
12
+ BlockUnexpectedIDError,
13
+ BlockWrongTagError,
14
+ FoundInvalidIDError,
15
+ InlineError,
16
+ InlineExpectedIDsError,
17
+ InlineLostIDError,
18
+ InlineUnexpectedIDError,
19
+ InlineWrongTagCountError,
20
+ )
21
+ from ..utils import ensure_list
22
+ from ..xml import plain_text
23
+
24
+ _LEVEL_WEIGHT = 3
25
+ _MAX_TEXT_HINT_TOKENS_COUNT = 6
26
+
27
+
28
+ _BLOCK_EXPECTED_IDS_LEVEL = 6
29
+ _BLOCK_WRONG_TAG_LEVEL = 5
30
+ _BLOCK_FOUND_INVALID_ID_LEVEL = 4
31
+ _BLOCK_UNEXPECTED_ID_LEVEL = 3
32
+
33
+ _INLINE_EXPECTED_IDS_LEVEL = 3
34
+ _INLINE_LOST_ID_LEVEL = 2
35
+ _INLINE_FOUND_INVALID_ID_LEVEL = 1
36
+ _INLINE_WRONG_TAG_COUNT_LEVEL = 0
37
+ _INLINE_UNEXPECTED_ID_LEVEL = 0
38
+
39
+ ERROR = TypeVar("ERROR")
40
+ LEVEL_DEPTH = 7
41
+
42
+
43
+ @dataclass
44
+ class ErrorItem(Generic[ERROR]):
45
+ error: ERROR
46
+ index1: int
47
+ index2: int
48
+ level: int
49
+ weight: int
50
+
51
+
52
+ @dataclass
53
+ class BlockErrorsGroup:
54
+ block_id: int
55
+ block_element: Element
56
+ errors: list[ErrorItem[BlockError | FoundInvalidIDError] | ErrorItem[InlineError | FoundInvalidIDError]]
57
+
58
+ @property
59
+ def weight(self) -> int:
60
+ return sum(e.weight for e in self.errors)
61
+
62
+
63
+ @dataclass
64
+ class ErrorsGroup:
65
+ upper_errors: list[ErrorItem[BlockError | FoundInvalidIDError]]
66
+ block_groups: list[BlockErrorsGroup]
67
+
68
+ @property
69
+ def errors_count(self) -> int:
70
+ count = len(self.upper_errors)
71
+ for block_group in self.block_groups:
72
+ count += len(block_group.errors)
73
+ return count
74
+
75
+
76
+ def nest_as_errors_group(errors: Iterable[BlockError | FoundInvalidIDError]) -> ErrorsGroup | None:
77
+ return _create_errors_group(
78
+ error_items=_transform_errors_to_items(errors),
79
+ )
80
+
81
+
82
+ def truncate_errors_group(errors_group: ErrorsGroup, max_errors: int) -> ErrorsGroup | None:
83
+ errors_items = list(_flatten_errors_group(errors_group))
84
+ if len(errors_items) <= max_errors:
85
+ return errors_group
86
+
87
+ errors_items.sort(key=lambda item: (-item[1].level, item[1].index1, item[1].index2))
88
+ errors_items = errors_items[:max_errors]
89
+
90
+ return _create_errors_group(
91
+ error_items=errors_items,
92
+ )
93
+
94
+
95
+ def generate_error_message(encoding: Encoding, errors_group: ErrorsGroup, omitted_count: int = 0) -> None | str:
96
+ message_lines: list[str] = []
97
+ for upper_error in errors_group.upper_errors:
98
+ message_lines.append(_format_block_error(upper_error.error))
99
+ if message_lines:
100
+ message_lines.append("")
101
+
102
+ for i, block_group in enumerate(errors_group.block_groups):
103
+ if i == 0:
104
+ message_lines.append("")
105
+
106
+ block_tag = block_group.block_element.tag
107
+ error_count = len(block_group.errors)
108
+ count_suffix = f" ({error_count} error{'s' if error_count != 1 else ''})"
109
+ message_lines.append(f"In {block_tag}#{block_group.block_id}:{count_suffix}")
110
+
111
+ for block_error in block_group.errors:
112
+ message: str
113
+ if isinstance(block_error.error, BlockError):
114
+ message = _format_block_error(block_error.error)
115
+ elif isinstance(block_error.error, InlineError):
116
+ message = _format_inline_error(encoding, block_error.error, block_group.block_id)
117
+ else:
118
+ raise RuntimeError()
119
+ message_lines.append(f" - {message}")
120
+
121
+ if not message_lines:
122
+ return None
123
+
124
+ header = (
125
+ f"Found {errors_group.errors_count} error(s). Fix them and return "
126
+ "the COMPLETE corrected XML (not just the changed parts):"
127
+ )
128
+ message_lines.insert(0, "")
129
+ message_lines.insert(0, header)
130
+
131
+ if omitted_count > 0:
132
+ message_lines.append("")
133
+ message_lines.append(
134
+ f"... and {omitted_count} more error(s) omitted. "
135
+ f"Fix the above errors first, then resubmit for remaining issues."
136
+ )
137
+ message_lines.append("")
138
+ message_lines.append("Remember: Return the entire <xml>...</xml> block with all corrections applied.")
139
+ else:
140
+ message_lines.append("")
141
+ message_lines.append("Return the entire <xml>...</xml> block with corrections.")
142
+
143
+ return "\n".join(message_lines)
144
+
145
+
146
+ @dataclass
147
+ class _Block:
148
+ id: int
149
+ element: Element
150
+
151
+
152
+ def _transform_errors_to_items(errors: Iterable[BlockError | FoundInvalidIDError]):
153
+ for i, block_error in enumerate(errors):
154
+ if isinstance(block_error, BlockContentError):
155
+ block = _Block(
156
+ id=block_error.id,
157
+ element=block_error.element,
158
+ )
159
+ for j, inline_error in enumerate(block_error.errors):
160
+ level = _get_inline_error_level(inline_error)
161
+ weight = _calculate_error_weight(inline_error, level)
162
+ yield (
163
+ block,
164
+ ErrorItem(
165
+ error=inline_error,
166
+ index1=i,
167
+ index2=j,
168
+ level=level,
169
+ weight=weight,
170
+ ),
171
+ )
172
+ else:
173
+ level = _get_block_error_level(block_error)
174
+ weight = _calculate_error_weight(block_error, level)
175
+ error_item: ErrorItem[BlockError | FoundInvalidIDError] = ErrorItem(
176
+ error=block_error,
177
+ index1=i,
178
+ index2=0,
179
+ level=level,
180
+ weight=weight,
181
+ )
182
+ block: _Block | None = None
183
+ if isinstance(block_error, BlockWrongTagError) and block_error.block is not None:
184
+ block = _Block(
185
+ id=block_error.block[0],
186
+ element=block_error.block[1],
187
+ )
188
+ yield block, error_item
189
+
190
+
191
+ def _flatten_errors_group(
192
+ errors_group: ErrorsGroup,
193
+ ) -> Generator[
194
+ tuple[
195
+ _Block | None,
196
+ ErrorItem[BlockError | FoundInvalidIDError] | ErrorItem[InlineError | FoundInvalidIDError],
197
+ ],
198
+ None,
199
+ None,
200
+ ]:
201
+ for error in errors_group.upper_errors:
202
+ yield None, error
203
+
204
+ for block_group in errors_group.block_groups:
205
+ block = _Block(
206
+ id=block_group.block_id,
207
+ element=block_group.block_element,
208
+ )
209
+ for error in block_group.errors:
210
+ yield block, error
211
+
212
+
213
+ def _create_errors_group(
214
+ error_items: Iterable[
215
+ tuple[
216
+ _Block | None,
217
+ ErrorItem[BlockError | FoundInvalidIDError] | ErrorItem[InlineError | FoundInvalidIDError],
218
+ ]
219
+ ],
220
+ ) -> ErrorsGroup | None:
221
+ upper_errors: list[ErrorItem[BlockError | FoundInvalidIDError]] = []
222
+ block_elements: dict[int, Element] = {}
223
+ block_errors_dict: dict[
224
+ int, list[ErrorItem[BlockError | FoundInvalidIDError] | ErrorItem[InlineError | FoundInvalidIDError]]
225
+ ] = {}
226
+
227
+ for block, error in error_items:
228
+ if block is None:
229
+ upper_errors.append(cast(ErrorItem[BlockError | FoundInvalidIDError], error))
230
+ else:
231
+ block_errors = ensure_list(block_errors_dict, block.id)
232
+ block_errors.append(error)
233
+ block_elements[block.id] = block.element
234
+
235
+ if not upper_errors and not block_errors_dict:
236
+ return None
237
+
238
+ block_errors_groups: list[BlockErrorsGroup] = []
239
+ for block_id, block_errors in block_errors_dict.items():
240
+ block_element = block_elements.get(block_id)
241
+ if block_element is None:
242
+ continue
243
+
244
+ block_error_group = BlockErrorsGroup(
245
+ block_id=block_id,
246
+ block_element=block_element,
247
+ errors=sorted(block_errors, key=lambda e: (-e.weight, e.index1, e.index2)),
248
+ )
249
+ block_errors_groups.append(block_error_group)
250
+
251
+ upper_errors.sort(key=lambda e: (-e.level, e.index1, e.index2))
252
+ block_errors_groups.sort(key=lambda g: -g.weight)
253
+
254
+ return ErrorsGroup(
255
+ upper_errors=upper_errors,
256
+ block_groups=block_errors_groups,
257
+ )
258
+
259
+
260
+ def _calculate_error_weight(error: BlockError | InlineError | FoundInvalidIDError, level: int) -> int:
261
+ # BlockExpectedIDsError 和 InlineExpectedIDsError 的权重乘以 id2element 数量
262
+ if isinstance(error, (BlockExpectedIDsError, InlineExpectedIDsError)):
263
+ return (_LEVEL_WEIGHT**level) * len(error.id2element)
264
+ else:
265
+ return _LEVEL_WEIGHT**level
266
+
267
+
268
+ def _get_block_error_level(error: BlockError | FoundInvalidIDError) -> int:
269
+ if isinstance(error, BlockWrongTagError):
270
+ return _BLOCK_WRONG_TAG_LEVEL
271
+ elif isinstance(error, BlockExpectedIDsError):
272
+ return _BLOCK_EXPECTED_IDS_LEVEL
273
+ elif isinstance(error, BlockUnexpectedIDError):
274
+ return _BLOCK_UNEXPECTED_ID_LEVEL
275
+ elif isinstance(error, FoundInvalidIDError):
276
+ return _BLOCK_FOUND_INVALID_ID_LEVEL
277
+ else:
278
+ return 0
279
+
280
+
281
+ def _get_inline_error_level(error: InlineError | FoundInvalidIDError) -> int:
282
+ if isinstance(error, InlineLostIDError):
283
+ return _INLINE_LOST_ID_LEVEL
284
+ elif isinstance(error, InlineExpectedIDsError):
285
+ return _INLINE_EXPECTED_IDS_LEVEL
286
+ elif isinstance(error, InlineUnexpectedIDError):
287
+ return _INLINE_UNEXPECTED_ID_LEVEL
288
+ elif isinstance(error, InlineWrongTagCountError):
289
+ return _INLINE_WRONG_TAG_COUNT_LEVEL
290
+ elif isinstance(error, FoundInvalidIDError):
291
+ return _INLINE_FOUND_INVALID_ID_LEVEL
292
+ else:
293
+ return 0
294
+
295
+
296
+ def _format_block_error(error: BlockError | FoundInvalidIDError) -> str:
297
+ if isinstance(error, BlockWrongTagError):
298
+ if error.block is None:
299
+ return (
300
+ f"Root tag mismatch: expected `<{error.expected_tag}>`, but found `<{error.instead_tag}>`. "
301
+ f"Fix: Change the root tag to `<{error.expected_tag}>`."
302
+ )
303
+ else:
304
+ return (
305
+ f"Wrong tag for block at `{error.instead_tag}#{error.block[0]}`: "
306
+ f'expected `<{error.expected_tag} id="{error.block[0]}">`, '
307
+ f'but found `<{error.instead_tag} id="{error.block[0]}">`. '
308
+ f"Fix: Change the tag to `<{error.expected_tag}>`."
309
+ )
310
+ elif isinstance(error, BlockExpectedIDsError):
311
+ # Add context hints with original text content
312
+ context_hints: list[str] = []
313
+ for id, elem in sorted(error.id2element.items()):
314
+ original_text = plain_text(elem).strip()
315
+ if original_text:
316
+ # Truncate to first 30 chars for block-level hints
317
+ text_preview = original_text[:30] + "..." if len(original_text) > 30 else original_text
318
+ context_hints.append(f' - `<{elem.tag} id="{id}">`: "{text_preview}"')
319
+
320
+ if context_hints:
321
+ message = "Missing block elements (find translation and wrap):\n" + "\n".join(context_hints)
322
+ else:
323
+ # Fallback if no text hints available
324
+ missing_elements = [f'<{elem.tag} id="{id}">' for id, elem in sorted(error.id2element.items())]
325
+ elements_str = ", ".join(missing_elements)
326
+ message = f"Missing expected blocks: {elements_str}. Fix: Add these missing blocks with the correct IDs."
327
+
328
+ return message
329
+
330
+ elif isinstance(error, BlockUnexpectedIDError):
331
+ selector = f"{error.element.tag}#{error.id}"
332
+ return f"Unexpected block found at `{selector}`. Fix: Remove this unexpected block."
333
+
334
+ elif isinstance(error, FoundInvalidIDError):
335
+ if error.invalid_id is None:
336
+ example = f"<{error.element.tag}>"
337
+ else:
338
+ example = f'<{error.element.tag} id="{error.invalid_id}">'
339
+ return f"Invalid or missing ID attribute: {example}. Fix: Ensure all blocks have valid numeric IDs."
340
+ else:
341
+ return "Unknown block error. Fix: Review the block structure."
342
+
343
+
344
+ def _format_inline_error(encoding: Encoding, error: InlineError | FoundInvalidIDError, block_id: int) -> str:
345
+ if isinstance(error, InlineLostIDError):
346
+ selector = _build_inline_selector(encoding, error.stack, block_id, element=error.element)
347
+ return f"Element at `{selector}` is missing an ID attribute. Fix: Add the required ID attribute."
348
+
349
+ elif isinstance(error, InlineExpectedIDsError):
350
+ # Add context hints with original text content
351
+ context_hints: list[str] = []
352
+ for id, elem in sorted(error.id2element.items()):
353
+ original_text = plain_text(elem).strip()
354
+ if original_text:
355
+ text_hint = _extract_text_hint(encoding, elem)
356
+ context_hints.append(f' - `<{elem.tag} id="{id}">`: "{text_hint}"')
357
+
358
+ if context_hints:
359
+ message = "Missing inline elements (find translation and wrap):\n" + "\n".join(context_hints)
360
+ else:
361
+ # Fallback if no text hints available
362
+ missing_elements = [f'<{elem.tag} id="{id}">' for id, elem in sorted(error.id2element.items())]
363
+ elements_str = ", ".join(missing_elements)
364
+ message = f"Missing expected inline elements: {elements_str}. Fix: Add these missing inline elements."
365
+
366
+ return message
367
+
368
+ elif isinstance(error, InlineUnexpectedIDError):
369
+ selector = f"{error.element.tag}#{error.id}"
370
+ return f"Unexpected inline element at `{selector}`. Fix: Remove this unexpected element."
371
+
372
+ elif isinstance(error, InlineWrongTagCountError):
373
+ tag = error.found_elements[0].tag if error.found_elements else "unknown"
374
+ selector = _build_inline_selector(encoding, error.stack, block_id, tag=tag)
375
+ expected = error.expected_count
376
+ found = len(error.found_elements)
377
+
378
+ if expected == 0 and found > 0:
379
+ # 情况1: 不应该有,但发现了
380
+ return (
381
+ f"Found unexpected `<{tag}>` elements at `{selector}`. "
382
+ f"There should be none, but {found} were found. "
383
+ f"Fix: Remove all `<{tag}>` elements from this location."
384
+ )
385
+ elif expected > 0 and found == 0:
386
+ # 情况2: 应该有,但没找到
387
+ return (
388
+ f"Missing `<{tag}>` elements at `{selector}`. "
389
+ f"Expected {expected}, but none were found. "
390
+ f"Fix: Add {expected} `<{tag}>` element(s) to this location."
391
+ )
392
+ elif found > expected:
393
+ # 情况3: 数量过多
394
+ extra = found - expected
395
+ return (
396
+ f"Too many `<{tag}>` elements at `{selector}`. "
397
+ f"Expected {expected}, but found {found} ({extra} extra). "
398
+ f"Fix: Remove {extra} `<{tag}>` element(s)."
399
+ )
400
+ else:
401
+ # 情况4: 数量过少
402
+ missing = expected - found
403
+ return (
404
+ f"Too few `<{tag}>` elements at `{selector}`. "
405
+ f"Expected {expected}, but only found {found} ({missing} missing). "
406
+ f"Fix: Add {missing} more `<{tag}>` element(s)."
407
+ )
408
+ elif isinstance(error, FoundInvalidIDError):
409
+ if error.invalid_id is None:
410
+ example = f"<{error.element.tag}>"
411
+ else:
412
+ example = f'<{error.element.tag} id="{error.invalid_id}">'
413
+ return f"Invalid inline ID: {example}. Fix: Ensure inline elements have valid numeric IDs."
414
+ else:
415
+ return "Unknown inline error. Fix: Review the inline structure."
416
+
417
+
418
+ def _build_inline_selector(
419
+ encoding: Encoding,
420
+ stack: list[Element],
421
+ block_id: int,
422
+ element: Element | None = None,
423
+ tag: str | None = None,
424
+ ) -> str:
425
+ if element is not None:
426
+ element_id = element.get("id")
427
+ if element_id is not None:
428
+ # 能用 ID 直接定位,就不必用路径定位
429
+ return f"{element.tag}#{element_id}"
430
+ tag = element.tag
431
+
432
+ # 路径:block#id > parent > ... > tag
433
+ block_tag = stack[0].tag if stack else "unknown"
434
+ path_parts = [f"{block_tag}#{block_id}"]
435
+
436
+ for parent in stack[1:]:
437
+ path_parts.append(parent.tag)
438
+
439
+ if tag:
440
+ path_parts.append(tag)
441
+
442
+ selector = " > ".join(path_parts)
443
+
444
+ if element is not None:
445
+ text_hint = _extract_text_hint(encoding, element)
446
+ if text_hint:
447
+ selector += f' (contains text: "{text_hint}")'
448
+ return selector
449
+
450
+
451
+ def _extract_text_hint(encoding: Encoding, element: Element) -> str:
452
+ text = plain_text(element).strip()
453
+ if text:
454
+ tokens = encoding.encode(text)
455
+ if len(tokens) > _MAX_TEXT_HINT_TOKENS_COUNT:
456
+ tokens = tokens[:_MAX_TEXT_HINT_TOKENS_COUNT]
457
+ text = encoding.decode(tokens).strip() + " ..."
458
+ return text
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: epub-translator
3
- Version: 0.1.1
3
+ Version: 0.1.4
4
4
  Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
5
5
  License: MIT
6
6
  Keywords: epub,llm,translation,translator
@@ -78,8 +78,7 @@ The easiest way to use EPUB Translator is through OOMOL Studio with a visual int
78
78
  ### Using Python API
79
79
 
80
80
  ```python
81
- from pathlib import Path
82
- from epub_translator import LLM, translate, language
81
+ from epub_translator import LLM, translate, language, SubmitKind
83
82
 
84
83
  # Initialize LLM with your API credentials
85
84
  llm = LLM(
@@ -91,10 +90,11 @@ llm = LLM(
91
90
 
92
91
  # Translate EPUB file using language constants
93
92
  translate(
94
- llm=llm,
95
- source_path=Path("source.epub"),
96
- target_path=Path("translated.epub"),
93
+ source_path="source.epub",
94
+ target_path="translated.epub",
97
95
  target_language=language.ENGLISH,
96
+ submit=SubmitKind.APPEND_BLOCK,
97
+ llm=llm,
98
98
  )
99
99
  ```
100
100
 
@@ -113,10 +113,11 @@ with tqdm(total=100, desc="Translating", unit="%") as pbar:
113
113
  last_progress = progress
114
114
 
115
115
  translate(
116
- llm=llm,
117
- source_path=Path("source.epub"),
118
- target_path=Path("translated.epub"),
116
+ source_path="source.epub",
117
+ target_path="translated.epub",
119
118
  target_language="English",
119
+ submit=SubmitKind.APPEND_BLOCK,
120
+ llm=llm,
120
121
  on_progress=on_progress,
121
122
  )
122
123
  ```
@@ -149,14 +150,63 @@ Translate an EPUB file:
149
150
 
150
151
  ```python
151
152
  translate(
152
- llm: LLM, # LLM instance
153
- source_path: Path, # Source EPUB file path
154
- target_path: Path, # Output EPUB file path
153
+ source_path: PathLike | str, # Source EPUB file path
154
+ target_path: PathLike | str, # Output EPUB file path
155
155
  target_language: str, # Target language (e.g., "English", "Chinese")
156
+ submit: SubmitKind, # How to insert translations (REPLACE, APPEND_TEXT, or APPEND_BLOCK)
156
157
  user_prompt: str | None = None, # Custom translation instructions
157
158
  max_retries: int = 5, # Maximum retries for failed translations
158
159
  max_group_tokens: int = 1200, # Maximum tokens per translation group
160
+ llm: LLM | None = None, # Single LLM instance for both translation and filling
161
+ translation_llm: LLM | None = None, # LLM instance for translation (overrides llm)
162
+ fill_llm: LLM | None = None, # LLM instance for XML filling (overrides llm)
159
163
  on_progress: Callable[[float], None] | None = None, # Progress callback (0.0-1.0)
164
+ on_fill_failed: Callable[[FillFailedEvent], None] | None = None, # Error callback
165
+ )
166
+ ```
167
+
168
+ **Note**: Either `llm` or both `translation_llm` and `fill_llm` must be provided. Using separate LLMs allows for task-specific optimization.
169
+
170
+ #### Submit Modes
171
+
172
+ The `submit` parameter controls how translated content is inserted into the document. Use `SubmitKind` enum to specify the insertion mode:
173
+
174
+ ```python
175
+ from epub_translator import SubmitKind
176
+
177
+ # Three available modes:
178
+ # - SubmitKind.REPLACE: Replace original content with translation (single-language output)
179
+ # - SubmitKind.APPEND_TEXT: Append translation as inline text (bilingual output)
180
+ # - SubmitKind.APPEND_BLOCK: Append translation as block elements (bilingual output, recommended)
181
+ ```
182
+
183
+ **Mode Comparison:**
184
+
185
+ - **`SubmitKind.REPLACE`**: Creates a single-language translation by replacing original text with translated content. Useful for creating books in the target language only.
186
+
187
+ - **`SubmitKind.APPEND_TEXT`**: Appends translations as inline text immediately after the original content. Both languages appear in the same paragraph, creating a continuous reading flow.
188
+
189
+ - **`SubmitKind.APPEND_BLOCK`** (Recommended): Appends translations as separate block elements (paragraphs) after the original. This creates clear visual separation between languages, making it ideal for side-by-side bilingual reading.
190
+
191
+ **Example:**
192
+
193
+ ```python
194
+ # For bilingual books (recommended)
195
+ translate(
196
+ source_path="source.epub",
197
+ target_path="translated.epub",
198
+ target_language=language.ENGLISH,
199
+ submit=SubmitKind.APPEND_BLOCK,
200
+ llm=llm,
201
+ )
202
+
203
+ # For single-language translation
204
+ translate(
205
+ source_path="source.epub",
206
+ target_path="translated.epub",
207
+ target_language=language.ENGLISH,
208
+ submit=SubmitKind.REPLACE,
209
+ llm=llm,
160
210
  )
161
211
  ```
162
212
 
@@ -169,18 +219,80 @@ from epub_translator import language
169
219
 
170
220
  # Usage example:
171
221
  translate(
172
- llm=llm,
173
- source_path=Path("source.epub"),
174
- target_path=Path("translated.epub"),
222
+ source_path="source.epub",
223
+ target_path="translated.epub",
175
224
  target_language=language.ENGLISH,
225
+ submit=SubmitKind.APPEND_BLOCK,
226
+ llm=llm,
176
227
  )
177
228
 
178
229
  # You can also use custom language strings:
179
230
  translate(
180
- llm=llm,
181
- source_path=Path("source.epub"),
182
- target_path=Path("translated.epub"),
231
+ source_path="source.epub",
232
+ target_path="translated.epub",
183
233
  target_language="Icelandic", # For languages not in the constants
234
+ submit=SubmitKind.APPEND_BLOCK,
235
+ llm=llm,
236
+ )
237
+ ```
238
+
239
+ ### Error Handling with `on_fill_failed`
240
+
241
+ Monitor and handle translation errors using the `on_fill_failed` callback:
242
+
243
+ ```python
244
+ from epub_translator import FillFailedEvent
245
+
246
+ def handle_fill_error(event: FillFailedEvent):
247
+ print(f"Translation error (attempt {event.retried_count}):")
248
+ print(f" {event.error_message}")
249
+ if event.over_maximum_retries:
250
+ print(" Maximum retries exceeded!")
251
+
252
+ translate(
253
+ source_path="source.epub",
254
+ target_path="translated.epub",
255
+ target_language=language.ENGLISH,
256
+ submit=SubmitKind.APPEND_BLOCK,
257
+ llm=llm,
258
+ on_fill_failed=handle_fill_error,
259
+ )
260
+ ```
261
+
262
+ The `FillFailedEvent` contains:
263
+ - `error_message: str` - Description of the error
264
+ - `retried_count: int` - Current retry attempt number
265
+ - `over_maximum_retries: bool` - Whether max retries has been exceeded
266
+
267
+ ### Dual-LLM Architecture
268
+
269
+ Use separate LLM instances for translation and XML structure filling with different optimization parameters:
270
+
271
+ ```python
272
+ # Create two LLM instances with different temperatures
273
+ translation_llm = LLM(
274
+ key="your-api-key",
275
+ url="https://api.openai.com/v1",
276
+ model="gpt-4",
277
+ token_encoding="o200k_base",
278
+ temperature=0.8, # Higher temperature for creative translation
279
+ )
280
+
281
+ fill_llm = LLM(
282
+ key="your-api-key",
283
+ url="https://api.openai.com/v1",
284
+ model="gpt-4",
285
+ token_encoding="o200k_base",
286
+ temperature=0.3, # Lower temperature for structure preservation
287
+ )
288
+
289
+ translate(
290
+ source_path="source.epub",
291
+ target_path="translated.epub",
292
+ target_language=language.ENGLISH,
293
+ submit=SubmitKind.APPEND_BLOCK,
294
+ translation_llm=translation_llm,
295
+ fill_llm=fill_llm,
184
296
  )
185
297
  ```
186
298
 
@@ -236,10 +348,11 @@ Provide specific translation instructions:
236
348
 
237
349
  ```python
238
350
  translate(
239
- llm=llm,
240
- source_path=Path("source.epub"),
241
- target_path=Path("translated.epub"),
351
+ source_path="source.epub",
352
+ target_path="translated.epub",
242
353
  target_language="English",
354
+ submit=SubmitKind.APPEND_BLOCK,
355
+ llm=llm,
243
356
  user_prompt="Use formal language and preserve technical terminology",
244
357
  )
245
358
  ```