epub-translator 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. epub_translator/__init__.py +2 -2
  2. epub_translator/data/fill.jinja +143 -38
  3. epub_translator/epub/__init__.py +1 -1
  4. epub_translator/epub/metadata.py +122 -0
  5. epub_translator/epub/spines.py +3 -2
  6. epub_translator/epub/zip.py +11 -9
  7. epub_translator/epub_transcode.py +108 -0
  8. epub_translator/llm/__init__.py +1 -0
  9. epub_translator/llm/context.py +109 -0
  10. epub_translator/llm/core.py +32 -113
  11. epub_translator/llm/executor.py +25 -31
  12. epub_translator/llm/increasable.py +1 -1
  13. epub_translator/llm/types.py +0 -3
  14. epub_translator/segment/__init__.py +26 -0
  15. epub_translator/segment/block_segment.py +124 -0
  16. epub_translator/segment/common.py +29 -0
  17. epub_translator/segment/inline_segment.py +356 -0
  18. epub_translator/{xml_translator → segment}/text_segment.py +8 -8
  19. epub_translator/segment/utils.py +43 -0
  20. epub_translator/translator.py +147 -183
  21. epub_translator/utils.py +33 -0
  22. epub_translator/xml/__init__.py +2 -0
  23. epub_translator/xml/const.py +1 -0
  24. epub_translator/xml/deduplication.py +3 -3
  25. epub_translator/xml/self_closing.py +182 -0
  26. epub_translator/xml/utils.py +42 -0
  27. epub_translator/xml/xml.py +7 -0
  28. epub_translator/xml/xml_like.py +8 -33
  29. epub_translator/xml_interrupter.py +165 -0
  30. epub_translator/xml_translator/__init__.py +1 -2
  31. epub_translator/xml_translator/callbacks.py +34 -0
  32. epub_translator/xml_translator/{const.py → common.py} +0 -1
  33. epub_translator/xml_translator/hill_climbing.py +104 -0
  34. epub_translator/xml_translator/stream_mapper.py +253 -0
  35. epub_translator/xml_translator/submitter.py +26 -72
  36. epub_translator/xml_translator/translator.py +162 -113
  37. epub_translator/xml_translator/validation.py +458 -0
  38. {epub_translator-0.1.1.dist-info → epub_translator-0.1.3.dist-info}/METADATA +72 -9
  39. epub_translator-0.1.3.dist-info/RECORD +66 -0
  40. epub_translator/epub/placeholder.py +0 -53
  41. epub_translator/iter_sync.py +0 -24
  42. epub_translator/xml_translator/fill.py +0 -128
  43. epub_translator/xml_translator/format.py +0 -282
  44. epub_translator/xml_translator/fragmented.py +0 -125
  45. epub_translator/xml_translator/group.py +0 -183
  46. epub_translator/xml_translator/progressive_locking.py +0 -256
  47. epub_translator/xml_translator/utils.py +0 -29
  48. epub_translator-0.1.1.dist-info/RECORD +0 -58
  49. {epub_translator-0.1.1.dist-info → epub_translator-0.1.3.dist-info}/LICENSE +0 -0
  50. {epub_translator-0.1.1.dist-info → epub_translator-0.1.3.dist-info}/WHEEL +0 -0
@@ -0,0 +1,458 @@
1
+ from collections.abc import Generator, Iterable
2
+ from dataclasses import dataclass
3
+ from typing import Generic, TypeVar, cast
4
+ from xml.etree.ElementTree import Element
5
+
6
+ from tiktoken import Encoding
7
+
8
+ from ..segment import (
9
+ BlockContentError,
10
+ BlockError,
11
+ BlockExpectedIDsError,
12
+ BlockUnexpectedIDError,
13
+ BlockWrongTagError,
14
+ FoundInvalidIDError,
15
+ InlineError,
16
+ InlineExpectedIDsError,
17
+ InlineLostIDError,
18
+ InlineUnexpectedIDError,
19
+ InlineWrongTagCountError,
20
+ )
21
+ from ..utils import ensure_list
22
+ from ..xml import plain_text
23
+
24
+ _LEVEL_WEIGHT = 3
25
+ _MAX_TEXT_HINT_TOKENS_COUNT = 6
26
+
27
+
28
+ _BLOCK_EXPECTED_IDS_LEVEL = 6
29
+ _BLOCK_WRONG_TAG_LEVEL = 5
30
+ _BLOCK_FOUND_INVALID_ID_LEVEL = 4
31
+ _BLOCK_UNEXPECTED_ID_LEVEL = 3
32
+
33
+ _INLINE_EXPECTED_IDS_LEVEL = 3
34
+ _INLINE_LOST_ID_LEVEL = 2
35
+ _INLINE_FOUND_INVALID_ID_LEVEL = 1
36
+ _INLINE_WRONG_TAG_COUNT_LEVEL = 0
37
+ _INLINE_UNEXPECTED_ID_LEVEL = 0
38
+
39
+ ERROR = TypeVar("ERROR")
40
+ LEVEL_DEPTH = 7
41
+
42
+
43
+ @dataclass
44
+ class ErrorItem(Generic[ERROR]):
45
+ error: ERROR
46
+ index1: int
47
+ index2: int
48
+ level: int
49
+ weight: int
50
+
51
+
52
+ @dataclass
53
+ class BlockErrorsGroup:
54
+ block_id: int
55
+ block_element: Element
56
+ errors: list[ErrorItem[BlockError | FoundInvalidIDError] | ErrorItem[InlineError | FoundInvalidIDError]]
57
+
58
+ @property
59
+ def weight(self) -> int:
60
+ return sum(e.weight for e in self.errors)
61
+
62
+
63
+ @dataclass
64
+ class ErrorsGroup:
65
+ upper_errors: list[ErrorItem[BlockError | FoundInvalidIDError]]
66
+ block_groups: list[BlockErrorsGroup]
67
+
68
+ @property
69
+ def errors_count(self) -> int:
70
+ count = len(self.upper_errors)
71
+ for block_group in self.block_groups:
72
+ count += len(block_group.errors)
73
+ return count
74
+
75
+
76
+ def nest_as_errors_group(errors: Iterable[BlockError | FoundInvalidIDError]) -> ErrorsGroup | None:
77
+ return _create_errors_group(
78
+ error_items=_transform_errors_to_items(errors),
79
+ )
80
+
81
+
82
+ def truncate_errors_group(errors_group: ErrorsGroup, max_errors: int) -> ErrorsGroup | None:
83
+ errors_items = list(_flatten_errors_group(errors_group))
84
+ if len(errors_items) <= max_errors:
85
+ return errors_group
86
+
87
+ errors_items.sort(key=lambda item: (-item[1].level, item[1].index1, item[1].index2))
88
+ errors_items = errors_items[:max_errors]
89
+
90
+ return _create_errors_group(
91
+ error_items=errors_items,
92
+ )
93
+
94
+
95
+ def generate_error_message(encoding: Encoding, errors_group: ErrorsGroup, omitted_count: int = 0) -> None | str:
96
+ message_lines: list[str] = []
97
+ for upper_error in errors_group.upper_errors:
98
+ message_lines.append(_format_block_error(upper_error.error))
99
+ if message_lines:
100
+ message_lines.append("")
101
+
102
+ for i, block_group in enumerate(errors_group.block_groups):
103
+ if i == 0:
104
+ message_lines.append("")
105
+
106
+ block_tag = block_group.block_element.tag
107
+ error_count = len(block_group.errors)
108
+ count_suffix = f" ({error_count} error{'s' if error_count != 1 else ''})"
109
+ message_lines.append(f"In {block_tag}#{block_group.block_id}:{count_suffix}")
110
+
111
+ for block_error in block_group.errors:
112
+ message: str
113
+ if isinstance(block_error.error, BlockError):
114
+ message = _format_block_error(block_error.error)
115
+ elif isinstance(block_error.error, InlineError):
116
+ message = _format_inline_error(encoding, block_error.error, block_group.block_id)
117
+ else:
118
+ raise RuntimeError()
119
+ message_lines.append(f" - {message}")
120
+
121
+ if not message_lines:
122
+ return None
123
+
124
+ header = (
125
+ f"Found {errors_group.errors_count} error(s). Fix them and return "
126
+ "the COMPLETE corrected XML (not just the changed parts):"
127
+ )
128
+ message_lines.insert(0, "")
129
+ message_lines.insert(0, header)
130
+
131
+ if omitted_count > 0:
132
+ message_lines.append("")
133
+ message_lines.append(
134
+ f"... and {omitted_count} more error(s) omitted. "
135
+ f"Fix the above errors first, then resubmit for remaining issues."
136
+ )
137
+ message_lines.append("")
138
+ message_lines.append("Remember: Return the entire <xml>...</xml> block with all corrections applied.")
139
+ else:
140
+ message_lines.append("")
141
+ message_lines.append("Return the entire <xml>...</xml> block with corrections.")
142
+
143
+ return "\n".join(message_lines)
144
+
145
+
146
+ @dataclass
147
+ class _Block:
148
+ id: int
149
+ element: Element
150
+
151
+
152
+ def _transform_errors_to_items(errors: Iterable[BlockError | FoundInvalidIDError]):
153
+ for i, block_error in enumerate(errors):
154
+ if isinstance(block_error, BlockContentError):
155
+ block = _Block(
156
+ id=block_error.id,
157
+ element=block_error.element,
158
+ )
159
+ for j, inline_error in enumerate(block_error.errors):
160
+ level = _get_inline_error_level(inline_error)
161
+ weight = _calculate_error_weight(inline_error, level)
162
+ yield (
163
+ block,
164
+ ErrorItem(
165
+ error=inline_error,
166
+ index1=i,
167
+ index2=j,
168
+ level=level,
169
+ weight=weight,
170
+ ),
171
+ )
172
+ else:
173
+ level = _get_block_error_level(block_error)
174
+ weight = _calculate_error_weight(block_error, level)
175
+ error_item: ErrorItem[BlockError | FoundInvalidIDError] = ErrorItem(
176
+ error=block_error,
177
+ index1=i,
178
+ index2=0,
179
+ level=level,
180
+ weight=weight,
181
+ )
182
+ block: _Block | None = None
183
+ if isinstance(block_error, BlockWrongTagError) and block_error.block is not None:
184
+ block = _Block(
185
+ id=block_error.block[0],
186
+ element=block_error.block[1],
187
+ )
188
+ yield block, error_item
189
+
190
+
191
+ def _flatten_errors_group(
192
+ errors_group: ErrorsGroup,
193
+ ) -> Generator[
194
+ tuple[
195
+ _Block | None,
196
+ ErrorItem[BlockError | FoundInvalidIDError] | ErrorItem[InlineError | FoundInvalidIDError],
197
+ ],
198
+ None,
199
+ None,
200
+ ]:
201
+ for error in errors_group.upper_errors:
202
+ yield None, error
203
+
204
+ for block_group in errors_group.block_groups:
205
+ block = _Block(
206
+ id=block_group.block_id,
207
+ element=block_group.block_element,
208
+ )
209
+ for error in block_group.errors:
210
+ yield block, error
211
+
212
+
213
+ def _create_errors_group(
214
+ error_items: Iterable[
215
+ tuple[
216
+ _Block | None,
217
+ ErrorItem[BlockError | FoundInvalidIDError] | ErrorItem[InlineError | FoundInvalidIDError],
218
+ ]
219
+ ],
220
+ ) -> ErrorsGroup | None:
221
+ upper_errors: list[ErrorItem[BlockError | FoundInvalidIDError]] = []
222
+ block_elements: dict[int, Element] = {}
223
+ block_errors_dict: dict[
224
+ int, list[ErrorItem[BlockError | FoundInvalidIDError] | ErrorItem[InlineError | FoundInvalidIDError]]
225
+ ] = {}
226
+
227
+ for block, error in error_items:
228
+ if block is None:
229
+ upper_errors.append(cast(ErrorItem[BlockError | FoundInvalidIDError], error))
230
+ else:
231
+ block_errors = ensure_list(block_errors_dict, block.id)
232
+ block_errors.append(error)
233
+ block_elements[block.id] = block.element
234
+
235
+ if not upper_errors and not block_errors_dict:
236
+ return None
237
+
238
+ block_errors_groups: list[BlockErrorsGroup] = []
239
+ for block_id, block_errors in block_errors_dict.items():
240
+ block_element = block_elements.get(block_id)
241
+ if block_element is None:
242
+ continue
243
+
244
+ block_error_group = BlockErrorsGroup(
245
+ block_id=block_id,
246
+ block_element=block_element,
247
+ errors=sorted(block_errors, key=lambda e: (-e.weight, e.index1, e.index2)),
248
+ )
249
+ block_errors_groups.append(block_error_group)
250
+
251
+ upper_errors.sort(key=lambda e: (-e.level, e.index1, e.index2))
252
+ block_errors_groups.sort(key=lambda g: -g.weight)
253
+
254
+ return ErrorsGroup(
255
+ upper_errors=upper_errors,
256
+ block_groups=block_errors_groups,
257
+ )
258
+
259
+
260
+ def _calculate_error_weight(error: BlockError | InlineError | FoundInvalidIDError, level: int) -> int:
261
+ # BlockExpectedIDsError 和 InlineExpectedIDsError 的权重乘以 id2element 数量
262
+ if isinstance(error, (BlockExpectedIDsError, InlineExpectedIDsError)):
263
+ return (_LEVEL_WEIGHT**level) * len(error.id2element)
264
+ else:
265
+ return _LEVEL_WEIGHT**level
266
+
267
+
268
+ def _get_block_error_level(error: BlockError | FoundInvalidIDError) -> int:
269
+ if isinstance(error, BlockWrongTagError):
270
+ return _BLOCK_WRONG_TAG_LEVEL
271
+ elif isinstance(error, BlockExpectedIDsError):
272
+ return _BLOCK_EXPECTED_IDS_LEVEL
273
+ elif isinstance(error, BlockUnexpectedIDError):
274
+ return _BLOCK_UNEXPECTED_ID_LEVEL
275
+ elif isinstance(error, FoundInvalidIDError):
276
+ return _BLOCK_FOUND_INVALID_ID_LEVEL
277
+ else:
278
+ return 0
279
+
280
+
281
+ def _get_inline_error_level(error: InlineError | FoundInvalidIDError) -> int:
282
+ if isinstance(error, InlineLostIDError):
283
+ return _INLINE_LOST_ID_LEVEL
284
+ elif isinstance(error, InlineExpectedIDsError):
285
+ return _INLINE_EXPECTED_IDS_LEVEL
286
+ elif isinstance(error, InlineUnexpectedIDError):
287
+ return _INLINE_UNEXPECTED_ID_LEVEL
288
+ elif isinstance(error, InlineWrongTagCountError):
289
+ return _INLINE_WRONG_TAG_COUNT_LEVEL
290
+ elif isinstance(error, FoundInvalidIDError):
291
+ return _INLINE_FOUND_INVALID_ID_LEVEL
292
+ else:
293
+ return 0
294
+
295
+
296
+ def _format_block_error(error: BlockError | FoundInvalidIDError) -> str:
297
+ if isinstance(error, BlockWrongTagError):
298
+ if error.block is None:
299
+ return (
300
+ f"Root tag mismatch: expected `<{error.expected_tag}>`, but found `<{error.instead_tag}>`. "
301
+ f"Fix: Change the root tag to `<{error.expected_tag}>`."
302
+ )
303
+ else:
304
+ return (
305
+ f"Wrong tag for block at `{error.instead_tag}#{error.block[0]}`: "
306
+ f'expected `<{error.expected_tag} id="{error.block[0]}">`, '
307
+ f'but found `<{error.instead_tag} id="{error.block[0]}">`. '
308
+ f"Fix: Change the tag to `<{error.expected_tag}>`."
309
+ )
310
+ elif isinstance(error, BlockExpectedIDsError):
311
+ # Add context hints with original text content
312
+ context_hints: list[str] = []
313
+ for id, elem in sorted(error.id2element.items()):
314
+ original_text = plain_text(elem).strip()
315
+ if original_text:
316
+ # Truncate to first 30 chars for block-level hints
317
+ text_preview = original_text[:30] + "..." if len(original_text) > 30 else original_text
318
+ context_hints.append(f' - `<{elem.tag} id="{id}">`: "{text_preview}"')
319
+
320
+ if context_hints:
321
+ message = "Missing block elements (find translation and wrap):\n" + "\n".join(context_hints)
322
+ else:
323
+ # Fallback if no text hints available
324
+ missing_elements = [f'<{elem.tag} id="{id}">' for id, elem in sorted(error.id2element.items())]
325
+ elements_str = ", ".join(missing_elements)
326
+ message = f"Missing expected blocks: {elements_str}. Fix: Add these missing blocks with the correct IDs."
327
+
328
+ return message
329
+
330
+ elif isinstance(error, BlockUnexpectedIDError):
331
+ selector = f"{error.element.tag}#{error.id}"
332
+ return f"Unexpected block found at `{selector}`. Fix: Remove this unexpected block."
333
+
334
+ elif isinstance(error, FoundInvalidIDError):
335
+ if error.invalid_id is None:
336
+ example = f"<{error.element.tag}>"
337
+ else:
338
+ example = f'<{error.element.tag} id="{error.invalid_id}">'
339
+ return f"Invalid or missing ID attribute: {example}. Fix: Ensure all blocks have valid numeric IDs."
340
+ else:
341
+ return "Unknown block error. Fix: Review the block structure."
342
+
343
+
344
+ def _format_inline_error(encoding: Encoding, error: InlineError | FoundInvalidIDError, block_id: int) -> str:
345
+ if isinstance(error, InlineLostIDError):
346
+ selector = _build_inline_selector(encoding, error.stack, block_id, element=error.element)
347
+ return f"Element at `{selector}` is missing an ID attribute. Fix: Add the required ID attribute."
348
+
349
+ elif isinstance(error, InlineExpectedIDsError):
350
+ # Add context hints with original text content
351
+ context_hints: list[str] = []
352
+ for id, elem in sorted(error.id2element.items()):
353
+ original_text = plain_text(elem).strip()
354
+ if original_text:
355
+ text_hint = _extract_text_hint(encoding, elem)
356
+ context_hints.append(f' - `<{elem.tag} id="{id}">`: "{text_hint}"')
357
+
358
+ if context_hints:
359
+ message = "Missing inline elements (find translation and wrap):\n" + "\n".join(context_hints)
360
+ else:
361
+ # Fallback if no text hints available
362
+ missing_elements = [f'<{elem.tag} id="{id}">' for id, elem in sorted(error.id2element.items())]
363
+ elements_str = ", ".join(missing_elements)
364
+ message = f"Missing expected inline elements: {elements_str}. Fix: Add these missing inline elements."
365
+
366
+ return message
367
+
368
+ elif isinstance(error, InlineUnexpectedIDError):
369
+ selector = f"{error.element.tag}#{error.id}"
370
+ return f"Unexpected inline element at `{selector}`. Fix: Remove this unexpected element."
371
+
372
+ elif isinstance(error, InlineWrongTagCountError):
373
+ tag = error.found_elements[0].tag if error.found_elements else "unknown"
374
+ selector = _build_inline_selector(encoding, error.stack, block_id, tag=tag)
375
+ expected = error.expected_count
376
+ found = len(error.found_elements)
377
+
378
+ if expected == 0 and found > 0:
379
+ # 情况1: 不应该有,但发现了
380
+ return (
381
+ f"Found unexpected `<{tag}>` elements at `{selector}`. "
382
+ f"There should be none, but {found} were found. "
383
+ f"Fix: Remove all `<{tag}>` elements from this location."
384
+ )
385
+ elif expected > 0 and found == 0:
386
+ # 情况2: 应该有,但没找到
387
+ return (
388
+ f"Missing `<{tag}>` elements at `{selector}`. "
389
+ f"Expected {expected}, but none were found. "
390
+ f"Fix: Add {expected} `<{tag}>` element(s) to this location."
391
+ )
392
+ elif found > expected:
393
+ # 情况3: 数量过多
394
+ extra = found - expected
395
+ return (
396
+ f"Too many `<{tag}>` elements at `{selector}`. "
397
+ f"Expected {expected}, but found {found} ({extra} extra). "
398
+ f"Fix: Remove {extra} `<{tag}>` element(s)."
399
+ )
400
+ else:
401
+ # 情况4: 数量过少
402
+ missing = expected - found
403
+ return (
404
+ f"Too few `<{tag}>` elements at `{selector}`. "
405
+ f"Expected {expected}, but only found {found} ({missing} missing). "
406
+ f"Fix: Add {missing} more `<{tag}>` element(s)."
407
+ )
408
+ elif isinstance(error, FoundInvalidIDError):
409
+ if error.invalid_id is None:
410
+ example = f"<{error.element.tag}>"
411
+ else:
412
+ example = f'<{error.element.tag} id="{error.invalid_id}">'
413
+ return f"Invalid inline ID: {example}. Fix: Ensure inline elements have valid numeric IDs."
414
+ else:
415
+ return "Unknown inline error. Fix: Review the inline structure."
416
+
417
+
418
+ def _build_inline_selector(
419
+ encoding: Encoding,
420
+ stack: list[Element],
421
+ block_id: int,
422
+ element: Element | None = None,
423
+ tag: str | None = None,
424
+ ) -> str:
425
+ if element is not None:
426
+ element_id = element.get("id")
427
+ if element_id is not None:
428
+ # 能用 ID 直接定位,就不必用路径定位
429
+ return f"{element.tag}#{element_id}"
430
+ tag = element.tag
431
+
432
+ # 路径:block#id > parent > ... > tag
433
+ block_tag = stack[0].tag if stack else "unknown"
434
+ path_parts = [f"{block_tag}#{block_id}"]
435
+
436
+ for parent in stack[1:]:
437
+ path_parts.append(parent.tag)
438
+
439
+ if tag:
440
+ path_parts.append(tag)
441
+
442
+ selector = " > ".join(path_parts)
443
+
444
+ if element is not None:
445
+ text_hint = _extract_text_hint(encoding, element)
446
+ if text_hint:
447
+ selector += f' (contains text: "{text_hint}")'
448
+ return selector
449
+
450
+
451
+ def _extract_text_hint(encoding: Encoding, element: Element) -> str:
452
+ text = plain_text(element).strip()
453
+ if text:
454
+ tokens = encoding.encode(text)
455
+ if len(tokens) > _MAX_TEXT_HINT_TOKENS_COUNT:
456
+ tokens = tokens[:_MAX_TEXT_HINT_TOKENS_COUNT]
457
+ text = encoding.decode(tokens).strip() + " ..."
458
+ return text
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: epub-translator
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
5
5
  License: MIT
6
6
  Keywords: epub,llm,translation,translator
@@ -91,10 +91,10 @@ llm = LLM(
91
91
 
92
92
  # Translate EPUB file using language constants
93
93
  translate(
94
- llm=llm,
95
94
  source_path=Path("source.epub"),
96
95
  target_path=Path("translated.epub"),
97
96
  target_language=language.ENGLISH,
97
+ llm=llm,
98
98
  )
99
99
  ```
100
100
 
@@ -113,10 +113,10 @@ with tqdm(total=100, desc="Translating", unit="%") as pbar:
113
113
  last_progress = progress
114
114
 
115
115
  translate(
116
- llm=llm,
117
116
  source_path=Path("source.epub"),
118
117
  target_path=Path("translated.epub"),
119
118
  target_language="English",
119
+ llm=llm,
120
120
  on_progress=on_progress,
121
121
  )
122
122
  ```
@@ -149,17 +149,22 @@ Translate an EPUB file:
149
149
 
150
150
  ```python
151
151
  translate(
152
- llm: LLM, # LLM instance
153
- source_path: Path, # Source EPUB file path
154
- target_path: Path, # Output EPUB file path
152
+ source_path: PathLike | str, # Source EPUB file path
153
+ target_path: PathLike | str, # Output EPUB file path
155
154
  target_language: str, # Target language (e.g., "English", "Chinese")
156
155
  user_prompt: str | None = None, # Custom translation instructions
157
156
  max_retries: int = 5, # Maximum retries for failed translations
158
157
  max_group_tokens: int = 1200, # Maximum tokens per translation group
158
+ llm: LLM | None = None, # Single LLM instance for both translation and filling
159
+ translation_llm: LLM | None = None, # LLM instance for translation (overrides llm)
160
+ fill_llm: LLM | None = None, # LLM instance for XML filling (overrides llm)
159
161
  on_progress: Callable[[float], None] | None = None, # Progress callback (0.0-1.0)
162
+ on_fill_failed: Callable[[FillFailedEvent], None] | None = None, # Error callback
160
163
  )
161
164
  ```
162
165
 
166
+ **Note**: Either `llm` or both `translation_llm` and `fill_llm` must be provided. Using separate LLMs allows for task-specific optimization.
167
+
163
168
  #### Language Constants
164
169
 
165
170
  EPUB Translator provides predefined language constants for convenience. You can use these constants instead of writing language names as strings:
@@ -169,18 +174,76 @@ from epub_translator import language
169
174
 
170
175
  # Usage example:
171
176
  translate(
172
- llm=llm,
173
177
  source_path=Path("source.epub"),
174
178
  target_path=Path("translated.epub"),
175
179
  target_language=language.ENGLISH,
180
+ llm=llm,
176
181
  )
177
182
 
178
183
  # You can also use custom language strings:
179
184
  translate(
180
- llm=llm,
181
185
  source_path=Path("source.epub"),
182
186
  target_path=Path("translated.epub"),
183
187
  target_language="Icelandic", # For languages not in the constants
188
+ llm=llm,
189
+ )
190
+ ```
191
+
192
+ ### Error Handling with `on_fill_failed`
193
+
194
+ Monitor and handle translation errors using the `on_fill_failed` callback:
195
+
196
+ ```python
197
+ from epub_translator import FillFailedEvent
198
+
199
+ def handle_fill_error(event: FillFailedEvent):
200
+ print(f"Translation error (attempt {event.retried_count}):")
201
+ print(f" {event.error_message}")
202
+ if event.over_maximum_retries:
203
+ print(" Maximum retries exceeded!")
204
+
205
+ translate(
206
+ source_path=Path("source.epub"),
207
+ target_path=Path("translated.epub"),
208
+ target_language=language.ENGLISH,
209
+ llm=llm,
210
+ on_fill_failed=handle_fill_error,
211
+ )
212
+ ```
213
+
214
+ The `FillFailedEvent` contains:
215
+ - `error_message: str` - Description of the error
216
+ - `retried_count: int` - Current retry attempt number
217
+ - `over_maximum_retries: bool` - Whether max retries has been exceeded
218
+
219
+ ### Dual-LLM Architecture
220
+
221
+ Use separate LLM instances for translation and XML structure filling with different optimization parameters:
222
+
223
+ ```python
224
+ # Create two LLM instances with different temperatures
225
+ translation_llm = LLM(
226
+ key="your-api-key",
227
+ url="https://api.openai.com/v1",
228
+ model="gpt-4",
229
+ token_encoding="o200k_base",
230
+ temperature=0.8, # Higher temperature for creative translation
231
+ )
232
+
233
+ fill_llm = LLM(
234
+ key="your-api-key",
235
+ url="https://api.openai.com/v1",
236
+ model="gpt-4",
237
+ token_encoding="o200k_base",
238
+ temperature=0.3, # Lower temperature for structure preservation
239
+ )
240
+
241
+ translate(
242
+ source_path=Path("source.epub"),
243
+ target_path=Path("translated.epub"),
244
+ target_language=language.ENGLISH,
245
+ translation_llm=translation_llm,
246
+ fill_llm=fill_llm,
184
247
  )
185
248
  ```
186
249
 
@@ -236,10 +299,10 @@ Provide specific translation instructions:
236
299
 
237
300
  ```python
238
301
  translate(
239
- llm=llm,
240
302
  source_path=Path("source.epub"),
241
303
  target_path=Path("translated.epub"),
242
304
  target_language="English",
305
+ llm=llm,
243
306
  user_prompt="Use formal language and preserve technical terminology",
244
307
  )
245
308
  ```
@@ -0,0 +1,66 @@
1
+ epub_translator/__init__.py,sha256=2FZPJyQdTgy_X7oOCxDpSqCZCGEcFqlulmhztLWuCIk,158
2
+ epub_translator/data/fill.jinja,sha256=zSytA8Vhp2i6YBZ09F1z9iPJq1-jUaiphoXqTNZwnvo,6964
3
+ epub_translator/data/mmltex/README.md,sha256=wwhe5yW1U_7_YZIFKnQVnCOmUl7Mu3gsr3lNnDSJ5Qs,2953
4
+ epub_translator/data/mmltex/cmarkup.xsl,sha256=DkhimAATM0XSCfVOfY41-qTPoddqzOHjZ00Pynr4zQE,37707
5
+ epub_translator/data/mmltex/entities.xsl,sha256=TYZ5iGg0u9XlDDBBGuZiHL7MsxKc-3OsTIBAVM1GDek,107742
6
+ epub_translator/data/mmltex/glayout.xsl,sha256=Ztc7N1wiHaYZlo9u9iuROrIl3uIIIoo1VFIuojXq7TM,6879
7
+ epub_translator/data/mmltex/mmltex.xsl,sha256=BVXFbApz-9W2qRKKtBTxptK5vxG2bfB8tv9W1MP5iBI,1384
8
+ epub_translator/data/mmltex/scripts.xsl,sha256=f4ei0cDCW3cV-Ra7rC3kC5tRcKdjJxbSpCeQLoohtgo,13697
9
+ epub_translator/data/mmltex/tables.xsl,sha256=RxtNo8qDtVAg8_6BuYsafraB_0z7YDAB9D__fT9gmWs,4327
10
+ epub_translator/data/mmltex/tokens.xsl,sha256=j3JZRcBhAiiY8o5K3640phfLwxO8JVspCFlSttwBzJk,12373
11
+ epub_translator/data/translate.jinja,sha256=93d8kschm5HV-EfXd1kFSIVMObDqTMdoUrwDfce2bhU,820
12
+ epub_translator/epub/__init__.py,sha256=ZddRHrLNVzgaSVrYflGnrq8tffmlKPhBbz9ok7sp8PY,149
13
+ epub_translator/epub/common.py,sha256=4-SpTe8iot9hMfyXILmlUFvYVNYqPAHL5hn1fr2wgis,1180
14
+ epub_translator/epub/math.py,sha256=-Q2LJQxxjgQZQUe_WlJA9tjzLqgqtw2ZmbGbHsPRp2U,5422
15
+ epub_translator/epub/metadata.py,sha256=DXSimY2iZNBA2juIaKtB-4CHHSYJiDK7PPhfenV4dto,3511
16
+ epub_translator/epub/spines.py,sha256=bP2IsobZm7zs4z10iXGc9SmgAFSIq9pJc8HE-V0aW9Y,1331
17
+ epub_translator/epub/toc.py,sha256=TKJfyDT4svFkXd6JCNZk2ZEYc9q-5DXnV3zY2UKo8nE,14891
18
+ epub_translator/epub/zip.py,sha256=-3LI8f-ksgU8xCy28NjBOKyQPE8PhPEUPqIKZE1p8dw,2364
19
+ epub_translator/epub_transcode.py,sha256=NzuvXXEZfAhIoMOSrgQRF0DPtaSpz4OY-NMSdC0Y2RM,2749
20
+ epub_translator/language.py,sha256=88osG0JNYxOkxBjg5Pm-P0Mhiyxf6GqdxoPW12HW0PE,493
21
+ epub_translator/llm/__init__.py,sha256=YcFYYnXmXyX0RUyC-PDbj5k7Woygp_XOpTI3vDiNSPM,75
22
+ epub_translator/llm/context.py,sha256=73paN3V66LQ6muKUSMCKEHEmMYBylK-dXOF8LmaQo5M,3885
23
+ epub_translator/llm/core.py,sha256=AorV4ss4Hr-IbAk8FmGhV2hgI2tKxQmW2Vz2WwUd0Ms,5110
24
+ epub_translator/llm/error.py,sha256=4efAIQL14DFSvAnSTUfgdAbZRqaWBqOfUGsSfvxa5zM,1503
25
+ epub_translator/llm/executor.py,sha256=A0IjQ-s9wBJuhAZAAydneb9zBXWnu2J9inR2Q8F-GDE,5533
26
+ epub_translator/llm/increasable.py,sha256=8XkKeI1hiHlpMHj8dQ4fW0BkViSx4hH8QfbQsy-5SDw,1297
27
+ epub_translator/llm/types.py,sha256=c-dMAIvlG4R3la3mUTWEw5xei-sIYKmQeBja7mirxcI,219
28
+ epub_translator/segment/__init__.py,sha256=UYTv_IKQbEB0DzhFeiuqCvjoJLvB-7XRwlaFS90KmIw,573
29
+ epub_translator/segment/block_segment.py,sha256=psNKA_HMIcwZtoug8AtnAcV9_mQ2WXLnXqFsekHzt2g,4570
30
+ epub_translator/segment/common.py,sha256=gGWYQaJ0tGnWCuF1me9TOo-Q_DrZVakCu2patyFIOs0,714
31
+ epub_translator/segment/inline_segment.py,sha256=_ZgSlZmGxzIvaPs01hreoUfnaXz8Yq7naksT34dGfds,14221
32
+ epub_translator/segment/text_segment.py,sha256=qKp646lAqsrI7CP7KYyXgRD2bY0dCR78i6TMBCzklrM,7614
33
+ epub_translator/segment/utils.py,sha256=qMqUt33pDRN5Tnuydkodzu2gaQrwTzAnQmXpDuHen1o,1036
34
+ epub_translator/serial/__init__.py,sha256=b3IMVmWcUwEqHKcGmey88b057pyz5ct946CaUZi4LB4,67
35
+ epub_translator/serial/chunk.py,sha256=FrTaHikVOd6bLYumnEriTaAQ_DIDLjHm16gh-wBVR9k,1495
36
+ epub_translator/serial/segment.py,sha256=uEz-ke1KcYrON-68FaUEzMG2CzHlMjvbC11F3ZT4yH0,446
37
+ epub_translator/serial/splitter.py,sha256=Nq0sxPXos8ez7QBG01sOKjnYKbeBWUBHflZGtqenVm8,1726
38
+ epub_translator/template.py,sha256=0CqRmj3nTtPshw0NmTr2ECqelops2MMyX94fMrE-HKs,1587
39
+ epub_translator/translator.py,sha256=Uy1dRBPA9hoNh3BE40M2XukK2VvcWRCvMvGwvhQtIaA,6212
40
+ epub_translator/utils.py,sha256=BfZWrYjzDNQ4cFrgvRNzd4i1CKLtPxS8Z4LBHhqEV78,914
41
+ epub_translator/xml/__init__.py,sha256=pxSRKPBQ7D8BCIzXceSad_1MFgN1Dou0BZz9trK47wU,138
42
+ epub_translator/xml/const.py,sha256=Re2TYmpwG7-jVVgSq3R_K-uYhvAYzcXcRmLFkwCPD9Y,19
43
+ epub_translator/xml/deduplication.py,sha256=TaMbzeA70VvUQV0X1wcQFVbuMEPJUtj9Hq6iWlUmtAQ,1152
44
+ epub_translator/xml/firendly/__init__.py,sha256=I5jhnhFWoHvojLsYXH4jfR4Gi8lKFZ3yQ56ze5hEe1M,74
45
+ epub_translator/xml/firendly/decoder.py,sha256=xRQ5LnSunmYbba_0oT39oUr86-sLYAHYMUGmlseIu2U,2467
46
+ epub_translator/xml/firendly/encoder.py,sha256=evjvw6oE-oCud44IsJ-YZVHn6dtUzjNYX25ljaZP6vY,2417
47
+ epub_translator/xml/firendly/parser.py,sha256=QlMHA0nfPJbNyx6IwRFrYVw7okuvzDB42NXCauIFV-o,6560
48
+ epub_translator/xml/firendly/tag.py,sha256=ahaGoYttuAlnFxLFFgTV51KUZSpUiHho-COZX14nxN8,3308
49
+ epub_translator/xml/firendly/transform.py,sha256=5tG1MJmzrXIR_Z5gmRxwcoKvXBzJBVH0ELeaRsG-8w0,1201
50
+ epub_translator/xml/self_closing.py,sha256=41ofGUdss9yU51IVwI4It6hKfzh8YcxIR_j-ohD19LE,5240
51
+ epub_translator/xml/utils.py,sha256=7tQ6L5P0_JXhxONeG64hEeeL5mKjA6NKS1H1Q9B1Cac,1062
52
+ epub_translator/xml/xml.py,sha256=qQ5Wk1-KVVHE4TX25zGOR7fINsGkXnoq-qyKKNl5no4,1675
53
+ epub_translator/xml/xml_like.py,sha256=jBK4UUgXXWRYnfYlCH1MUAjGHWBQAbUj8HsYqvTTWvA,8890
54
+ epub_translator/xml_interrupter.py,sha256=IGLATr7zTIdhE54Gnroab4Xu_vLJ7kzPiQgk7WMXKTc,7403
55
+ epub_translator/xml_translator/__init__.py,sha256=7aswnFGtuj97l7RQd4ka976WCKC7OPs2gFnJFdS74Ug,77
56
+ epub_translator/xml_translator/callbacks.py,sha256=IoZrsaivd2W76cHFupwv6auVxgEWHcBN2MHQJYcWoJ8,1324
57
+ epub_translator/xml_translator/common.py,sha256=hSPptgPp7j6dm47imELB5DgmEbzTEyJD6WEeELOOc50,38
58
+ epub_translator/xml_translator/hill_climbing.py,sha256=1jvilOkTLzwljJA4Nrel8yU2XGvOXpueUJTK7RAp-XY,4272
59
+ epub_translator/xml_translator/stream_mapper.py,sha256=tbMc2vyPUn9zEkJZ7-OVYuKaYyn2pPPwjcAdQ8HLzNs,10179
60
+ epub_translator/xml_translator/submitter.py,sha256=Ihp6DvvVMLNZLJkRccYppt_2I2CM7wvkkSAam9B2o2s,2268
61
+ epub_translator/xml_translator/translator.py,sha256=3Lu56vRkAbbnsWK5fOYkVoO-7b6TXCrFYSVYLOjqhw0,9169
62
+ epub_translator/xml_translator/validation.py,sha256=-OKlSZuD__sjAiEpGAO93YQme4ZDSPmoPjRsAMOCEjc,16668
63
+ epub_translator-0.1.3.dist-info/LICENSE,sha256=5RF32sL3LtMOJIErdDKp1ZEYPGXS8WPpsiSz_jMBnGI,1066
64
+ epub_translator-0.1.3.dist-info/METADATA,sha256=ruyJKZI669xCDIYL6YKoc8ojBsqbO_7Ebe15KkTjLS0,11699
65
+ epub_translator-0.1.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
66
+ epub_translator-0.1.3.dist-info/RECORD,,