epub-translator 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,9 @@ Translation rules:
13
13
  {% if user_prompt -%}
14
14
  User may provide additional requirements in <rules> tags before the source text. Follow them, but prioritize the rules above if conflicts arise.
15
15
 
16
+ <rules>
17
+ {{ user_prompt }}
18
+ </rules>
16
19
  {% endif -%}
17
20
 
18
21
  Output only the translated text, nothing else.
@@ -13,6 +13,7 @@ from ..template import create_env
13
13
  from .context import LLMContext
14
14
  from .executor import LLMExecutor
15
15
  from .increasable import Increasable
16
+ from .statistics import Statistics
16
17
  from .types import Message
17
18
 
18
19
  # Global state for logger filename generation
@@ -44,7 +45,7 @@ class LLM:
44
45
  self._temperature: Increasable = Increasable(temperature)
45
46
  self._cache_path: Path | None = self._ensure_dir_path(cache_path)
46
47
  self._logger_save_path: Path | None = self._ensure_dir_path(log_dir_path)
47
-
48
+ self._statistics = Statistics()
48
49
  self._executor = LLMExecutor(
49
50
  url=url,
50
51
  model=model,
@@ -53,12 +54,29 @@ class LLM:
53
54
  retry_times=retry_times,
54
55
  retry_interval_seconds=retry_interval_seconds,
55
56
  create_logger=self._create_logger,
57
+ statistics=self._statistics,
56
58
  )
57
59
 
58
60
  @property
59
61
  def encoding(self) -> Encoding:
60
62
  return self._encoding
61
63
 
64
+ @property
65
+ def total_tokens(self) -> int:
66
+ return self._statistics.total_tokens
67
+
68
+ @property
69
+ def input_tokens(self) -> int:
70
+ return self._statistics.input_tokens
71
+
72
+ @property
73
+ def input_cache_tokens(self) -> int:
74
+ return self._statistics.input_cache_tokens
75
+
76
+ @property
77
+ def output_tokens(self) -> int:
78
+ return self._statistics.output_tokens
79
+
62
80
  def context(self, cache_seed_content: str | None = None) -> LLMContext:
63
81
  return LLMContext(
64
82
  executor=self._executor,
@@ -7,6 +7,7 @@ from openai import OpenAI
7
7
  from openai.types.chat import ChatCompletionMessageParam
8
8
 
9
9
  from .error import is_retry_error
10
+ from .statistics import Statistics
10
11
  from .types import Message, MessageRole
11
12
 
12
13
 
@@ -20,12 +21,14 @@ class LLMExecutor:
20
21
  retry_times: int,
21
22
  retry_interval_seconds: float,
22
23
  create_logger: Callable[[], Logger | None],
24
+ statistics: Statistics,
23
25
  ) -> None:
24
26
  self._model_name: str = model
25
27
  self._timeout: float | None = timeout
26
28
  self._retry_times: int = retry_times
27
29
  self._retry_interval_seconds: float = retry_interval_seconds
28
30
  self._create_logger: Callable[[], Logger | None] = create_logger
31
+ self._statistics = statistics
29
32
  self._client = OpenAI(
30
33
  api_key=api_key,
31
34
  base_url=url,
@@ -156,6 +159,7 @@ class LLMExecutor:
156
159
  model=self._model_name,
157
160
  messages=messages,
158
161
  stream=True,
162
+ stream_options={"include_usage": True},
159
163
  top_p=top_p,
160
164
  temperature=temperature,
161
165
  max_tokens=max_tokens,
@@ -164,4 +168,5 @@ class LLMExecutor:
164
168
  for chunk in stream:
165
169
  if chunk.choices and chunk.choices[0].delta.content:
166
170
  buffer.write(chunk.choices[0].delta.content)
171
+ self._statistics.submit_usage(chunk.usage)
167
172
  return buffer.getvalue()
@@ -0,0 +1,25 @@
1
+ from threading import Lock
2
+
3
+ from openai.types import CompletionUsage
4
+
5
+
6
+ class Statistics:
7
+ def __init__(self) -> None:
8
+ self._lock = Lock()
9
+ self.total_tokens = 0
10
+ self.input_tokens = 0
11
+ self.input_cache_tokens = 0
12
+ self.output_tokens = 0
13
+
14
+ def submit_usage(self, usage: CompletionUsage | None) -> None:
15
+ if usage is None:
16
+ return
17
+ with self._lock:
18
+ if usage.total_tokens:
19
+ self.total_tokens += usage.total_tokens
20
+ if usage.prompt_tokens:
21
+ self.input_tokens += usage.prompt_tokens
22
+ if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
23
+ self.input_cache_tokens += usage.prompt_tokens_details.cached_tokens
24
+ if usage.completion_tokens:
25
+ self.output_tokens += usage.completion_tokens
@@ -21,6 +21,7 @@ from .text_segment import (
21
21
  TextPosition,
22
22
  TextSegment,
23
23
  combine_text_segments,
24
+ find_block_depth,
24
25
  incision_between,
25
26
  search_text_segments,
26
27
  )
@@ -4,7 +4,12 @@ from enum import Enum, auto
4
4
  from typing import Self
5
5
  from xml.etree.ElementTree import Element
6
6
 
7
- from ..xml import expand_left_element_texts, expand_right_element_texts, is_inline_tag, normalize_text_in_element
7
+ from ..xml import (
8
+ expand_left_element_texts,
9
+ expand_right_element_texts,
10
+ is_inline_element,
11
+ normalize_text_in_element,
12
+ )
8
13
 
9
14
 
10
15
  class TextPosition(Enum):
@@ -100,7 +105,7 @@ def search_text_segments(root: Element) -> Generator[TextSegment, None, None]:
100
105
  def _search_text_segments(stack: list[Element], element: Element) -> Generator[TextSegment, None, None]:
101
106
  text = normalize_text_in_element(element.text)
102
107
  next_stack = stack + [element]
103
- next_block_depth = _find_block_depth(next_stack)
108
+ next_block_depth = find_block_depth(next_stack)
104
109
 
105
110
  if text is not None:
106
111
  yield TextSegment(
@@ -125,12 +130,11 @@ def _search_text_segments(stack: list[Element], element: Element) -> Generator[T
125
130
  )
126
131
 
127
132
 
128
- def _find_block_depth(parent_stack: list[Element]) -> int:
133
+ def find_block_depth(parent_stack: list[Element]) -> int:
129
134
  index: int = 0
130
- for i in range(len(parent_stack) - 1, -1, -1):
131
- if not is_inline_tag(parent_stack[i].tag):
135
+ for i in range(len(parent_stack)):
136
+ if not is_inline_element(parent_stack[i]):
132
137
  index = i
133
- break
134
138
  return index + 1 # depth is a count not index
135
139
 
136
140
 
@@ -8,22 +8,6 @@ def element_fingerprint(element: Element) -> str:
8
8
  return f"<{element.tag} {' '.join(attrs)}/>"
9
9
 
10
10
 
11
- def unwrap_parents(element: Element) -> tuple[Element, list[Element]]:
12
- parents: list[Element] = []
13
- while True:
14
- if len(element) != 1:
15
- break
16
- child = element[0]
17
- if not element.text:
18
- break
19
- if not child.tail:
20
- break
21
- parents.append(element)
22
- element = child
23
- element.tail = None
24
- return element, parents
25
-
26
-
27
11
  def id_in_element(element: Element) -> int | None:
28
12
  id_str = element.get(ID_KEY, None)
29
13
  if id_str is None:
@@ -1,9 +1,13 @@
1
1
  from collections.abc import Generator, Iterable
2
2
  from typing import cast
3
- from xml.etree.ElementTree import Element
3
+ from xml.etree.ElementTree import Element, tostring
4
4
 
5
- from ..segment import TextSegment
5
+ from bs4 import BeautifulSoup
6
+ from mathml2latex.mathml import process_mathml
7
+
8
+ from ..segment import TextSegment, combine_text_segments, find_block_depth
6
9
  from ..utils import ensure_list, normalize_whitespace
10
+ from ..xml import DISPLAY_ATTRIBUTE, clone_element, is_inline_element
7
11
 
8
12
  _ID_KEY = "__XML_INTERRUPTER_ID"
9
13
  _MATH_TAG = "math"
@@ -37,8 +41,10 @@ class XMLInterrupter:
37
41
  def interrupt_block_element(self, element: Element) -> Element:
38
42
  interrupted_element = self._placeholder2interrupted.pop(id(element), None)
39
43
  if interrupted_element is None:
44
+ element.attrib.pop(_ID_KEY, None)
40
45
  return element
41
46
  else:
47
+ interrupted_element.attrib.pop(_ID_KEY, None)
42
48
  return interrupted_element
43
49
 
44
50
  def _expand_source_text_segment(self, text_segment: TextSegment):
@@ -81,14 +87,18 @@ class XMLInterrupter:
81
87
  _ID_KEY: cast(str, interrupted_element.get(_ID_KEY)),
82
88
  },
83
89
  )
90
+ interrupted_display = interrupted_element.get(DISPLAY_ATTRIBUTE, None)
91
+ if interrupted_display is not None:
92
+ placeholder_element.set(DISPLAY_ATTRIBUTE, interrupted_display)
93
+
84
94
  raw_parent_stack = text_segment.parent_stack[:interrupted_index]
85
95
  parent_stack = raw_parent_stack + [placeholder_element]
86
96
  merged_text_segment = TextSegment(
87
- text="".join(t.text for t in text_segments),
97
+ text=self._render_latex(text_segments),
88
98
  parent_stack=parent_stack,
89
99
  left_common_depth=text_segments[0].left_common_depth,
90
100
  right_common_depth=text_segments[-1].right_common_depth,
91
- block_depth=len(parent_stack),
101
+ block_depth=find_block_depth(parent_stack),
92
102
  position=text_segments[0].position,
93
103
  )
94
104
  self._placeholder2interrupted[id(placeholder_element)] = interrupted_element
@@ -116,8 +126,8 @@ class XMLInterrupter:
116
126
  # 原始栈退光,仅留下相对 interrupted 元素的栈,这种格式与 translated 要求一致
117
127
  text_segment.left_common_depth = max(0, text_segment.left_common_depth - interrupted_index)
118
128
  text_segment.right_common_depth = max(0, text_segment.right_common_depth - interrupted_index)
119
- text_segment.block_depth = 1
120
129
  text_segment.parent_stack = text_segment.parent_stack[interrupted_index:]
130
+ text_segment.block_depth = find_block_depth(text_segment.parent_stack)
121
131
 
122
132
  return merged_text_segment
123
133
 
@@ -129,37 +139,54 @@ class XMLInterrupter:
129
139
  break
130
140
  return interrupted_index
131
141
 
142
+ def _render_latex(self, text_segments: list[TextSegment]) -> str:
143
+ math_element, _ = next(combine_text_segments(text_segments))
144
+ while math_element.tag != _MATH_TAG:
145
+ if len(math_element) == 0:
146
+ return ""
147
+ math_element = math_element[0]
148
+
149
+ math_element = clone_element(math_element)
150
+ math_element.attrib.pop(_ID_KEY, None)
151
+ math_element.tail = None
152
+ latex: str | None = None
153
+ try:
154
+ mathml_str = tostring(math_element, encoding="unicode")
155
+ soup = BeautifulSoup(mathml_str, "html.parser")
156
+ latex = process_mathml(soup)
157
+ except Exception:
158
+ pass
159
+
160
+ if latex is None:
161
+ latex = "".join(t.text for t in text_segments)
162
+ latex = normalize_whitespace(latex).strip()
163
+ else:
164
+ latex = normalize_whitespace(latex).strip()
165
+ if is_inline_element(math_element):
166
+ latex = f"${latex}$"
167
+ else:
168
+ latex = f"$${latex}$$"
169
+
170
+ return f" {latex} "
171
+
132
172
  def _expand_translated_text_segment(self, text_segment: TextSegment):
133
- interrupted_id = text_segment.block_parent.attrib.pop(_ID_KEY, None)
173
+ parent_element = text_segment.parent_stack[-1]
174
+ interrupted_id = parent_element.attrib.pop(_ID_KEY, None)
134
175
  if interrupted_id is None:
135
176
  yield text_segment
136
177
  return
137
178
 
138
- raw_text_segments = self._raw_text_segments.pop(interrupted_id, None)
139
- if not raw_text_segments:
179
+ if parent_element is text_segment.block_parent:
180
+ # Block-level math, need to be hidden
140
181
  return
141
182
 
142
- raw_block = raw_text_segments[0].parent_stack[0]
143
- if not self._is_inline_math(raw_block):
183
+ raw_text_segments = self._raw_text_segments.pop(interrupted_id, None)
184
+ if not raw_text_segments:
185
+ yield text_segment
144
186
  return
145
187
 
146
188
  for raw_text_segment in raw_text_segments:
189
+ text_basic_parent_stack = text_segment.parent_stack[:-1]
147
190
  raw_text_segment.block_parent.attrib.pop(_ID_KEY, None)
191
+ raw_text_segment.parent_stack = text_basic_parent_stack + raw_text_segment.parent_stack
148
192
  yield raw_text_segment
149
-
150
- def _has_no_math_texts(self, element: Element):
151
- if element.tag == _MATH_TAG:
152
- return True
153
- if element.text and normalize_whitespace(element.text).strip():
154
- return False
155
- for child_element in element:
156
- if not self._has_no_math_texts(child_element):
157
- return False
158
- if child_element.tail and normalize_whitespace(child_element.tail).strip():
159
- return False
160
- return True
161
-
162
- def _is_inline_math(self, element: Element) -> bool:
163
- if element.tag != _MATH_TAG:
164
- return False
165
- return element.get("display", "").lower() != "block"
@@ -1 +1,2 @@
1
1
  ID_KEY: str = "id"
2
+ DISPLAY_ATTRIBUTE: str = "display"
@@ -1,6 +1,11 @@
1
+ from xml.etree.ElementTree import Element
2
+
3
+ from .const import DISPLAY_ATTRIBUTE
4
+
1
5
  # HTML inline-level elements
2
6
  # Reference: https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements
3
7
  # Reference: https://developer.mozilla.org/en-US/docs/Glossary/Inline-level_content
8
+ # Reference: https://developer.mozilla.org/en-US/docs/MathML/Element
4
9
  _HTML_INLINE_TAGS = frozenset(
5
10
  (
6
11
  # Inline text semantics
@@ -59,9 +64,57 @@ _HTML_INLINE_TAGS = frozenset(
59
64
  "del",
60
65
  "ins",
61
66
  "slot",
67
+ # MathML elements
68
+ # Token elements
69
+ "mi", # identifier
70
+ "mn", # number
71
+ "mo", # operator
72
+ "ms", # string literal
73
+ "mspace", # space
74
+ "mtext", # text
75
+ # General layout
76
+ "menclose", # enclosed content
77
+ "merror", # syntax error message
78
+ "mfenced", # parentheses (deprecated)
79
+ "mfrac", # fraction
80
+ "mpadded", # space around content
81
+ "mphantom", # invisible content
82
+ "mroot", # radical with index
83
+ "mrow", # grouped sub-expressions
84
+ "msqrt", # square root
85
+ "mstyle", # style change
86
+ # Scripts and limits
87
+ "mmultiscripts", # prescripts and tensor indices
88
+ "mover", # overscript
89
+ "mprescripts", # prescripts separator
90
+ "msub", # subscript
91
+ "msubsup", # subscript-superscript pair
92
+ "msup", # superscript
93
+ "munder", # underscript
94
+ "munderover", # underscript-overscript pair
95
+ # Table math
96
+ "mtable", # table or matrix
97
+ "mtr", # row in table or matrix
98
+ "mtd", # cell in table or matrix
99
+ # Semantic annotations
100
+ "annotation", # data annotation
101
+ "annotation-xml", # XML annotation
102
+ "semantics", # semantic annotation container
103
+ # Other
104
+ "maction", # bind actions to sub-expressions (deprecated)
62
105
  )
63
106
  )
64
107
 
65
108
 
66
- def is_inline_tag(tag: str) -> bool:
67
- return tag.lower() in _HTML_INLINE_TAGS
109
+ def is_inline_element(element: Element) -> bool:
110
+ tag = element.tag.lower()
111
+ if tag in _HTML_INLINE_TAGS:
112
+ return True
113
+ display = element.get(DISPLAY_ATTRIBUTE, None)
114
+ if display is not None:
115
+ display = display.lower()
116
+ if display == "inline":
117
+ return True
118
+ if tag == "math" and display != "block":
119
+ return True
120
+ return False
@@ -4,7 +4,7 @@ from enum import Enum, auto
4
4
  from xml.etree.ElementTree import Element
5
5
 
6
6
  from ..segment import TextSegment, combine_text_segments
7
- from ..xml import index_of_parent, is_inline_tag, iter_with_stack
7
+ from ..xml import index_of_parent, is_inline_element, iter_with_stack
8
8
  from .stream_mapper import InlineSegmentMapping
9
9
 
10
10
 
@@ -78,7 +78,7 @@ class _Submitter:
78
78
  preserved_elements: list[Element] = []
79
79
  if self._action == SubmitKind.REPLACE:
80
80
  for child in list(node.raw_element):
81
- if not is_inline_tag(child.tag):
81
+ if not is_inline_element(child):
82
82
  child.tail = None
83
83
  preserved_elements.append(child)
84
84
 
@@ -87,7 +87,7 @@ class _Submitter:
87
87
 
88
88
  if combined is not None:
89
89
  # 在 APPEND_BLOCK 模式下,如果是 inline tag,则在文本前面加空格
90
- if self._action == SubmitKind.APPEND_BLOCK and is_inline_tag(combined.tag) and combined.text:
90
+ if self._action == SubmitKind.APPEND_BLOCK and is_inline_element(combined) and combined.text:
91
91
  combined.text = " " + combined.text
92
92
  parent.insert(index + 1, combined)
93
93
  index += 1
@@ -200,7 +200,7 @@ class _Submitter:
200
200
  preserved_elements: list[Element] = []
201
201
  for i in range(start_index, end_index):
202
202
  elem = node_element[i]
203
- if not is_inline_tag(elem.tag):
203
+ if not is_inline_element(elem):
204
204
  elem.tail = None
205
205
  preserved_elements.append(elem)
206
206
 
@@ -223,7 +223,7 @@ class _Submitter:
223
223
 
224
224
  if combined.text:
225
225
  will_inject_space = self._action == SubmitKind.APPEND_TEXT or (
226
- is_inline_tag(combined.tag) and self._action == SubmitKind.APPEND_BLOCK
226
+ is_inline_element(combined) and self._action == SubmitKind.APPEND_BLOCK
227
227
  )
228
228
  if tail_element is not None:
229
229
  tail_element.tail = self._append_text_in_element(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: epub-translator
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
5
5
  License: MIT
6
6
  Keywords: epub,llm,translation,translator
@@ -24,6 +24,7 @@ Classifier: Topic :: Software Development :: Localization
24
24
  Classifier: Topic :: Text Processing :: Markup
25
25
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
26
26
  Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
27
+ Requires-Dist: mathml2latex (>=0.2.12,<0.3.0)
27
28
  Requires-Dist: openai (>=2.14.0,<3.0.0)
28
29
  Requires-Dist: resource-segmentation (>=0.0.7,<0.1.0)
29
30
  Requires-Dist: tiktoken (>=0.12.0,<1.0.0)
@@ -59,6 +60,13 @@ Translate EPUB books using Large Language Models while preserving the original t
59
60
  - **Flexible LLM Support**: Works with any OpenAI-compatible API endpoint
60
61
  - **Caching**: Built-in caching for progress recovery when translation fails
61
62
 
63
+ ## Use Cases
64
+
65
+ - **Language Learning**: Read books in their original language with side-by-side translations
66
+ - **Academic Research**: Access foreign literature with bilingual references
67
+ - **Content Localization**: Prepare books for international audiences
68
+ - **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
69
+
62
70
  ## Installation
63
71
 
64
72
  ```bash
@@ -357,13 +365,6 @@ llm = LLM(
357
365
  )
358
366
  ```
359
367
 
360
- ## Use Cases
361
-
362
- - **Language Learning**: Read books in their original language with side-by-side translations
363
- - **Academic Research**: Access foreign literature with bilingual references
364
- - **Content Localization**: Prepare books for international audiences
365
- - **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
366
-
367
368
  ## Advanced Features
368
369
 
369
370
  ### Custom Translation Prompts
@@ -421,6 +422,105 @@ translate(
421
422
 
422
423
  When using `concurrency > 1`, ensure that any custom callback functions (`on_progress`, `on_fill_failed`) are thread-safe. Built-in callbacks are thread-safe by default.
423
424
 
425
+ ### Token Usage Monitoring
426
+
427
+ Track token consumption during translation to monitor API costs and usage:
428
+
429
+ ```python
430
+ from epub_translator import LLM, translate, language, SubmitKind
431
+
432
+ llm = LLM(
433
+ key="your-api-key",
434
+ url="https://api.openai.com/v1",
435
+ model="gpt-4",
436
+ token_encoding="o200k_base",
437
+ )
438
+
439
+ translate(
440
+ source_path="source.epub",
441
+ target_path="translated.epub",
442
+ target_language=language.ENGLISH,
443
+ submit=SubmitKind.APPEND_BLOCK,
444
+ llm=llm,
445
+ )
446
+
447
+ # Access token statistics after translation
448
+ print(f"Total tokens: {llm.total_tokens}")
449
+ print(f"Input tokens: {llm.input_tokens}")
450
+ print(f"Input cache tokens: {llm.input_cache_tokens}")
451
+ print(f"Output tokens: {llm.output_tokens}")
452
+ ```
453
+
454
+ **Available Statistics:**
455
+
456
+ - `total_tokens` - Total number of tokens used (input + output)
457
+ - `input_tokens` - Number of prompt/input tokens
458
+ - `input_cache_tokens` - Number of cached input tokens (when using prompt caching)
459
+ - `output_tokens` - Number of generated/completion tokens
460
+
461
+ **Real-time Monitoring:**
462
+
463
+ You can also monitor token usage in real-time during translation:
464
+
465
+ ```python
466
+ from tqdm import tqdm
467
+ import time
468
+
469
+ with tqdm(total=100, desc="Translating", unit="%") as pbar:
470
+ last_progress = 0.0
471
+ start_time = time.time()
472
+
473
+ def on_progress(progress: float):
474
+ nonlocal last_progress
475
+ increment = (progress - last_progress) * 100
476
+ pbar.update(increment)
477
+ last_progress = progress
478
+
479
+ # Update token stats in progress bar
480
+ pbar.set_postfix({
481
+ 'tokens': llm.total_tokens,
482
+ 'cost_est': f'${llm.total_tokens * 0.00001:.4f}' # Estimate based on your pricing
483
+ })
484
+
485
+ translate(
486
+ source_path="source.epub",
487
+ target_path="translated.epub",
488
+ target_language=language.ENGLISH,
489
+ submit=SubmitKind.APPEND_BLOCK,
490
+ llm=llm,
491
+ on_progress=on_progress,
492
+ )
493
+
494
+ elapsed = time.time() - start_time
495
+ print(f"\nTranslation completed in {elapsed:.1f}s")
496
+ print(f"Total tokens used: {llm.total_tokens:,}")
497
+ print(f"Average tokens/second: {llm.total_tokens/elapsed:.1f}")
498
+ ```
499
+
500
+ **Dual-LLM Token Tracking:**
501
+
502
+ When using separate LLMs for translation and filling, each LLM tracks its own statistics:
503
+
504
+ ```python
505
+ translation_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
506
+ fill_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
507
+
508
+ translate(
509
+ source_path="source.epub",
510
+ target_path="translated.epub",
511
+ target_language=language.ENGLISH,
512
+ submit=SubmitKind.APPEND_BLOCK,
513
+ translation_llm=translation_llm,
514
+ fill_llm=fill_llm,
515
+ )
516
+
517
+ print(f"Translation tokens: {translation_llm.total_tokens}")
518
+ print(f"Fill tokens: {fill_llm.total_tokens}")
519
+ print(f"Combined total: {translation_llm.total_tokens + fill_llm.total_tokens}")
520
+ ```
521
+
522
+ **Note:** Token statistics are cumulative across all API calls made by the LLM instance. The counts only increase and are thread-safe when using concurrent translation.
523
+
424
524
  ## Related Projects
425
525
 
426
526
  ### PDF Craft
@@ -1,14 +1,6 @@
1
1
  epub_translator/__init__.py,sha256=JsiOUPpk5k7q8mXIgnRQWdVVnkJww_KDTg7jXsP7_C4,222
2
2
  epub_translator/data/fill.jinja,sha256=zSytA8Vhp2i6YBZ09F1z9iPJq1-jUaiphoXqTNZwnvo,6964
3
- epub_translator/data/mmltex/README.md,sha256=wwhe5yW1U_7_YZIFKnQVnCOmUl7Mu3gsr3lNnDSJ5Qs,2953
4
- epub_translator/data/mmltex/cmarkup.xsl,sha256=DkhimAATM0XSCfVOfY41-qTPoddqzOHjZ00Pynr4zQE,37707
5
- epub_translator/data/mmltex/entities.xsl,sha256=TYZ5iGg0u9XlDDBBGuZiHL7MsxKc-3OsTIBAVM1GDek,107742
6
- epub_translator/data/mmltex/glayout.xsl,sha256=Ztc7N1wiHaYZlo9u9iuROrIl3uIIIoo1VFIuojXq7TM,6879
7
- epub_translator/data/mmltex/mmltex.xsl,sha256=BVXFbApz-9W2qRKKtBTxptK5vxG2bfB8tv9W1MP5iBI,1384
8
- epub_translator/data/mmltex/scripts.xsl,sha256=f4ei0cDCW3cV-Ra7rC3kC5tRcKdjJxbSpCeQLoohtgo,13697
9
- epub_translator/data/mmltex/tables.xsl,sha256=RxtNo8qDtVAg8_6BuYsafraB_0z7YDAB9D__fT9gmWs,4327
10
- epub_translator/data/mmltex/tokens.xsl,sha256=j3JZRcBhAiiY8o5K3640phfLwxO8JVspCFlSttwBzJk,12373
11
- epub_translator/data/translate.jinja,sha256=93d8kschm5HV-EfXd1kFSIVMObDqTMdoUrwDfce2bhU,820
3
+ epub_translator/data/translate.jinja,sha256=MVAWvgO9kybEFi0zMiZLEWwuRUL3l8PrwJdsoueQeCs,855
12
4
  epub_translator/epub/__init__.py,sha256=aZawPakdkEquL4kRRpyCTdoSQ82l7FGqY4Uw6-ndoGA,154
13
5
  epub_translator/epub/common.py,sha256=4-SpTe8iot9hMfyXILmlUFvYVNYqPAHL5hn1fr2wgis,1180
14
6
  epub_translator/epub/math.py,sha256=-Q2LJQxxjgQZQUe_WlJA9tjzLqgqtw2ZmbGbHsPRp2U,5422
@@ -18,17 +10,18 @@ epub_translator/epub/toc.py,sha256=TKJfyDT4svFkXd6JCNZk2ZEYc9q-5DXnV3zY2UKo8nE,1
18
10
  epub_translator/epub/zip.py,sha256=-3LI8f-ksgU8xCy28NjBOKyQPE8PhPEUPqIKZE1p8dw,2364
19
11
  epub_translator/llm/__init__.py,sha256=YcFYYnXmXyX0RUyC-PDbj5k7Woygp_XOpTI3vDiNSPM,75
20
12
  epub_translator/llm/context.py,sha256=8-0UnrZIaNshR_imy_ed_UpOK7H1a6dOsG-boaYOX8k,4186
21
- epub_translator/llm/core.py,sha256=wQwt6oG68ZN_iQOaytBiPXOC7sI62XII_A4dOHdAt_s,5979
13
+ epub_translator/llm/core.py,sha256=MnToX8Zhr_r4sj9B3s54bclesojQEFarzl0VqHGDKlo,6488
22
14
  epub_translator/llm/error.py,sha256=4efAIQL14DFSvAnSTUfgdAbZRqaWBqOfUGsSfvxa5zM,1503
23
- epub_translator/llm/executor.py,sha256=A0IjQ-s9wBJuhAZAAydneb9zBXWnu2J9inR2Q8F-GDE,5533
15
+ epub_translator/llm/executor.py,sha256=wxgFwWaLmuqAvctO3lcQX4U52aiw7EdaFw9Ut0v-ZzU,5745
24
16
  epub_translator/llm/increasable.py,sha256=8XkKeI1hiHlpMHj8dQ4fW0BkViSx4hH8QfbQsy-5SDw,1297
17
+ epub_translator/llm/statistics.py,sha256=BX75qVWJ9aWbMoFtaQzoE8oVCLh7wiHoR06dX-AAl3E,875
25
18
  epub_translator/llm/types.py,sha256=c-dMAIvlG4R3la3mUTWEw5xei-sIYKmQeBja7mirxcI,219
26
- epub_translator/segment/__init__.py,sha256=UYTv_IKQbEB0DzhFeiuqCvjoJLvB-7XRwlaFS90KmIw,573
19
+ epub_translator/segment/__init__.py,sha256=nCHNaHASElKTbC8HEAQkI1Y12m6kEdX5uJVvVvHKtFg,595
27
20
  epub_translator/segment/block_segment.py,sha256=psNKA_HMIcwZtoug8AtnAcV9_mQ2WXLnXqFsekHzt2g,4570
28
21
  epub_translator/segment/common.py,sha256=gGWYQaJ0tGnWCuF1me9TOo-Q_DrZVakCu2patyFIOs0,714
29
22
  epub_translator/segment/inline_segment.py,sha256=nrRKoJ-vblsNITJeixrCgIOkVQyUXrchMg0XYU_8pLo,14563
30
- epub_translator/segment/text_segment.py,sha256=LhGlugp6MeAB3tk2jxd1kBb2EA8G2ruN49mP_IZehA0,6295
31
- epub_translator/segment/utils.py,sha256=qMqUt33pDRN5Tnuydkodzu2gaQrwTzAnQmXpDuHen1o,1036
23
+ epub_translator/segment/text_segment.py,sha256=E_qgPI09sCV_-PsJtgwcloTa0tpOP3wl0pw5gV9dDNY,6288
24
+ epub_translator/segment/utils.py,sha256=_tlIA1I7rYz9_q-oQ5cPZWPmhTObCXjksQzRtX3beXY,636
32
25
  epub_translator/serial/__init__.py,sha256=b3IMVmWcUwEqHKcGmey88b057pyz5ct946CaUZi4LB4,67
33
26
  epub_translator/serial/chunk.py,sha256=FrTaHikVOd6bLYumnEriTaAQ_DIDLjHm16gh-wBVR9k,1495
34
27
  epub_translator/serial/segment.py,sha256=uEz-ke1KcYrON-68FaUEzMG2CzHlMjvbC11F3ZT4yH0,446
@@ -39,10 +32,10 @@ epub_translator/translation/epub_transcode.py,sha256=_pRzmQgDrlfsibalkUogVi0F0Qy
39
32
  epub_translator/translation/language.py,sha256=88osG0JNYxOkxBjg5Pm-P0Mhiyxf6GqdxoPW12HW0PE,493
40
33
  epub_translator/translation/punctuation.py,sha256=TPCGjEmlAyN3G11VuXdHn-pvUkuWDwWqbTNzw-ij60E,813
41
34
  epub_translator/translation/translator.py,sha256=WC4Yqx-ffhxBhqzMAujE_NQG7BsDwgn95UMNG7OkUSo,6487
42
- epub_translator/translation/xml_interrupter.py,sha256=QxrNpBoR4ZIAvWsa20jz1z_bE_5-G5-nBGjE6IKCTjw,7405
35
+ epub_translator/translation/xml_interrupter.py,sha256=7TRGskn_OxRZT5mvKfjL0VMtU2VCgl1d9ElmfhFG0pM,8628
43
36
  epub_translator/utils.py,sha256=BfZWrYjzDNQ4cFrgvRNzd4i1CKLtPxS8Z4LBHhqEV78,914
44
37
  epub_translator/xml/__init__.py,sha256=qluFTfZYlPmOie8nR2C5O0tZ3UbCQEoEoR-Fq-__79c,160
45
- epub_translator/xml/const.py,sha256=Re2TYmpwG7-jVVgSq3R_K-uYhvAYzcXcRmLFkwCPD9Y,19
38
+ epub_translator/xml/const.py,sha256=tCdeJfGwH5xgS4uOmR-pXSfyWXGxOHMJyZKE46BVkJU,54
46
39
  epub_translator/xml/deduplication.py,sha256=TaMbzeA70VvUQV0X1wcQFVbuMEPJUtj9Hq6iWlUmtAQ,1152
47
40
  epub_translator/xml/friendly/__init__.py,sha256=I5jhnhFWoHvojLsYXH4jfR4Gi8lKFZ3yQ56ze5hEe1M,74
48
41
  epub_translator/xml/friendly/decoder.py,sha256=xRQ5LnSunmYbba_0oT39oUr86-sLYAHYMUGmlseIu2U,2467
@@ -50,7 +43,7 @@ epub_translator/xml/friendly/encoder.py,sha256=evjvw6oE-oCud44IsJ-YZVHn6dtUzjNYX
50
43
  epub_translator/xml/friendly/parser.py,sha256=QlMHA0nfPJbNyx6IwRFrYVw7okuvzDB42NXCauIFV-o,6560
51
44
  epub_translator/xml/friendly/tag.py,sha256=ahaGoYttuAlnFxLFFgTV51KUZSpUiHho-COZX14nxN8,3308
52
45
  epub_translator/xml/friendly/transform.py,sha256=5tG1MJmzrXIR_Z5gmRxwcoKvXBzJBVH0ELeaRsG-8w0,1201
53
- epub_translator/xml/inline.py,sha256=mwFho6wq2gYWmWcg5Cw6OQeteV-a-i6X9OE63fzblpE,1274
46
+ epub_translator/xml/inline.py,sha256=VcaNEF2ebVl2fogVk2yV3f4vOP4rePsPTV_qU3fJCE0,3108
54
47
  epub_translator/xml/self_closing.py,sha256=41ofGUdss9yU51IVwI4It6hKfzh8YcxIR_j-ohD19LE,5240
55
48
  epub_translator/xml/utils.py,sha256=7tQ6L5P0_JXhxONeG64hEeeL5mKjA6NKS1H1Q9B1Cac,1062
56
49
  epub_translator/xml/xml.py,sha256=qQ5Wk1-KVVHE4TX25zGOR7fINsGkXnoq-qyKKNl5no4,1675
@@ -62,10 +55,10 @@ epub_translator/xml_translator/concurrency.py,sha256=ACwoDHNX3xChL0On5yvUSFT8By7
62
55
  epub_translator/xml_translator/hill_climbing.py,sha256=1jvilOkTLzwljJA4Nrel8yU2XGvOXpueUJTK7RAp-XY,4272
63
56
  epub_translator/xml_translator/score.py,sha256=TkXDmr-29p8SzuAp68u_vFDE69y1TyId9S20HT1T_xs,5311
64
57
  epub_translator/xml_translator/stream_mapper.py,sha256=nk8iRUHAUQA2B35_y-JOCo6il8MSxXikWvyl-WA8WAA,10662
65
- epub_translator/xml_translator/submitter.py,sha256=6PGQTnEcOgL3zseDpSzDmU5d9Eg3eO5OfPIGmQp2DVY,14155
58
+ epub_translator/xml_translator/submitter.py,sha256=_ic2_JBPdEd2nMSu2mtQ5OzqpGv0zGrvYaicVUXAiUQ,14159
66
59
  epub_translator/xml_translator/translator.py,sha256=7Ja1jFbmjIgHcmI9V6gg_K0t7qb6in9mhRn54a7qhZ8,9497
67
60
  epub_translator/xml_translator/validation.py,sha256=-OKlSZuD__sjAiEpGAO93YQme4ZDSPmoPjRsAMOCEjc,16668
68
- epub_translator-0.1.6.dist-info/LICENSE,sha256=5RF32sL3LtMOJIErdDKp1ZEYPGXS8WPpsiSz_jMBnGI,1066
69
- epub_translator-0.1.6.dist-info/METADATA,sha256=AcjUb1wmz6cN8PnbwgWJeGlOO9sH445B-qPugLW705M,15638
70
- epub_translator-0.1.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
71
- epub_translator-0.1.6.dist-info/RECORD,,
61
+ epub_translator-0.1.8.dist-info/LICENSE,sha256=5RF32sL3LtMOJIErdDKp1ZEYPGXS8WPpsiSz_jMBnGI,1066
62
+ epub_translator-0.1.8.dist-info/METADATA,sha256=DTipkbLL2pnijg7XIXSHogZXJzI009K7ZTkGUMy06d8,18663
63
+ epub_translator-0.1.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
64
+ epub_translator-0.1.8.dist-info/RECORD,,