epub-translator 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@ from ..template import create_env
13
13
  from .context import LLMContext
14
14
  from .executor import LLMExecutor
15
15
  from .increasable import Increasable
16
+ from .statistics import Statistics
16
17
  from .types import Message
17
18
 
18
19
  # Global state for logger filename generation
@@ -44,7 +45,7 @@ class LLM:
44
45
  self._temperature: Increasable = Increasable(temperature)
45
46
  self._cache_path: Path | None = self._ensure_dir_path(cache_path)
46
47
  self._logger_save_path: Path | None = self._ensure_dir_path(log_dir_path)
47
-
48
+ self._statistics = Statistics()
48
49
  self._executor = LLMExecutor(
49
50
  url=url,
50
51
  model=model,
@@ -53,12 +54,29 @@ class LLM:
53
54
  retry_times=retry_times,
54
55
  retry_interval_seconds=retry_interval_seconds,
55
56
  create_logger=self._create_logger,
57
+ statistics=self._statistics,
56
58
  )
57
59
 
58
60
  @property
59
61
  def encoding(self) -> Encoding:
60
62
  return self._encoding
61
63
 
64
+ @property
65
+ def total_tokens(self) -> int:
66
+ return self._statistics.total_tokens
67
+
68
+ @property
69
+ def input_tokens(self) -> int:
70
+ return self._statistics.input_tokens
71
+
72
+ @property
73
+ def input_cache_tokens(self) -> int:
74
+ return self._statistics.input_cache_tokens
75
+
76
+ @property
77
+ def output_tokens(self) -> int:
78
+ return self._statistics.output_tokens
79
+
62
80
  def context(self, cache_seed_content: str | None = None) -> LLMContext:
63
81
  return LLMContext(
64
82
  executor=self._executor,
@@ -7,6 +7,7 @@ from openai import OpenAI
7
7
  from openai.types.chat import ChatCompletionMessageParam
8
8
 
9
9
  from .error import is_retry_error
10
+ from .statistics import Statistics
10
11
  from .types import Message, MessageRole
11
12
 
12
13
 
@@ -20,12 +21,14 @@ class LLMExecutor:
20
21
  retry_times: int,
21
22
  retry_interval_seconds: float,
22
23
  create_logger: Callable[[], Logger | None],
24
+ statistics: Statistics,
23
25
  ) -> None:
24
26
  self._model_name: str = model
25
27
  self._timeout: float | None = timeout
26
28
  self._retry_times: int = retry_times
27
29
  self._retry_interval_seconds: float = retry_interval_seconds
28
30
  self._create_logger: Callable[[], Logger | None] = create_logger
31
+ self._statistics = statistics
29
32
  self._client = OpenAI(
30
33
  api_key=api_key,
31
34
  base_url=url,
@@ -156,6 +159,7 @@ class LLMExecutor:
156
159
  model=self._model_name,
157
160
  messages=messages,
158
161
  stream=True,
162
+ stream_options={"include_usage": True},
159
163
  top_p=top_p,
160
164
  temperature=temperature,
161
165
  max_tokens=max_tokens,
@@ -164,4 +168,5 @@ class LLMExecutor:
164
168
  for chunk in stream:
165
169
  if chunk.choices and chunk.choices[0].delta.content:
166
170
  buffer.write(chunk.choices[0].delta.content)
171
+ self._statistics.submit_usage(chunk.usage)
167
172
  return buffer.getvalue()
@@ -0,0 +1,25 @@
1
+ from threading import Lock
2
+
3
+ from openai.types import CompletionUsage
4
+
5
+
6
+ class Statistics:
7
+ def __init__(self) -> None:
8
+ self._lock = Lock()
9
+ self.total_tokens = 0
10
+ self.input_tokens = 0
11
+ self.input_cache_tokens = 0
12
+ self.output_tokens = 0
13
+
14
+ def submit_usage(self, usage: CompletionUsage | None) -> None:
15
+ if usage is None:
16
+ return
17
+ with self._lock:
18
+ if usage.total_tokens:
19
+ self.total_tokens += usage.total_tokens
20
+ if usage.prompt_tokens:
21
+ self.input_tokens += usage.prompt_tokens
22
+ if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
23
+ self.input_cache_tokens += usage.prompt_tokens_details.cached_tokens
24
+ if usage.completion_tokens:
25
+ self.output_tokens += usage.completion_tokens
@@ -4,7 +4,12 @@ from enum import Enum, auto
4
4
  from typing import Self
5
5
  from xml.etree.ElementTree import Element
6
6
 
7
- from ..xml import expand_left_element_texts, expand_right_element_texts, is_inline_element, normalize_text_in_element
7
+ from ..xml import (
8
+ expand_left_element_texts,
9
+ expand_right_element_texts,
10
+ is_inline_element,
11
+ normalize_text_in_element,
12
+ )
8
13
 
9
14
 
10
15
  class TextPosition(Enum):
@@ -6,8 +6,8 @@ from bs4 import BeautifulSoup
6
6
  from mathml2latex.mathml import process_mathml
7
7
 
8
8
  from ..segment import TextSegment, combine_text_segments, find_block_depth
9
- from ..utils import ensure_list
10
- from ..xml import clone_element
9
+ from ..utils import ensure_list, normalize_whitespace
10
+ from ..xml import DISPLAY_ATTRIBUTE, clone_element, is_inline_element
11
11
 
12
12
  _ID_KEY = "__XML_INTERRUPTER_ID"
13
13
  _MATH_TAG = "math"
@@ -87,9 +87,9 @@ class XMLInterrupter:
87
87
  _ID_KEY: cast(str, interrupted_element.get(_ID_KEY)),
88
88
  },
89
89
  )
90
- interrupted_display = interrupted_element.get("display", None)
90
+ interrupted_display = interrupted_element.get(DISPLAY_ATTRIBUTE, None)
91
91
  if interrupted_display is not None:
92
- placeholder_element.set("display", interrupted_display)
92
+ placeholder_element.set(DISPLAY_ATTRIBUTE, interrupted_display)
93
93
 
94
94
  raw_parent_stack = text_segment.parent_stack[:interrupted_index]
95
95
  parent_stack = raw_parent_stack + [placeholder_element]
@@ -159,10 +159,13 @@ class XMLInterrupter:
159
159
 
160
160
  if latex is None:
161
161
  latex = "".join(t.text for t in text_segments)
162
- elif math_element.get("display", None) == "inline":
163
- latex = f"${latex}$"
162
+ latex = normalize_whitespace(latex).strip()
164
163
  else:
165
- latex = f"$${latex}$$"
164
+ latex = normalize_whitespace(latex).strip()
165
+ if is_inline_element(math_element):
166
+ latex = f"${latex}$"
167
+ else:
168
+ latex = f"$${latex}$$"
166
169
 
167
170
  return f" {latex} "
168
171
 
@@ -1 +1,2 @@
1
1
  ID_KEY: str = "id"
2
+ DISPLAY_ATTRIBUTE: str = "display"
@@ -1,5 +1,7 @@
1
1
  from xml.etree.ElementTree import Element
2
2
 
3
+ from .const import DISPLAY_ATTRIBUTE
4
+
3
5
  # HTML inline-level elements
4
6
  # Reference: https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements
5
7
  # Reference: https://developer.mozilla.org/en-US/docs/Glossary/Inline-level_content
@@ -105,9 +107,14 @@ _HTML_INLINE_TAGS = frozenset(
105
107
 
106
108
 
107
109
  def is_inline_element(element: Element) -> bool:
108
- if element.tag.lower() in _HTML_INLINE_TAGS:
110
+ tag = element.tag.lower()
111
+ if tag in _HTML_INLINE_TAGS:
109
112
  return True
110
- display = element.get("display", None)
111
- if display is not None and display.lower() == "inline":
113
+ display = element.get(DISPLAY_ATTRIBUTE, None)
114
+ if display is not None:
115
+ display = display.lower()
116
+ if display == "inline":
117
+ return True
118
+ if tag == "math" and display != "block":
112
119
  return True
113
120
  return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: epub-translator
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
5
5
  License: MIT
6
6
  Keywords: epub,llm,translation,translator
@@ -422,6 +422,105 @@ translate(
422
422
 
423
423
  When using `concurrency > 1`, ensure that any custom callback functions (`on_progress`, `on_fill_failed`) are thread-safe. Built-in callbacks are thread-safe by default.
424
424
 
425
+ ### Token Usage Monitoring
426
+
427
+ Track token consumption during translation to monitor API costs and usage:
428
+
429
+ ```python
430
+ from epub_translator import LLM, translate, language, SubmitKind
431
+
432
+ llm = LLM(
433
+ key="your-api-key",
434
+ url="https://api.openai.com/v1",
435
+ model="gpt-4",
436
+ token_encoding="o200k_base",
437
+ )
438
+
439
+ translate(
440
+ source_path="source.epub",
441
+ target_path="translated.epub",
442
+ target_language=language.ENGLISH,
443
+ submit=SubmitKind.APPEND_BLOCK,
444
+ llm=llm,
445
+ )
446
+
447
+ # Access token statistics after translation
448
+ print(f"Total tokens: {llm.total_tokens}")
449
+ print(f"Input tokens: {llm.input_tokens}")
450
+ print(f"Input cache tokens: {llm.input_cache_tokens}")
451
+ print(f"Output tokens: {llm.output_tokens}")
452
+ ```
453
+
454
+ **Available Statistics:**
455
+
456
+ - `total_tokens` - Total number of tokens used (input + output)
457
+ - `input_tokens` - Number of prompt/input tokens
458
+ - `input_cache_tokens` - Number of cached input tokens (when using prompt caching)
459
+ - `output_tokens` - Number of generated/completion tokens
460
+
461
+ **Real-time Monitoring:**
462
+
463
+ You can also monitor token usage in real-time during translation:
464
+
465
+ ```python
466
+ from tqdm import tqdm
467
+ import time
468
+
469
+ with tqdm(total=100, desc="Translating", unit="%") as pbar:
470
+ last_progress = 0.0
471
+ start_time = time.time()
472
+
473
+ def on_progress(progress: float):
474
+ nonlocal last_progress
475
+ increment = (progress - last_progress) * 100
476
+ pbar.update(increment)
477
+ last_progress = progress
478
+
479
+ # Update token stats in progress bar
480
+ pbar.set_postfix({
481
+ 'tokens': llm.total_tokens,
482
+ 'cost_est': f'${llm.total_tokens * 0.00001:.4f}' # Estimate based on your pricing
483
+ })
484
+
485
+ translate(
486
+ source_path="source.epub",
487
+ target_path="translated.epub",
488
+ target_language=language.ENGLISH,
489
+ submit=SubmitKind.APPEND_BLOCK,
490
+ llm=llm,
491
+ on_progress=on_progress,
492
+ )
493
+
494
+ elapsed = time.time() - start_time
495
+ print(f"\nTranslation completed in {elapsed:.1f}s")
496
+ print(f"Total tokens used: {llm.total_tokens:,}")
497
+ print(f"Average tokens/second: {llm.total_tokens/elapsed:.1f}")
498
+ ```
499
+
500
+ **Dual-LLM Token Tracking:**
501
+
502
+ When using separate LLMs for translation and filling, each LLM tracks its own statistics:
503
+
504
+ ```python
505
+ translation_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
506
+ fill_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
507
+
508
+ translate(
509
+ source_path="source.epub",
510
+ target_path="translated.epub",
511
+ target_language=language.ENGLISH,
512
+ submit=SubmitKind.APPEND_BLOCK,
513
+ translation_llm=translation_llm,
514
+ fill_llm=fill_llm,
515
+ )
516
+
517
+ print(f"Translation tokens: {translation_llm.total_tokens}")
518
+ print(f"Fill tokens: {fill_llm.total_tokens}")
519
+ print(f"Combined total: {translation_llm.total_tokens + fill_llm.total_tokens}")
520
+ ```
521
+
522
+ **Note:** Token statistics are cumulative across all API calls made by the LLM instance. The counts only increase and are thread-safe when using concurrent translation.
523
+
425
524
  ## Related Projects
426
525
 
427
526
  ### PDF Craft
@@ -10,16 +10,17 @@ epub_translator/epub/toc.py,sha256=TKJfyDT4svFkXd6JCNZk2ZEYc9q-5DXnV3zY2UKo8nE,1
10
10
  epub_translator/epub/zip.py,sha256=-3LI8f-ksgU8xCy28NjBOKyQPE8PhPEUPqIKZE1p8dw,2364
11
11
  epub_translator/llm/__init__.py,sha256=YcFYYnXmXyX0RUyC-PDbj5k7Woygp_XOpTI3vDiNSPM,75
12
12
  epub_translator/llm/context.py,sha256=8-0UnrZIaNshR_imy_ed_UpOK7H1a6dOsG-boaYOX8k,4186
13
- epub_translator/llm/core.py,sha256=wQwt6oG68ZN_iQOaytBiPXOC7sI62XII_A4dOHdAt_s,5979
13
+ epub_translator/llm/core.py,sha256=MnToX8Zhr_r4sj9B3s54bclesojQEFarzl0VqHGDKlo,6488
14
14
  epub_translator/llm/error.py,sha256=4efAIQL14DFSvAnSTUfgdAbZRqaWBqOfUGsSfvxa5zM,1503
15
- epub_translator/llm/executor.py,sha256=A0IjQ-s9wBJuhAZAAydneb9zBXWnu2J9inR2Q8F-GDE,5533
15
+ epub_translator/llm/executor.py,sha256=wxgFwWaLmuqAvctO3lcQX4U52aiw7EdaFw9Ut0v-ZzU,5745
16
16
  epub_translator/llm/increasable.py,sha256=8XkKeI1hiHlpMHj8dQ4fW0BkViSx4hH8QfbQsy-5SDw,1297
17
+ epub_translator/llm/statistics.py,sha256=BX75qVWJ9aWbMoFtaQzoE8oVCLh7wiHoR06dX-AAl3E,875
17
18
  epub_translator/llm/types.py,sha256=c-dMAIvlG4R3la3mUTWEw5xei-sIYKmQeBja7mirxcI,219
18
19
  epub_translator/segment/__init__.py,sha256=nCHNaHASElKTbC8HEAQkI1Y12m6kEdX5uJVvVvHKtFg,595
19
20
  epub_translator/segment/block_segment.py,sha256=psNKA_HMIcwZtoug8AtnAcV9_mQ2WXLnXqFsekHzt2g,4570
20
21
  epub_translator/segment/common.py,sha256=gGWYQaJ0tGnWCuF1me9TOo-Q_DrZVakCu2patyFIOs0,714
21
22
  epub_translator/segment/inline_segment.py,sha256=nrRKoJ-vblsNITJeixrCgIOkVQyUXrchMg0XYU_8pLo,14563
22
- epub_translator/segment/text_segment.py,sha256=DHv8bkBpVYVUI01hOIf9Jdc7D3v7SdbLD1MeL4MUh4U,6267
23
+ epub_translator/segment/text_segment.py,sha256=E_qgPI09sCV_-PsJtgwcloTa0tpOP3wl0pw5gV9dDNY,6288
23
24
  epub_translator/segment/utils.py,sha256=_tlIA1I7rYz9_q-oQ5cPZWPmhTObCXjksQzRtX3beXY,636
24
25
  epub_translator/serial/__init__.py,sha256=b3IMVmWcUwEqHKcGmey88b057pyz5ct946CaUZi4LB4,67
25
26
  epub_translator/serial/chunk.py,sha256=FrTaHikVOd6bLYumnEriTaAQ_DIDLjHm16gh-wBVR9k,1495
@@ -31,10 +32,10 @@ epub_translator/translation/epub_transcode.py,sha256=_pRzmQgDrlfsibalkUogVi0F0Qy
31
32
  epub_translator/translation/language.py,sha256=88osG0JNYxOkxBjg5Pm-P0Mhiyxf6GqdxoPW12HW0PE,493
32
33
  epub_translator/translation/punctuation.py,sha256=TPCGjEmlAyN3G11VuXdHn-pvUkuWDwWqbTNzw-ij60E,813
33
34
  epub_translator/translation/translator.py,sha256=WC4Yqx-ffhxBhqzMAujE_NQG7BsDwgn95UMNG7OkUSo,6487
34
- epub_translator/translation/xml_interrupter.py,sha256=c0wwfZzGpOkfKJ5v5p-lNgYlcqD0B6i2e6cQq-Tl0lI,8426
35
+ epub_translator/translation/xml_interrupter.py,sha256=7TRGskn_OxRZT5mvKfjL0VMtU2VCgl1d9ElmfhFG0pM,8628
35
36
  epub_translator/utils.py,sha256=BfZWrYjzDNQ4cFrgvRNzd4i1CKLtPxS8Z4LBHhqEV78,914
36
37
  epub_translator/xml/__init__.py,sha256=qluFTfZYlPmOie8nR2C5O0tZ3UbCQEoEoR-Fq-__79c,160
37
- epub_translator/xml/const.py,sha256=Re2TYmpwG7-jVVgSq3R_K-uYhvAYzcXcRmLFkwCPD9Y,19
38
+ epub_translator/xml/const.py,sha256=tCdeJfGwH5xgS4uOmR-pXSfyWXGxOHMJyZKE46BVkJU,54
38
39
  epub_translator/xml/deduplication.py,sha256=TaMbzeA70VvUQV0X1wcQFVbuMEPJUtj9Hq6iWlUmtAQ,1152
39
40
  epub_translator/xml/friendly/__init__.py,sha256=I5jhnhFWoHvojLsYXH4jfR4Gi8lKFZ3yQ56ze5hEe1M,74
40
41
  epub_translator/xml/friendly/decoder.py,sha256=xRQ5LnSunmYbba_0oT39oUr86-sLYAHYMUGmlseIu2U,2467
@@ -42,7 +43,7 @@ epub_translator/xml/friendly/encoder.py,sha256=evjvw6oE-oCud44IsJ-YZVHn6dtUzjNYX
42
43
  epub_translator/xml/friendly/parser.py,sha256=QlMHA0nfPJbNyx6IwRFrYVw7okuvzDB42NXCauIFV-o,6560
43
44
  epub_translator/xml/friendly/tag.py,sha256=ahaGoYttuAlnFxLFFgTV51KUZSpUiHho-COZX14nxN8,3308
44
45
  epub_translator/xml/friendly/transform.py,sha256=5tG1MJmzrXIR_Z5gmRxwcoKvXBzJBVH0ELeaRsG-8w0,1201
45
- epub_translator/xml/inline.py,sha256=fT_zm2NqHraJEwYXBHyqo9XjBFQUWRJO7YHB1rerkAc,2945
46
+ epub_translator/xml/inline.py,sha256=VcaNEF2ebVl2fogVk2yV3f4vOP4rePsPTV_qU3fJCE0,3108
46
47
  epub_translator/xml/self_closing.py,sha256=41ofGUdss9yU51IVwI4It6hKfzh8YcxIR_j-ohD19LE,5240
47
48
  epub_translator/xml/utils.py,sha256=7tQ6L5P0_JXhxONeG64hEeeL5mKjA6NKS1H1Q9B1Cac,1062
48
49
  epub_translator/xml/xml.py,sha256=qQ5Wk1-KVVHE4TX25zGOR7fINsGkXnoq-qyKKNl5no4,1675
@@ -57,7 +58,7 @@ epub_translator/xml_translator/stream_mapper.py,sha256=nk8iRUHAUQA2B35_y-JOCo6il
57
58
  epub_translator/xml_translator/submitter.py,sha256=_ic2_JBPdEd2nMSu2mtQ5OzqpGv0zGrvYaicVUXAiUQ,14159
58
59
  epub_translator/xml_translator/translator.py,sha256=7Ja1jFbmjIgHcmI9V6gg_K0t7qb6in9mhRn54a7qhZ8,9497
59
60
  epub_translator/xml_translator/validation.py,sha256=-OKlSZuD__sjAiEpGAO93YQme4ZDSPmoPjRsAMOCEjc,16668
60
- epub_translator-0.1.7.dist-info/LICENSE,sha256=5RF32sL3LtMOJIErdDKp1ZEYPGXS8WPpsiSz_jMBnGI,1066
61
- epub_translator-0.1.7.dist-info/METADATA,sha256=-ySr_REjXY7qG5dZslElcQeIQ_U8r-ggkVtKytZBYMI,15684
62
- epub_translator-0.1.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
63
- epub_translator-0.1.7.dist-info/RECORD,,
61
+ epub_translator-0.1.8.dist-info/LICENSE,sha256=5RF32sL3LtMOJIErdDKp1ZEYPGXS8WPpsiSz_jMBnGI,1066
62
+ epub_translator-0.1.8.dist-info/METADATA,sha256=DTipkbLL2pnijg7XIXSHogZXJzI009K7ZTkGUMy06d8,18663
63
+ epub_translator-0.1.8.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
64
+ epub_translator-0.1.8.dist-info/RECORD,,