epub-translator 0.1.7__tar.gz → 0.1.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {epub_translator-0.1.7 → epub_translator-0.1.8}/PKG-INFO +100 -1
- {epub_translator-0.1.7 → epub_translator-0.1.8}/README.md +99 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/llm/core.py +19 -1
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/llm/executor.py +5 -0
- epub_translator-0.1.8/epub_translator/llm/statistics.py +25 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/segment/text_segment.py +6 -1
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/translation/xml_interrupter.py +10 -7
- epub_translator-0.1.8/epub_translator/xml/const.py +2 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/inline.py +10 -3
- {epub_translator-0.1.7 → epub_translator-0.1.8}/pyproject.toml +1 -1
- epub_translator-0.1.7/epub_translator/xml/const.py +0 -1
- {epub_translator-0.1.7 → epub_translator-0.1.8}/LICENSE +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/__init__.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/data/fill.jinja +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/data/translate.jinja +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/epub/__init__.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/epub/common.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/epub/math.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/epub/metadata.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/epub/spines.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/epub/toc.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/epub/zip.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/llm/__init__.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/llm/context.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/llm/error.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/llm/increasable.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/llm/types.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/segment/__init__.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/segment/block_segment.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/segment/common.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/segment/inline_segment.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/segment/utils.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/serial/__init__.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/serial/chunk.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/serial/segment.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/serial/splitter.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/template.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/translation/__init__.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/translation/epub_transcode.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/translation/language.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/translation/punctuation.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/translation/translator.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/utils.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/__init__.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/deduplication.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/friendly/__init__.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/friendly/decoder.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/friendly/encoder.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/friendly/parser.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/friendly/tag.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/friendly/transform.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/self_closing.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/utils.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/xml.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml/xml_like.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/__init__.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/callbacks.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/common.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/concurrency.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/hill_climbing.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/score.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/stream_mapper.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/submitter.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/translator.py +0 -0
- {epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/validation.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: epub-translator
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: epub,llm,translation,translator
|
|
@@ -422,6 +422,105 @@ translate(
|
|
|
422
422
|
|
|
423
423
|
When using `concurrency > 1`, ensure that any custom callback functions (`on_progress`, `on_fill_failed`) are thread-safe. Built-in callbacks are thread-safe by default.
|
|
424
424
|
|
|
425
|
+
### Token Usage Monitoring
|
|
426
|
+
|
|
427
|
+
Track token consumption during translation to monitor API costs and usage:
|
|
428
|
+
|
|
429
|
+
```python
|
|
430
|
+
from epub_translator import LLM, translate, language, SubmitKind
|
|
431
|
+
|
|
432
|
+
llm = LLM(
|
|
433
|
+
key="your-api-key",
|
|
434
|
+
url="https://api.openai.com/v1",
|
|
435
|
+
model="gpt-4",
|
|
436
|
+
token_encoding="o200k_base",
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
translate(
|
|
440
|
+
source_path="source.epub",
|
|
441
|
+
target_path="translated.epub",
|
|
442
|
+
target_language=language.ENGLISH,
|
|
443
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
444
|
+
llm=llm,
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
# Access token statistics after translation
|
|
448
|
+
print(f"Total tokens: {llm.total_tokens}")
|
|
449
|
+
print(f"Input tokens: {llm.input_tokens}")
|
|
450
|
+
print(f"Input cache tokens: {llm.input_cache_tokens}")
|
|
451
|
+
print(f"Output tokens: {llm.output_tokens}")
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
**Available Statistics:**
|
|
455
|
+
|
|
456
|
+
- `total_tokens` - Total number of tokens used (input + output)
|
|
457
|
+
- `input_tokens` - Number of prompt/input tokens
|
|
458
|
+
- `input_cache_tokens` - Number of cached input tokens (when using prompt caching)
|
|
459
|
+
- `output_tokens` - Number of generated/completion tokens
|
|
460
|
+
|
|
461
|
+
**Real-time Monitoring:**
|
|
462
|
+
|
|
463
|
+
You can also monitor token usage in real-time during translation:
|
|
464
|
+
|
|
465
|
+
```python
|
|
466
|
+
from tqdm import tqdm
|
|
467
|
+
import time
|
|
468
|
+
|
|
469
|
+
with tqdm(total=100, desc="Translating", unit="%") as pbar:
|
|
470
|
+
last_progress = 0.0
|
|
471
|
+
start_time = time.time()
|
|
472
|
+
|
|
473
|
+
def on_progress(progress: float):
|
|
474
|
+
nonlocal last_progress
|
|
475
|
+
increment = (progress - last_progress) * 100
|
|
476
|
+
pbar.update(increment)
|
|
477
|
+
last_progress = progress
|
|
478
|
+
|
|
479
|
+
# Update token stats in progress bar
|
|
480
|
+
pbar.set_postfix({
|
|
481
|
+
'tokens': llm.total_tokens,
|
|
482
|
+
'cost_est': f'${llm.total_tokens * 0.00001:.4f}' # Estimate based on your pricing
|
|
483
|
+
})
|
|
484
|
+
|
|
485
|
+
translate(
|
|
486
|
+
source_path="source.epub",
|
|
487
|
+
target_path="translated.epub",
|
|
488
|
+
target_language=language.ENGLISH,
|
|
489
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
490
|
+
llm=llm,
|
|
491
|
+
on_progress=on_progress,
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
elapsed = time.time() - start_time
|
|
495
|
+
print(f"\nTranslation completed in {elapsed:.1f}s")
|
|
496
|
+
print(f"Total tokens used: {llm.total_tokens:,}")
|
|
497
|
+
print(f"Average tokens/second: {llm.total_tokens/elapsed:.1f}")
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
**Dual-LLM Token Tracking:**
|
|
501
|
+
|
|
502
|
+
When using separate LLMs for translation and filling, each LLM tracks its own statistics:
|
|
503
|
+
|
|
504
|
+
```python
|
|
505
|
+
translation_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
|
|
506
|
+
fill_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
|
|
507
|
+
|
|
508
|
+
translate(
|
|
509
|
+
source_path="source.epub",
|
|
510
|
+
target_path="translated.epub",
|
|
511
|
+
target_language=language.ENGLISH,
|
|
512
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
513
|
+
translation_llm=translation_llm,
|
|
514
|
+
fill_llm=fill_llm,
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
print(f"Translation tokens: {translation_llm.total_tokens}")
|
|
518
|
+
print(f"Fill tokens: {fill_llm.total_tokens}")
|
|
519
|
+
print(f"Combined total: {translation_llm.total_tokens + fill_llm.total_tokens}")
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
**Note:** Token statistics are cumulative across all API calls made by the LLM instance. The counts only increase and are thread-safe when using concurrent translation.
|
|
523
|
+
|
|
425
524
|
## Related Projects
|
|
426
525
|
|
|
427
526
|
### PDF Craft
|
|
@@ -388,6 +388,105 @@ translate(
|
|
|
388
388
|
|
|
389
389
|
When using `concurrency > 1`, ensure that any custom callback functions (`on_progress`, `on_fill_failed`) are thread-safe. Built-in callbacks are thread-safe by default.
|
|
390
390
|
|
|
391
|
+
### Token Usage Monitoring
|
|
392
|
+
|
|
393
|
+
Track token consumption during translation to monitor API costs and usage:
|
|
394
|
+
|
|
395
|
+
```python
|
|
396
|
+
from epub_translator import LLM, translate, language, SubmitKind
|
|
397
|
+
|
|
398
|
+
llm = LLM(
|
|
399
|
+
key="your-api-key",
|
|
400
|
+
url="https://api.openai.com/v1",
|
|
401
|
+
model="gpt-4",
|
|
402
|
+
token_encoding="o200k_base",
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
translate(
|
|
406
|
+
source_path="source.epub",
|
|
407
|
+
target_path="translated.epub",
|
|
408
|
+
target_language=language.ENGLISH,
|
|
409
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
410
|
+
llm=llm,
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
# Access token statistics after translation
|
|
414
|
+
print(f"Total tokens: {llm.total_tokens}")
|
|
415
|
+
print(f"Input tokens: {llm.input_tokens}")
|
|
416
|
+
print(f"Input cache tokens: {llm.input_cache_tokens}")
|
|
417
|
+
print(f"Output tokens: {llm.output_tokens}")
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
**Available Statistics:**
|
|
421
|
+
|
|
422
|
+
- `total_tokens` - Total number of tokens used (input + output)
|
|
423
|
+
- `input_tokens` - Number of prompt/input tokens
|
|
424
|
+
- `input_cache_tokens` - Number of cached input tokens (when using prompt caching)
|
|
425
|
+
- `output_tokens` - Number of generated/completion tokens
|
|
426
|
+
|
|
427
|
+
**Real-time Monitoring:**
|
|
428
|
+
|
|
429
|
+
You can also monitor token usage in real-time during translation:
|
|
430
|
+
|
|
431
|
+
```python
|
|
432
|
+
from tqdm import tqdm
|
|
433
|
+
import time
|
|
434
|
+
|
|
435
|
+
with tqdm(total=100, desc="Translating", unit="%") as pbar:
|
|
436
|
+
last_progress = 0.0
|
|
437
|
+
start_time = time.time()
|
|
438
|
+
|
|
439
|
+
def on_progress(progress: float):
|
|
440
|
+
nonlocal last_progress
|
|
441
|
+
increment = (progress - last_progress) * 100
|
|
442
|
+
pbar.update(increment)
|
|
443
|
+
last_progress = progress
|
|
444
|
+
|
|
445
|
+
# Update token stats in progress bar
|
|
446
|
+
pbar.set_postfix({
|
|
447
|
+
'tokens': llm.total_tokens,
|
|
448
|
+
'cost_est': f'${llm.total_tokens * 0.00001:.4f}' # Estimate based on your pricing
|
|
449
|
+
})
|
|
450
|
+
|
|
451
|
+
translate(
|
|
452
|
+
source_path="source.epub",
|
|
453
|
+
target_path="translated.epub",
|
|
454
|
+
target_language=language.ENGLISH,
|
|
455
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
456
|
+
llm=llm,
|
|
457
|
+
on_progress=on_progress,
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
elapsed = time.time() - start_time
|
|
461
|
+
print(f"\nTranslation completed in {elapsed:.1f}s")
|
|
462
|
+
print(f"Total tokens used: {llm.total_tokens:,}")
|
|
463
|
+
print(f"Average tokens/second: {llm.total_tokens/elapsed:.1f}")
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
**Dual-LLM Token Tracking:**
|
|
467
|
+
|
|
468
|
+
When using separate LLMs for translation and filling, each LLM tracks its own statistics:
|
|
469
|
+
|
|
470
|
+
```python
|
|
471
|
+
translation_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
|
|
472
|
+
fill_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
|
|
473
|
+
|
|
474
|
+
translate(
|
|
475
|
+
source_path="source.epub",
|
|
476
|
+
target_path="translated.epub",
|
|
477
|
+
target_language=language.ENGLISH,
|
|
478
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
479
|
+
translation_llm=translation_llm,
|
|
480
|
+
fill_llm=fill_llm,
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
print(f"Translation tokens: {translation_llm.total_tokens}")
|
|
484
|
+
print(f"Fill tokens: {fill_llm.total_tokens}")
|
|
485
|
+
print(f"Combined total: {translation_llm.total_tokens + fill_llm.total_tokens}")
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
**Note:** Token statistics are cumulative across all API calls made by the LLM instance. The counts only increase and are thread-safe when using concurrent translation.
|
|
489
|
+
|
|
391
490
|
## Related Projects
|
|
392
491
|
|
|
393
492
|
### PDF Craft
|
|
@@ -13,6 +13,7 @@ from ..template import create_env
|
|
|
13
13
|
from .context import LLMContext
|
|
14
14
|
from .executor import LLMExecutor
|
|
15
15
|
from .increasable import Increasable
|
|
16
|
+
from .statistics import Statistics
|
|
16
17
|
from .types import Message
|
|
17
18
|
|
|
18
19
|
# Global state for logger filename generation
|
|
@@ -44,7 +45,7 @@ class LLM:
|
|
|
44
45
|
self._temperature: Increasable = Increasable(temperature)
|
|
45
46
|
self._cache_path: Path | None = self._ensure_dir_path(cache_path)
|
|
46
47
|
self._logger_save_path: Path | None = self._ensure_dir_path(log_dir_path)
|
|
47
|
-
|
|
48
|
+
self._statistics = Statistics()
|
|
48
49
|
self._executor = LLMExecutor(
|
|
49
50
|
url=url,
|
|
50
51
|
model=model,
|
|
@@ -53,12 +54,29 @@ class LLM:
|
|
|
53
54
|
retry_times=retry_times,
|
|
54
55
|
retry_interval_seconds=retry_interval_seconds,
|
|
55
56
|
create_logger=self._create_logger,
|
|
57
|
+
statistics=self._statistics,
|
|
56
58
|
)
|
|
57
59
|
|
|
58
60
|
@property
|
|
59
61
|
def encoding(self) -> Encoding:
|
|
60
62
|
return self._encoding
|
|
61
63
|
|
|
64
|
+
@property
|
|
65
|
+
def total_tokens(self) -> int:
|
|
66
|
+
return self._statistics.total_tokens
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def input_tokens(self) -> int:
|
|
70
|
+
return self._statistics.input_tokens
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def input_cache_tokens(self) -> int:
|
|
74
|
+
return self._statistics.input_cache_tokens
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def output_tokens(self) -> int:
|
|
78
|
+
return self._statistics.output_tokens
|
|
79
|
+
|
|
62
80
|
def context(self, cache_seed_content: str | None = None) -> LLMContext:
|
|
63
81
|
return LLMContext(
|
|
64
82
|
executor=self._executor,
|
|
@@ -7,6 +7,7 @@ from openai import OpenAI
|
|
|
7
7
|
from openai.types.chat import ChatCompletionMessageParam
|
|
8
8
|
|
|
9
9
|
from .error import is_retry_error
|
|
10
|
+
from .statistics import Statistics
|
|
10
11
|
from .types import Message, MessageRole
|
|
11
12
|
|
|
12
13
|
|
|
@@ -20,12 +21,14 @@ class LLMExecutor:
|
|
|
20
21
|
retry_times: int,
|
|
21
22
|
retry_interval_seconds: float,
|
|
22
23
|
create_logger: Callable[[], Logger | None],
|
|
24
|
+
statistics: Statistics,
|
|
23
25
|
) -> None:
|
|
24
26
|
self._model_name: str = model
|
|
25
27
|
self._timeout: float | None = timeout
|
|
26
28
|
self._retry_times: int = retry_times
|
|
27
29
|
self._retry_interval_seconds: float = retry_interval_seconds
|
|
28
30
|
self._create_logger: Callable[[], Logger | None] = create_logger
|
|
31
|
+
self._statistics = statistics
|
|
29
32
|
self._client = OpenAI(
|
|
30
33
|
api_key=api_key,
|
|
31
34
|
base_url=url,
|
|
@@ -156,6 +159,7 @@ class LLMExecutor:
|
|
|
156
159
|
model=self._model_name,
|
|
157
160
|
messages=messages,
|
|
158
161
|
stream=True,
|
|
162
|
+
stream_options={"include_usage": True},
|
|
159
163
|
top_p=top_p,
|
|
160
164
|
temperature=temperature,
|
|
161
165
|
max_tokens=max_tokens,
|
|
@@ -164,4 +168,5 @@ class LLMExecutor:
|
|
|
164
168
|
for chunk in stream:
|
|
165
169
|
if chunk.choices and chunk.choices[0].delta.content:
|
|
166
170
|
buffer.write(chunk.choices[0].delta.content)
|
|
171
|
+
self._statistics.submit_usage(chunk.usage)
|
|
167
172
|
return buffer.getvalue()
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from threading import Lock
|
|
2
|
+
|
|
3
|
+
from openai.types import CompletionUsage
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Statistics:
|
|
7
|
+
def __init__(self) -> None:
|
|
8
|
+
self._lock = Lock()
|
|
9
|
+
self.total_tokens = 0
|
|
10
|
+
self.input_tokens = 0
|
|
11
|
+
self.input_cache_tokens = 0
|
|
12
|
+
self.output_tokens = 0
|
|
13
|
+
|
|
14
|
+
def submit_usage(self, usage: CompletionUsage | None) -> None:
|
|
15
|
+
if usage is None:
|
|
16
|
+
return
|
|
17
|
+
with self._lock:
|
|
18
|
+
if usage.total_tokens:
|
|
19
|
+
self.total_tokens += usage.total_tokens
|
|
20
|
+
if usage.prompt_tokens:
|
|
21
|
+
self.input_tokens += usage.prompt_tokens
|
|
22
|
+
if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
|
|
23
|
+
self.input_cache_tokens += usage.prompt_tokens_details.cached_tokens
|
|
24
|
+
if usage.completion_tokens:
|
|
25
|
+
self.output_tokens += usage.completion_tokens
|
|
@@ -4,7 +4,12 @@ from enum import Enum, auto
|
|
|
4
4
|
from typing import Self
|
|
5
5
|
from xml.etree.ElementTree import Element
|
|
6
6
|
|
|
7
|
-
from ..xml import
|
|
7
|
+
from ..xml import (
|
|
8
|
+
expand_left_element_texts,
|
|
9
|
+
expand_right_element_texts,
|
|
10
|
+
is_inline_element,
|
|
11
|
+
normalize_text_in_element,
|
|
12
|
+
)
|
|
8
13
|
|
|
9
14
|
|
|
10
15
|
class TextPosition(Enum):
|
{epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/translation/xml_interrupter.py
RENAMED
|
@@ -6,8 +6,8 @@ from bs4 import BeautifulSoup
|
|
|
6
6
|
from mathml2latex.mathml import process_mathml
|
|
7
7
|
|
|
8
8
|
from ..segment import TextSegment, combine_text_segments, find_block_depth
|
|
9
|
-
from ..utils import ensure_list
|
|
10
|
-
from ..xml import clone_element
|
|
9
|
+
from ..utils import ensure_list, normalize_whitespace
|
|
10
|
+
from ..xml import DISPLAY_ATTRIBUTE, clone_element, is_inline_element
|
|
11
11
|
|
|
12
12
|
_ID_KEY = "__XML_INTERRUPTER_ID"
|
|
13
13
|
_MATH_TAG = "math"
|
|
@@ -87,9 +87,9 @@ class XMLInterrupter:
|
|
|
87
87
|
_ID_KEY: cast(str, interrupted_element.get(_ID_KEY)),
|
|
88
88
|
},
|
|
89
89
|
)
|
|
90
|
-
interrupted_display = interrupted_element.get(
|
|
90
|
+
interrupted_display = interrupted_element.get(DISPLAY_ATTRIBUTE, None)
|
|
91
91
|
if interrupted_display is not None:
|
|
92
|
-
placeholder_element.set(
|
|
92
|
+
placeholder_element.set(DISPLAY_ATTRIBUTE, interrupted_display)
|
|
93
93
|
|
|
94
94
|
raw_parent_stack = text_segment.parent_stack[:interrupted_index]
|
|
95
95
|
parent_stack = raw_parent_stack + [placeholder_element]
|
|
@@ -159,10 +159,13 @@ class XMLInterrupter:
|
|
|
159
159
|
|
|
160
160
|
if latex is None:
|
|
161
161
|
latex = "".join(t.text for t in text_segments)
|
|
162
|
-
|
|
163
|
-
latex = f"${latex}$"
|
|
162
|
+
latex = normalize_whitespace(latex).strip()
|
|
164
163
|
else:
|
|
165
|
-
latex =
|
|
164
|
+
latex = normalize_whitespace(latex).strip()
|
|
165
|
+
if is_inline_element(math_element):
|
|
166
|
+
latex = f"${latex}$"
|
|
167
|
+
else:
|
|
168
|
+
latex = f"$${latex}$$"
|
|
166
169
|
|
|
167
170
|
return f" {latex} "
|
|
168
171
|
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from xml.etree.ElementTree import Element
|
|
2
2
|
|
|
3
|
+
from .const import DISPLAY_ATTRIBUTE
|
|
4
|
+
|
|
3
5
|
# HTML inline-level elements
|
|
4
6
|
# Reference: https://developer.mozilla.org/en-US/docs/Web/HTML/Inline_elements
|
|
5
7
|
# Reference: https://developer.mozilla.org/en-US/docs/Glossary/Inline-level_content
|
|
@@ -105,9 +107,14 @@ _HTML_INLINE_TAGS = frozenset(
|
|
|
105
107
|
|
|
106
108
|
|
|
107
109
|
def is_inline_element(element: Element) -> bool:
|
|
108
|
-
|
|
110
|
+
tag = element.tag.lower()
|
|
111
|
+
if tag in _HTML_INLINE_TAGS:
|
|
109
112
|
return True
|
|
110
|
-
display = element.get(
|
|
111
|
-
if display is not None
|
|
113
|
+
display = element.get(DISPLAY_ATTRIBUTE, None)
|
|
114
|
+
if display is not None:
|
|
115
|
+
display = display.lower()
|
|
116
|
+
if display == "inline":
|
|
117
|
+
return True
|
|
118
|
+
if tag == "math" and display != "block":
|
|
112
119
|
return True
|
|
113
120
|
return False
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "epub-translator"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.8"
|
|
4
4
|
description = "Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text."
|
|
5
5
|
keywords = ["epub", "llm", "translation", "translator"]
|
|
6
6
|
authors = [
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
ID_KEY: str = "id"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/translation/epub_transcode.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/concurrency.py
RENAMED
|
File without changes
|
{epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/hill_climbing.py
RENAMED
|
File without changes
|
|
File without changes
|
{epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/stream_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
{epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/translator.py
RENAMED
|
File without changes
|
{epub_translator-0.1.7 → epub_translator-0.1.8}/epub_translator/xml_translator/validation.py
RENAMED
|
File without changes
|