epub-translator 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {epub_translator-0.1.7 → epub_translator-0.1.9}/PKG-INFO +106 -16
  2. {epub_translator-0.1.7 → epub_translator-0.1.9}/README.md +105 -15
  3. epub_translator-0.1.9/epub_translator/epub/__init__.py +4 -0
  4. epub_translator-0.1.9/epub_translator/epub/metadata.py +85 -0
  5. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/epub/toc.py +76 -94
  6. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/llm/core.py +19 -1
  7. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/llm/executor.py +5 -0
  8. epub_translator-0.1.9/epub_translator/llm/statistics.py +25 -0
  9. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/segment/text_segment.py +6 -1
  10. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/translation/translator.py +16 -6
  11. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/translation/xml_interrupter.py +10 -7
  12. epub_translator-0.1.9/epub_translator/xml/const.py +2 -0
  13. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/inline.py +10 -3
  14. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/self_closing.py +5 -4
  15. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/xml_like.py +23 -1
  16. {epub_translator-0.1.7 → epub_translator-0.1.9}/pyproject.toml +1 -1
  17. epub_translator-0.1.7/epub_translator/epub/__init__.py +0 -4
  18. epub_translator-0.1.7/epub_translator/epub/metadata.py +0 -122
  19. epub_translator-0.1.7/epub_translator/xml/const.py +0 -1
  20. {epub_translator-0.1.7 → epub_translator-0.1.9}/LICENSE +0 -0
  21. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/__init__.py +0 -0
  22. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/data/fill.jinja +0 -0
  23. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/data/translate.jinja +0 -0
  24. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/epub/common.py +0 -0
  25. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/epub/math.py +0 -0
  26. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/epub/spines.py +0 -0
  27. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/epub/zip.py +0 -0
  28. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/llm/__init__.py +0 -0
  29. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/llm/context.py +0 -0
  30. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/llm/error.py +0 -0
  31. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/llm/increasable.py +0 -0
  32. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/llm/types.py +0 -0
  33. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/segment/__init__.py +0 -0
  34. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/segment/block_segment.py +0 -0
  35. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/segment/common.py +0 -0
  36. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/segment/inline_segment.py +0 -0
  37. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/segment/utils.py +0 -0
  38. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/serial/__init__.py +0 -0
  39. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/serial/chunk.py +0 -0
  40. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/serial/segment.py +0 -0
  41. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/serial/splitter.py +0 -0
  42. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/template.py +0 -0
  43. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/translation/__init__.py +0 -0
  44. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/translation/epub_transcode.py +0 -0
  45. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/translation/language.py +0 -0
  46. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/translation/punctuation.py +0 -0
  47. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/utils.py +0 -0
  48. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/__init__.py +0 -0
  49. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/deduplication.py +0 -0
  50. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/friendly/__init__.py +0 -0
  51. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/friendly/decoder.py +0 -0
  52. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/friendly/encoder.py +0 -0
  53. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/friendly/parser.py +0 -0
  54. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/friendly/tag.py +0 -0
  55. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/friendly/transform.py +0 -0
  56. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/utils.py +0 -0
  57. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml/xml.py +0 -0
  58. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/__init__.py +0 -0
  59. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/callbacks.py +0 -0
  60. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/common.py +0 -0
  61. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/concurrency.py +0 -0
  62. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/hill_climbing.py +0 -0
  63. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/score.py +0 -0
  64. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/stream_mapper.py +0 -0
  65. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/submitter.py +0 -0
  66. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/translator.py +0 -0
  67. {epub_translator-0.1.7 → epub_translator-0.1.9}/epub_translator/xml_translator/validation.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: epub-translator
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
5
5
  License: MIT
6
6
  Keywords: epub,llm,translation,translator
@@ -46,26 +46,17 @@ Description-Content-Type: text/markdown
46
46
  </div>
47
47
 
48
48
 
49
- Translate EPUB books using Large Language Models while preserving the original text. The translated content is displayed side-by-side with the original, creating bilingual books perfect for language learning and cross-reference reading.
49
+ Want to read a book in a foreign language without losing the original context? EPUB Translator transforms any EPUB into a bilingual edition with AI-powered translations displayed side-by-side with the original text.
50
50
 
51
- ![Translation Effect](./docs/images/translation.png)
51
+ Whether you're learning a new language, conducting academic research, or simply enjoying foreign literature, you get both versions in one book - preserving all formatting, images, and structure.
52
52
 
53
- ## Features
53
+ ![Translation Effect](./docs/images/translation.png)
54
54
 
55
- - **Bilingual Output**: Preserves original text alongside translations for easy comparison
56
- - **LLM-Powered**: Leverages large language models for high-quality, context-aware translations
57
- - **Format Preservation**: Maintains EPUB structure, styles, images, and formatting
58
- - **Complete Translation**: Translates chapter content, table of contents, and metadata
59
- - **Progress Tracking**: Monitor translation progress with built-in callbacks
60
- - **Flexible LLM Support**: Works with any OpenAI-compatible API endpoint
61
- - **Caching**: Built-in caching for progress recovery when translation fails
55
+ ### Online Demo
62
56
 
63
- ## Use Cases
57
+ We provide an [online demo platform](https://hub.oomol.com/package/books-translator) where you can try EPUB Translator's bilingual translation capabilities without any installation. Simply upload your EPUB file and get a translated bilingual edition.
64
58
 
65
- - **Language Learning**: Read books in their original language with side-by-side translations
66
- - **Academic Research**: Access foreign literature with bilingual references
67
- - **Content Localization**: Prepare books for international audiences
68
- - **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
59
+ [![EPUB Translator Online Demo](docs/images/online-en.png)](https://hub.oomol.com/package/books-translator)
69
60
 
70
61
  ## Installation
71
62
 
@@ -422,6 +413,105 @@ translate(
422
413
 
423
414
  When using `concurrency > 1`, ensure that any custom callback functions (`on_progress`, `on_fill_failed`) are thread-safe. Built-in callbacks are thread-safe by default.
424
415
 
416
+ ### Token Usage Monitoring
417
+
418
+ Track token consumption during translation to monitor API costs and usage:
419
+
420
+ ```python
421
+ from epub_translator import LLM, translate, language, SubmitKind
422
+
423
+ llm = LLM(
424
+ key="your-api-key",
425
+ url="https://api.openai.com/v1",
426
+ model="gpt-4",
427
+ token_encoding="o200k_base",
428
+ )
429
+
430
+ translate(
431
+ source_path="source.epub",
432
+ target_path="translated.epub",
433
+ target_language=language.ENGLISH,
434
+ submit=SubmitKind.APPEND_BLOCK,
435
+ llm=llm,
436
+ )
437
+
438
+ # Access token statistics after translation
439
+ print(f"Total tokens: {llm.total_tokens}")
440
+ print(f"Input tokens: {llm.input_tokens}")
441
+ print(f"Input cache tokens: {llm.input_cache_tokens}")
442
+ print(f"Output tokens: {llm.output_tokens}")
443
+ ```
444
+
445
+ **Available Statistics:**
446
+
447
+ - `total_tokens` - Total number of tokens used (input + output)
448
+ - `input_tokens` - Number of prompt/input tokens
449
+ - `input_cache_tokens` - Number of cached input tokens (when using prompt caching)
450
+ - `output_tokens` - Number of generated/completion tokens
451
+
452
+ **Real-time Monitoring:**
453
+
454
+ You can also monitor token usage in real-time during translation:
455
+
456
+ ```python
457
+ from tqdm import tqdm
458
+ import time
459
+
460
+ with tqdm(total=100, desc="Translating", unit="%") as pbar:
461
+ last_progress = 0.0
462
+ start_time = time.time()
463
+
464
+ def on_progress(progress: float):
465
+ nonlocal last_progress
466
+ increment = (progress - last_progress) * 100
467
+ pbar.update(increment)
468
+ last_progress = progress
469
+
470
+ # Update token stats in progress bar
471
+ pbar.set_postfix({
472
+ 'tokens': llm.total_tokens,
473
+ 'cost_est': f'${llm.total_tokens * 0.00001:.4f}' # Estimate based on your pricing
474
+ })
475
+
476
+ translate(
477
+ source_path="source.epub",
478
+ target_path="translated.epub",
479
+ target_language=language.ENGLISH,
480
+ submit=SubmitKind.APPEND_BLOCK,
481
+ llm=llm,
482
+ on_progress=on_progress,
483
+ )
484
+
485
+ elapsed = time.time() - start_time
486
+ print(f"\nTranslation completed in {elapsed:.1f}s")
487
+ print(f"Total tokens used: {llm.total_tokens:,}")
488
+ print(f"Average tokens/second: {llm.total_tokens/elapsed:.1f}")
489
+ ```
490
+
491
+ **Dual-LLM Token Tracking:**
492
+
493
+ When using separate LLMs for translation and filling, each LLM tracks its own statistics:
494
+
495
+ ```python
496
+ translation_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
497
+ fill_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
498
+
499
+ translate(
500
+ source_path="source.epub",
501
+ target_path="translated.epub",
502
+ target_language=language.ENGLISH,
503
+ submit=SubmitKind.APPEND_BLOCK,
504
+ translation_llm=translation_llm,
505
+ fill_llm=fill_llm,
506
+ )
507
+
508
+ print(f"Translation tokens: {translation_llm.total_tokens}")
509
+ print(f"Fill tokens: {fill_llm.total_tokens}")
510
+ print(f"Combined total: {translation_llm.total_tokens + fill_llm.total_tokens}")
511
+ ```
512
+
513
+ **Note:** Token statistics are cumulative across all API calls made by the LLM instance. The counts only increase and are thread-safe when using concurrent translation.
514
+
425
515
  ## Related Projects
426
516
 
427
517
  ### PDF Craft
@@ -12,26 +12,17 @@
12
12
  </div>
13
13
 
14
14
 
15
- Translate EPUB books using Large Language Models while preserving the original text. The translated content is displayed side-by-side with the original, creating bilingual books perfect for language learning and cross-reference reading.
15
+ Want to read a book in a foreign language without losing the original context? EPUB Translator transforms any EPUB into a bilingual edition with AI-powered translations displayed side-by-side with the original text.
16
16
 
17
- ![Translation Effect](./docs/images/translation.png)
17
+ Whether you're learning a new language, conducting academic research, or simply enjoying foreign literature, you get both versions in one book - preserving all formatting, images, and structure.
18
18
 
19
- ## Features
19
+ ![Translation Effect](./docs/images/translation.png)
20
20
 
21
- - **Bilingual Output**: Preserves original text alongside translations for easy comparison
22
- - **LLM-Powered**: Leverages large language models for high-quality, context-aware translations
23
- - **Format Preservation**: Maintains EPUB structure, styles, images, and formatting
24
- - **Complete Translation**: Translates chapter content, table of contents, and metadata
25
- - **Progress Tracking**: Monitor translation progress with built-in callbacks
26
- - **Flexible LLM Support**: Works with any OpenAI-compatible API endpoint
27
- - **Caching**: Built-in caching for progress recovery when translation fails
21
+ ### Online Demo
28
22
 
29
- ## Use Cases
23
+ We provide an [online demo platform](https://hub.oomol.com/package/books-translator) where you can try EPUB Translator's bilingual translation capabilities without any installation. Simply upload your EPUB file and get a translated bilingual edition.
30
24
 
31
- - **Language Learning**: Read books in their original language with side-by-side translations
32
- - **Academic Research**: Access foreign literature with bilingual references
33
- - **Content Localization**: Prepare books for international audiences
34
- - **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
25
+ [![EPUB Translator Online Demo](docs/images/online-en.png)](https://hub.oomol.com/package/books-translator)
35
26
 
36
27
  ## Installation
37
28
 
@@ -388,6 +379,105 @@ translate(
388
379
 
389
380
  When using `concurrency > 1`, ensure that any custom callback functions (`on_progress`, `on_fill_failed`) are thread-safe. Built-in callbacks are thread-safe by default.
390
381
 
382
+ ### Token Usage Monitoring
383
+
384
+ Track token consumption during translation to monitor API costs and usage:
385
+
386
+ ```python
387
+ from epub_translator import LLM, translate, language, SubmitKind
388
+
389
+ llm = LLM(
390
+ key="your-api-key",
391
+ url="https://api.openai.com/v1",
392
+ model="gpt-4",
393
+ token_encoding="o200k_base",
394
+ )
395
+
396
+ translate(
397
+ source_path="source.epub",
398
+ target_path="translated.epub",
399
+ target_language=language.ENGLISH,
400
+ submit=SubmitKind.APPEND_BLOCK,
401
+ llm=llm,
402
+ )
403
+
404
+ # Access token statistics after translation
405
+ print(f"Total tokens: {llm.total_tokens}")
406
+ print(f"Input tokens: {llm.input_tokens}")
407
+ print(f"Input cache tokens: {llm.input_cache_tokens}")
408
+ print(f"Output tokens: {llm.output_tokens}")
409
+ ```
410
+
411
+ **Available Statistics:**
412
+
413
+ - `total_tokens` - Total number of tokens used (input + output)
414
+ - `input_tokens` - Number of prompt/input tokens
415
+ - `input_cache_tokens` - Number of cached input tokens (when using prompt caching)
416
+ - `output_tokens` - Number of generated/completion tokens
417
+
418
+ **Real-time Monitoring:**
419
+
420
+ You can also monitor token usage in real-time during translation:
421
+
422
+ ```python
423
+ from tqdm import tqdm
424
+ import time
425
+
426
+ with tqdm(total=100, desc="Translating", unit="%") as pbar:
427
+ last_progress = 0.0
428
+ start_time = time.time()
429
+
430
+ def on_progress(progress: float):
431
+ nonlocal last_progress
432
+ increment = (progress - last_progress) * 100
433
+ pbar.update(increment)
434
+ last_progress = progress
435
+
436
+ # Update token stats in progress bar
437
+ pbar.set_postfix({
438
+ 'tokens': llm.total_tokens,
439
+ 'cost_est': f'${llm.total_tokens * 0.00001:.4f}' # Estimate based on your pricing
440
+ })
441
+
442
+ translate(
443
+ source_path="source.epub",
444
+ target_path="translated.epub",
445
+ target_language=language.ENGLISH,
446
+ submit=SubmitKind.APPEND_BLOCK,
447
+ llm=llm,
448
+ on_progress=on_progress,
449
+ )
450
+
451
+ elapsed = time.time() - start_time
452
+ print(f"\nTranslation completed in {elapsed:.1f}s")
453
+ print(f"Total tokens used: {llm.total_tokens:,}")
454
+ print(f"Average tokens/second: {llm.total_tokens/elapsed:.1f}")
455
+ ```
456
+
457
+ **Dual-LLM Token Tracking:**
458
+
459
+ When using separate LLMs for translation and filling, each LLM tracks its own statistics:
460
+
461
+ ```python
462
+ translation_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
463
+ fill_llm = LLM(key="...", url="...", model="gpt-4", token_encoding="o200k_base")
464
+
465
+ translate(
466
+ source_path="source.epub",
467
+ target_path="translated.epub",
468
+ target_language=language.ENGLISH,
469
+ submit=SubmitKind.APPEND_BLOCK,
470
+ translation_llm=translation_llm,
471
+ fill_llm=fill_llm,
472
+ )
473
+
474
+ print(f"Translation tokens: {translation_llm.total_tokens}")
475
+ print(f"Fill tokens: {fill_llm.total_tokens}")
476
+ print(f"Combined total: {translation_llm.total_tokens + fill_llm.total_tokens}")
477
+ ```
478
+
479
+ **Note:** Token statistics are cumulative across all API calls made by the LLM instance. The counts only increase and are thread-safe when using concurrent translation.
480
+
391
481
  ## Related Projects
392
482
 
393
483
  ### PDF Craft
@@ -0,0 +1,4 @@
1
+ from .metadata import MetadataContext, read_metadata, write_metadata
2
+ from .spines import search_spine_paths
3
+ from .toc import Toc, TocContext, read_toc, write_toc
4
+ from .zip import Zip
@@ -0,0 +1,85 @@
1
+ from dataclasses import dataclass
2
+ from pathlib import Path
3
+
4
+ from ..xml import XMLLikeNode
5
+ from .common import find_opf_path
6
+ from .zip import Zip
7
+
8
+
9
+ @dataclass
10
+ class MetadataField:
11
+ tag_name: str
12
+ text: str
13
+
14
+
15
+ @dataclass
16
+ class MetadataContext:
17
+ opf_path: Path # OPF 文件路径
18
+ xml_node: XMLLikeNode # XMLLikeNode 对象,保留原始文件信息
19
+
20
+
21
+ SKIP_FIELDS = frozenset(
22
+ (
23
+ "language",
24
+ "identifier",
25
+ "date",
26
+ "meta",
27
+ "contributor", # Usually technical information
28
+ )
29
+ )
30
+
31
+
32
+ def read_metadata(zip: Zip) -> tuple[list[MetadataField], MetadataContext]:
33
+ opf_path = find_opf_path(zip)
34
+
35
+ with zip.read(opf_path) as f:
36
+ xml_node = XMLLikeNode(f, is_html_like=False)
37
+
38
+ metadata_elem = None
39
+ for child in xml_node.element:
40
+ if child.tag.endswith("metadata"):
41
+ metadata_elem = child
42
+ break
43
+
44
+ if metadata_elem is None:
45
+ context = MetadataContext(opf_path=opf_path, xml_node=xml_node)
46
+ return [], context
47
+
48
+ fields: list[MetadataField] = []
49
+ for elem in metadata_elem:
50
+ tag_name = elem.tag
51
+ if elem.text and elem.text.strip() and tag_name not in SKIP_FIELDS:
52
+ fields.append(MetadataField(tag_name=tag_name, text=elem.text.strip()))
53
+
54
+ context = MetadataContext(opf_path=opf_path, xml_node=xml_node)
55
+ return fields, context
56
+
57
+
58
+ def write_metadata(zip: Zip, fields: list[MetadataField], context: MetadataContext) -> None:
59
+ metadata_elem = None
60
+ for child in context.xml_node.element:
61
+ if child.tag.endswith("metadata"):
62
+ metadata_elem = child
63
+ break
64
+
65
+ if metadata_elem is None:
66
+ return
67
+
68
+ fields_by_tag: dict[str, list[str]] = {}
69
+ for field in fields:
70
+ if field.tag_name not in fields_by_tag:
71
+ fields_by_tag[field.tag_name] = []
72
+ fields_by_tag[field.tag_name].append(field.text)
73
+
74
+ tag_counters: dict[str, int] = {tag: 0 for tag in fields_by_tag}
75
+
76
+ for elem in metadata_elem:
77
+ tag_name = elem.tag
78
+ if tag_name in fields_by_tag and elem.text and elem.text.strip():
79
+ counter = tag_counters[tag_name]
80
+ if counter < len(fields_by_tag[tag_name]):
81
+ elem.text = fields_by_tag[tag_name][counter]
82
+ tag_counters[tag_name] += 1
83
+
84
+ with zip.replace(context.opf_path) as f:
85
+ context.xml_node.save(f)
@@ -3,8 +3,8 @@ from pathlib import Path
3
3
  from xml.etree import ElementTree as ET
4
4
  from xml.etree.ElementTree import Element
5
5
 
6
- from ..xml.xml import plain_text
7
- from .common import extract_namespace, find_opf_path, strip_namespace
6
+ from ..xml import XMLLikeNode, plain_text
7
+ from .common import find_opf_path, strip_namespace
8
8
  from .zip import Zip
9
9
 
10
10
 
@@ -41,30 +41,40 @@ class Toc:
41
41
  return self.href
42
42
 
43
43
 
44
- def read_toc(zip: Zip) -> list[Toc]:
44
+ @dataclass
45
+ class TocContext:
46
+ version: int
47
+ toc_path: Path
48
+ xml_node: XMLLikeNode
49
+
50
+
51
+ def read_toc(zip: Zip) -> tuple[list[Toc], TocContext]:
45
52
  version = _detect_epub_version(zip)
46
53
  toc_path = _find_toc_path(zip, version)
47
54
 
48
55
  if toc_path is None:
49
- return []
56
+ raise ValueError("Cannot find TOC file in EPUB")
50
57
 
51
- if version == 2:
52
- return _read_ncx_toc(zip, toc_path)
53
- else:
54
- return _read_nav_toc(zip, toc_path)
58
+ with zip.read(toc_path) as f:
59
+ xml_node = XMLLikeNode(f, is_html_like=False)
55
60
 
61
+ if version == 3:
62
+ toc_list = _read_nav_toc(xml_node.element)
63
+ else:
64
+ toc_list = _read_ncx_toc(xml_node.element)
56
65
 
57
- def write_toc(zip: Zip, toc: list[Toc]) -> None:
58
- version = _detect_epub_version(zip)
59
- toc_path = _find_toc_path(zip, version)
66
+ context = TocContext(version=version, toc_path=toc_path, xml_node=xml_node)
67
+ return toc_list, context
60
68
 
61
- if toc_path is None:
62
- raise ValueError("Cannot find TOC file in EPUB")
63
69
 
64
- if version == 2:
65
- _write_ncx_toc(zip, toc_path, toc)
70
+ def write_toc(zip: Zip, toc: list[Toc], context: TocContext) -> None:
71
+ if context.version == 2:
72
+ _update_ncx_toc(context.xml_node.element, toc)
66
73
  else:
67
- _write_nav_toc(zip, toc_path, toc)
74
+ _update_nav_toc(context.xml_node.element, toc)
75
+
76
+ with zip.replace(context.toc_path) as f:
77
+ context.xml_node.save(f)
68
78
 
69
79
 
70
80
  def _detect_epub_version(zip: Zip) -> int:
@@ -72,8 +82,6 @@ def _detect_epub_version(zip: Zip) -> int:
72
82
  with zip.read(opf_path) as f:
73
83
  content = f.read()
74
84
  root = ET.fromstring(content)
75
-
76
- # 检查 package 元素的 version 属性
77
85
  version_str = root.get("version", "2.0")
78
86
 
79
87
  if version_str.startswith("3"):
@@ -89,7 +97,7 @@ def _find_toc_path(zip: Zip, version: int) -> Path | None:
89
97
  with zip.read(opf_path) as f:
90
98
  content = f.read()
91
99
  root = ET.fromstring(content)
92
- strip_namespace(root) # 移除命名空间前缀以简化 XPath
100
+ strip_namespace(root)
93
101
 
94
102
  manifest = root.find(".//manifest")
95
103
  if manifest is None:
@@ -115,23 +123,18 @@ def _find_toc_path(zip: Zip, version: int) -> Path | None:
115
123
  return None
116
124
 
117
125
 
118
- def _read_ncx_toc(zip: Zip, ncx_path: Path) -> list[Toc]:
119
- with zip.read(ncx_path) as f:
120
- content = f.read()
121
- root = ET.fromstring(content)
122
- strip_namespace(root) # 移除命名空间前缀以简化 XPath
123
-
124
- nav_map = root.find(".//navMap")
125
- if nav_map is None:
126
- return []
126
+ def _read_ncx_toc(root: Element) -> list[Toc]:
127
+ nav_map = root.find(".//navMap")
128
+ if nav_map is None:
129
+ return []
127
130
 
128
- result = []
129
- for nav_point in nav_map.findall("navPoint"):
130
- toc_item = _parse_nav_point(nav_point)
131
- if toc_item:
132
- result.append(toc_item)
131
+ result = []
132
+ for nav_point in nav_map.findall("navPoint"):
133
+ toc_item = _parse_nav_point(nav_point)
134
+ if toc_item:
135
+ result.append(toc_item)
133
136
 
134
- return result
137
+ return result
135
138
 
136
139
 
137
140
  def _parse_nav_point(nav_point: Element) -> Toc | None:
@@ -172,18 +175,11 @@ def _parse_nav_point(nav_point: Element) -> Toc | None:
172
175
  )
173
176
 
174
177
 
175
- def _write_ncx_toc(zip: Zip, ncx_path: Path, toc_list: list[Toc]) -> None:
176
- with zip.read(ncx_path) as f:
177
- content = f.read()
178
- root = ET.fromstring(content)
179
- ns = extract_namespace(root.tag)
180
- nav_map = root.find(f".//{{{ns}}}navMap" if ns else ".//navMap")
181
- if nav_map is None:
182
- raise ValueError("Cannot find navMap in NCX file")
183
- _update_nav_points(nav_map, toc_list, ns)
184
- tree = ET.ElementTree(root)
185
- with zip.replace(ncx_path) as out:
186
- tree.write(out, encoding="utf-8", xml_declaration=True)
178
+ def _update_ncx_toc(root: Element, toc_list: list[Toc]) -> None:
179
+ nav_map = root.find(".//navMap")
180
+ if nav_map is None:
181
+ raise ValueError("Cannot find navMap in NCX file")
182
+ _update_nav_points(nav_map, toc_list, None)
187
183
 
188
184
 
189
185
  def _update_nav_points(parent: Element, toc_list: list[Toc], ns: str | None, start_play_order: int = 1) -> int:
@@ -255,34 +251,28 @@ def _create_nav_point(toc: Toc, ns: str | None, play_order: int) -> Element:
255
251
  return nav_point
256
252
 
257
253
 
258
- def _read_nav_toc(zip: Zip, nav_path: Path) -> list[Toc]:
259
- with zip.read(nav_path) as f:
260
- content = f.read()
261
- root = ET.fromstring(content)
262
-
263
- strip_namespace(root)
264
-
265
- nav_elem = None
266
- for nav in root.findall(".//nav"):
267
- epub_type = nav.get("{http://www.idpf.org/2007/ops}type") or nav.get("type")
268
- if epub_type == "toc":
269
- nav_elem = nav
270
- break
254
+ def _read_nav_toc(root: Element) -> list[Toc]:
255
+ nav_elem = None
256
+ for nav in root.findall(".//nav"):
257
+ epub_type = nav.get("type")
258
+ if epub_type == "toc":
259
+ nav_elem = nav
260
+ break
271
261
 
272
- if nav_elem is None:
273
- return []
262
+ if nav_elem is None:
263
+ return []
274
264
 
275
- ol = nav_elem.find(".//ol")
276
- if ol is None:
277
- return []
265
+ ol = nav_elem.find(".//ol")
266
+ if ol is None:
267
+ return []
278
268
 
279
- result = []
280
- for li in ol.findall("li"):
281
- toc_item = _parse_nav_li(li)
282
- if toc_item:
283
- result.append(toc_item)
269
+ result = []
270
+ for li in ol.findall("li"):
271
+ toc_item = _parse_nav_li(li)
272
+ if toc_item:
273
+ result.append(toc_item)
284
274
 
285
- return result
275
+ return result
286
276
 
287
277
 
288
278
  def _parse_nav_li(li: Element) -> Toc | None:
@@ -331,30 +321,22 @@ def _parse_nav_li(li: Element) -> Toc | None:
331
321
  )
332
322
 
333
323
 
334
- def _write_nav_toc(zip: Zip, nav_path: Path, toc_list: list[Toc]) -> None:
335
- with zip.read(nav_path) as f:
336
- content = f.read()
337
- root = ET.fromstring(content)
338
- ns = extract_namespace(root.tag)
339
- nav_elem = None
340
- for nav in root.findall(f".//{{{ns}}}nav" if ns else ".//nav"):
341
- epub_type = nav.get("{http://www.idpf.org/2007/ops}type") or nav.get("type") or nav.get(f"{{{ns}}}type")
342
- if epub_type == "toc":
343
- nav_elem = nav
344
- break
345
-
346
- if nav_elem is None:
347
- raise ValueError("Cannot find nav element with type='toc'")
348
-
349
- ol = nav_elem.find(f".//{{{ns}}}ol" if ns else ".//ol")
350
- if ol is None:
351
- raise ValueError("Cannot find ol in nav element")
352
-
353
- _update_nav_lis(ol, toc_list, ns)
354
-
355
- tree = ET.ElementTree(root)
356
- with zip.replace(nav_path) as out:
357
- tree.write(out, encoding="utf-8", xml_declaration=True)
324
+ def _update_nav_toc(root: Element, toc_list: list[Toc]) -> None:
325
+ nav_elem = None
326
+ for nav in root.findall(".//nav"):
327
+ epub_type = nav.get("type")
328
+ if epub_type == "toc":
329
+ nav_elem = nav
330
+ break
331
+
332
+ if nav_elem is None:
333
+ raise ValueError("Cannot find nav element with type='toc'")
334
+
335
+ ol = nav_elem.find(".//ol")
336
+ if ol is None:
337
+ raise ValueError("Cannot find ol in nav element")
338
+
339
+ _update_nav_lis(ol, toc_list, None)
358
340
 
359
341
 
360
342
  def _update_nav_lis(ol: Element, toc_list: list[Toc], ns: str | None) -> None:
@@ -13,6 +13,7 @@ from ..template import create_env
13
13
  from .context import LLMContext
14
14
  from .executor import LLMExecutor
15
15
  from .increasable import Increasable
16
+ from .statistics import Statistics
16
17
  from .types import Message
17
18
 
18
19
  # Global state for logger filename generation
@@ -44,7 +45,7 @@ class LLM:
44
45
  self._temperature: Increasable = Increasable(temperature)
45
46
  self._cache_path: Path | None = self._ensure_dir_path(cache_path)
46
47
  self._logger_save_path: Path | None = self._ensure_dir_path(log_dir_path)
47
-
48
+ self._statistics = Statistics()
48
49
  self._executor = LLMExecutor(
49
50
  url=url,
50
51
  model=model,
@@ -53,12 +54,29 @@ class LLM:
53
54
  retry_times=retry_times,
54
55
  retry_interval_seconds=retry_interval_seconds,
55
56
  create_logger=self._create_logger,
57
+ statistics=self._statistics,
56
58
  )
57
59
 
58
60
  @property
59
61
  def encoding(self) -> Encoding:
60
62
  return self._encoding
61
63
 
64
+ @property
65
+ def total_tokens(self) -> int:
66
+ return self._statistics.total_tokens
67
+
68
+ @property
69
+ def input_tokens(self) -> int:
70
+ return self._statistics.input_tokens
71
+
72
+ @property
73
+ def input_cache_tokens(self) -> int:
74
+ return self._statistics.input_cache_tokens
75
+
76
+ @property
77
+ def output_tokens(self) -> int:
78
+ return self._statistics.output_tokens
79
+
62
80
  def context(self, cache_seed_content: str | None = None) -> LLMContext:
63
81
  return LLMContext(
64
82
  executor=self._executor,