epub-translator 0.1.5__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. epub_translator/__init__.py +1 -2
  2. epub_translator/epub/__init__.py +1 -1
  3. epub_translator/llm/context.py +10 -1
  4. epub_translator/llm/core.py +30 -3
  5. epub_translator/segment/inline_segment.py +11 -1
  6. epub_translator/segment/text_segment.py +0 -4
  7. epub_translator/translation/__init__.py +2 -0
  8. epub_translator/{epub_transcode.py → translation/epub_transcode.py} +2 -2
  9. epub_translator/{punctuation.py → translation/punctuation.py} +1 -1
  10. epub_translator/{translator.py → translation/translator.py} +8 -6
  11. epub_translator/{xml_interrupter.py → translation/xml_interrupter.py} +2 -2
  12. epub_translator/xml/__init__.py +1 -1
  13. epub_translator/xml_translator/concurrency.py +52 -0
  14. epub_translator/xml_translator/score.py +164 -0
  15. epub_translator/xml_translator/stream_mapper.py +145 -114
  16. epub_translator/xml_translator/translator.py +12 -18
  17. {epub_translator-0.1.5.dist-info → epub_translator-0.1.6.dist-info}/METADATA +29 -2
  18. {epub_translator-0.1.5.dist-info → epub_translator-0.1.6.dist-info}/RECORD +27 -24
  19. /epub_translator/{language.py → translation/language.py} +0 -0
  20. /epub_translator/xml/{firendly → friendly}/__init__.py +0 -0
  21. /epub_translator/xml/{firendly → friendly}/decoder.py +0 -0
  22. /epub_translator/xml/{firendly → friendly}/encoder.py +0 -0
  23. /epub_translator/xml/{firendly → friendly}/parser.py +0 -0
  24. /epub_translator/xml/{firendly → friendly}/tag.py +0 -0
  25. /epub_translator/xml/{firendly → friendly}/transform.py +0 -0
  26. {epub_translator-0.1.5.dist-info → epub_translator-0.1.6.dist-info}/LICENSE +0 -0
  27. {epub_translator-0.1.5.dist-info → epub_translator-0.1.6.dist-info}/WHEEL +0 -0
@@ -1,6 +1,5 @@
1
- from . import language
2
1
  from .llm import LLM
3
- from .translator import FillFailedEvent, translate
2
+ from .translation import FillFailedEvent, language, translate
4
3
  from .xml_translator import SubmitKind
5
4
 
6
5
  __all__ = [
@@ -1,4 +1,4 @@
1
1
  from .metadata import read_metadata, write_metadata
2
2
  from .spines import search_spine_paths
3
- from .toc import read_toc, write_toc
3
+ from .toc import Toc, read_toc, write_toc
4
4
  from .zip import Zip
@@ -1,5 +1,6 @@
1
1
  import hashlib
2
2
  import json
3
+ import threading
3
4
  import uuid
4
5
  from pathlib import Path
5
6
  from typing import Self
@@ -8,6 +9,9 @@ from .executor import LLMExecutor
8
9
  from .increasable import Increasable, Increaser
9
10
  from .types import Message, MessageRole
10
11
 
12
+ # Global lock for cache file commit operations
13
+ _CACHE_COMMIT_LOCK = threading.Lock()
14
+
11
15
 
12
16
  class LLMContext:
13
17
  def __init__(
@@ -101,7 +105,12 @@ class LLMContext:
101
105
  # Remove the .[context-id].txt suffix to get permanent name
102
106
  permanent_name = temp_file.name.rsplit(".", 2)[0] + ".txt"
103
107
  permanent_file = temp_file.parent / permanent_name
104
- temp_file.rename(permanent_file)
108
+
109
+ with _CACHE_COMMIT_LOCK: # 多线程下的线程安全
110
+ if permanent_file.exists():
111
+ temp_file.unlink()
112
+ else:
113
+ temp_file.rename(permanent_file)
105
114
 
106
115
  def _rollback(self) -> None:
107
116
  for temp_file in self._temp_files:
@@ -1,4 +1,5 @@
1
1
  import datetime
2
+ import threading
2
3
  from collections.abc import Generator
3
4
  from importlib.resources import files
4
5
  from logging import DEBUG, FileHandler, Formatter, Logger, getLogger
@@ -14,6 +15,11 @@ from .executor import LLMExecutor
14
15
  from .increasable import Increasable
15
16
  from .types import Message
16
17
 
18
+ # Global state for logger filename generation
19
+ _LOGGER_LOCK = threading.Lock()
20
+ _LAST_TIMESTAMP: str | None = None
21
+ _LOGGER_SUFFIX_ID: int = 1
22
+
17
23
 
18
24
  class LLM:
19
25
  def __init__(
@@ -95,13 +101,34 @@ class LLM:
95
101
  return dir_path.resolve()
96
102
 
97
103
  def _create_logger(self) -> Logger | None:
104
+ # pylint: disable=global-statement
105
+ global _LAST_TIMESTAMP, _LOGGER_SUFFIX_ID
106
+
98
107
  if self._logger_save_path is None:
99
108
  return None
100
109
 
101
110
  now = datetime.datetime.now(datetime.UTC)
102
- timestamp = now.strftime("%Y-%m-%d %H-%M-%S %f")
103
- file_path = self._logger_save_path / f"request {timestamp}.log"
104
- logger = getLogger(f"LLM Request {timestamp}")
111
+ # Use second-level precision for collision detection
112
+ timestamp_key = now.strftime("%Y-%m-%d %H-%M-%S")
113
+
114
+ with _LOGGER_LOCK:
115
+ if _LAST_TIMESTAMP == timestamp_key:
116
+ _LOGGER_SUFFIX_ID += 1
117
+ suffix_id = _LOGGER_SUFFIX_ID
118
+ else:
119
+ _LAST_TIMESTAMP = timestamp_key
120
+ _LOGGER_SUFFIX_ID = 1
121
+ suffix_id = 1
122
+
123
+ if suffix_id == 1:
124
+ file_name = f"request {timestamp_key}.log"
125
+ logger_name = f"LLM Request {timestamp_key}"
126
+ else:
127
+ file_name = f"request {timestamp_key}_{suffix_id}.log"
128
+ logger_name = f"LLM Request {timestamp_key}_{suffix_id}"
129
+
130
+ file_path = self._logger_save_path / file_name
131
+ logger = getLogger(logger_name)
105
132
  logger.setLevel(DEBUG)
106
133
  handler = FileHandler(file_path, encoding="utf-8")
107
134
  handler.setLevel(DEBUG)
@@ -47,6 +47,7 @@ def search_inline_segments(text_segments: Iterable[TextSegment]) -> Generator["I
47
47
  inline_segment = _pop_stack_data(stack_data)
48
48
  stack_data = None
49
49
  if inline_segment:
50
+ inline_segment.id = 0
50
51
  yield inline_segment
51
52
 
52
53
  if stack_data is None:
@@ -73,6 +74,7 @@ def search_inline_segments(text_segments: Iterable[TextSegment]) -> Generator["I
73
74
  if stack_data is not None:
74
75
  inline_segment = _pop_stack_data(stack_data)
75
76
  if inline_segment:
77
+ inline_segment.id = 0
76
78
  yield inline_segment
77
79
 
78
80
 
@@ -115,7 +117,7 @@ class InlineSegment:
115
117
  self._child_tag2ids: dict[str, list[int]] = {}
116
118
  self._child_tag2count: dict[str, int] = {}
117
119
 
118
- next_temp_id: int = 0
120
+ next_temp_id: int = 1
119
121
  terms = nest((child.parent.tag, child) for child in children if isinstance(child, InlineSegment))
120
122
 
121
123
  for tag, child_terms in terms.items():
@@ -162,6 +164,14 @@ class InlineSegment:
162
164
  elif isinstance(child, InlineSegment):
163
165
  yield from child
164
166
 
167
+ def clone(self) -> "InlineSegment":
168
+ cloned_segment = InlineSegment(
169
+ depth=len(self._parent_stack),
170
+ children=[child.clone() for child in self._children],
171
+ )
172
+ cloned_segment.id = self.id
173
+ return cloned_segment
174
+
165
175
  def recreate_ids(self, id_generator: IDGenerator) -> None:
166
176
  self._child_tag2count.clear()
167
177
  self._child_tag2ids.clear()
@@ -33,10 +33,6 @@ class TextSegment:
33
33
  def block_parent(self) -> Element:
34
34
  return self.parent_stack[self.block_depth - 1]
35
35
 
36
- @property
37
- def xml_text(self) -> str:
38
- return "".join(_expand_xml_texts(self))
39
-
40
36
  def strip_block_parents(self) -> Self:
41
37
  self.parent_stack = self.parent_stack[self.block_depth - 1 :]
42
38
  self.block_depth = 1
@@ -0,0 +1,2 @@
1
+ from . import language
2
+ from .translator import FillFailedEvent, translate
@@ -6,8 +6,8 @@ EPUB 数据结构与 XML 的编码/解码转换
6
6
 
7
7
  from xml.etree.ElementTree import Element
8
8
 
9
- from .epub.metadata import MetadataField
10
- from .epub.toc import Toc
9
+ from ..epub import Toc
10
+ from ..epub.metadata import MetadataField
11
11
 
12
12
 
13
13
  def encode_toc(toc: Toc) -> Element:
@@ -1,6 +1,6 @@
1
1
  from xml.etree.ElementTree import Element
2
2
 
3
- from .xml import iter_with_stack
3
+ from ..xml import iter_with_stack
4
4
 
5
5
  _QUOTE_MAPPING = {
6
6
  # 法语引号
@@ -5,7 +5,7 @@ from importlib.metadata import version as get_package_version
5
5
  from os import PathLike
6
6
  from pathlib import Path
7
7
 
8
- from .epub import (
8
+ from ..epub import (
9
9
  Zip,
10
10
  read_metadata,
11
11
  read_toc,
@@ -13,12 +13,12 @@ from .epub import (
13
13
  write_metadata,
14
14
  write_toc,
15
15
  )
16
+ from ..llm import LLM
17
+ from ..xml import XMLLikeNode, deduplicate_ids_in_element, find_first
18
+ from ..xml_translator import FillFailedEvent, SubmitKind, TranslationTask, XMLTranslator
16
19
  from .epub_transcode import decode_metadata, decode_toc_list, encode_metadata, encode_toc_list
17
- from .llm import LLM
18
20
  from .punctuation import unwrap_french_quotes
19
- from .xml import XMLLikeNode, deduplicate_ids_in_element, find_first
20
21
  from .xml_interrupter import XMLInterrupter
21
- from .xml_translator import FillFailedEvent, SubmitKind, TranslationTask, XMLTranslator
22
22
 
23
23
 
24
24
  class _ElementType(Enum):
@@ -40,7 +40,8 @@ def translate(
40
40
  submit: SubmitKind,
41
41
  user_prompt: str | None = None,
42
42
  max_retries: int = 5,
43
- max_group_tokens: int = 1200,
43
+ max_group_tokens: int = 2600,
44
+ concurrency: int = 1,
44
45
  llm: LLM | None = None,
45
46
  translation_llm: LLM | None = None,
46
47
  fill_llm: LLM | None = None,
@@ -62,7 +63,7 @@ def translate(
62
63
  ignore_translated_error=False,
63
64
  max_retries=max_retries,
64
65
  max_fill_displaying_errors=10,
65
- max_group_tokens=max_group_tokens,
66
+ max_group_score=max_group_tokens,
66
67
  cache_seed_content=f"{_get_version()}:{target_language}",
67
68
  )
68
69
  with Zip(
@@ -92,6 +93,7 @@ def translate(
92
93
  current_progress = 0.0
93
94
 
94
95
  for translated_elem, context in translator.translate_elements(
96
+ concurrency=concurrency,
95
97
  interrupt_source_text_segments=interrupter.interrupt_source_text_segments,
96
98
  interrupt_translated_text_segments=interrupter.interrupt_translated_text_segments,
97
99
  interrupt_block_element=interrupter.interrupt_block_element,
@@ -2,8 +2,8 @@ from collections.abc import Generator, Iterable
2
2
  from typing import cast
3
3
  from xml.etree.ElementTree import Element
4
4
 
5
- from .segment import TextSegment
6
- from .utils import ensure_list, normalize_whitespace
5
+ from ..segment import TextSegment
6
+ from ..utils import ensure_list, normalize_whitespace
7
7
 
8
8
  _ID_KEY = "__XML_INTERRUPTER_ID"
9
9
  _MATH_TAG = "math"
@@ -1,6 +1,6 @@
1
1
  from .const import *
2
2
  from .deduplication import *
3
- from .firendly import *
3
+ from .friendly import *
4
4
  from .inline import *
5
5
  from .utils import *
6
6
  from .xml import *
@@ -0,0 +1,52 @@
1
+ from collections import deque
2
+ from collections.abc import Callable, Iterable
3
+ from concurrent.futures import Future, ThreadPoolExecutor
4
+ from typing import TypeVar
5
+
6
+ P = TypeVar("P")
7
+ R = TypeVar("R")
8
+
9
+
10
+ def run_concurrency(
11
+ parameters: Iterable[P],
12
+ execute: Callable[[P], R],
13
+ concurrency: int,
14
+ ) -> Iterable[R]:
15
+ assert concurrency >= 1, "the concurrency must be at least 1"
16
+ # Fast path: concurrency == 1, no thread overhead
17
+ if concurrency == 1:
18
+ for param in parameters:
19
+ yield execute(param)
20
+ return
21
+
22
+ executor = ThreadPoolExecutor(max_workers=concurrency)
23
+ did_shutdown = False
24
+ try:
25
+ futures: deque[Future[R]] = deque()
26
+ params_iter = iter(parameters)
27
+ for _ in range(concurrency):
28
+ try:
29
+ param = next(params_iter)
30
+ future = executor.submit(execute, param)
31
+ futures.append(future)
32
+ except StopIteration:
33
+ break
34
+
35
+ while futures:
36
+ future = futures.popleft()
37
+ yield future.result()
38
+ try:
39
+ param = next(params_iter)
40
+ new_future = executor.submit(execute, param)
41
+ futures.append(new_future)
42
+ except StopIteration:
43
+ pass
44
+
45
+ except KeyboardInterrupt:
46
+ executor.shutdown(wait=False, cancel_futures=True)
47
+ did_shutdown = True
48
+ raise
49
+
50
+ finally:
51
+ if not did_shutdown:
52
+ executor.shutdown(wait=True)
@@ -0,0 +1,164 @@
1
+ from collections.abc import Generator
2
+ from dataclasses import dataclass
3
+ from enum import Enum, auto
4
+
5
+ from tiktoken import Encoding
6
+
7
+ from ..segment import InlineSegment, TextSegment
8
+ from .common import DATA_ORIGIN_LEN_KEY
9
+
10
+ _ID_WEIGHT = 80
11
+ _ELLIPSIS = "..."
12
+
13
+
14
+ @dataclass
15
+ class ScoreSegment:
16
+ text_segment: TextSegment
17
+ left_parents: list[InlineSegment]
18
+ right_parents: list[InlineSegment]
19
+ text_tokens: list[int]
20
+ score: int
21
+
22
+
23
+ def expand_to_score_segments(encoding: Encoding, inline_segment: InlineSegment) -> Generator[ScoreSegment, None, None]:
24
+ for i, score_segment in enumerate(_do_expand_inline_segment(inline_segment)):
25
+ xml_text = "".join(
26
+ _render_score_segment(
27
+ score_segment=score_segment,
28
+ is_first=(i == 0),
29
+ )
30
+ )
31
+ score_segment.text_tokens = encoding.encode(score_segment.text_segment.text)
32
+ score_segment.score = len(encoding.encode(xml_text)) + sum(
33
+ _ID_WEIGHT for parent in score_segment.left_parents if parent.id is not None
34
+ )
35
+ yield score_segment
36
+
37
+
38
+ def truncate_score_segment(
39
+ encoding: Encoding,
40
+ score_segment: ScoreSegment,
41
+ remain_head: bool,
42
+ remain_score: int,
43
+ ):
44
+ fixed_score = score_segment.score - len(score_segment.text_tokens)
45
+ if remain_score <= fixed_score:
46
+ # 裁剪仅能减少 text 部分的 tokens 数。
47
+ # 而 XML 本身头尾占用的 tokens 数,以及 ID 占用加权分属于 fixed_score 部分,无法裁剪
48
+ # 当发现将文字删光后才能达标时,不如直接放弃整段内容
49
+ return None
50
+
51
+ remain_text_tokens_count = remain_score - fixed_score
52
+
53
+ # remain_text_tokens_count cannot be 0 here
54
+ if remain_head:
55
+ remain_text = encoding.decode(score_segment.text_tokens[:remain_text_tokens_count])
56
+ else:
57
+ remain_text = encoding.decode(score_segment.text_tokens[-remain_text_tokens_count:])
58
+
59
+ if not remain_text.strip():
60
+ return None
61
+
62
+ if remain_head:
63
+ remain_text = f"{remain_text} {_ELLIPSIS}"
64
+ else:
65
+ remain_text = f"{_ELLIPSIS} {remain_text}"
66
+
67
+ text_segment = score_segment.text_segment.clone()
68
+ text_segment.text = remain_text
69
+
70
+ return ScoreSegment(
71
+ text_segment=text_segment,
72
+ left_parents=score_segment.left_parents,
73
+ right_parents=score_segment.right_parents,
74
+ text_tokens=encoding.encode(remain_text),
75
+ score=remain_text_tokens_count + fixed_score,
76
+ )
77
+
78
+
79
+ def _render_score_segment(score_segment: ScoreSegment, is_first: bool):
80
+ for i, parent in enumerate(score_segment.left_parents):
81
+ yield "<"
82
+ yield parent.parent.tag
83
+ if parent.id is not None:
84
+ yield ' id="99"'
85
+ if is_first and i == 0:
86
+ yield " "
87
+ yield DATA_ORIGIN_LEN_KEY
88
+ yield '="9999"'
89
+ yield ">"
90
+
91
+ yield score_segment.text_segment.text
92
+
93
+ for parent in reversed(score_segment.right_parents):
94
+ yield "</"
95
+ yield parent.parent.tag
96
+ yield ">"
97
+
98
+
99
+ def _do_expand_inline_segment(inline_segment: InlineSegment):
100
+ text_segment: TextSegment | None = None
101
+ left_parents: list[InlineSegment] = []
102
+ right_parents: list[InlineSegment] = []
103
+
104
+ for item in _expand_as_wrapped(inline_segment):
105
+ if isinstance(item, TextSegment):
106
+ if text_segment is None:
107
+ text_segment = item
108
+ else:
109
+ yield ScoreSegment(
110
+ text_segment=text_segment,
111
+ left_parents=left_parents,
112
+ right_parents=right_parents,
113
+ text_tokens=[],
114
+ score=0,
115
+ )
116
+ text_segment = item
117
+ left_parents = []
118
+ right_parents = []
119
+
120
+ elif isinstance(item, tuple):
121
+ child_inline_segment, orientation = item
122
+ if orientation == _Orientation.UP:
123
+ if text_segment is not None:
124
+ yield ScoreSegment(
125
+ text_segment=text_segment,
126
+ left_parents=left_parents,
127
+ right_parents=right_parents,
128
+ text_tokens=[],
129
+ score=0,
130
+ )
131
+ text_segment = None
132
+ left_parents = []
133
+ right_parents = []
134
+ left_parents.append(child_inline_segment)
135
+
136
+ elif orientation == _Orientation.DOWN:
137
+ if text_segment is None:
138
+ left_parents.clear()
139
+ else:
140
+ right_parents.append(child_inline_segment)
141
+
142
+ if text_segment is not None:
143
+ yield ScoreSegment(
144
+ text_segment=text_segment,
145
+ left_parents=left_parents,
146
+ right_parents=right_parents,
147
+ text_tokens=[],
148
+ score=0,
149
+ )
150
+
151
+
152
+ class _Orientation(Enum):
153
+ DOWN = auto()
154
+ UP = auto()
155
+
156
+
157
+ def _expand_as_wrapped(inline_segment: InlineSegment):
158
+ yield (inline_segment, _Orientation.UP)
159
+ for child in inline_segment.children:
160
+ if isinstance(child, InlineSegment):
161
+ yield from _expand_as_wrapped(child)
162
+ elif isinstance(child, TextSegment):
163
+ yield child
164
+ yield (inline_segment, _Orientation.DOWN)
@@ -1,4 +1,5 @@
1
1
  from collections.abc import Callable, Generator, Iterable, Iterator
2
+ from typing import TypeVar
2
3
  from xml.etree.ElementTree import Element
3
4
 
4
5
  from resource_segmentation import Group, Resource, Segment, split
@@ -6,11 +7,14 @@ from tiktoken import Encoding
6
7
 
7
8
  from ..segment import InlineSegment, TextSegment, search_inline_segments, search_text_segments
8
9
  from .callbacks import Callbacks
10
+ from .concurrency import run_concurrency
11
+ from .score import ScoreSegment, expand_to_score_segments, truncate_score_segment
9
12
 
10
13
  _PAGE_INCISION = 0
11
14
  _BLOCK_INCISION = 1
15
+ _T = TypeVar("_T")
12
16
 
13
- _ELLIPSIS = "..."
17
+ _ResourcePayload = tuple[InlineSegment, list[ScoreSegment]]
14
18
 
15
19
 
16
20
  InlineSegmentMapping = tuple[Element, list[TextSegment]]
@@ -18,23 +22,33 @@ InlineSegmentGroupMap = Callable[[list[InlineSegment]], list[InlineSegmentMappin
18
22
 
19
23
 
20
24
  class XMLStreamMapper:
21
- def __init__(self, encoding: Encoding, max_group_tokens: int) -> None:
25
+ def __init__(self, encoding: Encoding, max_group_score: int) -> None:
22
26
  self._encoding: Encoding = encoding
23
- self._max_group_tokens: int = max_group_tokens
27
+ self._max_group_score: int = max_group_score
24
28
 
25
29
  def map_stream(
26
30
  self,
27
31
  elements: Iterator[Element],
28
32
  callbacks: Callbacks,
29
33
  map: InlineSegmentGroupMap,
34
+ concurrency: int,
30
35
  ) -> Generator[tuple[Element, list[InlineSegmentMapping]], None, None]:
31
36
  current_element: Element | None = None
32
37
  mapping_buffer: list[InlineSegmentMapping] = []
33
38
 
34
- for group in self._split_into_serial_groups(elements, callbacks):
39
+ def execute(group: Group[_ResourcePayload]):
35
40
  head, body, tail = self._truncate_and_transform_group(group)
41
+ head = [segment.clone() for segment in head]
42
+ tail = [segment.clone() for segment in tail]
36
43
  target_body = map(head + body + tail)[len(head) : len(head) + len(body)]
37
- for origin, target in zip(body, target_body, strict=False):
44
+ return zip(body, target_body, strict=False)
45
+
46
+ for mapping_pairs in run_concurrency(
47
+ parameters=self._split_into_serial_groups(elements, callbacks),
48
+ execute=execute,
49
+ concurrency=concurrency,
50
+ ):
51
+ for origin, target in mapping_pairs:
38
52
  origin_element = origin.head.root
39
53
  if current_element is None:
40
54
  current_element = origin_element
@@ -58,7 +72,7 @@ class XMLStreamMapper:
58
72
  def generate():
59
73
  for element in elements:
60
74
  yield from split(
61
- max_segment_count=self._max_group_tokens,
75
+ max_segment_count=self._max_group_score,
62
76
  border_incision=_PAGE_INCISION,
63
77
  resources=self._expand_to_resources(element, callbacks),
64
78
  )
@@ -79,7 +93,7 @@ class XMLStreamMapper:
79
93
  next_sum_body_count = sum(x.count for x in self._expand_resource_segments(next_group.body))
80
94
  next_sum_count = sum_count + next_sum_body_count
81
95
 
82
- if next_sum_count + next_group.tail_remain_count > self._max_group_tokens:
96
+ if next_sum_count + next_group.tail_remain_count > self._max_group_score:
83
97
  yield group
84
98
  group = next_group
85
99
  sum_count = group.head_remain_count + next_sum_body_count
@@ -91,23 +105,25 @@ class XMLStreamMapper:
91
105
 
92
106
  yield group
93
107
 
94
- def _truncate_and_transform_group(self, group: Group[InlineSegment]):
95
- head = list(
96
- self._truncate_inline_segments(
97
- inline_segments=self._expand_inline_segments(group.head),
98
- remain_head=False,
99
- remain_count=group.head_remain_count,
100
- )
108
+ def _truncate_and_transform_group(
109
+ self, group: Group[_ResourcePayload]
110
+ ) -> tuple[list[InlineSegment], list[InlineSegment], list[InlineSegment]]:
111
+ head = self._truncate_group_gap(
112
+ gap=group.head,
113
+ remain_head=False,
114
+ remain_score=group.head_remain_count,
101
115
  )
102
- body = list(self._expand_inline_segments(group.body))
103
- tail = list(
104
- self._truncate_inline_segments(
105
- inline_segments=self._expand_inline_segments(group.tail),
106
- remain_head=True,
107
- remain_count=group.tail_remain_count,
108
- )
116
+ body = self._expand_inline_segments(group.body)
117
+ tail = self._truncate_group_gap(
118
+ gap=group.tail,
119
+ remain_head=True,
120
+ remain_score=group.tail_remain_count,
121
+ )
122
+ return (
123
+ [r.payload[0] for r in head],
124
+ [p[0] for p in body],
125
+ [r.payload[0] for r in tail],
109
126
  )
110
- return head, body, tail
111
127
 
112
128
  def _expand_to_resources(self, element: Element, callbacks: Callbacks):
113
129
  def expand(element: Element):
@@ -131,123 +147,138 @@ class XMLStreamMapper:
131
147
  else:
132
148
  end_incision = _PAGE_INCISION
133
149
 
134
- yield Resource(
135
- count=sum(len(self._encoding.encode(t.xml_text)) for t in inline_segment),
150
+ yield self._transform_to_resource(
151
+ inline_segment=inline_segment,
136
152
  start_incision=start_incision,
137
153
  end_incision=end_incision,
138
- payload=inline_segment,
139
154
  )
140
155
  inline_segment = next_inline_segment
141
156
  start_incision = end_incision
142
157
 
143
- yield Resource(
144
- count=sum(len(self._encoding.encode(t.xml_text)) for t in inline_segment),
158
+ yield self._transform_to_resource(
159
+ inline_segment=inline_segment,
145
160
  start_incision=start_incision,
146
161
  end_incision=_PAGE_INCISION,
147
- payload=inline_segment,
148
162
  )
149
163
 
150
- def _truncate_inline_segments(self, inline_segments: Iterable[InlineSegment], remain_head: bool, remain_count: int):
151
- def clone_and_expand(segments: Iterable[InlineSegment]):
152
- for segment in segments:
153
- for child_segment in segment:
154
- yield child_segment.clone() # 切割对应的 head 和 tail 会与其他 group 重叠,复制避免互相影响
155
-
156
- truncated_text_segments = self._truncate_text_segments(
157
- text_segments=clone_and_expand(inline_segments),
158
- remain_head=remain_head,
159
- remain_count=remain_count,
164
+ def _transform_to_resource(
165
+ self,
166
+ inline_segment: InlineSegment,
167
+ start_incision: int,
168
+ end_incision: int,
169
+ ) -> Resource[_ResourcePayload]:
170
+ source_segments = list(
171
+ expand_to_score_segments(
172
+ encoding=self._encoding,
173
+ inline_segment=inline_segment,
174
+ )
175
+ )
176
+ return Resource(
177
+ count=sum(segment.score for segment in source_segments),
178
+ start_incision=start_incision,
179
+ end_incision=end_incision,
180
+ payload=(inline_segment, source_segments),
160
181
  )
161
- yield from search_inline_segments(truncated_text_segments)
162
182
 
163
- def _expand_inline_segments(self, items: list[Resource[InlineSegment] | Segment[InlineSegment]]):
183
+ def _expand_inline_segments(self, items: list[Resource[_ResourcePayload] | Segment[_ResourcePayload]]):
164
184
  for resource in self._expand_resource_segments(items):
165
185
  yield resource.payload
166
186
 
167
- def _expand_resource_segments(self, items: list[Resource[InlineSegment] | Segment[InlineSegment]]):
187
+ def _expand_resource_segments(self, items: list[Resource[_ResourcePayload] | Segment[_ResourcePayload]]):
168
188
  for item in items:
169
189
  if isinstance(item, Resource):
170
190
  yield item
171
191
  elif isinstance(item, Segment):
172
192
  yield from item.resources
173
193
 
174
- def _truncate_text_segments(self, text_segments: Iterable[TextSegment], remain_head: bool, remain_count: int):
175
- if remain_head:
176
- yield from self._filter_and_remain_segments(
177
- segments=text_segments,
194
+ def _truncate_group_gap(
195
+ self,
196
+ gap: list[Resource[_ResourcePayload] | Segment[_ResourcePayload]],
197
+ remain_head: bool,
198
+ remain_score: int,
199
+ ):
200
+ def expand_resource_segments(items: list[Resource[_ResourcePayload] | Segment[_ResourcePayload]]):
201
+ for item in items:
202
+ if isinstance(item, Resource):
203
+ yield item
204
+ elif isinstance(item, Segment):
205
+ yield from item.resources
206
+
207
+ resources, remain_score = _truncate_items(
208
+ items=expand_resource_segments(gap),
209
+ score=lambda resource: resource.count,
210
+ remain_head=remain_head,
211
+ remain_score=remain_score,
212
+ )
213
+ if remain_score > 0:
214
+ resource = resources.pop() if remain_head else resources.pop(0)
215
+ inline_segment, score_segments = resource.payload
216
+ score_segments, remain_score = _truncate_items(
217
+ items=score_segments,
218
+ score=lambda score_segment: score_segment.score,
178
219
  remain_head=remain_head,
179
- remain_count=remain_count,
220
+ remain_score=remain_score,
180
221
  )
181
- else:
182
- yield from reversed(
183
- list(
184
- self._filter_and_remain_segments(
185
- segments=reversed(list(text_segments)),
186
- remain_head=remain_head,
187
- remain_count=remain_count,
188
- )
222
+ if remain_score > 0:
223
+ score_segment = score_segments.pop() if remain_head else score_segments.pop(0)
224
+ score_segment = truncate_score_segment(
225
+ score_segment=score_segment,
226
+ encoding=self._encoding,
227
+ remain_head=remain_head,
228
+ remain_score=remain_score,
229
+ )
230
+ if score_segment is not None:
231
+ if remain_head:
232
+ score_segments.append(score_segment)
233
+ else:
234
+ score_segments.insert(0, score_segment)
235
+
236
+ inline_segment = next(
237
+ search_inline_segments(s.text_segment for s in score_segments),
238
+ None,
189
239
  )
190
- )
191
240
 
192
- def _filter_and_remain_segments(self, segments: Iterable[TextSegment], remain_head: bool, remain_count: int):
193
- for segment in segments:
194
- if remain_count <= 0:
195
- break
196
- raw_xml_text = segment.xml_text
197
- tokens = self._encoding.encode(raw_xml_text)
198
- tokens_count = len(tokens)
199
-
200
- if tokens_count > remain_count:
201
- truncated_segment = self._truncate_text_segment(
202
- segment=segment,
203
- tokens=tokens,
204
- raw_xml_text=raw_xml_text,
205
- remain_head=remain_head,
206
- remain_count=remain_count,
241
+ if inline_segment is not None:
242
+ resource = Resource(
243
+ count=sum(s.score for s in score_segments),
244
+ start_incision=resource.start_incision,
245
+ end_incision=resource.end_incision,
246
+ payload=(inline_segment, score_segments),
207
247
  )
208
- if truncated_segment is not None:
209
- yield truncated_segment
210
- break
248
+ if remain_head:
249
+ resources.append(resource)
250
+ else:
251
+ resources.insert(0, resource)
211
252
 
212
- yield segment
213
- remain_count -= tokens_count
253
+ return resources
214
254
 
215
- def _truncate_text_segment(
216
- self,
217
- segment: TextSegment,
218
- tokens: list[int],
219
- raw_xml_text: str,
220
- remain_head: bool,
221
- remain_count: int,
222
- ) -> TextSegment | None:
223
- # 典型的 xml_text: <tag id="99" data-origin-len="999">Some text</tag>
224
- # 如果切割点在前缀 XML 区,则整体舍弃
225
- # 如果切割点在后缀 XML 区,则整体保留
226
- # 只有刚好切割在正文区,才执行文本截断操作
227
- remain_text: str
228
- xml_text_head_length = raw_xml_text.find(segment.text)
229
-
230
- if remain_head:
231
- remain_xml_text = self._encoding.decode(tokens[:remain_count]) # remain_count cannot be 0 here
232
- if len(remain_xml_text) <= xml_text_head_length:
233
- return None
234
- if len(remain_xml_text) >= xml_text_head_length + len(segment.text):
235
- return segment
236
- remain_text = remain_xml_text[xml_text_head_length:]
237
- else:
238
- xml_text_tail_length = len(raw_xml_text) - (xml_text_head_length + len(segment.text))
239
- remain_xml_text = self._encoding.decode(tokens[-remain_count:])
240
- if len(remain_xml_text) <= xml_text_tail_length:
241
- return None
242
- if len(remain_xml_text) >= xml_text_tail_length + len(segment.text):
243
- return segment
244
- remain_text = remain_xml_text[: len(remain_xml_text) - xml_text_tail_length]
245
-
246
- if not remain_text.strip():
247
- return None
248
-
249
- if remain_head:
250
- segment.text = f"{remain_text} {_ELLIPSIS}"
251
- else:
252
- segment.text = f"{_ELLIPSIS} {remain_text}"
253
- return segment
255
+
256
+ def _truncate_items(items: Iterable[_T], score: Callable[[_T], int], remain_head: bool, remain_score: int):
257
+ truncated_items = list(items)
258
+ if not truncated_items:
259
+ return truncated_items, 0
260
+
261
+ if not remain_head:
262
+ truncated_items.reverse()
263
+
264
+ truncated_index: int | None = None
265
+ for i, item in enumerate(truncated_items):
266
+ item_score = score(item)
267
+ remain_score -= item_score
268
+ if remain_score <= 0:
269
+ truncated_index = i
270
+ break
271
+
272
+ if truncated_index is not None:
273
+ while len(truncated_items) > truncated_index + 1:
274
+ truncated_items.pop()
275
+
276
+ if truncated_items and remain_score < 0:
277
+ remain_score = score(truncated_items[-1]) + remain_score
278
+ else:
279
+ remain_score = 0
280
+
281
+ if not remain_head:
282
+ truncated_items.reverse()
283
+
284
+ return truncated_items, remain_score
@@ -31,7 +31,7 @@ class XMLTranslator:
31
31
  ignore_translated_error: bool,
32
32
  max_retries: int,
33
33
  max_fill_displaying_errors: int,
34
- max_group_tokens: int,
34
+ max_group_score: int,
35
35
  cache_seed_content: str | None = None,
36
36
  ) -> None:
37
37
  self._translation_llm: LLM = translation_llm
@@ -44,12 +44,13 @@ class XMLTranslator:
44
44
  self._cache_seed_content: str | None = cache_seed_content
45
45
  self._stream_mapper: XMLStreamMapper = XMLStreamMapper(
46
46
  encoding=translation_llm.encoding,
47
- max_group_tokens=max_group_tokens,
47
+ max_group_score=max_group_score,
48
48
  )
49
49
 
50
50
  def translate_element(
51
51
  self,
52
52
  task: TranslationTask[T],
53
+ concurrency: int = 1,
53
54
  interrupt_source_text_segments: Callable[[Iterable[TextSegment]], Iterable[TextSegment]] | None = None,
54
55
  interrupt_translated_text_segments: Callable[[Iterable[TextSegment]], Iterable[TextSegment]] | None = None,
55
56
  interrupt_block_element: Callable[[Element], Element] | None = None,
@@ -57,6 +58,7 @@ class XMLTranslator:
57
58
  ) -> tuple[Element, T]:
58
59
  for translated in self.translate_elements(
59
60
  tasks=((task),),
61
+ concurrency=concurrency,
60
62
  interrupt_source_text_segments=interrupt_source_text_segments,
61
63
  interrupt_translated_text_segments=interrupt_translated_text_segments,
62
64
  interrupt_block_element=interrupt_block_element,
@@ -69,6 +71,7 @@ class XMLTranslator:
69
71
  def translate_elements(
70
72
  self,
71
73
  tasks: Iterable[TranslationTask[T]],
74
+ concurrency: int = 1,
72
75
  interrupt_source_text_segments: Callable[[Iterable[TextSegment]], Iterable[TextSegment]] | None = None,
73
76
  interrupt_translated_text_segments: Callable[[Iterable[TextSegment]], Iterable[TextSegment]] | None = None,
74
77
  interrupt_block_element: Callable[[Element], Element] | None = None,
@@ -90,6 +93,7 @@ class XMLTranslator:
90
93
  for element, mappings in self._stream_mapper.map_stream(
91
94
  elements=generate_elements(),
92
95
  callbacks=callbacks,
96
+ concurrency=concurrency,
93
97
  map=lambda inline_segments: self._translate_inline_segments(
94
98
  inline_segments=inline_segments,
95
99
  callbacks=callbacks,
@@ -117,8 +121,7 @@ class XMLTranslator:
117
121
  inline_segments=inline_segments,
118
122
  ),
119
123
  )
120
- text_segments = (text for inline in inline_segments for text in inline)
121
- source_text = "".join(self._render_text_segments(text_segments))
124
+ source_text = "".join(self._render_source_text_parts(inline_segments))
122
125
  translated_text = self._translate_text(source_text)
123
126
 
124
127
  self._request_and_submit(
@@ -137,21 +140,12 @@ class XMLTranslator:
137
140
 
138
141
  return mappings
139
142
 
140
- def _render_text_segments(self, segments: Iterable[TextSegment]):
141
- # TODO: 没必要,直接按照新的 inline segment 组织就行了
142
- iterator = iter(segments)
143
- segment = next(iterator, None)
144
- if segment is None:
145
- return
146
- while True:
147
- next_segment = next(iterator, None)
148
- if next_segment is None:
149
- break
150
- yield segment.text
151
- if id(segment.block_parent) != id(next_segment.block_parent):
143
+ def _render_source_text_parts(self, inline_segments: list[InlineSegment]):
144
+ for i, inline_segment in enumerate(inline_segments):
145
+ if i > 0:
152
146
  yield "\n\n"
153
- segment = next_segment
154
- yield segment.text
147
+ for text_segment in inline_segment:
148
+ yield text_segment.text
155
149
 
156
150
  def _translate_text(self, text: str) -> str:
157
151
  with self._translation_llm.context(cache_seed_content=self._cache_seed_content) as ctx:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: epub-translator
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
5
5
  License: MIT
6
6
  Keywords: epub,llm,translation,translator
@@ -156,7 +156,8 @@ translate(
156
156
  submit: SubmitKind, # How to insert translations (REPLACE, APPEND_TEXT, or APPEND_BLOCK)
157
157
  user_prompt: str | None = None, # Custom translation instructions
158
158
  max_retries: int = 5, # Maximum retries for failed translations
159
- max_group_tokens: int = 1200, # Maximum tokens per translation group
159
+ max_group_tokens: int = 2600, # Maximum tokens per translation group
160
+ concurrency: int = 1, # Number of concurrent translation tasks (default: 1)
160
161
  llm: LLM | None = None, # Single LLM instance for both translation and filling
161
162
  translation_llm: LLM | None = None, # LLM instance for translation (overrides llm)
162
163
  fill_llm: LLM | None = None, # LLM instance for XML filling (overrides llm)
@@ -394,6 +395,32 @@ llm = LLM(
394
395
  )
395
396
  ```
396
397
 
398
+ ### Concurrent Translation
399
+
400
+ Speed up translation by processing multiple text segments concurrently. Use the `concurrency` parameter to control how many translation tasks run in parallel:
401
+
402
+ ```python
403
+ translate(
404
+ source_path="source.epub",
405
+ target_path="translated.epub",
406
+ target_language="English",
407
+ submit=SubmitKind.APPEND_BLOCK,
408
+ llm=llm,
409
+ concurrency=4, # Process 4 segments concurrently
410
+ )
411
+ ```
412
+
413
+ **Performance Tips:**
414
+
415
+ - Start with `concurrency=4` and adjust based on your API rate limits and system resources
416
+ - Higher concurrency values can significantly reduce translation time for large books
417
+ - The translation order is preserved regardless of concurrency settings
418
+ - Monitor your API provider's rate limits to avoid throttling
419
+
420
+ **Thread Safety:**
421
+
422
+ When using `concurrency > 1`, ensure that any custom callback functions (`on_progress`, `on_fill_failed`) are thread-safe. Built-in callbacks are thread-safe by default.
423
+
397
424
  ## Related Projects
398
425
 
399
426
  ### PDF Craft
@@ -1,4 +1,4 @@
1
- epub_translator/__init__.py,sha256=m2uTGNmBmZhRWQjjYQ1TVrjOuFXJhzQnuuTOq5-t29U,234
1
+ epub_translator/__init__.py,sha256=JsiOUPpk5k7q8mXIgnRQWdVVnkJww_KDTg7jXsP7_C4,222
2
2
  epub_translator/data/fill.jinja,sha256=zSytA8Vhp2i6YBZ09F1z9iPJq1-jUaiphoXqTNZwnvo,6964
3
3
  epub_translator/data/mmltex/README.md,sha256=wwhe5yW1U_7_YZIFKnQVnCOmUl7Mu3gsr3lNnDSJ5Qs,2953
4
4
  epub_translator/data/mmltex/cmarkup.xsl,sha256=DkhimAATM0XSCfVOfY41-qTPoddqzOHjZ00Pynr4zQE,37707
@@ -9,60 +9,63 @@ epub_translator/data/mmltex/scripts.xsl,sha256=f4ei0cDCW3cV-Ra7rC3kC5tRcKdjJxbSp
9
9
  epub_translator/data/mmltex/tables.xsl,sha256=RxtNo8qDtVAg8_6BuYsafraB_0z7YDAB9D__fT9gmWs,4327
10
10
  epub_translator/data/mmltex/tokens.xsl,sha256=j3JZRcBhAiiY8o5K3640phfLwxO8JVspCFlSttwBzJk,12373
11
11
  epub_translator/data/translate.jinja,sha256=93d8kschm5HV-EfXd1kFSIVMObDqTMdoUrwDfce2bhU,820
12
- epub_translator/epub/__init__.py,sha256=ZddRHrLNVzgaSVrYflGnrq8tffmlKPhBbz9ok7sp8PY,149
12
+ epub_translator/epub/__init__.py,sha256=aZawPakdkEquL4kRRpyCTdoSQ82l7FGqY4Uw6-ndoGA,154
13
13
  epub_translator/epub/common.py,sha256=4-SpTe8iot9hMfyXILmlUFvYVNYqPAHL5hn1fr2wgis,1180
14
14
  epub_translator/epub/math.py,sha256=-Q2LJQxxjgQZQUe_WlJA9tjzLqgqtw2ZmbGbHsPRp2U,5422
15
15
  epub_translator/epub/metadata.py,sha256=DXSimY2iZNBA2juIaKtB-4CHHSYJiDK7PPhfenV4dto,3511
16
16
  epub_translator/epub/spines.py,sha256=bP2IsobZm7zs4z10iXGc9SmgAFSIq9pJc8HE-V0aW9Y,1331
17
17
  epub_translator/epub/toc.py,sha256=TKJfyDT4svFkXd6JCNZk2ZEYc9q-5DXnV3zY2UKo8nE,14891
18
18
  epub_translator/epub/zip.py,sha256=-3LI8f-ksgU8xCy28NjBOKyQPE8PhPEUPqIKZE1p8dw,2364
19
- epub_translator/epub_transcode.py,sha256=NzuvXXEZfAhIoMOSrgQRF0DPtaSpz4OY-NMSdC0Y2RM,2749
20
- epub_translator/language.py,sha256=88osG0JNYxOkxBjg5Pm-P0Mhiyxf6GqdxoPW12HW0PE,493
21
19
  epub_translator/llm/__init__.py,sha256=YcFYYnXmXyX0RUyC-PDbj5k7Woygp_XOpTI3vDiNSPM,75
22
- epub_translator/llm/context.py,sha256=73paN3V66LQ6muKUSMCKEHEmMYBylK-dXOF8LmaQo5M,3885
23
- epub_translator/llm/core.py,sha256=AorV4ss4Hr-IbAk8FmGhV2hgI2tKxQmW2Vz2WwUd0Ms,5110
20
+ epub_translator/llm/context.py,sha256=8-0UnrZIaNshR_imy_ed_UpOK7H1a6dOsG-boaYOX8k,4186
21
+ epub_translator/llm/core.py,sha256=wQwt6oG68ZN_iQOaytBiPXOC7sI62XII_A4dOHdAt_s,5979
24
22
  epub_translator/llm/error.py,sha256=4efAIQL14DFSvAnSTUfgdAbZRqaWBqOfUGsSfvxa5zM,1503
25
23
  epub_translator/llm/executor.py,sha256=A0IjQ-s9wBJuhAZAAydneb9zBXWnu2J9inR2Q8F-GDE,5533
26
24
  epub_translator/llm/increasable.py,sha256=8XkKeI1hiHlpMHj8dQ4fW0BkViSx4hH8QfbQsy-5SDw,1297
27
25
  epub_translator/llm/types.py,sha256=c-dMAIvlG4R3la3mUTWEw5xei-sIYKmQeBja7mirxcI,219
28
- epub_translator/punctuation.py,sha256=Yrf3b_Pl36FPBaK96LR-EBjnztlcZZTWLSNaYoWIUSc,812
29
26
  epub_translator/segment/__init__.py,sha256=UYTv_IKQbEB0DzhFeiuqCvjoJLvB-7XRwlaFS90KmIw,573
30
27
  epub_translator/segment/block_segment.py,sha256=psNKA_HMIcwZtoug8AtnAcV9_mQ2WXLnXqFsekHzt2g,4570
31
28
  epub_translator/segment/common.py,sha256=gGWYQaJ0tGnWCuF1me9TOo-Q_DrZVakCu2patyFIOs0,714
32
- epub_translator/segment/inline_segment.py,sha256=_ZgSlZmGxzIvaPs01hreoUfnaXz8Yq7naksT34dGfds,14221
33
- epub_translator/segment/text_segment.py,sha256=Fos3tTuTcpnm-NmqPftzqov1_Rwr57PBv8AIgjKNYcg,6389
29
+ epub_translator/segment/inline_segment.py,sha256=nrRKoJ-vblsNITJeixrCgIOkVQyUXrchMg0XYU_8pLo,14563
30
+ epub_translator/segment/text_segment.py,sha256=LhGlugp6MeAB3tk2jxd1kBb2EA8G2ruN49mP_IZehA0,6295
34
31
  epub_translator/segment/utils.py,sha256=qMqUt33pDRN5Tnuydkodzu2gaQrwTzAnQmXpDuHen1o,1036
35
32
  epub_translator/serial/__init__.py,sha256=b3IMVmWcUwEqHKcGmey88b057pyz5ct946CaUZi4LB4,67
36
33
  epub_translator/serial/chunk.py,sha256=FrTaHikVOd6bLYumnEriTaAQ_DIDLjHm16gh-wBVR9k,1495
37
34
  epub_translator/serial/segment.py,sha256=uEz-ke1KcYrON-68FaUEzMG2CzHlMjvbC11F3ZT4yH0,446
38
35
  epub_translator/serial/splitter.py,sha256=Nq0sxPXos8ez7QBG01sOKjnYKbeBWUBHflZGtqenVm8,1726
39
36
  epub_translator/template.py,sha256=0CqRmj3nTtPshw0NmTr2ECqelops2MMyX94fMrE-HKs,1587
40
- epub_translator/translator.py,sha256=SL0Qh49QaZD3bKKkf5xM0hF2MkPqzxKO8uyo8rn9wTQ,6421
37
+ epub_translator/translation/__init__.py,sha256=R0c0ZngocOC-Qczs0a8JYAdAcCu2gv3FLcSrUyhwDMo,74
38
+ epub_translator/translation/epub_transcode.py,sha256=_pRzmQgDrlfsibalkUogVi0F0Qy_uuYfKhZk3nP5pkA,2747
39
+ epub_translator/translation/language.py,sha256=88osG0JNYxOkxBjg5Pm-P0Mhiyxf6GqdxoPW12HW0PE,493
40
+ epub_translator/translation/punctuation.py,sha256=TPCGjEmlAyN3G11VuXdHn-pvUkuWDwWqbTNzw-ij60E,813
41
+ epub_translator/translation/translator.py,sha256=WC4Yqx-ffhxBhqzMAujE_NQG7BsDwgn95UMNG7OkUSo,6487
42
+ epub_translator/translation/xml_interrupter.py,sha256=QxrNpBoR4ZIAvWsa20jz1z_bE_5-G5-nBGjE6IKCTjw,7405
41
43
  epub_translator/utils.py,sha256=BfZWrYjzDNQ4cFrgvRNzd4i1CKLtPxS8Z4LBHhqEV78,914
42
- epub_translator/xml/__init__.py,sha256=1sBLICHtNNw0UNMOXCZzrZ7uGfOwnPf_m4MmmMNzakY,160
44
+ epub_translator/xml/__init__.py,sha256=qluFTfZYlPmOie8nR2C5O0tZ3UbCQEoEoR-Fq-__79c,160
43
45
  epub_translator/xml/const.py,sha256=Re2TYmpwG7-jVVgSq3R_K-uYhvAYzcXcRmLFkwCPD9Y,19
44
46
  epub_translator/xml/deduplication.py,sha256=TaMbzeA70VvUQV0X1wcQFVbuMEPJUtj9Hq6iWlUmtAQ,1152
45
- epub_translator/xml/firendly/__init__.py,sha256=I5jhnhFWoHvojLsYXH4jfR4Gi8lKFZ3yQ56ze5hEe1M,74
46
- epub_translator/xml/firendly/decoder.py,sha256=xRQ5LnSunmYbba_0oT39oUr86-sLYAHYMUGmlseIu2U,2467
47
- epub_translator/xml/firendly/encoder.py,sha256=evjvw6oE-oCud44IsJ-YZVHn6dtUzjNYX25ljaZP6vY,2417
48
- epub_translator/xml/firendly/parser.py,sha256=QlMHA0nfPJbNyx6IwRFrYVw7okuvzDB42NXCauIFV-o,6560
49
- epub_translator/xml/firendly/tag.py,sha256=ahaGoYttuAlnFxLFFgTV51KUZSpUiHho-COZX14nxN8,3308
50
- epub_translator/xml/firendly/transform.py,sha256=5tG1MJmzrXIR_Z5gmRxwcoKvXBzJBVH0ELeaRsG-8w0,1201
47
+ epub_translator/xml/friendly/__init__.py,sha256=I5jhnhFWoHvojLsYXH4jfR4Gi8lKFZ3yQ56ze5hEe1M,74
48
+ epub_translator/xml/friendly/decoder.py,sha256=xRQ5LnSunmYbba_0oT39oUr86-sLYAHYMUGmlseIu2U,2467
49
+ epub_translator/xml/friendly/encoder.py,sha256=evjvw6oE-oCud44IsJ-YZVHn6dtUzjNYX25ljaZP6vY,2417
50
+ epub_translator/xml/friendly/parser.py,sha256=QlMHA0nfPJbNyx6IwRFrYVw7okuvzDB42NXCauIFV-o,6560
51
+ epub_translator/xml/friendly/tag.py,sha256=ahaGoYttuAlnFxLFFgTV51KUZSpUiHho-COZX14nxN8,3308
52
+ epub_translator/xml/friendly/transform.py,sha256=5tG1MJmzrXIR_Z5gmRxwcoKvXBzJBVH0ELeaRsG-8w0,1201
51
53
  epub_translator/xml/inline.py,sha256=mwFho6wq2gYWmWcg5Cw6OQeteV-a-i6X9OE63fzblpE,1274
52
54
  epub_translator/xml/self_closing.py,sha256=41ofGUdss9yU51IVwI4It6hKfzh8YcxIR_j-ohD19LE,5240
53
55
  epub_translator/xml/utils.py,sha256=7tQ6L5P0_JXhxONeG64hEeeL5mKjA6NKS1H1Q9B1Cac,1062
54
56
  epub_translator/xml/xml.py,sha256=qQ5Wk1-KVVHE4TX25zGOR7fINsGkXnoq-qyKKNl5no4,1675
55
57
  epub_translator/xml/xml_like.py,sha256=jBK4UUgXXWRYnfYlCH1MUAjGHWBQAbUj8HsYqvTTWvA,8890
56
- epub_translator/xml_interrupter.py,sha256=IGLATr7zTIdhE54Gnroab4Xu_vLJ7kzPiQgk7WMXKTc,7403
57
58
  epub_translator/xml_translator/__init__.py,sha256=lqts1mJL_WfojDnMAQ5OM7TbT6u9X3H-X4C_avHzvXM,128
58
59
  epub_translator/xml_translator/callbacks.py,sha256=IoZrsaivd2W76cHFupwv6auVxgEWHcBN2MHQJYcWoJ8,1324
59
60
  epub_translator/xml_translator/common.py,sha256=hSPptgPp7j6dm47imELB5DgmEbzTEyJD6WEeELOOc50,38
61
+ epub_translator/xml_translator/concurrency.py,sha256=ACwoDHNX3xChL0On5yvUSFT8By7aoHoKor94k6A8nuY,1502
60
62
  epub_translator/xml_translator/hill_climbing.py,sha256=1jvilOkTLzwljJA4Nrel8yU2XGvOXpueUJTK7RAp-XY,4272
61
- epub_translator/xml_translator/stream_mapper.py,sha256=tbMc2vyPUn9zEkJZ7-OVYuKaYyn2pPPwjcAdQ8HLzNs,10179
63
+ epub_translator/xml_translator/score.py,sha256=TkXDmr-29p8SzuAp68u_vFDE69y1TyId9S20HT1T_xs,5311
64
+ epub_translator/xml_translator/stream_mapper.py,sha256=nk8iRUHAUQA2B35_y-JOCo6il8MSxXikWvyl-WA8WAA,10662
62
65
  epub_translator/xml_translator/submitter.py,sha256=6PGQTnEcOgL3zseDpSzDmU5d9Eg3eO5OfPIGmQp2DVY,14155
63
- epub_translator/xml_translator/translator.py,sha256=eIvniqKtNoqFFvfvxK4oA-W02y5ZTpmPQ8wFAJlvOUU,9752
66
+ epub_translator/xml_translator/translator.py,sha256=7Ja1jFbmjIgHcmI9V6gg_K0t7qb6in9mhRn54a7qhZ8,9497
64
67
  epub_translator/xml_translator/validation.py,sha256=-OKlSZuD__sjAiEpGAO93YQme4ZDSPmoPjRsAMOCEjc,16668
65
- epub_translator-0.1.5.dist-info/LICENSE,sha256=5RF32sL3LtMOJIErdDKp1ZEYPGXS8WPpsiSz_jMBnGI,1066
66
- epub_translator-0.1.5.dist-info/METADATA,sha256=IT5MBdl68pICDYmk5tn3CwvdnZ5QxlVoaSzw-VhKf3c,14603
67
- epub_translator-0.1.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
68
- epub_translator-0.1.5.dist-info/RECORD,,
68
+ epub_translator-0.1.6.dist-info/LICENSE,sha256=5RF32sL3LtMOJIErdDKp1ZEYPGXS8WPpsiSz_jMBnGI,1066
69
+ epub_translator-0.1.6.dist-info/METADATA,sha256=AcjUb1wmz6cN8PnbwgWJeGlOO9sH445B-qPugLW705M,15638
70
+ epub_translator-0.1.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
71
+ epub_translator-0.1.6.dist-info/RECORD,,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes