epub-translator 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epub_translator/__init__.py +2 -2
- epub_translator/data/fill.jinja +143 -38
- epub_translator/epub/__init__.py +1 -1
- epub_translator/epub/metadata.py +122 -0
- epub_translator/epub/spines.py +3 -2
- epub_translator/epub/zip.py +11 -9
- epub_translator/epub_transcode.py +108 -0
- epub_translator/llm/__init__.py +1 -0
- epub_translator/llm/context.py +109 -0
- epub_translator/llm/core.py +32 -113
- epub_translator/llm/executor.py +25 -31
- epub_translator/llm/increasable.py +1 -1
- epub_translator/llm/types.py +0 -3
- epub_translator/segment/__init__.py +26 -0
- epub_translator/segment/block_segment.py +124 -0
- epub_translator/segment/common.py +29 -0
- epub_translator/segment/inline_segment.py +356 -0
- epub_translator/{xml_translator → segment}/text_segment.py +8 -8
- epub_translator/segment/utils.py +43 -0
- epub_translator/translator.py +147 -183
- epub_translator/utils.py +33 -0
- epub_translator/xml/__init__.py +2 -0
- epub_translator/xml/const.py +1 -0
- epub_translator/xml/deduplication.py +3 -3
- epub_translator/xml/self_closing.py +182 -0
- epub_translator/xml/utils.py +42 -0
- epub_translator/xml/xml.py +7 -0
- epub_translator/xml/xml_like.py +8 -33
- epub_translator/xml_interrupter.py +165 -0
- epub_translator/xml_translator/__init__.py +1 -2
- epub_translator/xml_translator/callbacks.py +34 -0
- epub_translator/xml_translator/{const.py → common.py} +0 -1
- epub_translator/xml_translator/hill_climbing.py +104 -0
- epub_translator/xml_translator/stream_mapper.py +253 -0
- epub_translator/xml_translator/submitter.py +26 -72
- epub_translator/xml_translator/translator.py +162 -113
- epub_translator/xml_translator/validation.py +458 -0
- {epub_translator-0.1.1.dist-info → epub_translator-0.1.3.dist-info}/METADATA +72 -9
- epub_translator-0.1.3.dist-info/RECORD +66 -0
- epub_translator/epub/placeholder.py +0 -53
- epub_translator/iter_sync.py +0 -24
- epub_translator/xml_translator/fill.py +0 -128
- epub_translator/xml_translator/format.py +0 -282
- epub_translator/xml_translator/fragmented.py +0 -125
- epub_translator/xml_translator/group.py +0 -183
- epub_translator/xml_translator/progressive_locking.py +0 -256
- epub_translator/xml_translator/utils.py +0 -29
- epub_translator-0.1.1.dist-info/RECORD +0 -58
- {epub_translator-0.1.1.dist-info → epub_translator-0.1.3.dist-info}/LICENSE +0 -0
- {epub_translator-0.1.1.dist-info → epub_translator-0.1.3.dist-info}/WHEEL +0 -0
|
@@ -1,69 +1,37 @@
|
|
|
1
|
-
from collections.abc import Iterable
|
|
2
1
|
from xml.etree.ElementTree import Element
|
|
3
2
|
|
|
4
|
-
from ..
|
|
5
|
-
from
|
|
3
|
+
from ..segment import TextSegment, combine_text_segments
|
|
4
|
+
from ..xml import index_of_parent, iter_with_stack
|
|
5
|
+
from .stream_mapper import InlineSegmentMapping
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def submit_text_segments(element: Element,
|
|
9
|
-
grouped_map = _group_text_segments(
|
|
10
|
-
flatten_text_segments = dict(_extract_flatten_text_segments(element, grouped_map))
|
|
8
|
+
def submit_text_segments(element: Element, mappings: list[InlineSegmentMapping]) -> Element:
|
|
9
|
+
grouped_map = _group_text_segments(mappings)
|
|
11
10
|
_append_text_segments(element, grouped_map)
|
|
12
|
-
|
|
11
|
+
return element
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
def _group_text_segments(
|
|
14
|
+
def _group_text_segments(mappings: list[InlineSegmentMapping]):
|
|
16
15
|
grouped_map: dict[int, list[TextSegment]] = {}
|
|
17
|
-
for
|
|
18
|
-
parent_id = id(
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
#
|
|
27
|
-
#
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
for
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
parent_id = id(parent)
|
|
35
|
-
if parent_id in grouped_map:
|
|
36
|
-
override_parent_ids.add(parent_id)
|
|
37
|
-
|
|
38
|
-
if id(element) in grouped_map:
|
|
39
|
-
override_parent_ids.add(id(element)) # root 不会出现在 parents 中需单独添加
|
|
40
|
-
|
|
41
|
-
for parent_id in override_parent_ids:
|
|
42
|
-
yield parent_id, grouped_map.pop(parent_id)
|
|
16
|
+
for block_element, text_segments in mappings:
|
|
17
|
+
parent_id = id(block_element)
|
|
18
|
+
grouped_map[parent_id] = text_segments
|
|
19
|
+
|
|
20
|
+
# TODO: 如下是为了清除嵌入文字的 Block,当前版本忽略了嵌入文字的 Block 概念。
|
|
21
|
+
# 这是书籍中可能出现的一种情况,虽然不多见。
|
|
22
|
+
# 例如,作为非叶子的块元素,它的子块元素之间会夹杂文本,当前 collect_next_inline_segment 会忽略这些文字:
|
|
23
|
+
# <div>
|
|
24
|
+
# Some text before.
|
|
25
|
+
# <!-- 只有下一行作为叶子节点的块元素内的文字会被处理 -->
|
|
26
|
+
# <div>Paragraph 1.</div>
|
|
27
|
+
# Some text in between.
|
|
28
|
+
# </div>
|
|
29
|
+
for _, text_segments in mappings:
|
|
30
|
+
for text_segment in text_segments:
|
|
31
|
+
for parent_block in text_segment.parent_stack[: text_segment.block_depth - 1]:
|
|
32
|
+
grouped_map.pop(id(parent_block), None)
|
|
43
33
|
|
|
44
|
-
|
|
45
|
-
def _replace_text_segments(element: Element, text_segments: dict[int, list[TextSegment]]):
|
|
46
|
-
for _, child_element in iter_with_stack(element):
|
|
47
|
-
tail_text_segments: list[TextSegment] = []
|
|
48
|
-
for text_segment in text_segments.get(id(child_element), ()):
|
|
49
|
-
if text_segment.position == TextPosition.TEXT:
|
|
50
|
-
child_element.text = _append_text(
|
|
51
|
-
origin_text=child_element.text,
|
|
52
|
-
append_text=text_segment.text,
|
|
53
|
-
)
|
|
54
|
-
elif text_segment.position == TextPosition.TAIL:
|
|
55
|
-
tail_text_segments.append(text_segment)
|
|
56
|
-
|
|
57
|
-
tail_text_segments.sort(key=lambda t: t.index)
|
|
58
|
-
tail_text_segments.reverse()
|
|
59
|
-
for cc_element in child_element:
|
|
60
|
-
if not tail_text_segments:
|
|
61
|
-
break
|
|
62
|
-
if cc_element.tail is not None:
|
|
63
|
-
cc_element.tail = _append_text(
|
|
64
|
-
origin_text=cc_element.tail,
|
|
65
|
-
append_text=tail_text_segments.pop().text,
|
|
66
|
-
)
|
|
34
|
+
return grouped_map
|
|
67
35
|
|
|
68
36
|
|
|
69
37
|
def _append_text_segments(element: Element, grouped_map: dict[int, list[TextSegment]]):
|
|
@@ -74,7 +42,7 @@ def _append_text_segments(element: Element, grouped_map: dict[int, list[TextSegm
|
|
|
74
42
|
if not grouped:
|
|
75
43
|
continue
|
|
76
44
|
parent = parents[-1]
|
|
77
|
-
index =
|
|
45
|
+
index = index_of_parent(parents[-1], child_element)
|
|
78
46
|
combined = next(
|
|
79
47
|
combine_text_segments(
|
|
80
48
|
segments=(t.strip_block_parents() for t in grouped),
|
|
@@ -86,17 +54,3 @@ def _append_text_segments(element: Element, grouped_map: dict[int, list[TextSegm
|
|
|
86
54
|
parent.insert(index + 1, combined_element)
|
|
87
55
|
combined_element.tail = child_element.tail
|
|
88
56
|
child_element.tail = None
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def _index_of_parent(parent: Element, checked_element: Element) -> int:
|
|
92
|
-
for i, child in enumerate(parent):
|
|
93
|
-
if child == checked_element:
|
|
94
|
-
return i
|
|
95
|
-
raise ValueError("Element not found in parent.")
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def _append_text(origin_text: str | None, append_text: str) -> str:
|
|
99
|
-
if origin_text is None:
|
|
100
|
-
return append_text
|
|
101
|
-
else:
|
|
102
|
-
return origin_text + append_text
|
|
@@ -1,15 +1,14 @@
|
|
|
1
|
-
from collections.abc import Generator, Iterable
|
|
1
|
+
from collections.abc import Callable, Generator, Iterable
|
|
2
2
|
from typing import TypeVar
|
|
3
3
|
from xml.etree.ElementTree import Element
|
|
4
4
|
|
|
5
|
-
from ..iter_sync import IterSync
|
|
6
5
|
from ..llm import LLM, Message, MessageRole
|
|
7
|
-
from ..
|
|
8
|
-
from
|
|
9
|
-
from .
|
|
10
|
-
from .
|
|
11
|
-
from .
|
|
12
|
-
from .
|
|
6
|
+
from ..segment import BlockSegment, InlineSegment, TextSegment
|
|
7
|
+
from ..xml import decode_friendly, encode_friendly
|
|
8
|
+
from .callbacks import Callbacks, FillFailedEvent, warp_callbacks
|
|
9
|
+
from .hill_climbing import HillClimbing
|
|
10
|
+
from .stream_mapper import InlineSegmentMapping, XMLStreamMapper
|
|
11
|
+
from .submitter import submit_text_segments
|
|
13
12
|
|
|
14
13
|
T = TypeVar("T")
|
|
15
14
|
|
|
@@ -17,66 +16,110 @@ T = TypeVar("T")
|
|
|
17
16
|
class XMLTranslator:
|
|
18
17
|
def __init__(
|
|
19
18
|
self,
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
translation_llm: LLM,
|
|
20
|
+
fill_llm: LLM,
|
|
22
21
|
target_language: str,
|
|
23
22
|
user_prompt: str | None,
|
|
24
23
|
ignore_translated_error: bool,
|
|
25
24
|
max_retries: int,
|
|
26
25
|
max_fill_displaying_errors: int,
|
|
26
|
+
max_group_tokens: int,
|
|
27
|
+
cache_seed_content: str | None = None,
|
|
27
28
|
) -> None:
|
|
28
|
-
self.
|
|
29
|
-
self.
|
|
29
|
+
self._translation_llm: LLM = translation_llm
|
|
30
|
+
self._fill_llm: LLM = fill_llm
|
|
30
31
|
self._target_language: str = target_language
|
|
31
32
|
self._user_prompt: str | None = user_prompt
|
|
32
33
|
self._ignore_translated_error: bool = ignore_translated_error
|
|
33
34
|
self._max_retries: int = max_retries
|
|
34
35
|
self._max_fill_displaying_errors: int = max_fill_displaying_errors
|
|
36
|
+
self._cache_seed_content: str | None = cache_seed_content
|
|
37
|
+
self._stream_mapper: XMLStreamMapper = XMLStreamMapper(
|
|
38
|
+
encoding=translation_llm.encoding,
|
|
39
|
+
max_group_tokens=max_group_tokens,
|
|
40
|
+
)
|
|
35
41
|
|
|
36
|
-
def
|
|
37
|
-
|
|
42
|
+
def translate_element(
|
|
43
|
+
self,
|
|
44
|
+
element: Element,
|
|
45
|
+
interrupt_source_text_segments: Callable[[Iterable[TextSegment]], Iterable[TextSegment]] | None = None,
|
|
46
|
+
interrupt_translated_text_segments: Callable[[Iterable[TextSegment]], Iterable[TextSegment]] | None = None,
|
|
47
|
+
interrupt_block_element: Callable[[Element], Element] | None = None,
|
|
48
|
+
on_fill_failed: Callable[[FillFailedEvent], None] | None = None,
|
|
49
|
+
) -> Element:
|
|
50
|
+
for translated in self.translate_elements(
|
|
51
|
+
elements=((element),),
|
|
52
|
+
interrupt_source_text_segments=interrupt_source_text_segments,
|
|
53
|
+
interrupt_translated_text_segments=interrupt_translated_text_segments,
|
|
54
|
+
interrupt_block_element=interrupt_block_element,
|
|
55
|
+
on_fill_failed=on_fill_failed,
|
|
56
|
+
):
|
|
38
57
|
return translated
|
|
39
|
-
raise RuntimeError("Translation failed unexpectedly")
|
|
40
58
|
|
|
41
|
-
|
|
42
|
-
self, items: Iterable[tuple[Element, T]]
|
|
43
|
-
) -> Generator[tuple[Element, list[TextSegment], T], None, None]:
|
|
44
|
-
sync: IterSync[tuple[Element, T]] = IterSync()
|
|
45
|
-
text_segments: list[TextSegment] = []
|
|
59
|
+
raise RuntimeError("Translation failed unexpectedly")
|
|
46
60
|
|
|
47
|
-
|
|
48
|
-
|
|
61
|
+
def translate_elements(
|
|
62
|
+
self,
|
|
63
|
+
elements: Iterable[Element],
|
|
64
|
+
interrupt_source_text_segments: Callable[[Iterable[TextSegment]], Iterable[TextSegment]] | None = None,
|
|
65
|
+
interrupt_translated_text_segments: Callable[[Iterable[TextSegment]], Iterable[TextSegment]] | None = None,
|
|
66
|
+
interrupt_block_element: Callable[[Element], Element] | None = None,
|
|
67
|
+
on_fill_failed: Callable[[FillFailedEvent], None] | None = None,
|
|
68
|
+
) -> Generator[Element, None, None]:
|
|
69
|
+
callbacks = warp_callbacks(
|
|
70
|
+
interrupt_source_text_segments=interrupt_source_text_segments,
|
|
71
|
+
interrupt_translated_text_segments=interrupt_translated_text_segments,
|
|
72
|
+
interrupt_block_element=interrupt_block_element,
|
|
73
|
+
on_fill_failed=on_fill_failed,
|
|
74
|
+
)
|
|
75
|
+
for element, mappings in self._stream_mapper.map_stream(
|
|
76
|
+
elements=iter(elements),
|
|
77
|
+
callbacks=callbacks,
|
|
78
|
+
map=lambda inline_segments: self._translate_inline_segments(
|
|
79
|
+
inline_segments=inline_segments,
|
|
80
|
+
callbacks=callbacks,
|
|
81
|
+
),
|
|
49
82
|
):
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
tail_element, _ = sync.tail
|
|
54
|
-
if id(tail_element) == id(text_segment.root):
|
|
55
|
-
break
|
|
56
|
-
tail_element, payload = sync.take()
|
|
57
|
-
yield tail_element, text_segments, payload
|
|
58
|
-
text_segments = []
|
|
59
|
-
text_segments.append(text_segment)
|
|
60
|
-
|
|
61
|
-
while sync.tail is not None:
|
|
62
|
-
tail_element, payload = sync.take()
|
|
63
|
-
yield tail_element, text_segments, payload
|
|
64
|
-
text_segments = []
|
|
65
|
-
|
|
66
|
-
def _translate_text_segments(self, elements: Iterable[Element]):
|
|
67
|
-
for group in self._group_context.split_groups(elements):
|
|
68
|
-
text_segments = list(group)
|
|
69
|
-
fill = XMLFill(text_segments)
|
|
70
|
-
source_text = "".join(self._render_text_segments(text_segments))
|
|
71
|
-
translated_text = self._translate_text(source_text)
|
|
72
|
-
self._fill_into_xml(
|
|
73
|
-
fill=fill,
|
|
74
|
-
source_text=source_text,
|
|
75
|
-
translated_text=translated_text,
|
|
83
|
+
yield submit_text_segments(
|
|
84
|
+
element=element,
|
|
85
|
+
mappings=mappings,
|
|
76
86
|
)
|
|
77
|
-
|
|
87
|
+
|
|
88
|
+
def _translate_inline_segments(
|
|
89
|
+
self,
|
|
90
|
+
inline_segments: list[InlineSegment],
|
|
91
|
+
callbacks: Callbacks,
|
|
92
|
+
) -> list[InlineSegmentMapping | None]:
|
|
93
|
+
hill_climbing = HillClimbing(
|
|
94
|
+
encoding=self._fill_llm.encoding,
|
|
95
|
+
max_fill_displaying_errors=self._max_fill_displaying_errors,
|
|
96
|
+
block_segment=BlockSegment(
|
|
97
|
+
root_tag="xml",
|
|
98
|
+
inline_segments=inline_segments,
|
|
99
|
+
),
|
|
100
|
+
)
|
|
101
|
+
text_segments = (text for inline in inline_segments for text in inline)
|
|
102
|
+
source_text = "".join(self._render_text_segments(text_segments))
|
|
103
|
+
translated_text = self._translate_text(source_text)
|
|
104
|
+
|
|
105
|
+
self._request_and_submit(
|
|
106
|
+
hill_climbing=hill_climbing,
|
|
107
|
+
source_text=source_text,
|
|
108
|
+
translated_text=translated_text,
|
|
109
|
+
callbacks=callbacks,
|
|
110
|
+
)
|
|
111
|
+
mappings: list[InlineSegmentMapping | None] = []
|
|
112
|
+
for mapping in hill_climbing.gen_mappings():
|
|
113
|
+
if mapping:
|
|
114
|
+
_, text_segments = mapping
|
|
115
|
+
if not text_segments:
|
|
116
|
+
mapping = None
|
|
117
|
+
mappings.append(mapping)
|
|
118
|
+
|
|
119
|
+
return mappings
|
|
78
120
|
|
|
79
121
|
def _render_text_segments(self, segments: Iterable[TextSegment]):
|
|
122
|
+
# TODO: 没必要,直接按照新的 inline segment 组织就行了
|
|
80
123
|
iterator = iter(segments)
|
|
81
124
|
segment = next(iterator, None)
|
|
82
125
|
if segment is None:
|
|
@@ -92,88 +135,94 @@ class XMLTranslator:
|
|
|
92
135
|
yield segment.text
|
|
93
136
|
|
|
94
137
|
def _translate_text(self, text: str) -> str:
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
138
|
+
with self._translation_llm.context(cache_seed_content=self._cache_seed_content) as ctx:
|
|
139
|
+
return ctx.request(
|
|
140
|
+
input=[
|
|
141
|
+
Message(
|
|
142
|
+
role=MessageRole.SYSTEM,
|
|
143
|
+
message=self._translation_llm.template("translate").render(
|
|
144
|
+
target_language=self._target_language,
|
|
145
|
+
user_prompt=self._user_prompt,
|
|
146
|
+
),
|
|
102
147
|
),
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
)
|
|
148
|
+
Message(role=MessageRole.USER, message=text),
|
|
149
|
+
]
|
|
150
|
+
)
|
|
107
151
|
|
|
108
|
-
def
|
|
152
|
+
def _request_and_submit(
|
|
153
|
+
self,
|
|
154
|
+
hill_climbing: HillClimbing,
|
|
155
|
+
source_text: str,
|
|
156
|
+
translated_text: str,
|
|
157
|
+
callbacks: Callbacks,
|
|
158
|
+
) -> None:
|
|
109
159
|
user_message = (
|
|
110
160
|
f"Source text:\n{source_text}\n\n"
|
|
111
|
-
f"XML template:\n```XML\n{encode_friendly(
|
|
161
|
+
f"XML template:\n```XML\n{encode_friendly(hill_climbing.request_element())}\n```\n\n"
|
|
112
162
|
f"Translated text:\n{translated_text}"
|
|
113
163
|
)
|
|
114
164
|
fixed_messages: list[Message] = [
|
|
115
165
|
Message(
|
|
116
166
|
role=MessageRole.SYSTEM,
|
|
117
|
-
message=self.
|
|
167
|
+
message=self._fill_llm.template("fill").render(),
|
|
118
168
|
),
|
|
119
169
|
Message(
|
|
120
170
|
role=MessageRole.USER,
|
|
121
171
|
message=user_message,
|
|
122
172
|
),
|
|
123
173
|
]
|
|
124
|
-
|
|
125
|
-
validator = ProgressiveLockingValidator()
|
|
126
174
|
conversation_history: list[Message] = []
|
|
127
|
-
latest_error: ValidationError | None = None
|
|
128
175
|
|
|
129
|
-
with self.
|
|
130
|
-
|
|
131
|
-
# Request LLM response
|
|
132
|
-
response = llm_context.request(
|
|
133
|
-
input=fixed_messages + conversation_history,
|
|
134
|
-
)
|
|
176
|
+
with self._fill_llm.context(cache_seed_content=self._cache_seed_content) as llm_context:
|
|
177
|
+
error_message: str | None = None
|
|
135
178
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
179
|
+
for retry_count in range(self._max_retries):
|
|
180
|
+
response = llm_context.request(fixed_messages + conversation_history)
|
|
181
|
+
validated_element = self._extract_xml_element(response)
|
|
182
|
+
error_message = None
|
|
183
|
+
if isinstance(validated_element, str):
|
|
184
|
+
error_message = validated_element
|
|
185
|
+
elif isinstance(validated_element, Element):
|
|
186
|
+
error_message = hill_climbing.submit(validated_element)
|
|
187
|
+
|
|
188
|
+
if error_message is None:
|
|
189
|
+
break
|
|
139
190
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
191
|
+
callbacks.on_fill_failed(
|
|
192
|
+
FillFailedEvent(
|
|
193
|
+
error_message=error_message,
|
|
194
|
+
retried_count=retry_count + 1,
|
|
195
|
+
over_maximum_retries=False,
|
|
145
196
|
)
|
|
197
|
+
)
|
|
198
|
+
conversation_history = [
|
|
199
|
+
Message(role=MessageRole.ASSISTANT, message=response),
|
|
200
|
+
Message(role=MessageRole.USER, message=error_message),
|
|
201
|
+
]
|
|
202
|
+
if error_message is not None:
|
|
203
|
+
callbacks.on_fill_failed(
|
|
204
|
+
FillFailedEvent(
|
|
205
|
+
error_message=error_message,
|
|
206
|
+
retried_count=self._max_retries,
|
|
207
|
+
over_maximum_retries=True,
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
def _extract_xml_element(self, text: str) -> Element | str:
|
|
212
|
+
first_xml_element: Element | None = None
|
|
213
|
+
all_xml_elements: int = 0
|
|
146
214
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
conversation_history = [
|
|
163
|
-
Message(role=MessageRole.ASSISTANT, message=response),
|
|
164
|
-
Message(role=MessageRole.USER, message=full_error_message),
|
|
165
|
-
]
|
|
166
|
-
|
|
167
|
-
except ValidationError as error:
|
|
168
|
-
# XML extraction or basic validation failed
|
|
169
|
-
latest_error = error
|
|
170
|
-
conversation_history = [
|
|
171
|
-
Message(role=MessageRole.ASSISTANT, message=response),
|
|
172
|
-
Message(role=MessageRole.USER, message=str(error)),
|
|
173
|
-
]
|
|
174
|
-
|
|
175
|
-
message = f"Failed to get valid XML structure after {self._max_retries} attempts"
|
|
176
|
-
if latest_error is None:
|
|
177
|
-
raise ValueError(message)
|
|
178
|
-
else:
|
|
179
|
-
raise ValueError(message) from latest_error
|
|
215
|
+
for xml_element in decode_friendly(text, tags="xml"):
|
|
216
|
+
if first_xml_element is None:
|
|
217
|
+
first_xml_element = xml_element
|
|
218
|
+
all_xml_elements += 1
|
|
219
|
+
|
|
220
|
+
if first_xml_element is None:
|
|
221
|
+
return "No complete <xml>...</xml> block found. Please ensure you have properly closed the XML with </xml> tag." # noqa: E501
|
|
222
|
+
|
|
223
|
+
if all_xml_elements > 1:
|
|
224
|
+
return (
|
|
225
|
+
f"Found {all_xml_elements} <xml>...</xml> blocks. "
|
|
226
|
+
"Please return only one XML block without any examples or explanations."
|
|
227
|
+
)
|
|
228
|
+
return first_xml_element
|