markdown-flow 0.2.19__py3-none-any.whl → 0.2.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markdown_flow/__init__.py +4 -4
- markdown_flow/constants.py +210 -99
- markdown_flow/core.py +605 -209
- markdown_flow/llm.py +4 -3
- markdown_flow/models.py +1 -17
- markdown_flow/parser/__init__.py +38 -0
- markdown_flow/parser/code_fence_utils.py +190 -0
- markdown_flow/parser/interaction.py +354 -0
- markdown_flow/parser/json_parser.py +50 -0
- markdown_flow/parser/output.py +215 -0
- markdown_flow/parser/preprocessor.py +151 -0
- markdown_flow/parser/validation.py +100 -0
- markdown_flow/parser/variable.py +95 -0
- markdown_flow/providers/__init__.py +16 -0
- markdown_flow/providers/config.py +46 -0
- markdown_flow/providers/openai.py +369 -0
- markdown_flow/utils.py +43 -43
- {markdown_flow-0.2.19.dist-info → markdown_flow-0.2.30.dist-info}/METADATA +45 -52
- markdown_flow-0.2.30.dist-info/RECORD +24 -0
- markdown_flow-0.2.19.dist-info/RECORD +0 -13
- {markdown_flow-0.2.19.dist-info → markdown_flow-0.2.30.dist-info}/WHEEL +0 -0
- {markdown_flow-0.2.19.dist-info → markdown_flow-0.2.30.dist-info}/licenses/LICENSE +0 -0
- {markdown_flow-0.2.19.dist-info → markdown_flow-0.2.30.dist-info}/top_level.txt +0 -0
markdown_flow/core.py
CHANGED
|
@@ -13,32 +13,30 @@ from typing import Any
|
|
|
13
13
|
from .constants import (
|
|
14
14
|
BLOCK_INDEX_OUT_OF_RANGE_ERROR,
|
|
15
15
|
BLOCK_SEPARATOR,
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
COMPILED_VARIABLE_REFERENCE_CLEANUP_REGEX,
|
|
20
|
-
COMPILED_WHITESPACE_CLEANUP_REGEX,
|
|
16
|
+
CONTEXT_BUTTON_OPTIONS_TEMPLATE,
|
|
17
|
+
CONTEXT_QUESTION_TEMPLATE,
|
|
18
|
+
DEFAULT_BASE_SYSTEM_PROMPT,
|
|
21
19
|
DEFAULT_INTERACTION_ERROR_PROMPT,
|
|
22
20
|
DEFAULT_INTERACTION_PROMPT,
|
|
23
|
-
DEFAULT_VALIDATION_SYSTEM_MESSAGE,
|
|
24
21
|
INPUT_EMPTY_ERROR,
|
|
25
22
|
INTERACTION_ERROR_RENDER_INSTRUCTIONS,
|
|
26
23
|
INTERACTION_PARSE_ERROR,
|
|
27
24
|
INTERACTION_PATTERN_NON_CAPTURING,
|
|
28
25
|
INTERACTION_PATTERN_SPLIT,
|
|
29
|
-
INTERACTION_RENDER_INSTRUCTIONS,
|
|
30
26
|
LLM_PROVIDER_REQUIRED_ERROR,
|
|
31
27
|
OUTPUT_INSTRUCTION_EXPLANATION,
|
|
32
28
|
UNSUPPORTED_PROMPT_TYPE_ERROR,
|
|
29
|
+
VALIDATION_REQUIREMENTS_TEMPLATE,
|
|
30
|
+
VALIDATION_TASK_TEMPLATE,
|
|
33
31
|
)
|
|
34
32
|
from .enums import BlockType
|
|
35
33
|
from .exceptions import BlockIndexError
|
|
36
34
|
from .llm import LLMProvider, LLMResult, ProcessMode
|
|
37
|
-
from .models import Block
|
|
38
|
-
from .
|
|
35
|
+
from .models import Block
|
|
36
|
+
from .parser import (
|
|
37
|
+
CodeBlockPreprocessor,
|
|
39
38
|
InteractionParser,
|
|
40
39
|
InteractionType,
|
|
41
|
-
extract_interaction_question,
|
|
42
40
|
extract_preserved_content,
|
|
43
41
|
extract_variables_from_text,
|
|
44
42
|
is_preserved_content_block,
|
|
@@ -57,51 +55,156 @@ class MarkdownFlow:
|
|
|
57
55
|
|
|
58
56
|
_llm_provider: LLMProvider | None
|
|
59
57
|
_document: str
|
|
58
|
+
_processed_document: str
|
|
60
59
|
_document_prompt: str | None
|
|
61
60
|
_interaction_prompt: str | None
|
|
62
61
|
_interaction_error_prompt: str | None
|
|
62
|
+
_max_context_length: int
|
|
63
63
|
_blocks: list[Block] | None
|
|
64
|
-
|
|
64
|
+
_model: str | None
|
|
65
|
+
_temperature: float | None
|
|
66
|
+
_preprocessor: CodeBlockPreprocessor
|
|
65
67
|
|
|
66
68
|
def __init__(
|
|
67
69
|
self,
|
|
68
70
|
document: str,
|
|
69
71
|
llm_provider: LLMProvider | None = None,
|
|
72
|
+
base_system_prompt: str | None = None,
|
|
70
73
|
document_prompt: str | None = None,
|
|
71
74
|
interaction_prompt: str | None = None,
|
|
72
75
|
interaction_error_prompt: str | None = None,
|
|
76
|
+
max_context_length: int = 0,
|
|
73
77
|
):
|
|
74
78
|
"""
|
|
75
79
|
Initialize MarkdownFlow instance.
|
|
76
80
|
|
|
77
81
|
Args:
|
|
78
82
|
document: Markdown document content
|
|
79
|
-
llm_provider: LLM provider
|
|
83
|
+
llm_provider: LLM provider (required for COMPLETE and STREAM modes)
|
|
84
|
+
base_system_prompt: MarkdownFlow base system prompt (framework-level, content blocks only)
|
|
80
85
|
document_prompt: Document-level system prompt
|
|
81
86
|
interaction_prompt: Interaction content rendering prompt
|
|
82
87
|
interaction_error_prompt: Interaction error rendering prompt
|
|
88
|
+
max_context_length: Maximum number of context messages to keep (0 = unlimited)
|
|
83
89
|
"""
|
|
84
90
|
self._document = document
|
|
85
91
|
self._llm_provider = llm_provider
|
|
92
|
+
self._base_system_prompt = base_system_prompt or DEFAULT_BASE_SYSTEM_PROMPT
|
|
86
93
|
self._document_prompt = document_prompt
|
|
87
94
|
self._interaction_prompt = interaction_prompt or DEFAULT_INTERACTION_PROMPT
|
|
88
95
|
self._interaction_error_prompt = interaction_error_prompt or DEFAULT_INTERACTION_ERROR_PROMPT
|
|
96
|
+
self._max_context_length = max_context_length
|
|
89
97
|
self._blocks = None
|
|
90
|
-
self.
|
|
98
|
+
self._model: str | None = None
|
|
99
|
+
self._temperature: float | None = None
|
|
100
|
+
|
|
101
|
+
# Preprocess document: extract code blocks and replace with placeholders
|
|
102
|
+
# This is done once during initialization, similar to Go implementation
|
|
103
|
+
self._preprocessor = CodeBlockPreprocessor()
|
|
104
|
+
self._processed_document = self._preprocessor.extract_code_blocks(document)
|
|
91
105
|
|
|
92
106
|
def set_llm_provider(self, provider: LLMProvider) -> None:
|
|
93
107
|
"""Set LLM provider."""
|
|
94
108
|
self._llm_provider = provider
|
|
95
109
|
|
|
110
|
+
def get_processed_document(self) -> str:
|
|
111
|
+
"""
|
|
112
|
+
Get preprocessed document (for debugging and testing).
|
|
113
|
+
|
|
114
|
+
Returns the document content after code blocks have been replaced with placeholders.
|
|
115
|
+
|
|
116
|
+
Use cases:
|
|
117
|
+
- Verify that code block preprocessing was executed correctly
|
|
118
|
+
- Check placeholder format (__MDFLOW_CODE_BLOCK_N__)
|
|
119
|
+
- Debug preprocessing stage issues
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Preprocessed document string
|
|
123
|
+
"""
|
|
124
|
+
return self._processed_document
|
|
125
|
+
|
|
126
|
+
def get_content_messages(
|
|
127
|
+
self,
|
|
128
|
+
block_index: int,
|
|
129
|
+
variables: dict[str, str | list[str]] | None,
|
|
130
|
+
context: list[dict[str, str]] | None = None,
|
|
131
|
+
) -> list[dict[str, str]]:
|
|
132
|
+
"""
|
|
133
|
+
Get content messages (for debugging and inspection).
|
|
134
|
+
|
|
135
|
+
Builds and returns the complete message list that will be sent to LLM.
|
|
136
|
+
|
|
137
|
+
Use cases:
|
|
138
|
+
- Debug: View actual content sent to LLM
|
|
139
|
+
- Verify: Check if code blocks are correctly restored
|
|
140
|
+
- Inspect: Verify variable replacement and prompt building logic
|
|
141
|
+
- Review: Confirm system/user message assembly results
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
block_index: Block index
|
|
145
|
+
variables: Variable mapping
|
|
146
|
+
context: Context message list
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
List of message dictionaries
|
|
150
|
+
"""
|
|
151
|
+
return self._build_content_messages(block_index, variables, context)
|
|
152
|
+
|
|
153
|
+
def set_model(self, model: str) -> "MarkdownFlow":
|
|
154
|
+
"""
|
|
155
|
+
Set model name for this instance.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
model: Model name to use
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Self for method chaining
|
|
162
|
+
"""
|
|
163
|
+
self._model = model
|
|
164
|
+
return self
|
|
165
|
+
|
|
166
|
+
def set_temperature(self, temperature: float) -> "MarkdownFlow":
|
|
167
|
+
"""
|
|
168
|
+
Set temperature for this instance.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
temperature: Temperature value (typically 0.0-2.0)
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Self for method chaining
|
|
175
|
+
"""
|
|
176
|
+
self._temperature = temperature
|
|
177
|
+
return self
|
|
178
|
+
|
|
179
|
+
def get_model(self) -> str | None:
|
|
180
|
+
"""
|
|
181
|
+
Get model name for this instance.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Model name if set, None otherwise
|
|
185
|
+
"""
|
|
186
|
+
return self._model
|
|
187
|
+
|
|
188
|
+
def get_temperature(self) -> float | None:
|
|
189
|
+
"""
|
|
190
|
+
Get temperature for this instance.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
Temperature value if set, None otherwise
|
|
194
|
+
"""
|
|
195
|
+
return self._temperature
|
|
196
|
+
|
|
96
197
|
def set_prompt(self, prompt_type: str, value: str | None) -> None:
|
|
97
198
|
"""
|
|
98
199
|
Set prompt template.
|
|
99
200
|
|
|
100
201
|
Args:
|
|
101
|
-
prompt_type: Prompt type ('document', 'interaction', 'interaction_error')
|
|
202
|
+
prompt_type: Prompt type ('base_system', 'document', 'interaction', 'interaction_error')
|
|
102
203
|
value: Prompt content
|
|
103
204
|
"""
|
|
104
|
-
if prompt_type == "
|
|
205
|
+
if prompt_type == "base_system":
|
|
206
|
+
self._base_system_prompt = value or DEFAULT_BASE_SYSTEM_PROMPT
|
|
207
|
+
elif prompt_type == "document":
|
|
105
208
|
self._document_prompt = value
|
|
106
209
|
elif prompt_type == "interaction":
|
|
107
210
|
self._interaction_prompt = value or DEFAULT_INTERACTION_PROMPT
|
|
@@ -110,6 +213,44 @@ class MarkdownFlow:
|
|
|
110
213
|
else:
|
|
111
214
|
raise ValueError(UNSUPPORTED_PROMPT_TYPE_ERROR.format(prompt_type=prompt_type))
|
|
112
215
|
|
|
216
|
+
def _truncate_context(
|
|
217
|
+
self,
|
|
218
|
+
context: list[dict[str, str]] | None,
|
|
219
|
+
) -> list[dict[str, str]] | None:
|
|
220
|
+
"""
|
|
221
|
+
Filter and truncate context to specified maximum length.
|
|
222
|
+
|
|
223
|
+
Processing steps:
|
|
224
|
+
1. Filter out messages with empty content (empty string or whitespace only)
|
|
225
|
+
2. Truncate to max_context_length if configured (0 = unlimited)
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
context: Original context list
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Filtered and truncated context. Returns None if no valid messages remain.
|
|
232
|
+
"""
|
|
233
|
+
if not context:
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
# Step 1: Filter out messages with empty or whitespace-only content
|
|
237
|
+
filtered_context = [msg for msg in context if msg.get("content", "").strip()]
|
|
238
|
+
|
|
239
|
+
# Return None if no valid messages remain after filtering
|
|
240
|
+
if not filtered_context:
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
# Step 2: Truncate to max_context_length if configured
|
|
244
|
+
if self._max_context_length == 0:
|
|
245
|
+
# No limit, return all filtered messages
|
|
246
|
+
return filtered_context
|
|
247
|
+
|
|
248
|
+
# Keep the most recent N messages
|
|
249
|
+
if len(filtered_context) > self._max_context_length:
|
|
250
|
+
return filtered_context[-self._max_context_length :]
|
|
251
|
+
|
|
252
|
+
return filtered_context
|
|
253
|
+
|
|
113
254
|
@property
|
|
114
255
|
def document(self) -> str:
|
|
115
256
|
"""Get document content."""
|
|
@@ -125,8 +266,9 @@ class MarkdownFlow:
|
|
|
125
266
|
if self._blocks is not None:
|
|
126
267
|
return self._blocks
|
|
127
268
|
|
|
128
|
-
|
|
129
|
-
|
|
269
|
+
# Parse the preprocessed document (code blocks already replaced with placeholders)
|
|
270
|
+
# The preprocessing was done once during initialization
|
|
271
|
+
segments = re.split(BLOCK_SEPARATOR, self._processed_document)
|
|
130
272
|
final_blocks: list[Block] = []
|
|
131
273
|
|
|
132
274
|
for segment in segments:
|
|
@@ -167,14 +309,6 @@ class MarkdownFlow:
|
|
|
167
309
|
"""Extract all variable names from the document."""
|
|
168
310
|
return extract_variables_from_text(self._document)
|
|
169
311
|
|
|
170
|
-
def set_interaction_validation_config(self, block_index: int, config: InteractionValidationConfig) -> None:
|
|
171
|
-
"""Set validation config for specified interaction block."""
|
|
172
|
-
self._interaction_configs[block_index] = config
|
|
173
|
-
|
|
174
|
-
def get_interaction_validation_config(self, block_index: int) -> InteractionValidationConfig | None:
|
|
175
|
-
"""Get validation config for specified interaction block."""
|
|
176
|
-
return self._interaction_configs.get(block_index)
|
|
177
|
-
|
|
178
312
|
# Core unified interface
|
|
179
313
|
|
|
180
314
|
def process(
|
|
@@ -198,6 +332,10 @@ class MarkdownFlow:
|
|
|
198
332
|
Returns:
|
|
199
333
|
LLMResult or Generator[LLMResult, None, None]
|
|
200
334
|
"""
|
|
335
|
+
# Process base_system_prompt variable replacement
|
|
336
|
+
if self._base_system_prompt:
|
|
337
|
+
self._base_system_prompt = replace_variables_in_text(self._base_system_prompt, variables or {})
|
|
338
|
+
|
|
201
339
|
# Process document_prompt variable replacement
|
|
202
340
|
if self._document_prompt:
|
|
203
341
|
self._document_prompt = replace_variables_in_text(self._document_prompt, variables or {})
|
|
@@ -210,7 +348,7 @@ class MarkdownFlow:
|
|
|
210
348
|
if block.block_type == BlockType.INTERACTION:
|
|
211
349
|
if user_input is None:
|
|
212
350
|
# Render interaction content
|
|
213
|
-
return self._process_interaction_render(block_index, mode, variables)
|
|
351
|
+
return self._process_interaction_render(block_index, mode, context, variables)
|
|
214
352
|
# Process user input
|
|
215
353
|
return self._process_interaction_input(block_index, user_input, mode, context, variables)
|
|
216
354
|
|
|
@@ -231,17 +369,17 @@ class MarkdownFlow:
|
|
|
231
369
|
variables: dict[str, str | list[str]] | None,
|
|
232
370
|
):
|
|
233
371
|
"""Process content block."""
|
|
234
|
-
#
|
|
235
|
-
|
|
372
|
+
# Truncate context to configured maximum length
|
|
373
|
+
truncated_context = self._truncate_context(context)
|
|
236
374
|
|
|
237
|
-
|
|
238
|
-
|
|
375
|
+
# Build messages with context
|
|
376
|
+
messages = self._build_content_messages(block_index, variables, truncated_context)
|
|
239
377
|
|
|
240
378
|
if mode == ProcessMode.COMPLETE:
|
|
241
379
|
if not self._llm_provider:
|
|
242
380
|
raise ValueError(LLM_PROVIDER_REQUIRED_ERROR)
|
|
243
381
|
|
|
244
|
-
content = self._llm_provider.complete(messages)
|
|
382
|
+
content = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
|
|
245
383
|
return LLMResult(content=content, prompt=messages[-1]["content"])
|
|
246
384
|
|
|
247
385
|
if mode == ProcessMode.STREAM:
|
|
@@ -249,7 +387,7 @@ class MarkdownFlow:
|
|
|
249
387
|
raise ValueError(LLM_PROVIDER_REQUIRED_ERROR)
|
|
250
388
|
|
|
251
389
|
def stream_generator():
|
|
252
|
-
for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
390
|
+
for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature): # type: ignore[attr-defined]
|
|
253
391
|
yield LLMResult(content=chunk, prompt=messages[-1]["content"])
|
|
254
392
|
|
|
255
393
|
return stream_generator()
|
|
@@ -264,9 +402,18 @@ class MarkdownFlow:
|
|
|
264
402
|
# Replace variables
|
|
265
403
|
content = replace_variables_in_text(content, variables or {})
|
|
266
404
|
|
|
405
|
+
# Restore code blocks (replace placeholders with original code blocks)
|
|
406
|
+
content = self._preprocessor.restore_code_blocks(content)
|
|
407
|
+
|
|
267
408
|
return LLMResult(content=content)
|
|
268
409
|
|
|
269
|
-
def _process_interaction_render(
|
|
410
|
+
def _process_interaction_render(
|
|
411
|
+
self,
|
|
412
|
+
block_index: int,
|
|
413
|
+
mode: ProcessMode,
|
|
414
|
+
context: list[dict[str, str]] | None = None,
|
|
415
|
+
variables: dict[str, str | list[str]] | None = None,
|
|
416
|
+
):
|
|
270
417
|
"""Process interaction content rendering."""
|
|
271
418
|
block = self.get_block(block_index)
|
|
272
419
|
|
|
@@ -277,66 +424,92 @@ class MarkdownFlow:
|
|
|
277
424
|
processed_block = copy(block)
|
|
278
425
|
processed_block.content = processed_content
|
|
279
426
|
|
|
280
|
-
#
|
|
281
|
-
|
|
282
|
-
if not
|
|
283
|
-
#
|
|
284
|
-
return LLMResult(
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
427
|
+
# 提取可翻译内容(JSON 格式)
|
|
428
|
+
translatable_json, interaction_info = self._extract_translatable_content(processed_block.content)
|
|
429
|
+
if not interaction_info:
|
|
430
|
+
# 解析失败,返回原始内容
|
|
431
|
+
return LLMResult(
|
|
432
|
+
content=processed_block.content,
|
|
433
|
+
metadata={
|
|
434
|
+
"block_type": "interaction",
|
|
435
|
+
"block_index": block_index,
|
|
436
|
+
},
|
|
437
|
+
)
|
|
288
438
|
|
|
289
|
-
|
|
439
|
+
# 如果没有可翻译内容,直接返回
|
|
440
|
+
if not translatable_json or translatable_json == "{}":
|
|
290
441
|
return LLMResult(
|
|
291
|
-
|
|
442
|
+
content=processed_block.content,
|
|
292
443
|
metadata={
|
|
293
|
-
"
|
|
294
|
-
"
|
|
444
|
+
"block_type": "interaction",
|
|
445
|
+
"block_index": block_index,
|
|
295
446
|
},
|
|
296
447
|
)
|
|
297
448
|
|
|
449
|
+
# 构建翻译消息
|
|
450
|
+
messages = self._build_translation_messages(translatable_json)
|
|
451
|
+
|
|
298
452
|
if mode == ProcessMode.COMPLETE:
|
|
299
453
|
if not self._llm_provider:
|
|
300
|
-
return LLMResult(
|
|
454
|
+
return LLMResult(
|
|
455
|
+
content=processed_block.content,
|
|
456
|
+
metadata={
|
|
457
|
+
"block_type": "interaction",
|
|
458
|
+
"block_index": block_index,
|
|
459
|
+
},
|
|
460
|
+
)
|
|
301
461
|
|
|
302
|
-
|
|
303
|
-
|
|
462
|
+
# 调用 LLM 进行翻译
|
|
463
|
+
translated_json = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
|
|
464
|
+
|
|
465
|
+
# 使用翻译结果重构交互内容
|
|
466
|
+
translated_content = self._reconstruct_with_translation(processed_block.content, translatable_json, translated_json, interaction_info)
|
|
304
467
|
|
|
305
468
|
return LLMResult(
|
|
306
|
-
content=
|
|
469
|
+
content=translated_content,
|
|
307
470
|
prompt=messages[-1]["content"],
|
|
308
471
|
metadata={
|
|
309
|
-
"
|
|
310
|
-
"
|
|
472
|
+
"block_type": "interaction",
|
|
473
|
+
"block_index": block_index,
|
|
474
|
+
"original_content": translatable_json,
|
|
475
|
+
"translated_content": translated_json,
|
|
311
476
|
},
|
|
312
477
|
)
|
|
313
478
|
|
|
314
479
|
if mode == ProcessMode.STREAM:
|
|
315
480
|
if not self._llm_provider:
|
|
316
|
-
#
|
|
317
|
-
rendered_content = self._reconstruct_interaction_content(processed_block.content, question_text or "")
|
|
318
|
-
|
|
481
|
+
# 降级处理,返回处理后的内容
|
|
319
482
|
def stream_generator():
|
|
320
483
|
yield LLMResult(
|
|
321
|
-
content=
|
|
484
|
+
content=processed_block.content,
|
|
322
485
|
prompt=messages[-1]["content"],
|
|
486
|
+
metadata={
|
|
487
|
+
"block_type": "interaction",
|
|
488
|
+
"block_index": block_index,
|
|
489
|
+
},
|
|
323
490
|
)
|
|
324
491
|
|
|
325
492
|
return stream_generator()
|
|
326
493
|
|
|
327
|
-
#
|
|
494
|
+
# 有 LLM 提供者,收集完整响应后返回一次
|
|
328
495
|
def stream_generator():
|
|
329
496
|
full_response = ""
|
|
330
|
-
for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
497
|
+
for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature): # type: ignore[attr-defined]
|
|
331
498
|
full_response += chunk
|
|
332
499
|
|
|
333
|
-
#
|
|
334
|
-
|
|
500
|
+
# 使用翻译结果重构交互内容
|
|
501
|
+
translated_content = self._reconstruct_with_translation(processed_block.content, translatable_json, full_response, interaction_info)
|
|
335
502
|
|
|
336
|
-
#
|
|
503
|
+
# 一次性返回完整内容(不是增量)
|
|
337
504
|
yield LLMResult(
|
|
338
|
-
content=
|
|
505
|
+
content=translated_content,
|
|
339
506
|
prompt=messages[-1]["content"],
|
|
507
|
+
metadata={
|
|
508
|
+
"block_type": "interaction",
|
|
509
|
+
"block_index": block_index,
|
|
510
|
+
"original_content": translatable_json,
|
|
511
|
+
"translated_content": full_response,
|
|
512
|
+
},
|
|
340
513
|
)
|
|
341
514
|
|
|
342
515
|
return stream_generator()
|
|
@@ -356,7 +529,7 @@ class MarkdownFlow:
|
|
|
356
529
|
# Basic validation
|
|
357
530
|
if not user_input or not any(values for values in user_input.values()):
|
|
358
531
|
error_msg = INPUT_EMPTY_ERROR
|
|
359
|
-
return self._render_error(error_msg, mode)
|
|
532
|
+
return self._render_error(error_msg, mode, context)
|
|
360
533
|
|
|
361
534
|
# Get the target variable value from user_input
|
|
362
535
|
target_values = user_input.get(target_variable, [])
|
|
@@ -370,24 +543,98 @@ class MarkdownFlow:
|
|
|
370
543
|
|
|
371
544
|
if "error" in parse_result:
|
|
372
545
|
error_msg = INTERACTION_PARSE_ERROR.format(error=parse_result["error"])
|
|
373
|
-
return self._render_error(error_msg, mode)
|
|
546
|
+
return self._render_error(error_msg, mode, context)
|
|
374
547
|
|
|
375
548
|
interaction_type = parse_result.get("type")
|
|
376
549
|
|
|
377
550
|
# Process user input based on interaction type
|
|
378
551
|
if interaction_type in [
|
|
379
|
-
InteractionType.BUTTONS_ONLY,
|
|
380
552
|
InteractionType.BUTTONS_WITH_TEXT,
|
|
381
|
-
InteractionType.BUTTONS_MULTI_SELECT,
|
|
382
553
|
InteractionType.BUTTONS_MULTI_WITH_TEXT,
|
|
383
554
|
]:
|
|
384
|
-
#
|
|
555
|
+
# Buttons with text input: smart validation (match buttons first, then LLM validate custom text)
|
|
556
|
+
buttons = parse_result.get("buttons", [])
|
|
557
|
+
|
|
558
|
+
# Step 1: Match button values
|
|
559
|
+
matched_values, unmatched_values = self._match_button_values(buttons, target_values)
|
|
560
|
+
|
|
561
|
+
# Step 2: If there are unmatched values (custom text), validate with LLM
|
|
562
|
+
if unmatched_values:
|
|
563
|
+
# Create user_input for LLM validation (only custom text)
|
|
564
|
+
custom_input = {target_variable: unmatched_values}
|
|
565
|
+
|
|
566
|
+
validation_result = self._process_llm_validation(
|
|
567
|
+
block_index=block_index,
|
|
568
|
+
user_input=custom_input,
|
|
569
|
+
target_variable=target_variable,
|
|
570
|
+
mode=mode,
|
|
571
|
+
context=context,
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
# Handle validation result based on mode
|
|
575
|
+
if mode == ProcessMode.COMPLETE:
|
|
576
|
+
# Check if validation passed
|
|
577
|
+
if isinstance(validation_result, LLMResult) and validation_result.variables:
|
|
578
|
+
validated_values = validation_result.variables.get(target_variable, [])
|
|
579
|
+
# Merge matched button values + validated custom text
|
|
580
|
+
all_values = matched_values + validated_values
|
|
581
|
+
return LLMResult(
|
|
582
|
+
content="",
|
|
583
|
+
variables={target_variable: all_values},
|
|
584
|
+
metadata={
|
|
585
|
+
"interaction_type": str(interaction_type),
|
|
586
|
+
"matched_button_values": matched_values,
|
|
587
|
+
"validated_custom_values": validated_values,
|
|
588
|
+
},
|
|
589
|
+
)
|
|
590
|
+
# Validation failed, return error
|
|
591
|
+
return validation_result
|
|
592
|
+
|
|
593
|
+
if mode == ProcessMode.STREAM:
|
|
594
|
+
# For stream mode, collect validation result
|
|
595
|
+
def stream_merge_generator():
|
|
596
|
+
# Consume the validation stream
|
|
597
|
+
for result in validation_result: # type: ignore[attr-defined]
|
|
598
|
+
if isinstance(result, LLMResult) and result.variables:
|
|
599
|
+
validated_values = result.variables.get(target_variable, [])
|
|
600
|
+
all_values = matched_values + validated_values
|
|
601
|
+
yield LLMResult(
|
|
602
|
+
content="",
|
|
603
|
+
variables={target_variable: all_values},
|
|
604
|
+
metadata={
|
|
605
|
+
"interaction_type": str(interaction_type),
|
|
606
|
+
"matched_button_values": matched_values,
|
|
607
|
+
"validated_custom_values": validated_values,
|
|
608
|
+
},
|
|
609
|
+
)
|
|
610
|
+
else:
|
|
611
|
+
# Validation failed
|
|
612
|
+
yield result
|
|
613
|
+
|
|
614
|
+
return stream_merge_generator()
|
|
615
|
+
else:
|
|
616
|
+
# All values matched buttons, return directly
|
|
617
|
+
return LLMResult(
|
|
618
|
+
content="",
|
|
619
|
+
variables={target_variable: matched_values},
|
|
620
|
+
metadata={
|
|
621
|
+
"interaction_type": str(interaction_type),
|
|
622
|
+
"all_matched_buttons": True,
|
|
623
|
+
},
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
if interaction_type in [
|
|
627
|
+
InteractionType.BUTTONS_ONLY,
|
|
628
|
+
InteractionType.BUTTONS_MULTI_SELECT,
|
|
629
|
+
]:
|
|
630
|
+
# Pure button types: only basic button validation (no LLM)
|
|
385
631
|
return self._process_button_validation(
|
|
386
632
|
parse_result,
|
|
387
633
|
target_values,
|
|
388
634
|
target_variable,
|
|
389
635
|
mode,
|
|
390
636
|
interaction_type,
|
|
637
|
+
context,
|
|
391
638
|
)
|
|
392
639
|
|
|
393
640
|
if interaction_type == InteractionType.NON_ASSIGNMENT_BUTTON:
|
|
@@ -403,19 +650,50 @@ class MarkdownFlow:
|
|
|
403
650
|
)
|
|
404
651
|
|
|
405
652
|
# Text-only input type: ?[%{{sys_user_nickname}}...question]
|
|
406
|
-
#
|
|
653
|
+
# Use LLM validation to check if input is relevant to the question
|
|
407
654
|
if target_values:
|
|
408
|
-
return
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
"values": target_values,
|
|
415
|
-
},
|
|
655
|
+
return self._process_llm_validation(
|
|
656
|
+
block_index=block_index,
|
|
657
|
+
user_input=user_input,
|
|
658
|
+
target_variable=target_variable,
|
|
659
|
+
mode=mode,
|
|
660
|
+
context=context,
|
|
416
661
|
)
|
|
417
662
|
error_msg = f"No input provided for variable '{target_variable}'"
|
|
418
|
-
return self._render_error(error_msg, mode)
|
|
663
|
+
return self._render_error(error_msg, mode, context)
|
|
664
|
+
|
|
665
|
+
def _match_button_values(
|
|
666
|
+
self,
|
|
667
|
+
buttons: list[dict[str, str]],
|
|
668
|
+
target_values: list[str],
|
|
669
|
+
) -> tuple[list[str], list[str]]:
|
|
670
|
+
"""
|
|
671
|
+
Match user input values against button options.
|
|
672
|
+
|
|
673
|
+
Args:
|
|
674
|
+
buttons: List of button dictionaries with 'display' and 'value' keys
|
|
675
|
+
target_values: User input values to match
|
|
676
|
+
|
|
677
|
+
Returns:
|
|
678
|
+
Tuple of (matched_values, unmatched_values)
|
|
679
|
+
- matched_values: Values that match button options (using button value)
|
|
680
|
+
- unmatched_values: Values that don't match any button
|
|
681
|
+
"""
|
|
682
|
+
matched_values = []
|
|
683
|
+
unmatched_values = []
|
|
684
|
+
|
|
685
|
+
for value in target_values:
|
|
686
|
+
matched = False
|
|
687
|
+
for button in buttons:
|
|
688
|
+
if value in [button["display"], button["value"]]:
|
|
689
|
+
matched_values.append(button["value"]) # Use button value
|
|
690
|
+
matched = True
|
|
691
|
+
break
|
|
692
|
+
|
|
693
|
+
if not matched:
|
|
694
|
+
unmatched_values.append(value)
|
|
695
|
+
|
|
696
|
+
return matched_values, unmatched_values
|
|
419
697
|
|
|
420
698
|
def _process_button_validation(
|
|
421
699
|
self,
|
|
@@ -424,6 +702,7 @@ class MarkdownFlow:
|
|
|
424
702
|
target_variable: str,
|
|
425
703
|
mode: ProcessMode,
|
|
426
704
|
interaction_type: InteractionType,
|
|
705
|
+
context: list[dict[str, str]] | None = None,
|
|
427
706
|
) -> LLMResult | Generator[LLMResult, None, None]:
|
|
428
707
|
"""
|
|
429
708
|
Simplified button validation with new input format.
|
|
@@ -434,6 +713,7 @@ class MarkdownFlow:
|
|
|
434
713
|
target_variable: Target variable name
|
|
435
714
|
mode: Processing mode
|
|
436
715
|
interaction_type: Type of interaction
|
|
716
|
+
context: Conversation history context (optional)
|
|
437
717
|
"""
|
|
438
718
|
buttons = parse_result.get("buttons", [])
|
|
439
719
|
is_multi_select = interaction_type in [
|
|
@@ -459,7 +739,7 @@ class MarkdownFlow:
|
|
|
459
739
|
# Pure button mode requires input
|
|
460
740
|
button_displays = [btn["display"] for btn in buttons]
|
|
461
741
|
error_msg = f"Please select from: {', '.join(button_displays)}"
|
|
462
|
-
return self._render_error(error_msg, mode)
|
|
742
|
+
return self._render_error(error_msg, mode, context)
|
|
463
743
|
|
|
464
744
|
# Validate input values against available buttons
|
|
465
745
|
valid_values = []
|
|
@@ -484,7 +764,7 @@ class MarkdownFlow:
|
|
|
484
764
|
if invalid_values and not allow_text_input:
|
|
485
765
|
button_displays = [btn["display"] for btn in buttons]
|
|
486
766
|
error_msg = f"Invalid options: {', '.join(invalid_values)}. Please select from: {', '.join(button_displays)}"
|
|
487
|
-
return self._render_error(error_msg, mode)
|
|
767
|
+
return self._render_error(error_msg, mode, context)
|
|
488
768
|
|
|
489
769
|
# Success: return validated values
|
|
490
770
|
return LLMResult(
|
|
@@ -505,26 +785,18 @@ class MarkdownFlow:
|
|
|
505
785
|
user_input: dict[str, list[str]],
|
|
506
786
|
target_variable: str,
|
|
507
787
|
mode: ProcessMode,
|
|
788
|
+
context: list[dict[str, str]] | None = None,
|
|
508
789
|
) -> LLMResult | Generator[LLMResult, None, None]:
|
|
509
790
|
"""Process LLM validation."""
|
|
510
791
|
# Build validation messages
|
|
511
|
-
messages = self._build_validation_messages(block_index, user_input, target_variable)
|
|
512
|
-
|
|
513
|
-
if mode == ProcessMode.PROMPT_ONLY:
|
|
514
|
-
return LLMResult(
|
|
515
|
-
prompt=messages[-1]["content"],
|
|
516
|
-
metadata={
|
|
517
|
-
"validation_target": user_input,
|
|
518
|
-
"target_variable": target_variable,
|
|
519
|
-
},
|
|
520
|
-
)
|
|
792
|
+
messages = self._build_validation_messages(block_index, user_input, target_variable, context)
|
|
521
793
|
|
|
522
794
|
if mode == ProcessMode.COMPLETE:
|
|
523
795
|
if not self._llm_provider:
|
|
524
796
|
# Fallback processing, return variables directly
|
|
525
797
|
return LLMResult(content="", variables=user_input) # type: ignore[arg-type]
|
|
526
798
|
|
|
527
|
-
llm_response = self._llm_provider.complete(messages)
|
|
799
|
+
llm_response = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
|
|
528
800
|
|
|
529
801
|
# Parse validation response and convert to LLMResult
|
|
530
802
|
# Use joined target values for fallback; avoids JSON string injection
|
|
@@ -538,7 +810,7 @@ class MarkdownFlow:
|
|
|
538
810
|
|
|
539
811
|
def stream_generator():
|
|
540
812
|
full_response = ""
|
|
541
|
-
for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
813
|
+
for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature): # type: ignore[attr-defined]
|
|
542
814
|
full_response += chunk
|
|
543
815
|
|
|
544
816
|
# Parse complete response and convert to LLMResult
|
|
@@ -562,26 +834,15 @@ class MarkdownFlow:
|
|
|
562
834
|
mode: ProcessMode,
|
|
563
835
|
) -> LLMResult | Generator[LLMResult, None, None]:
|
|
564
836
|
"""Process LLM validation with button options (third case)."""
|
|
565
|
-
#
|
|
566
|
-
messages = self.
|
|
567
|
-
|
|
568
|
-
if mode == ProcessMode.PROMPT_ONLY:
|
|
569
|
-
return LLMResult(
|
|
570
|
-
prompt=messages[-1]["content"],
|
|
571
|
-
metadata={
|
|
572
|
-
"validation_target": user_input,
|
|
573
|
-
"target_variable": target_variable,
|
|
574
|
-
"options": options,
|
|
575
|
-
"question": question,
|
|
576
|
-
},
|
|
577
|
-
)
|
|
837
|
+
# Use unified validation message builder (button context will be included automatically)
|
|
838
|
+
messages = self._build_validation_messages(block_index, user_input, target_variable, context=None)
|
|
578
839
|
|
|
579
840
|
if mode == ProcessMode.COMPLETE:
|
|
580
841
|
if not self._llm_provider:
|
|
581
842
|
# Fallback processing, return variables directly
|
|
582
843
|
return LLMResult(content="", variables=user_input) # type: ignore[arg-type]
|
|
583
844
|
|
|
584
|
-
llm_response = self._llm_provider.complete(messages)
|
|
845
|
+
llm_response = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
|
|
585
846
|
|
|
586
847
|
# Parse validation response and convert to LLMResult
|
|
587
848
|
# Use joined target values for fallback; avoids JSON string injection
|
|
@@ -595,7 +856,7 @@ class MarkdownFlow:
|
|
|
595
856
|
|
|
596
857
|
def stream_generator():
|
|
597
858
|
full_response = ""
|
|
598
|
-
for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
859
|
+
for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature): # type: ignore[attr-defined]
|
|
599
860
|
full_response += chunk
|
|
600
861
|
# For validation scenario, don't output chunks in real-time, only final result
|
|
601
862
|
|
|
@@ -612,21 +873,24 @@ class MarkdownFlow:
|
|
|
612
873
|
|
|
613
874
|
return stream_generator()
|
|
614
875
|
|
|
615
|
-
def _render_error(
|
|
876
|
+
def _render_error(
|
|
877
|
+
self,
|
|
878
|
+
error_message: str,
|
|
879
|
+
mode: ProcessMode,
|
|
880
|
+
context: list[dict[str, str]] | None = None,
|
|
881
|
+
) -> LLMResult | Generator[LLMResult, None, None]:
|
|
616
882
|
"""Render user-friendly error message."""
|
|
617
|
-
|
|
883
|
+
# Truncate context to configured maximum length
|
|
884
|
+
truncated_context = self._truncate_context(context)
|
|
618
885
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
prompt=messages[-1]["content"],
|
|
622
|
-
metadata={"original_error": error_message},
|
|
623
|
-
)
|
|
886
|
+
# Build error messages with context
|
|
887
|
+
messages = self._build_error_render_messages(error_message, truncated_context)
|
|
624
888
|
|
|
625
889
|
if mode == ProcessMode.COMPLETE:
|
|
626
890
|
if not self._llm_provider:
|
|
627
891
|
return LLMResult(content=error_message) # Fallback processing
|
|
628
892
|
|
|
629
|
-
friendly_error = self._llm_provider.complete(messages)
|
|
893
|
+
friendly_error = self._llm_provider.complete(messages, model=self._model, temperature=self._temperature)
|
|
630
894
|
return LLMResult(content=friendly_error, prompt=messages[-1]["content"])
|
|
631
895
|
|
|
632
896
|
if mode == ProcessMode.STREAM:
|
|
@@ -634,7 +898,7 @@ class MarkdownFlow:
|
|
|
634
898
|
return LLMResult(content=error_message)
|
|
635
899
|
|
|
636
900
|
def stream_generator():
|
|
637
|
-
for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
901
|
+
for chunk in self._llm_provider.stream(messages, model=self._model, temperature=self._temperature): # type: ignore[attr-defined]
|
|
638
902
|
yield LLMResult(content=chunk, prompt=messages[-1]["content"])
|
|
639
903
|
|
|
640
904
|
return stream_generator()
|
|
@@ -645,6 +909,7 @@ class MarkdownFlow:
|
|
|
645
909
|
self,
|
|
646
910
|
block_index: int,
|
|
647
911
|
variables: dict[str, str | list[str]] | None,
|
|
912
|
+
context: list[dict[str, str]] | None = None,
|
|
648
913
|
) -> list[dict[str, str]]:
|
|
649
914
|
"""Build content block messages."""
|
|
650
915
|
block = self.get_block(block_index)
|
|
@@ -657,120 +922,261 @@ class MarkdownFlow:
|
|
|
657
922
|
# Replace variables
|
|
658
923
|
block_content = replace_variables_in_text(block_content, variables or {})
|
|
659
924
|
|
|
925
|
+
# Restore code blocks (让 LLM 看到真实的代码块内容)
|
|
926
|
+
# Code block preprocessing is to prevent the parser from misinterpreting
|
|
927
|
+
# MarkdownFlow syntax inside code blocks, but the LLM needs to see
|
|
928
|
+
# the real content to correctly understand and generate responses
|
|
929
|
+
block_content = self._preprocessor.restore_code_blocks(block_content)
|
|
930
|
+
|
|
660
931
|
# Build message array
|
|
661
932
|
messages = []
|
|
662
933
|
|
|
663
|
-
#
|
|
934
|
+
# Build system message with XML tags
|
|
935
|
+
# Priority order: preserve_or_translate_instruction > base_system > document_prompt
|
|
936
|
+
system_parts = []
|
|
937
|
+
|
|
938
|
+
# 1. Output instruction (highest priority - if preserved content exists)
|
|
939
|
+
# Note: OUTPUT_INSTRUCTION_EXPLANATION already contains <preserve_or_translate_instruction> tags
|
|
940
|
+
if has_preserved_content:
|
|
941
|
+
system_parts.append(OUTPUT_INSTRUCTION_EXPLANATION.strip())
|
|
942
|
+
|
|
943
|
+
# 2. Base system prompt (if exists and non-empty)
|
|
944
|
+
if self._base_system_prompt:
|
|
945
|
+
system_parts.append(f"<base_system>\n{self._base_system_prompt}\n</base_system>")
|
|
946
|
+
|
|
947
|
+
# 3. Document prompt (if exists and non-empty)
|
|
664
948
|
if self._document_prompt:
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
949
|
+
system_parts.append(f"<document_prompt>\n{self._document_prompt}\n</document_prompt>")
|
|
950
|
+
|
|
951
|
+
# Combine all parts and add as system message
|
|
952
|
+
if system_parts:
|
|
953
|
+
system_msg = "\n\n".join(system_parts)
|
|
669
954
|
messages.append({"role": "system", "content": system_msg})
|
|
670
|
-
elif has_preserved_content:
|
|
671
|
-
# No document prompt but has preserved content, add explanation alone
|
|
672
|
-
messages.append({"role": "system", "content": OUTPUT_INSTRUCTION_EXPLANATION.strip()})
|
|
673
955
|
|
|
674
|
-
#
|
|
675
|
-
#
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
956
|
+
# Add conversation history context if provided
|
|
957
|
+
# Context is inserted after system message and before current user message
|
|
958
|
+
truncated_context = self._truncate_context(context)
|
|
959
|
+
if truncated_context:
|
|
960
|
+
messages.extend(truncated_context)
|
|
679
961
|
|
|
680
962
|
# Add processed content as user message (as instruction to LLM)
|
|
681
963
|
messages.append({"role": "user", "content": block_content})
|
|
682
964
|
|
|
683
965
|
return messages
|
|
684
966
|
|
|
685
|
-
def
|
|
686
|
-
"""
|
|
687
|
-
# Check if using custom interaction prompt
|
|
688
|
-
if self._interaction_prompt != DEFAULT_INTERACTION_PROMPT:
|
|
689
|
-
# User custom prompt + mandatory direction protection
|
|
690
|
-
render_prompt = f"""{self._interaction_prompt}"""
|
|
691
|
-
else:
|
|
692
|
-
# Use default prompt and instructions
|
|
693
|
-
render_prompt = f"""{self._interaction_prompt}
|
|
694
|
-
{INTERACTION_RENDER_INSTRUCTIONS}"""
|
|
967
|
+
def _extract_translatable_content(self, interaction_content: str) -> tuple[str, dict[str, Any] | None]:
|
|
968
|
+
"""提取交互内容中需要翻译的部分为 JSON 格式
|
|
695
969
|
|
|
696
|
-
|
|
970
|
+
Args:
|
|
971
|
+
interaction_content: 交互内容字符串
|
|
697
972
|
|
|
698
|
-
|
|
699
|
-
|
|
973
|
+
Returns:
|
|
974
|
+
tuple: (JSON 字符串, InteractionInfo 字典)
|
|
975
|
+
"""
|
|
976
|
+
# 解析交互内容
|
|
977
|
+
interaction_parser = InteractionParser()
|
|
978
|
+
interaction_info = interaction_parser.parse(interaction_content)
|
|
979
|
+
if not interaction_info:
|
|
980
|
+
return "{}", None
|
|
700
981
|
|
|
701
|
-
|
|
982
|
+
translatable = {}
|
|
702
983
|
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
if config and config.validation_template:
|
|
709
|
-
# Use custom validation template
|
|
710
|
-
validation_prompt = config.validation_template
|
|
711
|
-
user_input_str = json.dumps(user_input, ensure_ascii=False)
|
|
712
|
-
validation_prompt = validation_prompt.replace("{sys_user_input}", user_input_str)
|
|
713
|
-
validation_prompt = validation_prompt.replace("{block_content}", block.content)
|
|
714
|
-
validation_prompt = validation_prompt.replace("{target_variable}", target_variable)
|
|
715
|
-
system_message = DEFAULT_VALIDATION_SYSTEM_MESSAGE
|
|
716
|
-
else:
|
|
717
|
-
# Use smart default validation template
|
|
718
|
-
from .utils import (
|
|
719
|
-
extract_interaction_question,
|
|
720
|
-
generate_smart_validation_template,
|
|
721
|
-
)
|
|
984
|
+
# 提取按钮的 Display 文本
|
|
985
|
+
if interaction_info.get("buttons"):
|
|
986
|
+
button_texts = [btn["display"] for btn in interaction_info["buttons"]]
|
|
987
|
+
translatable["buttons"] = button_texts
|
|
722
988
|
|
|
723
|
-
|
|
724
|
-
|
|
989
|
+
# 提取问题文本
|
|
990
|
+
if interaction_info.get("question"):
|
|
991
|
+
translatable["question"] = interaction_info["question"]
|
|
725
992
|
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
interaction_question=interaction_question,
|
|
731
|
-
)
|
|
993
|
+
# 转换为 JSON
|
|
994
|
+
import json
|
|
995
|
+
|
|
996
|
+
json_str = json.dumps(translatable, ensure_ascii=False)
|
|
732
997
|
|
|
733
|
-
|
|
734
|
-
user_input_str = json.dumps(user_input, ensure_ascii=False)
|
|
735
|
-
validation_prompt = validation_template.replace("{sys_user_input}", user_input_str)
|
|
736
|
-
validation_prompt = validation_prompt.replace("{block_content}", block.content)
|
|
737
|
-
validation_prompt = validation_prompt.replace("{target_variable}", target_variable)
|
|
738
|
-
system_message = DEFAULT_VALIDATION_SYSTEM_MESSAGE
|
|
998
|
+
return json_str, interaction_info
|
|
739
999
|
|
|
1000
|
+
def _build_translation_messages(self, translatable_json: str) -> list[dict[str, str]]:
|
|
1001
|
+
"""构建翻译用的消息列表
|
|
1002
|
+
|
|
1003
|
+
Args:
|
|
1004
|
+
translatable_json: 可翻译内容的 JSON 字符串
|
|
1005
|
+
|
|
1006
|
+
Returns:
|
|
1007
|
+
list: 消息列表
|
|
1008
|
+
"""
|
|
740
1009
|
messages = []
|
|
741
1010
|
|
|
742
|
-
|
|
743
|
-
|
|
1011
|
+
# 构建 system message:交互翻译提示词 + 文档提示词(XML 格式)
|
|
1012
|
+
# interaction_prompt: 定义翻译规则和 JSON 格式要求(包含 <interaction_translation_rules> 标签)
|
|
1013
|
+
# document_prompt: 提供语言指令(如"使用英语输出"),包装在 <document_context> 标签中供 LLM 检测
|
|
1014
|
+
system_content = self._interaction_prompt
|
|
1015
|
+
if self._document_prompt:
|
|
1016
|
+
# 将文档提示词包装在 <document_context> 标签中
|
|
1017
|
+
system_content = f"{self._interaction_prompt}\n\n<document_context>\n{self._document_prompt}\n</document_context>"
|
|
1018
|
+
|
|
1019
|
+
messages.append({"role": "system", "content": system_content})
|
|
1020
|
+
|
|
1021
|
+
# 添加可翻译内容作为 user message
|
|
1022
|
+
messages.append({"role": "user", "content": translatable_json})
|
|
744
1023
|
|
|
745
1024
|
return messages
|
|
746
1025
|
|
|
747
|
-
def
|
|
1026
|
+
def _reconstruct_with_translation(
|
|
748
1027
|
self,
|
|
1028
|
+
original_content: str,
|
|
1029
|
+
original_json: str,
|
|
1030
|
+
translated_json: str,
|
|
1031
|
+
interaction_info: dict[str, Any],
|
|
1032
|
+
) -> str:
|
|
1033
|
+
"""使用翻译后的内容重构交互块
|
|
1034
|
+
|
|
1035
|
+
Args:
|
|
1036
|
+
original_content: 原始交互内容
|
|
1037
|
+
original_json: 原始的可翻译 JSON(翻译前)
|
|
1038
|
+
translated_json: 翻译后的 JSON 字符串
|
|
1039
|
+
interaction_info: 交互信息字典
|
|
1040
|
+
|
|
1041
|
+
Returns:
|
|
1042
|
+
str: 重构后的交互内容
|
|
1043
|
+
"""
|
|
1044
|
+
import json
|
|
1045
|
+
|
|
1046
|
+
# 解析原始 JSON
|
|
1047
|
+
try:
|
|
1048
|
+
original = json.loads(original_json)
|
|
1049
|
+
except json.JSONDecodeError:
|
|
1050
|
+
return original_content
|
|
1051
|
+
|
|
1052
|
+
# 解析翻译后的 JSON
|
|
1053
|
+
try:
|
|
1054
|
+
translated = json.loads(translated_json)
|
|
1055
|
+
except json.JSONDecodeError:
|
|
1056
|
+
return original_content
|
|
1057
|
+
|
|
1058
|
+
reconstructed = original_content
|
|
1059
|
+
|
|
1060
|
+
# 替换按钮 Display 文本(智能处理 Value)
|
|
1061
|
+
if "buttons" in translated and interaction_info.get("buttons"):
|
|
1062
|
+
for i, button in enumerate(interaction_info["buttons"]):
|
|
1063
|
+
if i < len(translated["buttons"]):
|
|
1064
|
+
old_display = button["display"]
|
|
1065
|
+
new_display = translated["buttons"][i]
|
|
1066
|
+
|
|
1067
|
+
# 检测是否发生了翻译
|
|
1068
|
+
translation_happened = False
|
|
1069
|
+
if "buttons" in original and i < len(original["buttons"]):
|
|
1070
|
+
if original["buttons"][i] != new_display:
|
|
1071
|
+
translation_happened = True
|
|
1072
|
+
|
|
1073
|
+
# 如果有 Value 分离(display//value 格式),保留 value
|
|
1074
|
+
if button["display"] != button["value"]:
|
|
1075
|
+
# 已有 value 分离,按原逻辑处理
|
|
1076
|
+
# 替换格式:oldDisplay//value -> newDisplay//value
|
|
1077
|
+
old_pattern = f"{old_display}//{button['value']}"
|
|
1078
|
+
new_pattern = f"{new_display}//{button['value']}"
|
|
1079
|
+
reconstructed = reconstructed.replace(old_pattern, new_pattern, 1)
|
|
1080
|
+
elif translation_happened:
|
|
1081
|
+
# 没有 value 分离,但发生了翻译
|
|
1082
|
+
# 自动添加 value:翻译后//原始
|
|
1083
|
+
old_pattern = old_display
|
|
1084
|
+
new_pattern = f"{new_display}//{old_display}"
|
|
1085
|
+
reconstructed = reconstructed.replace(old_pattern, new_pattern, 1)
|
|
1086
|
+
else:
|
|
1087
|
+
# 没有翻译,保持原样
|
|
1088
|
+
reconstructed = reconstructed.replace(old_display, new_display, 1)
|
|
1089
|
+
|
|
1090
|
+
# 替换问题文本
|
|
1091
|
+
if "question" in translated and interaction_info.get("question"):
|
|
1092
|
+
old_question = interaction_info["question"]
|
|
1093
|
+
new_question = translated["question"]
|
|
1094
|
+
reconstructed = reconstructed.replace(f"...{old_question}", f"...{new_question}", 1)
|
|
1095
|
+
|
|
1096
|
+
return reconstructed
|
|
1097
|
+
|
|
1098
|
+
def _build_validation_messages(
|
|
1099
|
+
self,
|
|
1100
|
+
block_index: int,
|
|
749
1101
|
user_input: dict[str, list[str]],
|
|
750
1102
|
target_variable: str,
|
|
751
|
-
|
|
752
|
-
question: str,
|
|
1103
|
+
context: list[dict[str, str]] | None = None,
|
|
753
1104
|
) -> list[dict[str, str]]:
|
|
754
|
-
"""
|
|
755
|
-
|
|
756
|
-
user_input_str = json.dumps(user_input, ensure_ascii=False)
|
|
757
|
-
validation_prompt = BUTTONS_WITH_TEXT_VALIDATION_TEMPLATE.format(
|
|
758
|
-
question=question,
|
|
759
|
-
options=", ".join(options),
|
|
760
|
-
user_input=user_input_str,
|
|
761
|
-
target_variable=target_variable,
|
|
762
|
-
)
|
|
1105
|
+
"""
|
|
1106
|
+
Build validation messages with new structure.
|
|
763
1107
|
|
|
764
|
-
|
|
1108
|
+
System message contains:
|
|
1109
|
+
- VALIDATION_TASK_TEMPLATE (includes task description and output language rules)
|
|
1110
|
+
- Question context (if exists)
|
|
1111
|
+
- Button options context (if exists)
|
|
1112
|
+
- VALIDATION_REQUIREMENTS_TEMPLATE
|
|
1113
|
+
- document_prompt wrapped in <document_context> tags (if exists)
|
|
1114
|
+
|
|
1115
|
+
User message contains:
|
|
1116
|
+
- User input only
|
|
1117
|
+
"""
|
|
1118
|
+
from .parser import InteractionParser, extract_interaction_question
|
|
1119
|
+
|
|
1120
|
+
block = self.get_block(block_index)
|
|
1121
|
+
|
|
1122
|
+
# Extract user input values for target variable
|
|
1123
|
+
target_values = user_input.get(target_variable, [])
|
|
1124
|
+
user_input_str = ", ".join(target_values) if target_values else ""
|
|
1125
|
+
|
|
1126
|
+
# Build System Message (contains all validation rules and context)
|
|
1127
|
+
# VALIDATION_TASK_TEMPLATE already includes system message, directly replace variables
|
|
1128
|
+
task_template = VALIDATION_TASK_TEMPLATE.replace("{target_variable}", target_variable)
|
|
1129
|
+
system_parts = [task_template]
|
|
1130
|
+
|
|
1131
|
+
# Extract interaction question
|
|
1132
|
+
interaction_question = extract_interaction_question(block.content)
|
|
1133
|
+
|
|
1134
|
+
# Add question context (if exists)
|
|
1135
|
+
if interaction_question:
|
|
1136
|
+
question_context = CONTEXT_QUESTION_TEMPLATE.format(question=interaction_question)
|
|
1137
|
+
system_parts.append("")
|
|
1138
|
+
system_parts.append(question_context)
|
|
1139
|
+
|
|
1140
|
+
# Parse interaction to extract button information
|
|
1141
|
+
parser = InteractionParser()
|
|
1142
|
+
parse_result = parser.parse(block.content)
|
|
1143
|
+
buttons = parse_result.get("buttons") if "buttons" in parse_result else None
|
|
1144
|
+
|
|
1145
|
+
# Add button options context (if exists)
|
|
1146
|
+
if buttons:
|
|
1147
|
+
button_displays = [btn.get("display", "") for btn in buttons if btn.get("display")]
|
|
1148
|
+
if button_displays:
|
|
1149
|
+
button_options = "、".join(button_displays)
|
|
1150
|
+
button_context = CONTEXT_BUTTON_OPTIONS_TEMPLATE.format(button_options=button_options)
|
|
1151
|
+
system_parts.append("")
|
|
1152
|
+
system_parts.append(button_context)
|
|
1153
|
+
|
|
1154
|
+
# Add extraction requirements (using template)
|
|
1155
|
+
system_parts.append("")
|
|
1156
|
+
system_parts.append(VALIDATION_REQUIREMENTS_TEMPLATE)
|
|
1157
|
+
|
|
1158
|
+
# Add document_prompt (if exists)
|
|
765
1159
|
if self._document_prompt:
|
|
766
|
-
|
|
1160
|
+
system_parts.append("")
|
|
1161
|
+
system_parts.append("<document_context>")
|
|
1162
|
+
system_parts.append(self._document_prompt)
|
|
1163
|
+
system_parts.append("</document_context>")
|
|
1164
|
+
|
|
1165
|
+
system_content = "\n".join(system_parts)
|
|
767
1166
|
|
|
768
|
-
|
|
769
|
-
messages
|
|
1167
|
+
# Build message list
|
|
1168
|
+
messages = [
|
|
1169
|
+
{"role": "system", "content": system_content},
|
|
1170
|
+
{"role": "user", "content": user_input_str}, # Only user input
|
|
1171
|
+
]
|
|
770
1172
|
|
|
771
1173
|
return messages
|
|
772
1174
|
|
|
773
|
-
def _build_error_render_messages(
|
|
1175
|
+
def _build_error_render_messages(
|
|
1176
|
+
self,
|
|
1177
|
+
error_message: str,
|
|
1178
|
+
context: list[dict[str, str]] | None = None,
|
|
1179
|
+
) -> list[dict[str, str]]:
|
|
774
1180
|
"""Build error rendering messages."""
|
|
775
1181
|
render_prompt = f"""{self._interaction_error_prompt}
|
|
776
1182
|
|
|
@@ -783,24 +1189,14 @@ Original Error: {error_message}
|
|
|
783
1189
|
messages.append({"role": "system", "content": self._document_prompt})
|
|
784
1190
|
|
|
785
1191
|
messages.append({"role": "system", "content": render_prompt})
|
|
1192
|
+
|
|
1193
|
+
# Add conversation history context if provided
|
|
1194
|
+
truncated_context = self._truncate_context(context)
|
|
1195
|
+
if truncated_context:
|
|
1196
|
+
messages.extend(truncated_context)
|
|
1197
|
+
|
|
786
1198
|
messages.append({"role": "user", "content": error_message})
|
|
787
1199
|
|
|
788
1200
|
return messages
|
|
789
1201
|
|
|
790
1202
|
# Helper methods
|
|
791
|
-
|
|
792
|
-
def _reconstruct_interaction_content(self, original_content: str, rendered_question: str) -> str:
|
|
793
|
-
"""Reconstruct interaction content."""
|
|
794
|
-
cleaned_question = rendered_question.strip()
|
|
795
|
-
# Use pre-compiled regex for improved performance
|
|
796
|
-
cleaned_question = COMPILED_BRACKETS_CLEANUP_REGEX.sub("", cleaned_question)
|
|
797
|
-
cleaned_question = COMPILED_VARIABLE_REFERENCE_CLEANUP_REGEX.sub("", cleaned_question)
|
|
798
|
-
cleaned_question = COMPILED_WHITESPACE_CLEANUP_REGEX.sub(" ", cleaned_question).strip()
|
|
799
|
-
|
|
800
|
-
match = COMPILED_INTERACTION_CONTENT_RECONSTRUCT_REGEX.search(original_content)
|
|
801
|
-
|
|
802
|
-
if match:
|
|
803
|
-
prefix = match.group(1)
|
|
804
|
-
suffix = match.group(2)
|
|
805
|
-
return f"{prefix}{cleaned_question}{suffix}"
|
|
806
|
-
return original_content # type: ignore[unreachable]
|