markdown-flow 0.2.15__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of markdown-flow might be problematic. Click here for more details.
- markdown_flow/__init__.py +1 -1
- markdown_flow/constants.py +40 -15
- markdown_flow/core.py +71 -502
- markdown_flow/llm.py +7 -9
- markdown_flow/utils.py +6 -8
- {markdown_flow-0.2.15.dist-info → markdown_flow-0.2.17.dist-info}/METADATA +36 -118
- markdown_flow-0.2.17.dist-info/RECORD +13 -0
- markdown_flow-0.2.15.dist-info/RECORD +0 -13
- {markdown_flow-0.2.15.dist-info → markdown_flow-0.2.17.dist-info}/WHEEL +0 -0
- {markdown_flow-0.2.15.dist-info → markdown_flow-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {markdown_flow-0.2.15.dist-info → markdown_flow-0.2.17.dist-info}/top_level.txt +0 -0
markdown_flow/core.py
CHANGED
|
@@ -6,7 +6,7 @@ Refactored MarkdownFlow class with built-in LLM processing capabilities and unif
|
|
|
6
6
|
|
|
7
7
|
import json
|
|
8
8
|
import re
|
|
9
|
-
from collections.abc import
|
|
9
|
+
from collections.abc import AsyncGenerator
|
|
10
10
|
from copy import copy
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
@@ -28,6 +28,7 @@ from .constants import (
|
|
|
28
28
|
INTERACTION_PATTERN_SPLIT,
|
|
29
29
|
INTERACTION_RENDER_INSTRUCTIONS,
|
|
30
30
|
LLM_PROVIDER_REQUIRED_ERROR,
|
|
31
|
+
OUTPUT_INSTRUCTION_EXPLANATION,
|
|
31
32
|
UNSUPPORTED_PROMPT_TYPE_ERROR,
|
|
32
33
|
)
|
|
33
34
|
from .enums import BlockType
|
|
@@ -176,15 +177,14 @@ class MarkdownFlow:
|
|
|
176
177
|
|
|
177
178
|
# Core unified interface
|
|
178
179
|
|
|
179
|
-
def process(
|
|
180
|
+
async def process(
|
|
180
181
|
self,
|
|
181
182
|
block_index: int,
|
|
182
183
|
mode: ProcessMode = ProcessMode.COMPLETE,
|
|
183
184
|
context: list[dict[str, str]] | None = None,
|
|
184
185
|
variables: dict[str, str | list[str]] | None = None,
|
|
185
186
|
user_input: dict[str, list[str]] | None = None,
|
|
186
|
-
|
|
187
|
-
) -> LLMResult | Generator[LLMResult, None, None]:
|
|
187
|
+
) -> LLMResult | AsyncGenerator[LLMResult, None]:
|
|
188
188
|
"""
|
|
189
189
|
Unified block processing interface.
|
|
190
190
|
|
|
@@ -194,10 +194,9 @@ class MarkdownFlow:
|
|
|
194
194
|
context: Context message list
|
|
195
195
|
variables: Variable mappings
|
|
196
196
|
user_input: User input (for interaction blocks)
|
|
197
|
-
dynamic_interaction_format: Dynamic interaction format for validation
|
|
198
197
|
|
|
199
198
|
Returns:
|
|
200
|
-
LLMResult or
|
|
199
|
+
LLMResult or AsyncGenerator[LLMResult, None]
|
|
201
200
|
"""
|
|
202
201
|
# Process document_prompt variable replacement
|
|
203
202
|
if self._document_prompt:
|
|
@@ -206,70 +205,56 @@ class MarkdownFlow:
|
|
|
206
205
|
block = self.get_block(block_index)
|
|
207
206
|
|
|
208
207
|
if block.block_type == BlockType.CONTENT:
|
|
209
|
-
|
|
210
|
-
if dynamic_interaction_format and user_input:
|
|
211
|
-
return self._process_dynamic_interaction_validation(block_index, dynamic_interaction_format, user_input, mode, context, variables)
|
|
212
|
-
# Normal content processing (possibly with dynamic conversion)
|
|
213
|
-
return self._process_content(block_index, mode, context, variables)
|
|
208
|
+
return await self._process_content(block_index, mode, context, variables)
|
|
214
209
|
|
|
215
210
|
if block.block_type == BlockType.INTERACTION:
|
|
216
211
|
if user_input is None:
|
|
217
212
|
# Render interaction content
|
|
218
|
-
return self._process_interaction_render(block_index, mode, variables)
|
|
213
|
+
return await self._process_interaction_render(block_index, mode, variables)
|
|
219
214
|
# Process user input
|
|
220
|
-
return self._process_interaction_input(block_index, user_input, mode, context, variables)
|
|
215
|
+
return await self._process_interaction_input(block_index, user_input, mode, context, variables)
|
|
221
216
|
|
|
222
217
|
if block.block_type == BlockType.PRESERVED_CONTENT:
|
|
223
218
|
# Preserved content output as-is, no LLM call
|
|
224
|
-
return self._process_preserved_content(block_index, variables)
|
|
219
|
+
return await self._process_preserved_content(block_index, variables)
|
|
225
220
|
|
|
226
221
|
# Handle other types as content
|
|
227
|
-
return self._process_content(block_index, mode, context, variables)
|
|
222
|
+
return await self._process_content(block_index, mode, context, variables)
|
|
228
223
|
|
|
229
224
|
# Internal processing methods
|
|
230
225
|
|
|
231
|
-
def _process_content(
|
|
226
|
+
async def _process_content(
|
|
232
227
|
self,
|
|
233
228
|
block_index: int,
|
|
234
229
|
mode: ProcessMode,
|
|
235
230
|
context: list[dict[str, str]] | None,
|
|
236
231
|
variables: dict[str, str | list[str]] | None,
|
|
237
|
-
) -> LLMResult |
|
|
232
|
+
) -> LLMResult | AsyncGenerator[LLMResult, None]:
|
|
238
233
|
"""Process content block."""
|
|
234
|
+
# Build messages
|
|
235
|
+
messages = self._build_content_messages(block_index, variables)
|
|
239
236
|
|
|
240
|
-
# For PROMPT_ONLY mode, use standard content processing
|
|
241
237
|
if mode == ProcessMode.PROMPT_ONLY:
|
|
242
|
-
messages = self._build_content_messages(block_index, variables)
|
|
243
238
|
return LLMResult(prompt=messages[-1]["content"], metadata={"messages": messages})
|
|
244
239
|
|
|
245
|
-
# For COMPLETE and STREAM modes with LLM provider, use dynamic interaction check
|
|
246
|
-
# LLM will decide whether content needs to be converted to interaction block
|
|
247
|
-
if self._llm_provider:
|
|
248
|
-
block = self.get_block(block_index)
|
|
249
|
-
if block.block_type == BlockType.CONTENT:
|
|
250
|
-
return self._process_with_dynamic_check(block_index, mode, context, variables)
|
|
251
|
-
|
|
252
|
-
# Fallback: Build messages using standard content processing
|
|
253
|
-
messages = self._build_content_messages(block_index, variables)
|
|
254
|
-
|
|
255
240
|
if mode == ProcessMode.COMPLETE:
|
|
256
241
|
if not self._llm_provider:
|
|
257
242
|
raise ValueError(LLM_PROVIDER_REQUIRED_ERROR)
|
|
258
243
|
|
|
259
|
-
|
|
260
|
-
return LLMResult(content=
|
|
244
|
+
content = await self._llm_provider.complete(messages)
|
|
245
|
+
return LLMResult(content=content, prompt=messages[-1]["content"])
|
|
261
246
|
|
|
262
247
|
if mode == ProcessMode.STREAM:
|
|
263
248
|
if not self._llm_provider:
|
|
264
249
|
raise ValueError(LLM_PROVIDER_REQUIRED_ERROR)
|
|
265
250
|
|
|
266
|
-
def stream_generator():
|
|
267
|
-
for chunk in self._llm_provider.stream(messages):
|
|
251
|
+
async def stream_generator():
|
|
252
|
+
async for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
268
253
|
yield LLMResult(content=chunk, prompt=messages[-1]["content"])
|
|
269
254
|
|
|
270
255
|
return stream_generator()
|
|
271
256
|
|
|
272
|
-
def _process_preserved_content(self, block_index: int, variables: dict[str, str | list[str]] | None) -> LLMResult:
|
|
257
|
+
async def _process_preserved_content(self, block_index: int, variables: dict[str, str | list[str]] | None) -> LLMResult:
|
|
273
258
|
"""Process preserved content block, output as-is without LLM call."""
|
|
274
259
|
block = self.get_block(block_index)
|
|
275
260
|
|
|
@@ -281,7 +266,7 @@ class MarkdownFlow:
|
|
|
281
266
|
|
|
282
267
|
return LLMResult(content=content)
|
|
283
268
|
|
|
284
|
-
def _process_interaction_render(self, block_index: int, mode: ProcessMode, variables: dict[str, str | list[str]] | None = None) -> LLMResult |
|
|
269
|
+
async def _process_interaction_render(self, block_index: int, mode: ProcessMode, variables: dict[str, str | list[str]] | None = None) -> LLMResult | AsyncGenerator[LLMResult, None]:
|
|
285
270
|
"""Process interaction content rendering."""
|
|
286
271
|
block = self.get_block(block_index)
|
|
287
272
|
|
|
@@ -314,8 +299,7 @@ class MarkdownFlow:
|
|
|
314
299
|
if not self._llm_provider:
|
|
315
300
|
return LLMResult(content=processed_block.content) # Fallback processing
|
|
316
301
|
|
|
317
|
-
|
|
318
|
-
rendered_question = result.content
|
|
302
|
+
rendered_question = await self._llm_provider.complete(messages)
|
|
319
303
|
rendered_content = self._reconstruct_interaction_content(processed_block.content, rendered_question)
|
|
320
304
|
|
|
321
305
|
return LLMResult(
|
|
@@ -332,7 +316,7 @@ class MarkdownFlow:
|
|
|
332
316
|
# For interaction blocks, return reconstructed content (one-time output)
|
|
333
317
|
rendered_content = self._reconstruct_interaction_content(processed_block.content, question_text or "")
|
|
334
318
|
|
|
335
|
-
def stream_generator():
|
|
319
|
+
async def stream_generator():
|
|
336
320
|
yield LLMResult(
|
|
337
321
|
content=rendered_content,
|
|
338
322
|
prompt=messages[-1]["content"],
|
|
@@ -341,9 +325,9 @@ class MarkdownFlow:
|
|
|
341
325
|
return stream_generator()
|
|
342
326
|
|
|
343
327
|
# With LLM provider, collect full response then return once
|
|
344
|
-
def stream_generator():
|
|
328
|
+
async def stream_generator():
|
|
345
329
|
full_response = ""
|
|
346
|
-
for chunk in self._llm_provider.stream(messages):
|
|
330
|
+
async for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
347
331
|
full_response += chunk
|
|
348
332
|
|
|
349
333
|
# Reconstruct final interaction content
|
|
@@ -357,23 +341,22 @@ class MarkdownFlow:
|
|
|
357
341
|
|
|
358
342
|
return stream_generator()
|
|
359
343
|
|
|
360
|
-
def _process_interaction_input(
|
|
344
|
+
async def _process_interaction_input(
|
|
361
345
|
self,
|
|
362
346
|
block_index: int,
|
|
363
347
|
user_input: dict[str, list[str]],
|
|
364
348
|
mode: ProcessMode,
|
|
365
349
|
context: list[dict[str, str]] | None,
|
|
366
350
|
variables: dict[str, str | list[str]] | None = None,
|
|
367
|
-
) -> LLMResult |
|
|
351
|
+
) -> LLMResult | AsyncGenerator[LLMResult, None]:
|
|
368
352
|
"""Process interaction user input."""
|
|
369
|
-
_ = context # Mark as intentionally unused
|
|
370
353
|
block = self.get_block(block_index)
|
|
371
354
|
target_variable = block.variables[0] if block.variables else "user_input"
|
|
372
355
|
|
|
373
356
|
# Basic validation
|
|
374
357
|
if not user_input or not any(values for values in user_input.values()):
|
|
375
358
|
error_msg = INPUT_EMPTY_ERROR
|
|
376
|
-
return self._render_error(error_msg, mode)
|
|
359
|
+
return await self._render_error(error_msg, mode)
|
|
377
360
|
|
|
378
361
|
# Get the target variable value from user_input
|
|
379
362
|
target_values = user_input.get(target_variable, [])
|
|
@@ -387,7 +370,7 @@ class MarkdownFlow:
|
|
|
387
370
|
|
|
388
371
|
if "error" in parse_result:
|
|
389
372
|
error_msg = INTERACTION_PARSE_ERROR.format(error=parse_result["error"])
|
|
390
|
-
return self._render_error(error_msg, mode)
|
|
373
|
+
return await self._render_error(error_msg, mode)
|
|
391
374
|
|
|
392
375
|
interaction_type = parse_result.get("type")
|
|
393
376
|
|
|
@@ -399,7 +382,7 @@ class MarkdownFlow:
|
|
|
399
382
|
InteractionType.BUTTONS_MULTI_WITH_TEXT,
|
|
400
383
|
]:
|
|
401
384
|
# All button types: validate user input against available buttons
|
|
402
|
-
return self._process_button_validation(
|
|
385
|
+
return await self._process_button_validation(
|
|
403
386
|
parse_result,
|
|
404
387
|
target_values,
|
|
405
388
|
target_variable,
|
|
@@ -432,16 +415,16 @@ class MarkdownFlow:
|
|
|
432
415
|
},
|
|
433
416
|
)
|
|
434
417
|
error_msg = f"No input provided for variable '{target_variable}'"
|
|
435
|
-
return self._render_error(error_msg, mode)
|
|
418
|
+
return await self._render_error(error_msg, mode)
|
|
436
419
|
|
|
437
|
-
def _process_button_validation(
|
|
420
|
+
async def _process_button_validation(
|
|
438
421
|
self,
|
|
439
422
|
parse_result: dict[str, Any],
|
|
440
423
|
target_values: list[str],
|
|
441
424
|
target_variable: str,
|
|
442
425
|
mode: ProcessMode,
|
|
443
426
|
interaction_type: InteractionType,
|
|
444
|
-
) -> LLMResult |
|
|
427
|
+
) -> LLMResult | AsyncGenerator[LLMResult, None]:
|
|
445
428
|
"""
|
|
446
429
|
Simplified button validation with new input format.
|
|
447
430
|
|
|
@@ -476,9 +459,9 @@ class MarkdownFlow:
|
|
|
476
459
|
# Pure button mode requires input
|
|
477
460
|
button_displays = [btn["display"] for btn in buttons]
|
|
478
461
|
error_msg = f"Please select from: {', '.join(button_displays)}"
|
|
479
|
-
return self._render_error(error_msg, mode)
|
|
462
|
+
return await self._render_error(error_msg, mode)
|
|
480
463
|
|
|
481
|
-
#
|
|
464
|
+
# Validate input values against available buttons
|
|
482
465
|
valid_values = []
|
|
483
466
|
invalid_values = []
|
|
484
467
|
|
|
@@ -491,30 +474,19 @@ class MarkdownFlow:
|
|
|
491
474
|
break
|
|
492
475
|
|
|
493
476
|
if not matched:
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
return self._process_llm_validation_with_options(
|
|
503
|
-
block_index=0, # Not used in the method
|
|
504
|
-
user_input={target_variable: target_values},
|
|
505
|
-
target_variable=target_variable,
|
|
506
|
-
options=button_displays,
|
|
507
|
-
question=question,
|
|
508
|
-
mode=mode,
|
|
509
|
-
)
|
|
510
|
-
|
|
511
|
-
# Check for validation errors in pure button mode or when text input not allowed
|
|
512
|
-
if invalid_values:
|
|
477
|
+
if allow_text_input:
|
|
478
|
+
# Allow custom text in buttons+text mode
|
|
479
|
+
valid_values.append(value)
|
|
480
|
+
else:
|
|
481
|
+
invalid_values.append(value)
|
|
482
|
+
|
|
483
|
+
# Check for validation errors
|
|
484
|
+
if invalid_values and not allow_text_input:
|
|
513
485
|
button_displays = [btn["display"] for btn in buttons]
|
|
514
486
|
error_msg = f"Invalid options: {', '.join(invalid_values)}. Please select from: {', '.join(button_displays)}"
|
|
515
|
-
return self._render_error(error_msg, mode)
|
|
487
|
+
return await self._render_error(error_msg, mode)
|
|
516
488
|
|
|
517
|
-
# Success: return validated
|
|
489
|
+
# Success: return validated values
|
|
518
490
|
return LLMResult(
|
|
519
491
|
content="",
|
|
520
492
|
variables={target_variable: valid_values},
|
|
@@ -524,17 +496,16 @@ class MarkdownFlow:
|
|
|
524
496
|
"valid_values": valid_values,
|
|
525
497
|
"invalid_values": invalid_values,
|
|
526
498
|
"total_input_count": len(target_values),
|
|
527
|
-
"llm_validated": False,
|
|
528
499
|
},
|
|
529
500
|
)
|
|
530
501
|
|
|
531
|
-
def _process_llm_validation(
|
|
502
|
+
async def _process_llm_validation(
|
|
532
503
|
self,
|
|
533
504
|
block_index: int,
|
|
534
505
|
user_input: dict[str, list[str]],
|
|
535
506
|
target_variable: str,
|
|
536
507
|
mode: ProcessMode,
|
|
537
|
-
) -> LLMResult |
|
|
508
|
+
) -> LLMResult | AsyncGenerator[LLMResult, None]:
|
|
538
509
|
"""Process LLM validation."""
|
|
539
510
|
# Build validation messages
|
|
540
511
|
messages = self._build_validation_messages(block_index, user_input, target_variable)
|
|
@@ -553,8 +524,7 @@ class MarkdownFlow:
|
|
|
553
524
|
# Fallback processing, return variables directly
|
|
554
525
|
return LLMResult(content="", variables=user_input) # type: ignore[arg-type]
|
|
555
526
|
|
|
556
|
-
|
|
557
|
-
llm_response = result.content
|
|
527
|
+
llm_response = await self._llm_provider.complete(messages)
|
|
558
528
|
|
|
559
529
|
# Parse validation response and convert to LLMResult
|
|
560
530
|
# Use joined target values for fallback; avoids JSON string injection
|
|
@@ -566,9 +536,9 @@ class MarkdownFlow:
|
|
|
566
536
|
if not self._llm_provider:
|
|
567
537
|
return LLMResult(content="", variables=user_input) # type: ignore[arg-type]
|
|
568
538
|
|
|
569
|
-
def stream_generator():
|
|
539
|
+
async def stream_generator():
|
|
570
540
|
full_response = ""
|
|
571
|
-
for chunk in self._llm_provider.stream(messages):
|
|
541
|
+
async for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
572
542
|
full_response += chunk
|
|
573
543
|
|
|
574
544
|
# Parse complete response and convert to LLMResult
|
|
@@ -582,7 +552,7 @@ class MarkdownFlow:
|
|
|
582
552
|
|
|
583
553
|
return stream_generator()
|
|
584
554
|
|
|
585
|
-
def _process_llm_validation_with_options(
|
|
555
|
+
async def _process_llm_validation_with_options(
|
|
586
556
|
self,
|
|
587
557
|
block_index: int,
|
|
588
558
|
user_input: dict[str, list[str]],
|
|
@@ -590,9 +560,8 @@ class MarkdownFlow:
|
|
|
590
560
|
options: list[str],
|
|
591
561
|
question: str,
|
|
592
562
|
mode: ProcessMode,
|
|
593
|
-
) -> LLMResult |
|
|
563
|
+
) -> LLMResult | AsyncGenerator[LLMResult, None]:
|
|
594
564
|
"""Process LLM validation with button options (third case)."""
|
|
595
|
-
_ = block_index # Mark as intentionally unused
|
|
596
565
|
# Build special validation messages containing button option information
|
|
597
566
|
messages = self._build_validation_messages_with_options(user_input, target_variable, options, question)
|
|
598
567
|
|
|
@@ -612,8 +581,7 @@ class MarkdownFlow:
|
|
|
612
581
|
# Fallback processing, return variables directly
|
|
613
582
|
return LLMResult(content="", variables=user_input) # type: ignore[arg-type]
|
|
614
583
|
|
|
615
|
-
|
|
616
|
-
llm_response = result.content
|
|
584
|
+
llm_response = await self._llm_provider.complete(messages)
|
|
617
585
|
|
|
618
586
|
# Parse validation response and convert to LLMResult
|
|
619
587
|
# Use joined target values for fallback; avoids JSON string injection
|
|
@@ -625,9 +593,9 @@ class MarkdownFlow:
|
|
|
625
593
|
if not self._llm_provider:
|
|
626
594
|
return LLMResult(content="", variables=user_input) # type: ignore[arg-type]
|
|
627
595
|
|
|
628
|
-
def stream_generator():
|
|
596
|
+
async def stream_generator():
|
|
629
597
|
full_response = ""
|
|
630
|
-
for chunk in self._llm_provider.stream(messages):
|
|
598
|
+
async for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
631
599
|
full_response += chunk
|
|
632
600
|
# For validation scenario, don't output chunks in real-time, only final result
|
|
633
601
|
|
|
@@ -644,7 +612,7 @@ class MarkdownFlow:
|
|
|
644
612
|
|
|
645
613
|
return stream_generator()
|
|
646
614
|
|
|
647
|
-
def _render_error(self, error_message: str, mode: ProcessMode) -> LLMResult |
|
|
615
|
+
async def _render_error(self, error_message: str, mode: ProcessMode) -> LLMResult | AsyncGenerator[LLMResult, None]:
|
|
648
616
|
"""Render user-friendly error message."""
|
|
649
617
|
messages = self._build_error_render_messages(error_message)
|
|
650
618
|
|
|
@@ -658,16 +626,15 @@ class MarkdownFlow:
|
|
|
658
626
|
if not self._llm_provider:
|
|
659
627
|
return LLMResult(content=error_message) # Fallback processing
|
|
660
628
|
|
|
661
|
-
|
|
662
|
-
friendly_error = result.content
|
|
629
|
+
friendly_error = await self._llm_provider.complete(messages)
|
|
663
630
|
return LLMResult(content=friendly_error, prompt=messages[-1]["content"])
|
|
664
631
|
|
|
665
632
|
if mode == ProcessMode.STREAM:
|
|
666
633
|
if not self._llm_provider:
|
|
667
634
|
return LLMResult(content=error_message)
|
|
668
635
|
|
|
669
|
-
def stream_generator():
|
|
670
|
-
for chunk in self._llm_provider.stream(messages):
|
|
636
|
+
async def stream_generator():
|
|
637
|
+
async for chunk in self._llm_provider.stream(messages): # type: ignore[attr-defined]
|
|
671
638
|
yield LLMResult(content=chunk, prompt=messages[-1]["content"])
|
|
672
639
|
|
|
673
640
|
return stream_generator()
|
|
@@ -683,8 +650,9 @@ class MarkdownFlow:
|
|
|
683
650
|
block = self.get_block(block_index)
|
|
684
651
|
block_content = block.content
|
|
685
652
|
|
|
686
|
-
# Process output instructions
|
|
687
|
-
|
|
653
|
+
# Process output instructions and detect if preserved content exists
|
|
654
|
+
# Returns: (processed_content, has_preserved_content)
|
|
655
|
+
block_content, has_preserved_content = process_output_instructions(block_content)
|
|
688
656
|
|
|
689
657
|
# Replace variables
|
|
690
658
|
block_content = replace_variables_in_text(block_content, variables or {})
|
|
@@ -692,9 +660,16 @@ class MarkdownFlow:
|
|
|
692
660
|
# Build message array
|
|
693
661
|
messages = []
|
|
694
662
|
|
|
695
|
-
#
|
|
663
|
+
# Conditionally add system prompts
|
|
696
664
|
if self._document_prompt:
|
|
697
|
-
|
|
665
|
+
system_msg = self._document_prompt
|
|
666
|
+
# Only add output instruction explanation when preserved content detected
|
|
667
|
+
if has_preserved_content:
|
|
668
|
+
system_msg += "\n\n" + OUTPUT_INSTRUCTION_EXPLANATION.strip()
|
|
669
|
+
messages.append({"role": "system", "content": system_msg})
|
|
670
|
+
elif has_preserved_content:
|
|
671
|
+
# No document prompt but has preserved content, add explanation alone
|
|
672
|
+
messages.append({"role": "system", "content": OUTPUT_INSTRUCTION_EXPLANATION.strip()})
|
|
698
673
|
|
|
699
674
|
# For most content blocks, historical conversation context is not needed
|
|
700
675
|
# because each document block is an independent instruction
|
|
@@ -827,411 +802,5 @@ Original Error: {error_message}
|
|
|
827
802
|
if match:
|
|
828
803
|
prefix = match.group(1)
|
|
829
804
|
suffix = match.group(2)
|
|
830
|
-
|
|
831
|
-
# suffix format is "original_question]", we only want "]"
|
|
832
|
-
if suffix.endswith("]"):
|
|
833
|
-
clean_suffix = "]"
|
|
834
|
-
else:
|
|
835
|
-
clean_suffix = suffix
|
|
836
|
-
|
|
837
|
-
return f"{prefix}{cleaned_question}{clean_suffix}"
|
|
805
|
+
return f"{prefix}{cleaned_question}{suffix}"
|
|
838
806
|
return original_content # type: ignore[unreachable]
|
|
839
|
-
|
|
840
|
-
# Dynamic Interaction Methods
|
|
841
|
-
|
|
842
|
-
def _process_with_dynamic_check(
|
|
843
|
-
self,
|
|
844
|
-
block_index: int,
|
|
845
|
-
mode: ProcessMode,
|
|
846
|
-
context: list[dict[str, str]] | None,
|
|
847
|
-
variables: dict[str, str | list[str]] | None,
|
|
848
|
-
) -> LLMResult | Generator[LLMResult, None, None]:
|
|
849
|
-
"""Process content with dynamic interaction detection and conversion."""
|
|
850
|
-
|
|
851
|
-
block = self.get_block(block_index)
|
|
852
|
-
messages = self._build_dynamic_check_messages(block, context, variables)
|
|
853
|
-
|
|
854
|
-
# Define Function Calling tools with structured approach
|
|
855
|
-
tools = [
|
|
856
|
-
{
|
|
857
|
-
"type": "function",
|
|
858
|
-
"function": {
|
|
859
|
-
"name": "create_interaction_block",
|
|
860
|
-
"description": "Convert content to interaction block with structured data when it needs to collect user input",
|
|
861
|
-
"parameters": {
|
|
862
|
-
"type": "object",
|
|
863
|
-
"properties": {
|
|
864
|
-
"needs_interaction": {"type": "boolean", "description": "Whether this content needs to be converted to interaction block"},
|
|
865
|
-
"variable_name": {"type": "string", "description": "Name of the variable to collect (without {{}} brackets)"},
|
|
866
|
-
"interaction_type": {
|
|
867
|
-
"type": "string",
|
|
868
|
-
"enum": ["single_select", "multi_select", "text_input", "mixed"],
|
|
869
|
-
"description": "Type of interaction: single_select (|), multi_select (||), text_input (...), mixed (options + text)",
|
|
870
|
-
},
|
|
871
|
-
"options": {"type": "array", "items": {"type": "string"}, "description": "List of selectable options (3-4 specific options based on context)"},
|
|
872
|
-
"allow_text_input": {"type": "boolean", "description": "Whether to include a text input option for 'Other' cases"},
|
|
873
|
-
"text_input_prompt": {"type": "string", "description": "Prompt text for the text input option (e.g., '其他请输入', 'Other, please specify')"},
|
|
874
|
-
},
|
|
875
|
-
"required": ["needs_interaction"],
|
|
876
|
-
},
|
|
877
|
-
},
|
|
878
|
-
}
|
|
879
|
-
]
|
|
880
|
-
|
|
881
|
-
if not self._llm_provider:
|
|
882
|
-
raise ValueError(LLM_PROVIDER_REQUIRED_ERROR)
|
|
883
|
-
|
|
884
|
-
# Call LLM with tools
|
|
885
|
-
result = self._llm_provider.complete(messages, tools)
|
|
886
|
-
|
|
887
|
-
# If interaction was generated through Function Calling, construct the MarkdownFlow format
|
|
888
|
-
if result.transformed_to_interaction and result.metadata and "tool_args" in result.metadata:
|
|
889
|
-
tool_args = result.metadata["tool_args"]
|
|
890
|
-
if tool_args.get("needs_interaction"):
|
|
891
|
-
# Construct MarkdownFlow format from structured data
|
|
892
|
-
interaction_content = self._build_interaction_format(tool_args)
|
|
893
|
-
result.content = interaction_content
|
|
894
|
-
|
|
895
|
-
# If transformed to interaction, return as is
|
|
896
|
-
if result.transformed_to_interaction:
|
|
897
|
-
return result
|
|
898
|
-
|
|
899
|
-
# If not transformed, continue with normal processing using standard content messages
|
|
900
|
-
normal_messages = self._build_content_messages(block_index, variables)
|
|
901
|
-
|
|
902
|
-
if mode == ProcessMode.STREAM:
|
|
903
|
-
|
|
904
|
-
def stream_wrapper():
|
|
905
|
-
stream_generator = self._llm_provider.stream(normal_messages)
|
|
906
|
-
for chunk in stream_generator:
|
|
907
|
-
yield LLMResult(content=chunk)
|
|
908
|
-
|
|
909
|
-
return stream_wrapper()
|
|
910
|
-
|
|
911
|
-
# Complete mode - use normal content processing
|
|
912
|
-
normal_result = self._llm_provider.complete(normal_messages)
|
|
913
|
-
return LLMResult(content=normal_result.content, prompt=normal_messages[-1]["content"], metadata=normal_result.metadata)
|
|
914
|
-
|
|
915
|
-
def _build_dynamic_check_messages(
|
|
916
|
-
self,
|
|
917
|
-
block: "Block",
|
|
918
|
-
context: list[dict[str, str]] | None,
|
|
919
|
-
variables: dict[str, str | list[str]] | None,
|
|
920
|
-
) -> list[dict[str, str]]:
|
|
921
|
-
"""Build messages for dynamic interaction detection."""
|
|
922
|
-
|
|
923
|
-
import json
|
|
924
|
-
|
|
925
|
-
# System prompt for detection
|
|
926
|
-
system_prompt = """You are an intelligent document processing assistant specializing in creating interactive forms.
|
|
927
|
-
|
|
928
|
-
Task: Analyze the given content block and determine if it needs to be converted to an interaction block to collect user information.
|
|
929
|
-
|
|
930
|
-
**ABSOLUTE RULE**: Convert ONLY when ALL THREE mandatory elements are explicitly present:
|
|
931
|
-
1. Storage action word + target connector + variable
|
|
932
|
-
2. No exceptions, no implications, no assumptions
|
|
933
|
-
|
|
934
|
-
**MANDATORY TRIPLE PATTERN (ALL REQUIRED):**
|
|
935
|
-
|
|
936
|
-
**Element 1: Storage Action Words**
|
|
937
|
-
- Chinese: "记录", "保存", "存储", "收集", "采集"
|
|
938
|
-
- English: "save", "store", "record", "collect", "gather"
|
|
939
|
-
|
|
940
|
-
**Element 2: Target Connection Words**
|
|
941
|
-
- Chinese: "到", "为", "在", "至"
|
|
942
|
-
- English: "to", "as", "in", "into"
|
|
943
|
-
|
|
944
|
-
**Element 3: Target Variable**
|
|
945
|
-
- Must contain {{variable_name}} syntax for NEW data storage
|
|
946
|
-
- Variable must be for collecting NEW information, not using existing data
|
|
947
|
-
|
|
948
|
-
**VALID CONVERSION FORMULA:**
|
|
949
|
-
[Storage Word] + [Connector] + {{new_variable}}
|
|
950
|
-
|
|
951
|
-
Examples of VALID patterns:
|
|
952
|
-
- "...记录到{{姓名}}"
|
|
953
|
-
- "...保存为{{偏好}}"
|
|
954
|
-
- "...存储在{{选择}}"
|
|
955
|
-
- "...save to {{preference}}"
|
|
956
|
-
- "...collect as {{user_input}}"
|
|
957
|
-
|
|
958
|
-
**STRICT EXCLUSION RULES:**
|
|
959
|
-
|
|
960
|
-
❌ NEVER convert if missing ANY element:
|
|
961
|
-
- No storage action word = NO conversion
|
|
962
|
-
- No target connector = NO conversion
|
|
963
|
-
- No {{variable}} = NO conversion
|
|
964
|
-
- Using existing {{variable}} instead of collecting new = NO conversion
|
|
965
|
-
|
|
966
|
-
❌ NEVER convert casual conversation:
|
|
967
|
-
- Simple questions without storage intent
|
|
968
|
-
- Introduction requests without persistence
|
|
969
|
-
- General inquiries without data collection
|
|
970
|
-
- Educational or exploratory content
|
|
971
|
-
|
|
972
|
-
❌ NEVER infer or assume storage intent:
|
|
973
|
-
- Don't assume "询问姓名" means "保存姓名"
|
|
974
|
-
- Don't assume "了解偏好" means "记录偏好"
|
|
975
|
-
- Don't assume data collection without explicit storage words
|
|
976
|
-
|
|
977
|
-
**PATTERN ANALYSIS METHOD:**
|
|
978
|
-
1. **Exact Pattern Match**: Search for [Storage Word] + [Connector] + {{variable}}
|
|
979
|
-
2. **No Pattern = No Conversion**: If exact pattern not found, return needs_interaction: false
|
|
980
|
-
3. **Zero Tolerance**: No partial matches, no similar meanings, no interpretations
|
|
981
|
-
|
|
982
|
-
**ULTRA-CONSERVATIVE APPROACH:**
|
|
983
|
-
- If there's ANY doubt about storage intent = DON'T convert
|
|
984
|
-
- If storage pattern is not 100% explicit = DON'T convert
|
|
985
|
-
- If you need to "interpret" or "infer" storage intent = DON'T convert
|
|
986
|
-
- Prefer false negatives over false positives
|
|
987
|
-
|
|
988
|
-
When exact pattern is found, generate structured interaction data. Otherwise, always return needs_interaction: false."""
|
|
989
|
-
|
|
990
|
-
# User message with content and context
|
|
991
|
-
# Build user prompt with document context
|
|
992
|
-
user_prompt_parts = []
|
|
993
|
-
|
|
994
|
-
# Add document-level prompt context if exists
|
|
995
|
-
if self._document_prompt:
|
|
996
|
-
user_prompt_parts.append(f"""Document-level instructions:
|
|
997
|
-
{self._document_prompt}
|
|
998
|
-
|
|
999
|
-
(Note: The above are the user's document-level instructions that provide context and requirements for processing.)
|
|
1000
|
-
""")
|
|
1001
|
-
|
|
1002
|
-
# Prepare content analysis with both original and resolved versions
|
|
1003
|
-
original_content = block.content
|
|
1004
|
-
|
|
1005
|
-
# Create resolved content with variable substitution for better context
|
|
1006
|
-
resolved_content = original_content
|
|
1007
|
-
if variables:
|
|
1008
|
-
from .utils import replace_variables_in_text
|
|
1009
|
-
|
|
1010
|
-
resolved_content = replace_variables_in_text(original_content, variables)
|
|
1011
|
-
|
|
1012
|
-
content_analysis = f"""Current content block to analyze:
|
|
1013
|
-
|
|
1014
|
-
**Original content (shows variable structure):**
|
|
1015
|
-
{original_content}
|
|
1016
|
-
|
|
1017
|
-
**Resolved content (with current variable values):**
|
|
1018
|
-
{resolved_content}
|
|
1019
|
-
|
|
1020
|
-
**Existing variable values:**
|
|
1021
|
-
{json.dumps(variables, ensure_ascii=False) if variables else "None"}"""
|
|
1022
|
-
|
|
1023
|
-
# Add different analysis based on whether content has variables
|
|
1024
|
-
if "{{" in original_content and "}}" in original_content:
|
|
1025
|
-
from .utils import extract_variables_from_text
|
|
1026
|
-
|
|
1027
|
-
content_variables = set(extract_variables_from_text(original_content))
|
|
1028
|
-
|
|
1029
|
-
# Find new variables (not yet collected)
|
|
1030
|
-
new_variables = content_variables - (set(variables.keys()) if variables else set())
|
|
1031
|
-
existing_used_variables = content_variables & (set(variables.keys()) if variables else set())
|
|
1032
|
-
|
|
1033
|
-
content_analysis += f"""
|
|
1034
|
-
|
|
1035
|
-
**Variable analysis:**
|
|
1036
|
-
- Variables used from previous steps: {list(existing_used_variables) if existing_used_variables else "None"}
|
|
1037
|
-
- New variables to collect: {list(new_variables) if new_variables else "None"}
|
|
1038
|
-
|
|
1039
|
-
**Context guidance:**
|
|
1040
|
-
- Use the resolved content to understand the actual context and requirements
|
|
1041
|
-
- Generate options based on the real variable values shown in the resolved content
|
|
1042
|
-
- Collect user input for the new variables identified above"""
|
|
1043
|
-
|
|
1044
|
-
user_prompt_parts.append(content_analysis)
|
|
1045
|
-
|
|
1046
|
-
# Add analysis requirements and structured output guide
|
|
1047
|
-
user_prompt_parts.append("""## Analysis Task:
|
|
1048
|
-
1. Determine if this content needs to be converted to an interaction block
|
|
1049
|
-
2. If conversion is needed, provide structured interaction data
|
|
1050
|
-
|
|
1051
|
-
## Context-based Analysis:
|
|
1052
|
-
- Use the "Resolved content" to understand actual context (e.g., if it shows "川菜", generate Sichuan dish options)
|
|
1053
|
-
- Extract the "New variables to collect" identified in the variable analysis above
|
|
1054
|
-
- Generate 3-4 specific options based on the resolved context and document-level instructions
|
|
1055
|
-
- Follow ALL document-level instruction requirements (language, domain, terminology)
|
|
1056
|
-
|
|
1057
|
-
## Selection Type Decision Logic:
|
|
1058
|
-
Ask: "Can a user realistically want/choose multiple of these options simultaneously?"
|
|
1059
|
-
|
|
1060
|
-
**Use MULTI_SELECT when:**
|
|
1061
|
-
- Food dishes (can order multiple: 宫保鸡丁, 麻婆豆腐)
|
|
1062
|
-
- Programming skills (can know multiple: Python, JavaScript)
|
|
1063
|
-
- Interests/hobbies (can have multiple: 读书, 运动, 旅游)
|
|
1064
|
-
- Product features (can want multiple: 定制颜色, 个性化logo)
|
|
1065
|
-
- Exercise types (can do multiple: 跑步, 游泳, 瑜伽)
|
|
1066
|
-
|
|
1067
|
-
**Use SINGLE_SELECT when:**
|
|
1068
|
-
- Job positions (usually apply for one: 软件工程师 OR 产品经理)
|
|
1069
|
-
- Experience levels (have one current level: Beginner OR Advanced)
|
|
1070
|
-
- Budget ranges (have one range: 5-10万 OR 10-20万)
|
|
1071
|
-
- Education levels (have one highest: Bachelor's OR Master's)
|
|
1072
|
-
|
|
1073
|
-
## Output Instructions:
|
|
1074
|
-
If this content needs interaction, use the create_interaction_block function with:
|
|
1075
|
-
- `needs_interaction`: true/false
|
|
1076
|
-
- `variable_name`: the variable to collect (from "New variables" above)
|
|
1077
|
-
- `interaction_type`: "single_select", "multi_select", "text_input", or "mixed"
|
|
1078
|
-
- `options`: array of 3-4 specific options based on context
|
|
1079
|
-
- `allow_text_input`: true if you want to include "other" option
|
|
1080
|
-
- `text_input_prompt`: text for the "other" option (in appropriate language)
|
|
1081
|
-
|
|
1082
|
-
Analyze the content and provide the structured interaction data.""")
|
|
1083
|
-
|
|
1084
|
-
user_prompt = "\n\n".join(user_prompt_parts)
|
|
1085
|
-
|
|
1086
|
-
messages = [{"role": "system", "content": system_prompt}]
|
|
1087
|
-
|
|
1088
|
-
# Add context if provided
|
|
1089
|
-
if context:
|
|
1090
|
-
messages.extend(context)
|
|
1091
|
-
|
|
1092
|
-
messages.append({"role": "user", "content": user_prompt})
|
|
1093
|
-
|
|
1094
|
-
return messages
|
|
1095
|
-
|
|
1096
|
-
def _build_interaction_format(self, tool_args: dict) -> str:
|
|
1097
|
-
"""Build MarkdownFlow interaction format from structured Function Calling data."""
|
|
1098
|
-
variable_name = tool_args.get("variable_name", "")
|
|
1099
|
-
interaction_type = tool_args.get("interaction_type", "single_select")
|
|
1100
|
-
options = tool_args.get("options", [])
|
|
1101
|
-
allow_text_input = tool_args.get("allow_text_input", False)
|
|
1102
|
-
text_input_prompt = tool_args.get("text_input_prompt", "...请输入")
|
|
1103
|
-
|
|
1104
|
-
if not variable_name:
|
|
1105
|
-
return ""
|
|
1106
|
-
|
|
1107
|
-
# For text_input type, options can be empty
|
|
1108
|
-
if interaction_type != "text_input" and not options:
|
|
1109
|
-
return ""
|
|
1110
|
-
|
|
1111
|
-
# Choose separator based on interaction type
|
|
1112
|
-
if interaction_type in ["multi_select", "mixed"]:
|
|
1113
|
-
separator = "||"
|
|
1114
|
-
else:
|
|
1115
|
-
separator = "|"
|
|
1116
|
-
|
|
1117
|
-
# Build options string
|
|
1118
|
-
if interaction_type == "text_input":
|
|
1119
|
-
# Text input only
|
|
1120
|
-
options_str = f"...{text_input_prompt}"
|
|
1121
|
-
else:
|
|
1122
|
-
# Options with potential text input
|
|
1123
|
-
options_str = separator.join(options)
|
|
1124
|
-
|
|
1125
|
-
if allow_text_input and text_input_prompt:
|
|
1126
|
-
# Ensure text input has ... prefix
|
|
1127
|
-
text_option = text_input_prompt if text_input_prompt.startswith("...") else f"...{text_input_prompt}"
|
|
1128
|
-
options_str += f"{separator}{text_option}"
|
|
1129
|
-
|
|
1130
|
-
return f"?[%{{{{{variable_name}}}}} {options_str}]"
|
|
1131
|
-
|
|
1132
|
-
def _process_dynamic_interaction_validation(
|
|
1133
|
-
self,
|
|
1134
|
-
block_index: int,
|
|
1135
|
-
interaction_format: str,
|
|
1136
|
-
user_input: dict[str, list[str]],
|
|
1137
|
-
mode: ProcessMode,
|
|
1138
|
-
context: list[dict[str, str]] | None,
|
|
1139
|
-
variables: dict[str, str | list[str]] | None,
|
|
1140
|
-
) -> LLMResult:
|
|
1141
|
-
"""Validate user input for dynamically generated interaction blocks using same logic as normal interactions."""
|
|
1142
|
-
_ = block_index # Mark as intentionally unused
|
|
1143
|
-
_ = context # Mark as intentionally unused
|
|
1144
|
-
|
|
1145
|
-
from .utils import InteractionParser
|
|
1146
|
-
|
|
1147
|
-
# Parse the interaction format using the same parser as normal interactions
|
|
1148
|
-
parser = InteractionParser()
|
|
1149
|
-
parse_result = parser.parse(interaction_format)
|
|
1150
|
-
|
|
1151
|
-
if "error" in parse_result:
|
|
1152
|
-
error_msg = f"Invalid interaction format: {parse_result['error']}"
|
|
1153
|
-
return self._render_error(error_msg, mode)
|
|
1154
|
-
|
|
1155
|
-
# Extract variable name and interaction type
|
|
1156
|
-
variable_name = parse_result.get("variable")
|
|
1157
|
-
interaction_type = parse_result.get("type")
|
|
1158
|
-
|
|
1159
|
-
if not variable_name:
|
|
1160
|
-
error_msg = f"No variable found in interaction format: {interaction_format}"
|
|
1161
|
-
return self._render_error(error_msg, mode)
|
|
1162
|
-
|
|
1163
|
-
# Get user input for the target variable
|
|
1164
|
-
target_values = user_input.get(variable_name, [])
|
|
1165
|
-
|
|
1166
|
-
# Basic validation - check if input is provided when required
|
|
1167
|
-
if not target_values:
|
|
1168
|
-
# Check if this is a text input or allows empty input
|
|
1169
|
-
allow_text_input = interaction_type in [
|
|
1170
|
-
InteractionType.BUTTONS_WITH_TEXT,
|
|
1171
|
-
InteractionType.BUTTONS_MULTI_WITH_TEXT,
|
|
1172
|
-
]
|
|
1173
|
-
|
|
1174
|
-
if allow_text_input:
|
|
1175
|
-
# Allow empty input for buttons+text mode - merge with existing variables
|
|
1176
|
-
merged_variables = dict(variables or {})
|
|
1177
|
-
merged_variables[variable_name] = []
|
|
1178
|
-
return LLMResult(
|
|
1179
|
-
content="",
|
|
1180
|
-
variables=merged_variables,
|
|
1181
|
-
metadata={
|
|
1182
|
-
"interaction_type": "dynamic_interaction",
|
|
1183
|
-
"empty_input": True,
|
|
1184
|
-
},
|
|
1185
|
-
)
|
|
1186
|
-
error_msg = f"No input provided for variable '{variable_name}'"
|
|
1187
|
-
return self._render_error(error_msg, mode)
|
|
1188
|
-
|
|
1189
|
-
# Use the same validation logic as normal interactions
|
|
1190
|
-
if interaction_type in [
|
|
1191
|
-
InteractionType.BUTTONS_ONLY,
|
|
1192
|
-
InteractionType.BUTTONS_WITH_TEXT,
|
|
1193
|
-
InteractionType.BUTTONS_MULTI_SELECT,
|
|
1194
|
-
InteractionType.BUTTONS_MULTI_WITH_TEXT,
|
|
1195
|
-
]:
|
|
1196
|
-
# Button validation - reuse the existing button validation logic
|
|
1197
|
-
button_result = self._process_button_validation(
|
|
1198
|
-
parse_result,
|
|
1199
|
-
target_values,
|
|
1200
|
-
variable_name,
|
|
1201
|
-
mode,
|
|
1202
|
-
interaction_type,
|
|
1203
|
-
)
|
|
1204
|
-
|
|
1205
|
-
# Merge with existing variables for dynamic interactions
|
|
1206
|
-
if hasattr(button_result, "variables") and button_result.variables is not None and variables:
|
|
1207
|
-
merged_variables = dict(variables)
|
|
1208
|
-
merged_variables.update(button_result.variables)
|
|
1209
|
-
return LLMResult(
|
|
1210
|
-
content=button_result.content,
|
|
1211
|
-
variables=merged_variables,
|
|
1212
|
-
metadata=button_result.metadata,
|
|
1213
|
-
)
|
|
1214
|
-
return button_result
|
|
1215
|
-
|
|
1216
|
-
if interaction_type == InteractionType.NON_ASSIGNMENT_BUTTON:
|
|
1217
|
-
# Non-assignment buttons: don't set variables, keep existing ones
|
|
1218
|
-
return LLMResult(
|
|
1219
|
-
content="",
|
|
1220
|
-
variables=dict(variables or {}),
|
|
1221
|
-
metadata={
|
|
1222
|
-
"interaction_type": "non_assignment_button",
|
|
1223
|
-
"user_input": user_input,
|
|
1224
|
-
},
|
|
1225
|
-
)
|
|
1226
|
-
# Text-only input type - merge with existing variables
|
|
1227
|
-
merged_variables = dict(variables or {})
|
|
1228
|
-
merged_variables[variable_name] = target_values
|
|
1229
|
-
return LLMResult(
|
|
1230
|
-
content="",
|
|
1231
|
-
variables=merged_variables,
|
|
1232
|
-
metadata={
|
|
1233
|
-
"interaction_type": "text_only",
|
|
1234
|
-
"target_variable": variable_name,
|
|
1235
|
-
"values": target_values,
|
|
1236
|
-
},
|
|
1237
|
-
)
|