realtimex-deeptutor 0.5.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- realtimex_deeptutor/__init__.py +67 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
- realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
- src/__init__.py +40 -0
- src/agents/__init__.py +24 -0
- src/agents/base_agent.py +657 -0
- src/agents/chat/__init__.py +24 -0
- src/agents/chat/chat_agent.py +435 -0
- src/agents/chat/prompts/en/chat_agent.yaml +35 -0
- src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
- src/agents/chat/session_manager.py +311 -0
- src/agents/co_writer/__init__.py +0 -0
- src/agents/co_writer/edit_agent.py +260 -0
- src/agents/co_writer/narrator_agent.py +423 -0
- src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
- src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
- src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
- src/agents/guide/__init__.py +16 -0
- src/agents/guide/agents/__init__.py +11 -0
- src/agents/guide/agents/chat_agent.py +104 -0
- src/agents/guide/agents/interactive_agent.py +223 -0
- src/agents/guide/agents/locate_agent.py +149 -0
- src/agents/guide/agents/summary_agent.py +150 -0
- src/agents/guide/guide_manager.py +500 -0
- src/agents/guide/prompts/en/chat_agent.yaml +41 -0
- src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
- src/agents/guide/prompts/en/locate_agent.yaml +68 -0
- src/agents/guide/prompts/en/summary_agent.yaml +157 -0
- src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
- src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
- src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
- src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
- src/agents/ideagen/__init__.py +12 -0
- src/agents/ideagen/idea_generation_workflow.py +426 -0
- src/agents/ideagen/material_organizer_agent.py +173 -0
- src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
- src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
- src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
- src/agents/question/__init__.py +24 -0
- src/agents/question/agents/__init__.py +18 -0
- src/agents/question/agents/generate_agent.py +381 -0
- src/agents/question/agents/relevance_analyzer.py +207 -0
- src/agents/question/agents/retrieve_agent.py +239 -0
- src/agents/question/coordinator.py +718 -0
- src/agents/question/example.py +109 -0
- src/agents/question/prompts/en/coordinator.yaml +75 -0
- src/agents/question/prompts/en/generate_agent.yaml +77 -0
- src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
- src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
- src/agents/question/prompts/zh/coordinator.yaml +75 -0
- src/agents/question/prompts/zh/generate_agent.yaml +77 -0
- src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
- src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
- src/agents/research/agents/__init__.py +23 -0
- src/agents/research/agents/decompose_agent.py +507 -0
- src/agents/research/agents/manager_agent.py +228 -0
- src/agents/research/agents/note_agent.py +180 -0
- src/agents/research/agents/rephrase_agent.py +263 -0
- src/agents/research/agents/reporting_agent.py +1333 -0
- src/agents/research/agents/research_agent.py +714 -0
- src/agents/research/data_structures.py +451 -0
- src/agents/research/main.py +188 -0
- src/agents/research/prompts/en/decompose_agent.yaml +89 -0
- src/agents/research/prompts/en/manager_agent.yaml +24 -0
- src/agents/research/prompts/en/note_agent.yaml +121 -0
- src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/en/reporting_agent.yaml +380 -0
- src/agents/research/prompts/en/research_agent.yaml +173 -0
- src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
- src/agents/research/prompts/zh/manager_agent.yaml +24 -0
- src/agents/research/prompts/zh/note_agent.yaml +121 -0
- src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
- src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
- src/agents/research/prompts/zh/research_agent.yaml +173 -0
- src/agents/research/research_pipeline.py +1309 -0
- src/agents/research/utils/__init__.py +60 -0
- src/agents/research/utils/citation_manager.py +799 -0
- src/agents/research/utils/json_utils.py +98 -0
- src/agents/research/utils/token_tracker.py +297 -0
- src/agents/solve/__init__.py +80 -0
- src/agents/solve/analysis_loop/__init__.py +14 -0
- src/agents/solve/analysis_loop/investigate_agent.py +414 -0
- src/agents/solve/analysis_loop/note_agent.py +190 -0
- src/agents/solve/main_solver.py +862 -0
- src/agents/solve/memory/__init__.py +34 -0
- src/agents/solve/memory/citation_memory.py +353 -0
- src/agents/solve/memory/investigate_memory.py +226 -0
- src/agents/solve/memory/solve_memory.py +340 -0
- src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
- src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
- src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
- src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
- src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
- src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
- src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
- src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
- src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
- src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
- src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
- src/agents/solve/solve_loop/__init__.py +22 -0
- src/agents/solve/solve_loop/citation_manager.py +74 -0
- src/agents/solve/solve_loop/manager_agent.py +274 -0
- src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
- src/agents/solve/solve_loop/response_agent.py +301 -0
- src/agents/solve/solve_loop/solve_agent.py +325 -0
- src/agents/solve/solve_loop/tool_agent.py +470 -0
- src/agents/solve/utils/__init__.py +64 -0
- src/agents/solve/utils/config_validator.py +313 -0
- src/agents/solve/utils/display_manager.py +223 -0
- src/agents/solve/utils/error_handler.py +363 -0
- src/agents/solve/utils/json_utils.py +98 -0
- src/agents/solve/utils/performance_monitor.py +407 -0
- src/agents/solve/utils/token_tracker.py +541 -0
- src/api/__init__.py +0 -0
- src/api/main.py +240 -0
- src/api/routers/__init__.py +1 -0
- src/api/routers/agent_config.py +69 -0
- src/api/routers/chat.py +296 -0
- src/api/routers/co_writer.py +337 -0
- src/api/routers/config.py +627 -0
- src/api/routers/dashboard.py +18 -0
- src/api/routers/guide.py +337 -0
- src/api/routers/ideagen.py +436 -0
- src/api/routers/knowledge.py +821 -0
- src/api/routers/notebook.py +247 -0
- src/api/routers/question.py +537 -0
- src/api/routers/research.py +394 -0
- src/api/routers/settings.py +164 -0
- src/api/routers/solve.py +305 -0
- src/api/routers/system.py +252 -0
- src/api/run_server.py +61 -0
- src/api/utils/history.py +172 -0
- src/api/utils/log_interceptor.py +21 -0
- src/api/utils/notebook_manager.py +415 -0
- src/api/utils/progress_broadcaster.py +72 -0
- src/api/utils/task_id_manager.py +100 -0
- src/config/__init__.py +0 -0
- src/config/accessors.py +18 -0
- src/config/constants.py +34 -0
- src/config/defaults.py +18 -0
- src/config/schema.py +38 -0
- src/config/settings.py +50 -0
- src/core/errors.py +62 -0
- src/knowledge/__init__.py +23 -0
- src/knowledge/add_documents.py +606 -0
- src/knowledge/config.py +65 -0
- src/knowledge/example_add_documents.py +236 -0
- src/knowledge/extract_numbered_items.py +1039 -0
- src/knowledge/initializer.py +621 -0
- src/knowledge/kb.py +22 -0
- src/knowledge/manager.py +782 -0
- src/knowledge/progress_tracker.py +182 -0
- src/knowledge/start_kb.py +535 -0
- src/logging/__init__.py +103 -0
- src/logging/adapters/__init__.py +17 -0
- src/logging/adapters/lightrag.py +184 -0
- src/logging/adapters/llamaindex.py +141 -0
- src/logging/config.py +80 -0
- src/logging/handlers/__init__.py +20 -0
- src/logging/handlers/console.py +75 -0
- src/logging/handlers/file.py +201 -0
- src/logging/handlers/websocket.py +127 -0
- src/logging/logger.py +709 -0
- src/logging/stats/__init__.py +16 -0
- src/logging/stats/llm_stats.py +179 -0
- src/services/__init__.py +56 -0
- src/services/config/__init__.py +61 -0
- src/services/config/knowledge_base_config.py +210 -0
- src/services/config/loader.py +260 -0
- src/services/config/unified_config.py +603 -0
- src/services/embedding/__init__.py +45 -0
- src/services/embedding/adapters/__init__.py +22 -0
- src/services/embedding/adapters/base.py +106 -0
- src/services/embedding/adapters/cohere.py +127 -0
- src/services/embedding/adapters/jina.py +99 -0
- src/services/embedding/adapters/ollama.py +116 -0
- src/services/embedding/adapters/openai_compatible.py +96 -0
- src/services/embedding/client.py +159 -0
- src/services/embedding/config.py +156 -0
- src/services/embedding/provider.py +119 -0
- src/services/llm/__init__.py +152 -0
- src/services/llm/capabilities.py +313 -0
- src/services/llm/client.py +302 -0
- src/services/llm/cloud_provider.py +530 -0
- src/services/llm/config.py +200 -0
- src/services/llm/error_mapping.py +103 -0
- src/services/llm/exceptions.py +152 -0
- src/services/llm/factory.py +450 -0
- src/services/llm/local_provider.py +347 -0
- src/services/llm/providers/anthropic.py +95 -0
- src/services/llm/providers/base_provider.py +93 -0
- src/services/llm/providers/open_ai.py +83 -0
- src/services/llm/registry.py +71 -0
- src/services/llm/telemetry.py +40 -0
- src/services/llm/types.py +27 -0
- src/services/llm/utils.py +333 -0
- src/services/prompt/__init__.py +25 -0
- src/services/prompt/manager.py +206 -0
- src/services/rag/__init__.py +64 -0
- src/services/rag/components/__init__.py +29 -0
- src/services/rag/components/base.py +59 -0
- src/services/rag/components/chunkers/__init__.py +18 -0
- src/services/rag/components/chunkers/base.py +34 -0
- src/services/rag/components/chunkers/fixed.py +71 -0
- src/services/rag/components/chunkers/numbered_item.py +94 -0
- src/services/rag/components/chunkers/semantic.py +97 -0
- src/services/rag/components/embedders/__init__.py +14 -0
- src/services/rag/components/embedders/base.py +32 -0
- src/services/rag/components/embedders/openai.py +63 -0
- src/services/rag/components/indexers/__init__.py +18 -0
- src/services/rag/components/indexers/base.py +35 -0
- src/services/rag/components/indexers/graph.py +172 -0
- src/services/rag/components/indexers/lightrag.py +156 -0
- src/services/rag/components/indexers/vector.py +146 -0
- src/services/rag/components/parsers/__init__.py +18 -0
- src/services/rag/components/parsers/base.py +35 -0
- src/services/rag/components/parsers/markdown.py +52 -0
- src/services/rag/components/parsers/pdf.py +115 -0
- src/services/rag/components/parsers/text.py +86 -0
- src/services/rag/components/retrievers/__init__.py +18 -0
- src/services/rag/components/retrievers/base.py +34 -0
- src/services/rag/components/retrievers/dense.py +200 -0
- src/services/rag/components/retrievers/hybrid.py +164 -0
- src/services/rag/components/retrievers/lightrag.py +169 -0
- src/services/rag/components/routing.py +286 -0
- src/services/rag/factory.py +234 -0
- src/services/rag/pipeline.py +215 -0
- src/services/rag/pipelines/__init__.py +32 -0
- src/services/rag/pipelines/academic.py +44 -0
- src/services/rag/pipelines/lightrag.py +43 -0
- src/services/rag/pipelines/llamaindex.py +313 -0
- src/services/rag/pipelines/raganything.py +384 -0
- src/services/rag/service.py +244 -0
- src/services/rag/types.py +73 -0
- src/services/search/__init__.py +284 -0
- src/services/search/base.py +87 -0
- src/services/search/consolidation.py +398 -0
- src/services/search/providers/__init__.py +128 -0
- src/services/search/providers/baidu.py +188 -0
- src/services/search/providers/exa.py +194 -0
- src/services/search/providers/jina.py +161 -0
- src/services/search/providers/perplexity.py +153 -0
- src/services/search/providers/serper.py +209 -0
- src/services/search/providers/tavily.py +161 -0
- src/services/search/types.py +114 -0
- src/services/setup/__init__.py +34 -0
- src/services/setup/init.py +285 -0
- src/services/tts/__init__.py +16 -0
- src/services/tts/config.py +99 -0
- src/tools/__init__.py +91 -0
- src/tools/code_executor.py +536 -0
- src/tools/paper_search_tool.py +171 -0
- src/tools/query_item_tool.py +310 -0
- src/tools/question/__init__.py +15 -0
- src/tools/question/exam_mimic.py +616 -0
- src/tools/question/pdf_parser.py +211 -0
- src/tools/question/question_extractor.py +397 -0
- src/tools/rag_tool.py +173 -0
- src/tools/tex_chunker.py +339 -0
- src/tools/tex_downloader.py +253 -0
- src/tools/web_search.py +71 -0
- src/utils/config_manager.py +206 -0
- src/utils/document_validator.py +168 -0
- src/utils/error_rate_tracker.py +111 -0
- src/utils/error_utils.py +82 -0
- src/utils/json_parser.py +110 -0
- src/utils/network/circuit_breaker.py +79 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Error Handler - Error handling and retry mechanism
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from typing import Any, Optional
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
|
|
11
|
+
import tenacity
|
|
12
|
+
|
|
13
|
+
from src.config.constants import VALID_INVESTIGATE_TOOLS, VALID_SOLVE_TOOLS
|
|
14
|
+
from src.logging.logger import get_logger
|
|
15
|
+
from src.services.llm.exceptions import LLMParseError
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _format_validation_errors(e: ValidationError) -> str:
|
|
19
|
+
"""Format Pydantic validation errors into a readable string."""
|
|
20
|
+
return "; ".join(
|
|
21
|
+
[f"{'.'.join(str(x) for x in err['loc']) or 'root'}: {err['msg']}" for err in e.errors()]
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Pydantic models for output validation
|
|
26
|
+
class ToolIntent(BaseModel):
|
|
27
|
+
"""Model for tool intent in investigate output"""
|
|
28
|
+
|
|
29
|
+
tool_type: str = Field(..., description="Type of tool to use")
|
|
30
|
+
query: str = Field("", description="Query for the tool")
|
|
31
|
+
identifier: Optional[str] = Field(None, description="Optional identifier")
|
|
32
|
+
|
|
33
|
+
@field_validator("tool_type")
|
|
34
|
+
@classmethod
|
|
35
|
+
def validate_tool_type(cls, v):
|
|
36
|
+
if v.lower() not in VALID_INVESTIGATE_TOOLS:
|
|
37
|
+
raise ValueError(f"tool_type must be one of {VALID_INVESTIGATE_TOOLS}, got: {v}")
|
|
38
|
+
return v.lower()
|
|
39
|
+
|
|
40
|
+
@field_validator("query")
|
|
41
|
+
@classmethod
|
|
42
|
+
def validate_query_required(cls, v, info):
|
|
43
|
+
tool_type = info.data.get("tool_type", "").lower()
|
|
44
|
+
if tool_type != "none" and not v:
|
|
45
|
+
raise ValueError("query is required for non-none tools")
|
|
46
|
+
return v
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class InvestigateOutput(BaseModel):
|
|
50
|
+
"""Model for InvestigateAgent output"""
|
|
51
|
+
|
|
52
|
+
reasoning: str = Field(..., description="Reasoning for the investigation")
|
|
53
|
+
tools: list[ToolIntent] = Field(..., min_length=1, description="List of tool intents")
|
|
54
|
+
|
|
55
|
+
@field_validator("tools")
|
|
56
|
+
@classmethod
|
|
57
|
+
def validate_tools_consistency(cls, v):
|
|
58
|
+
# Check for 'none' tool exclusivity
|
|
59
|
+
has_none = any(tool.tool_type == "none" for tool in v)
|
|
60
|
+
if has_none and len(v) > 1:
|
|
61
|
+
raise ValueError("When 'none' tool exists, no other tool intents should be provided")
|
|
62
|
+
return v
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class Citation(BaseModel):
|
|
66
|
+
"""Model for citation in note output"""
|
|
67
|
+
|
|
68
|
+
reference_id: Optional[str] = None
|
|
69
|
+
source: Optional[str] = None
|
|
70
|
+
content: Optional[str] = None
|
|
71
|
+
|
|
72
|
+
@model_validator(mode="after")
|
|
73
|
+
def validate_citation_fields(self):
|
|
74
|
+
"""Validate that at least one of reference_id or source is provided."""
|
|
75
|
+
if not self.reference_id and not self.source:
|
|
76
|
+
raise ValueError("citation must contain reference_id or source")
|
|
77
|
+
return self
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class NoteOutput(BaseModel):
|
|
81
|
+
"""Model for NoteAgent output"""
|
|
82
|
+
|
|
83
|
+
summary: str = Field(..., description="Summary of the notes")
|
|
84
|
+
citations: list[Citation] = Field(default_factory=list, description="List of citations")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class ReflectOutput(BaseModel):
|
|
88
|
+
"""Model for InvestigateReflectAgent output"""
|
|
89
|
+
|
|
90
|
+
should_stop: bool = Field(..., description="Whether to stop the investigation")
|
|
91
|
+
reason: str = Field(..., description="Reason for the decision")
|
|
92
|
+
remaining_questions: list[str] = Field(..., description="List of remaining questions")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class PlanStep(BaseModel):
|
|
96
|
+
"""Model for plan step"""
|
|
97
|
+
|
|
98
|
+
step_id: str = Field(..., description="Step identifier")
|
|
99
|
+
plan: str = Field(..., description="Plan description")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class PlanBlock(BaseModel):
|
|
103
|
+
"""Model for plan block"""
|
|
104
|
+
|
|
105
|
+
block_id: str = Field(..., description="Block identifier")
|
|
106
|
+
format: str = Field(..., description="Output format")
|
|
107
|
+
steps: list[PlanStep] = Field(..., min_length=1, description="List of steps")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class PlanOutput(BaseModel):
|
|
111
|
+
"""Model for PlanAgent output"""
|
|
112
|
+
|
|
113
|
+
answer_style: str = Field(..., description="Style of the answer")
|
|
114
|
+
blocks: list[PlanBlock] = Field(..., min_length=1, description="List of plan blocks")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class SolveToolCall(BaseModel):
|
|
118
|
+
"""Model for tool call in solve output"""
|
|
119
|
+
|
|
120
|
+
tool_type: str = Field(..., description="Type of tool to call")
|
|
121
|
+
query: str = Field(..., description="Query for the tool call")
|
|
122
|
+
|
|
123
|
+
@field_validator("tool_type")
|
|
124
|
+
@classmethod
|
|
125
|
+
def validate_tool_type(cls, v):
|
|
126
|
+
if v.lower() not in VALID_SOLVE_TOOLS:
|
|
127
|
+
raise ValueError(f"tool_type must be one of {VALID_SOLVE_TOOLS}, got: {v}")
|
|
128
|
+
return v.lower()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class SolveOutput(BaseModel):
|
|
132
|
+
"""Model for SolveAgent output"""
|
|
133
|
+
|
|
134
|
+
tool_calls: list[SolveToolCall] = Field(..., min_length=1, description="List of tool calls")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
# Initialize module logger
|
|
138
|
+
logger = get_logger("ErrorHandler")
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def retry_on_parse_error(
|
|
142
|
+
max_retries: int = 2,
|
|
143
|
+
delay: float = 1.0,
|
|
144
|
+
backoff: float = 2.0,
|
|
145
|
+
exceptions: tuple[type[Exception], ...] = (LLMParseError,),
|
|
146
|
+
):
|
|
147
|
+
"""
|
|
148
|
+
Parse error retry decorator using tenacity.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
max_retries: Maximum retry count
|
|
152
|
+
delay: Initial delay time (seconds)
|
|
153
|
+
backoff: Delay multiplier factor
|
|
154
|
+
exceptions: Tuple of exception types to retry on (default: LLMParseError only)
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Decorated function
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def decorator(func: Callable):
|
|
161
|
+
return tenacity.retry(
|
|
162
|
+
retry=tenacity.retry_if_exception_type(*exceptions),
|
|
163
|
+
wait=tenacity.wait_exponential(multiplier=backoff, min=delay, max=60),
|
|
164
|
+
stop=tenacity.stop_after_attempt(max_retries + 1),
|
|
165
|
+
before_sleep=lambda retry_state: logger.warning(
|
|
166
|
+
f"Parse failed (attempt {retry_state.attempt_number}/{max_retries + 1}), "
|
|
167
|
+
f"retrying in {retry_state.upcoming_sleep:.1f}s... Error: {str(retry_state.outcome.exception())}"
|
|
168
|
+
),
|
|
169
|
+
)(func)
|
|
170
|
+
|
|
171
|
+
return decorator
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def validate_output(
|
|
175
|
+
output: dict[str, Any], required_fields: list, field_types: dict[str, type] | None = None
|
|
176
|
+
) -> bool:
|
|
177
|
+
"""
|
|
178
|
+
Validate output contains required fields and correct types
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
output: Output dictionary
|
|
182
|
+
required_fields: List of required fields
|
|
183
|
+
field_types: Field type dictionary (optional)
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
bool: Whether valid
|
|
187
|
+
|
|
188
|
+
Raises:
|
|
189
|
+
LLMParseError: Raised when validation fails
|
|
190
|
+
"""
|
|
191
|
+
# Check required fields
|
|
192
|
+
missing_fields = [field for field in required_fields if field not in output]
|
|
193
|
+
|
|
194
|
+
if missing_fields:
|
|
195
|
+
raise LLMParseError(f"Missing required fields: {', '.join(missing_fields)}")
|
|
196
|
+
|
|
197
|
+
# Check field types
|
|
198
|
+
if field_types:
|
|
199
|
+
for field, expected_type in field_types.items():
|
|
200
|
+
if field in output and not isinstance(output[field], expected_type):
|
|
201
|
+
actual_type = type(output[field]).__name__
|
|
202
|
+
expected_type_name = expected_type.__name__
|
|
203
|
+
raise LLMParseError(
|
|
204
|
+
f"Field '{field}' type error: expected {expected_type_name}, got {actual_type}"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
return True
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def safe_parse(
|
|
211
|
+
text: str, parser_func: Callable, default: Any = None, raise_on_error: bool = False
|
|
212
|
+
) -> Any:
|
|
213
|
+
"""
|
|
214
|
+
Safe parsing (catch exceptions and return default value)
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
parser_func: Parser function
|
|
218
|
+
text: Text to parse
|
|
219
|
+
default: Default value
|
|
220
|
+
raise_on_error: Whether to raise exception on error
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Parsed result or default value
|
|
224
|
+
"""
|
|
225
|
+
try:
|
|
226
|
+
return parser_func(text)
|
|
227
|
+
except Exception as e:
|
|
228
|
+
if raise_on_error:
|
|
229
|
+
raise LLMParseError(f"Parsing failed: {e!s}") from e
|
|
230
|
+
|
|
231
|
+
logger.error(
|
|
232
|
+
f"Parsing failed; falling back to default value {default!r}. This may affect behavior. Error: {e!s}",
|
|
233
|
+
exc_info=True,
|
|
234
|
+
)
|
|
235
|
+
return default
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def validate_investigate_output(
|
|
239
|
+
output: dict[str, Any], valid_tools: list[str] = VALID_INVESTIGATE_TOOLS
|
|
240
|
+
) -> bool:
|
|
241
|
+
"""Validate InvestigateAgent output using Pydantic model"""
|
|
242
|
+
# Check if custom tools are provided
|
|
243
|
+
if valid_tools != VALID_INVESTIGATE_TOOLS:
|
|
244
|
+
# For custom tools, do manual validation
|
|
245
|
+
validate_output(output, ["reasoning"], {"reasoning": str})
|
|
246
|
+
tools = output.get("tools", [])
|
|
247
|
+
if not isinstance(tools, list) or len(tools) < 1:
|
|
248
|
+
raise LLMParseError("tools must be a non-empty list")
|
|
249
|
+
|
|
250
|
+
for i, tool in enumerate(tools):
|
|
251
|
+
if not isinstance(tool, dict):
|
|
252
|
+
raise LLMParseError(f"tool[{i}] must be a dictionary")
|
|
253
|
+
tool_type = tool.get("tool_type", "").lower()
|
|
254
|
+
if tool_type not in valid_tools:
|
|
255
|
+
raise LLMParseError(
|
|
256
|
+
f"tool[{i}] tool_type must be one of {valid_tools}, got: {tool_type}"
|
|
257
|
+
)
|
|
258
|
+
if tool_type != "none" and not tool.get("query"):
|
|
259
|
+
raise LLMParseError(f"tool[{i}] missing query")
|
|
260
|
+
|
|
261
|
+
# Check none tool exclusivity
|
|
262
|
+
has_none = any(t.get("tool_type", "").lower() == "none" for t in tools)
|
|
263
|
+
if has_none and len(tools) > 1:
|
|
264
|
+
raise LLMParseError("When 'none' tool exists, no other tool intents should be provided")
|
|
265
|
+
return True
|
|
266
|
+
|
|
267
|
+
# Use Pydantic for standard validation
|
|
268
|
+
try:
|
|
269
|
+
InvestigateOutput(**output)
|
|
270
|
+
return True
|
|
271
|
+
except ValidationError as e:
|
|
272
|
+
error_details = _format_validation_errors(e)
|
|
273
|
+
raise LLMParseError(f"InvestigateAgent output validation failed: {error_details}") from e
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def validate_note_output(output: dict[str, Any]) -> bool:
|
|
277
|
+
"""Validate NoteAgent output using Pydantic model"""
|
|
278
|
+
try:
|
|
279
|
+
NoteOutput(**output)
|
|
280
|
+
return True
|
|
281
|
+
except ValidationError as e:
|
|
282
|
+
error_details = _format_validation_errors(e)
|
|
283
|
+
raise LLMParseError(f"NoteAgent output validation failed: {error_details}") from e
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def validate_reflect_output(output: dict[str, Any]) -> bool:
|
|
287
|
+
"""Validate InvestigateReflectAgent output using Pydantic model"""
|
|
288
|
+
try:
|
|
289
|
+
ReflectOutput(**output)
|
|
290
|
+
return True
|
|
291
|
+
except ValidationError as e:
|
|
292
|
+
error_details = _format_validation_errors(e)
|
|
293
|
+
raise LLMParseError(
|
|
294
|
+
f"InvestigateReflectAgent output validation failed: {error_details}"
|
|
295
|
+
) from e
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def validate_plan_output(output: dict[str, Any]) -> bool:
|
|
299
|
+
"""Validate PlanAgent output using Pydantic model"""
|
|
300
|
+
try:
|
|
301
|
+
PlanOutput(**output)
|
|
302
|
+
return True
|
|
303
|
+
except ValidationError as e:
|
|
304
|
+
error_details = _format_validation_errors(e)
|
|
305
|
+
raise LLMParseError(f"PlanAgent output validation failed: {error_details}") from e
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def validate_solve_output(
|
|
309
|
+
output: dict[str, Any], valid_tool_types: list[str] = VALID_SOLVE_TOOLS
|
|
310
|
+
) -> bool:
|
|
311
|
+
"""Validate SolveAgent output using Pydantic model"""
|
|
312
|
+
# Check if custom tools are provided
|
|
313
|
+
if valid_tool_types != VALID_SOLVE_TOOLS:
|
|
314
|
+
# For custom tools, do manual validation
|
|
315
|
+
validate_output(output, ["tool_calls"], {"tool_calls": list})
|
|
316
|
+
tool_calls = output.get("tool_calls", [])
|
|
317
|
+
if not isinstance(tool_calls, list) or len(tool_calls) < 1:
|
|
318
|
+
raise LLMParseError("tool_calls must be a non-empty list")
|
|
319
|
+
|
|
320
|
+
for i, tool_call in enumerate(tool_calls):
|
|
321
|
+
if not isinstance(tool_call, dict):
|
|
322
|
+
raise LLMParseError(f"tool_call[{i}] must be a dictionary")
|
|
323
|
+
if "tool_type" not in tool_call or "query" not in tool_call:
|
|
324
|
+
raise LLMParseError(f"tool_call[{i}] missing required fields: tool_type, query")
|
|
325
|
+
tool_type = tool_call.get("tool_type", "").lower()
|
|
326
|
+
if tool_type not in valid_tool_types:
|
|
327
|
+
raise LLMParseError(
|
|
328
|
+
f"Invalid tool_type: {tool_type}, must be one of {valid_tool_types}"
|
|
329
|
+
)
|
|
330
|
+
return True
|
|
331
|
+
|
|
332
|
+
# Use Pydantic for standard validation
|
|
333
|
+
try:
|
|
334
|
+
SolveOutput(**output)
|
|
335
|
+
return True
|
|
336
|
+
except ValidationError as e:
|
|
337
|
+
error_details = _format_validation_errors(e)
|
|
338
|
+
raise LLMParseError(f"SolveAgent output validation failed: {error_details}") from e
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def validate_none_tool_constraint(
|
|
342
|
+
tools: list[dict[str, Any]], tool_type_key: str = "tool_type"
|
|
343
|
+
) -> None:
|
|
344
|
+
"""
|
|
345
|
+
Validate that 'none' tool does not coexist with other tools.
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
tools: List of tool dictionaries
|
|
349
|
+
tool_type_key: Key to access tool type in each dict (default: "tool_type")
|
|
350
|
+
|
|
351
|
+
Raises:
|
|
352
|
+
LLMParseError: If none tool constraint is violated
|
|
353
|
+
"""
|
|
354
|
+
has_none = any(
|
|
355
|
+
isinstance(tool_type := tool.get(tool_type_key), str) and tool_type.lower() == "none"
|
|
356
|
+
for tool in tools
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
if has_none and len(tools) > 1:
|
|
360
|
+
raise LLMParseError(
|
|
361
|
+
f"When 'none' tool exists, no other tool intents should be provided. "
|
|
362
|
+
f"Found {len(tools)} tools with types: {[tool.get(tool_type_key) for tool in tools]}"
|
|
363
|
+
)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
JSON Utils - JSON parsing utilities
|
|
5
|
+
Specifically for extracting and parsing JSON data from LLM output,
|
|
6
|
+
supports Markdown code block processing and is robust to Python
|
|
7
|
+
triple-quoted strings generated by LLMs.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _escape_triple_quoted_strings(text: str) -> str:
|
|
16
|
+
"""
|
|
17
|
+
Convert Python triple-quoted strings (\"\"\" ... \"\"\")
|
|
18
|
+
into JSON-safe escaped strings.
|
|
19
|
+
|
|
20
|
+
This fixes cases where LLMs generate Python code inside JSON,
|
|
21
|
+
which would otherwise violate JSON syntax.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def replacer(match: re.Match) -> str:
|
|
25
|
+
content = match.group(1)
|
|
26
|
+
# json.dumps safely escapes newlines and quotes
|
|
27
|
+
return json.dumps(content)
|
|
28
|
+
|
|
29
|
+
pattern = re.compile(r'"""([\s\S]*?)"""')
|
|
30
|
+
return pattern.sub(replacer, text)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def extract_json_from_text(text: str) -> dict[str, Any] | list[Any] | None:
|
|
34
|
+
"""
|
|
35
|
+
Extract JSON object or array from text.
|
|
36
|
+
|
|
37
|
+
Supports:
|
|
38
|
+
1. ```json ... ``` code blocks
|
|
39
|
+
2. ``` ... ``` code blocks
|
|
40
|
+
3. Pure JSON text
|
|
41
|
+
4. JSON containing Python triple-quoted strings (sanitized)
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
text: Original text containing JSON
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Parsed JSON object (dict) or array (list),
|
|
48
|
+
or None if parsing fails.
|
|
49
|
+
"""
|
|
50
|
+
if not text:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
# 🔧 FIX: sanitize triple-quoted strings before any JSON parsing
|
|
54
|
+
text = _escape_triple_quoted_strings(text)
|
|
55
|
+
|
|
56
|
+
# 1. Try matching Markdown code blocks
|
|
57
|
+
code_block_pattern = re.compile(r"```(?:json)?\s*([\s\S]*?)\s*```")
|
|
58
|
+
match = code_block_pattern.search(text)
|
|
59
|
+
|
|
60
|
+
if match:
|
|
61
|
+
json_str = match.group(1).strip()
|
|
62
|
+
try:
|
|
63
|
+
return json.loads(json_str)
|
|
64
|
+
except json.JSONDecodeError:
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
# 2. Try parsing the full text directly
|
|
68
|
+
try:
|
|
69
|
+
return json.loads(text)
|
|
70
|
+
except json.JSONDecodeError:
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
# 3. Try extracting outermost JSON object
|
|
74
|
+
json_obj_pattern = re.compile(r"\{[\s\S]*\}")
|
|
75
|
+
match_obj = json_obj_pattern.search(text)
|
|
76
|
+
if match_obj:
|
|
77
|
+
try:
|
|
78
|
+
return json.loads(match_obj.group(0))
|
|
79
|
+
except json.JSONDecodeError:
|
|
80
|
+
pass
|
|
81
|
+
|
|
82
|
+
# 4. Try extracting outermost JSON array
|
|
83
|
+
json_arr_pattern = re.compile(r"\[[\s\S]*\]")
|
|
84
|
+
match_arr = json_arr_pattern.search(text)
|
|
85
|
+
if match_arr:
|
|
86
|
+
try:
|
|
87
|
+
return json.loads(match_arr.group(0))
|
|
88
|
+
except json.JSONDecodeError:
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def clean_json_string(json_str: str) -> str:
|
|
95
|
+
"""
|
|
96
|
+
Clean JSON string by removing illegal control characters.
|
|
97
|
+
"""
|
|
98
|
+
return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", json_str)
|