realtimex-deeptutor 0.5.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (276) hide show
  1. realtimex_deeptutor/__init__.py +67 -0
  2. realtimex_deeptutor-0.5.0.post1.dist-info/METADATA +1612 -0
  3. realtimex_deeptutor-0.5.0.post1.dist-info/RECORD +276 -0
  4. realtimex_deeptutor-0.5.0.post1.dist-info/WHEEL +5 -0
  5. realtimex_deeptutor-0.5.0.post1.dist-info/entry_points.txt +2 -0
  6. realtimex_deeptutor-0.5.0.post1.dist-info/licenses/LICENSE +661 -0
  7. realtimex_deeptutor-0.5.0.post1.dist-info/top_level.txt +2 -0
  8. src/__init__.py +40 -0
  9. src/agents/__init__.py +24 -0
  10. src/agents/base_agent.py +657 -0
  11. src/agents/chat/__init__.py +24 -0
  12. src/agents/chat/chat_agent.py +435 -0
  13. src/agents/chat/prompts/en/chat_agent.yaml +35 -0
  14. src/agents/chat/prompts/zh/chat_agent.yaml +35 -0
  15. src/agents/chat/session_manager.py +311 -0
  16. src/agents/co_writer/__init__.py +0 -0
  17. src/agents/co_writer/edit_agent.py +260 -0
  18. src/agents/co_writer/narrator_agent.py +423 -0
  19. src/agents/co_writer/prompts/en/edit_agent.yaml +113 -0
  20. src/agents/co_writer/prompts/en/narrator_agent.yaml +88 -0
  21. src/agents/co_writer/prompts/zh/edit_agent.yaml +113 -0
  22. src/agents/co_writer/prompts/zh/narrator_agent.yaml +88 -0
  23. src/agents/guide/__init__.py +16 -0
  24. src/agents/guide/agents/__init__.py +11 -0
  25. src/agents/guide/agents/chat_agent.py +104 -0
  26. src/agents/guide/agents/interactive_agent.py +223 -0
  27. src/agents/guide/agents/locate_agent.py +149 -0
  28. src/agents/guide/agents/summary_agent.py +150 -0
  29. src/agents/guide/guide_manager.py +500 -0
  30. src/agents/guide/prompts/en/chat_agent.yaml +41 -0
  31. src/agents/guide/prompts/en/interactive_agent.yaml +202 -0
  32. src/agents/guide/prompts/en/locate_agent.yaml +68 -0
  33. src/agents/guide/prompts/en/summary_agent.yaml +157 -0
  34. src/agents/guide/prompts/zh/chat_agent.yaml +41 -0
  35. src/agents/guide/prompts/zh/interactive_agent.yaml +626 -0
  36. src/agents/guide/prompts/zh/locate_agent.yaml +68 -0
  37. src/agents/guide/prompts/zh/summary_agent.yaml +157 -0
  38. src/agents/ideagen/__init__.py +12 -0
  39. src/agents/ideagen/idea_generation_workflow.py +426 -0
  40. src/agents/ideagen/material_organizer_agent.py +173 -0
  41. src/agents/ideagen/prompts/en/idea_generation.yaml +187 -0
  42. src/agents/ideagen/prompts/en/material_organizer.yaml +69 -0
  43. src/agents/ideagen/prompts/zh/idea_generation.yaml +187 -0
  44. src/agents/ideagen/prompts/zh/material_organizer.yaml +69 -0
  45. src/agents/question/__init__.py +24 -0
  46. src/agents/question/agents/__init__.py +18 -0
  47. src/agents/question/agents/generate_agent.py +381 -0
  48. src/agents/question/agents/relevance_analyzer.py +207 -0
  49. src/agents/question/agents/retrieve_agent.py +239 -0
  50. src/agents/question/coordinator.py +718 -0
  51. src/agents/question/example.py +109 -0
  52. src/agents/question/prompts/en/coordinator.yaml +75 -0
  53. src/agents/question/prompts/en/generate_agent.yaml +77 -0
  54. src/agents/question/prompts/en/relevance_analyzer.yaml +41 -0
  55. src/agents/question/prompts/en/retrieve_agent.yaml +32 -0
  56. src/agents/question/prompts/zh/coordinator.yaml +75 -0
  57. src/agents/question/prompts/zh/generate_agent.yaml +77 -0
  58. src/agents/question/prompts/zh/relevance_analyzer.yaml +39 -0
  59. src/agents/question/prompts/zh/retrieve_agent.yaml +30 -0
  60. src/agents/research/agents/__init__.py +23 -0
  61. src/agents/research/agents/decompose_agent.py +507 -0
  62. src/agents/research/agents/manager_agent.py +228 -0
  63. src/agents/research/agents/note_agent.py +180 -0
  64. src/agents/research/agents/rephrase_agent.py +263 -0
  65. src/agents/research/agents/reporting_agent.py +1333 -0
  66. src/agents/research/agents/research_agent.py +714 -0
  67. src/agents/research/data_structures.py +451 -0
  68. src/agents/research/main.py +188 -0
  69. src/agents/research/prompts/en/decompose_agent.yaml +89 -0
  70. src/agents/research/prompts/en/manager_agent.yaml +24 -0
  71. src/agents/research/prompts/en/note_agent.yaml +121 -0
  72. src/agents/research/prompts/en/rephrase_agent.yaml +58 -0
  73. src/agents/research/prompts/en/reporting_agent.yaml +380 -0
  74. src/agents/research/prompts/en/research_agent.yaml +173 -0
  75. src/agents/research/prompts/zh/decompose_agent.yaml +89 -0
  76. src/agents/research/prompts/zh/manager_agent.yaml +24 -0
  77. src/agents/research/prompts/zh/note_agent.yaml +121 -0
  78. src/agents/research/prompts/zh/rephrase_agent.yaml +58 -0
  79. src/agents/research/prompts/zh/reporting_agent.yaml +380 -0
  80. src/agents/research/prompts/zh/research_agent.yaml +173 -0
  81. src/agents/research/research_pipeline.py +1309 -0
  82. src/agents/research/utils/__init__.py +60 -0
  83. src/agents/research/utils/citation_manager.py +799 -0
  84. src/agents/research/utils/json_utils.py +98 -0
  85. src/agents/research/utils/token_tracker.py +297 -0
  86. src/agents/solve/__init__.py +80 -0
  87. src/agents/solve/analysis_loop/__init__.py +14 -0
  88. src/agents/solve/analysis_loop/investigate_agent.py +414 -0
  89. src/agents/solve/analysis_loop/note_agent.py +190 -0
  90. src/agents/solve/main_solver.py +862 -0
  91. src/agents/solve/memory/__init__.py +34 -0
  92. src/agents/solve/memory/citation_memory.py +353 -0
  93. src/agents/solve/memory/investigate_memory.py +226 -0
  94. src/agents/solve/memory/solve_memory.py +340 -0
  95. src/agents/solve/prompts/en/analysis_loop/investigate_agent.yaml +55 -0
  96. src/agents/solve/prompts/en/analysis_loop/note_agent.yaml +54 -0
  97. src/agents/solve/prompts/en/solve_loop/manager_agent.yaml +67 -0
  98. src/agents/solve/prompts/en/solve_loop/precision_answer_agent.yaml +62 -0
  99. src/agents/solve/prompts/en/solve_loop/response_agent.yaml +90 -0
  100. src/agents/solve/prompts/en/solve_loop/solve_agent.yaml +75 -0
  101. src/agents/solve/prompts/en/solve_loop/tool_agent.yaml +38 -0
  102. src/agents/solve/prompts/zh/analysis_loop/investigate_agent.yaml +53 -0
  103. src/agents/solve/prompts/zh/analysis_loop/note_agent.yaml +54 -0
  104. src/agents/solve/prompts/zh/solve_loop/manager_agent.yaml +66 -0
  105. src/agents/solve/prompts/zh/solve_loop/precision_answer_agent.yaml +62 -0
  106. src/agents/solve/prompts/zh/solve_loop/response_agent.yaml +90 -0
  107. src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml +76 -0
  108. src/agents/solve/prompts/zh/solve_loop/tool_agent.yaml +41 -0
  109. src/agents/solve/solve_loop/__init__.py +22 -0
  110. src/agents/solve/solve_loop/citation_manager.py +74 -0
  111. src/agents/solve/solve_loop/manager_agent.py +274 -0
  112. src/agents/solve/solve_loop/precision_answer_agent.py +96 -0
  113. src/agents/solve/solve_loop/response_agent.py +301 -0
  114. src/agents/solve/solve_loop/solve_agent.py +325 -0
  115. src/agents/solve/solve_loop/tool_agent.py +470 -0
  116. src/agents/solve/utils/__init__.py +64 -0
  117. src/agents/solve/utils/config_validator.py +313 -0
  118. src/agents/solve/utils/display_manager.py +223 -0
  119. src/agents/solve/utils/error_handler.py +363 -0
  120. src/agents/solve/utils/json_utils.py +98 -0
  121. src/agents/solve/utils/performance_monitor.py +407 -0
  122. src/agents/solve/utils/token_tracker.py +541 -0
  123. src/api/__init__.py +0 -0
  124. src/api/main.py +240 -0
  125. src/api/routers/__init__.py +1 -0
  126. src/api/routers/agent_config.py +69 -0
  127. src/api/routers/chat.py +296 -0
  128. src/api/routers/co_writer.py +337 -0
  129. src/api/routers/config.py +627 -0
  130. src/api/routers/dashboard.py +18 -0
  131. src/api/routers/guide.py +337 -0
  132. src/api/routers/ideagen.py +436 -0
  133. src/api/routers/knowledge.py +821 -0
  134. src/api/routers/notebook.py +247 -0
  135. src/api/routers/question.py +537 -0
  136. src/api/routers/research.py +394 -0
  137. src/api/routers/settings.py +164 -0
  138. src/api/routers/solve.py +305 -0
  139. src/api/routers/system.py +252 -0
  140. src/api/run_server.py +61 -0
  141. src/api/utils/history.py +172 -0
  142. src/api/utils/log_interceptor.py +21 -0
  143. src/api/utils/notebook_manager.py +415 -0
  144. src/api/utils/progress_broadcaster.py +72 -0
  145. src/api/utils/task_id_manager.py +100 -0
  146. src/config/__init__.py +0 -0
  147. src/config/accessors.py +18 -0
  148. src/config/constants.py +34 -0
  149. src/config/defaults.py +18 -0
  150. src/config/schema.py +38 -0
  151. src/config/settings.py +50 -0
  152. src/core/errors.py +62 -0
  153. src/knowledge/__init__.py +23 -0
  154. src/knowledge/add_documents.py +606 -0
  155. src/knowledge/config.py +65 -0
  156. src/knowledge/example_add_documents.py +236 -0
  157. src/knowledge/extract_numbered_items.py +1039 -0
  158. src/knowledge/initializer.py +621 -0
  159. src/knowledge/kb.py +22 -0
  160. src/knowledge/manager.py +782 -0
  161. src/knowledge/progress_tracker.py +182 -0
  162. src/knowledge/start_kb.py +535 -0
  163. src/logging/__init__.py +103 -0
  164. src/logging/adapters/__init__.py +17 -0
  165. src/logging/adapters/lightrag.py +184 -0
  166. src/logging/adapters/llamaindex.py +141 -0
  167. src/logging/config.py +80 -0
  168. src/logging/handlers/__init__.py +20 -0
  169. src/logging/handlers/console.py +75 -0
  170. src/logging/handlers/file.py +201 -0
  171. src/logging/handlers/websocket.py +127 -0
  172. src/logging/logger.py +709 -0
  173. src/logging/stats/__init__.py +16 -0
  174. src/logging/stats/llm_stats.py +179 -0
  175. src/services/__init__.py +56 -0
  176. src/services/config/__init__.py +61 -0
  177. src/services/config/knowledge_base_config.py +210 -0
  178. src/services/config/loader.py +260 -0
  179. src/services/config/unified_config.py +603 -0
  180. src/services/embedding/__init__.py +45 -0
  181. src/services/embedding/adapters/__init__.py +22 -0
  182. src/services/embedding/adapters/base.py +106 -0
  183. src/services/embedding/adapters/cohere.py +127 -0
  184. src/services/embedding/adapters/jina.py +99 -0
  185. src/services/embedding/adapters/ollama.py +116 -0
  186. src/services/embedding/adapters/openai_compatible.py +96 -0
  187. src/services/embedding/client.py +159 -0
  188. src/services/embedding/config.py +156 -0
  189. src/services/embedding/provider.py +119 -0
  190. src/services/llm/__init__.py +152 -0
  191. src/services/llm/capabilities.py +313 -0
  192. src/services/llm/client.py +302 -0
  193. src/services/llm/cloud_provider.py +530 -0
  194. src/services/llm/config.py +200 -0
  195. src/services/llm/error_mapping.py +103 -0
  196. src/services/llm/exceptions.py +152 -0
  197. src/services/llm/factory.py +450 -0
  198. src/services/llm/local_provider.py +347 -0
  199. src/services/llm/providers/anthropic.py +95 -0
  200. src/services/llm/providers/base_provider.py +93 -0
  201. src/services/llm/providers/open_ai.py +83 -0
  202. src/services/llm/registry.py +71 -0
  203. src/services/llm/telemetry.py +40 -0
  204. src/services/llm/types.py +27 -0
  205. src/services/llm/utils.py +333 -0
  206. src/services/prompt/__init__.py +25 -0
  207. src/services/prompt/manager.py +206 -0
  208. src/services/rag/__init__.py +64 -0
  209. src/services/rag/components/__init__.py +29 -0
  210. src/services/rag/components/base.py +59 -0
  211. src/services/rag/components/chunkers/__init__.py +18 -0
  212. src/services/rag/components/chunkers/base.py +34 -0
  213. src/services/rag/components/chunkers/fixed.py +71 -0
  214. src/services/rag/components/chunkers/numbered_item.py +94 -0
  215. src/services/rag/components/chunkers/semantic.py +97 -0
  216. src/services/rag/components/embedders/__init__.py +14 -0
  217. src/services/rag/components/embedders/base.py +32 -0
  218. src/services/rag/components/embedders/openai.py +63 -0
  219. src/services/rag/components/indexers/__init__.py +18 -0
  220. src/services/rag/components/indexers/base.py +35 -0
  221. src/services/rag/components/indexers/graph.py +172 -0
  222. src/services/rag/components/indexers/lightrag.py +156 -0
  223. src/services/rag/components/indexers/vector.py +146 -0
  224. src/services/rag/components/parsers/__init__.py +18 -0
  225. src/services/rag/components/parsers/base.py +35 -0
  226. src/services/rag/components/parsers/markdown.py +52 -0
  227. src/services/rag/components/parsers/pdf.py +115 -0
  228. src/services/rag/components/parsers/text.py +86 -0
  229. src/services/rag/components/retrievers/__init__.py +18 -0
  230. src/services/rag/components/retrievers/base.py +34 -0
  231. src/services/rag/components/retrievers/dense.py +200 -0
  232. src/services/rag/components/retrievers/hybrid.py +164 -0
  233. src/services/rag/components/retrievers/lightrag.py +169 -0
  234. src/services/rag/components/routing.py +286 -0
  235. src/services/rag/factory.py +234 -0
  236. src/services/rag/pipeline.py +215 -0
  237. src/services/rag/pipelines/__init__.py +32 -0
  238. src/services/rag/pipelines/academic.py +44 -0
  239. src/services/rag/pipelines/lightrag.py +43 -0
  240. src/services/rag/pipelines/llamaindex.py +313 -0
  241. src/services/rag/pipelines/raganything.py +384 -0
  242. src/services/rag/service.py +244 -0
  243. src/services/rag/types.py +73 -0
  244. src/services/search/__init__.py +284 -0
  245. src/services/search/base.py +87 -0
  246. src/services/search/consolidation.py +398 -0
  247. src/services/search/providers/__init__.py +128 -0
  248. src/services/search/providers/baidu.py +188 -0
  249. src/services/search/providers/exa.py +194 -0
  250. src/services/search/providers/jina.py +161 -0
  251. src/services/search/providers/perplexity.py +153 -0
  252. src/services/search/providers/serper.py +209 -0
  253. src/services/search/providers/tavily.py +161 -0
  254. src/services/search/types.py +114 -0
  255. src/services/setup/__init__.py +34 -0
  256. src/services/setup/init.py +285 -0
  257. src/services/tts/__init__.py +16 -0
  258. src/services/tts/config.py +99 -0
  259. src/tools/__init__.py +91 -0
  260. src/tools/code_executor.py +536 -0
  261. src/tools/paper_search_tool.py +171 -0
  262. src/tools/query_item_tool.py +310 -0
  263. src/tools/question/__init__.py +15 -0
  264. src/tools/question/exam_mimic.py +616 -0
  265. src/tools/question/pdf_parser.py +211 -0
  266. src/tools/question/question_extractor.py +397 -0
  267. src/tools/rag_tool.py +173 -0
  268. src/tools/tex_chunker.py +339 -0
  269. src/tools/tex_downloader.py +253 -0
  270. src/tools/web_search.py +71 -0
  271. src/utils/config_manager.py +206 -0
  272. src/utils/document_validator.py +168 -0
  273. src/utils/error_rate_tracker.py +111 -0
  274. src/utils/error_utils.py +82 -0
  275. src/utils/json_parser.py +110 -0
  276. src/utils/network/circuit_breaker.py +79 -0
@@ -0,0 +1,363 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Error Handler - Error handling and retry mechanism
5
+ """
6
+
7
+ from collections.abc import Callable
8
+ from typing import Any, Optional
9
+
10
+ from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
11
+ import tenacity
12
+
13
+ from src.config.constants import VALID_INVESTIGATE_TOOLS, VALID_SOLVE_TOOLS
14
+ from src.logging.logger import get_logger
15
+ from src.services.llm.exceptions import LLMParseError
16
+
17
+
18
+ def _format_validation_errors(e: ValidationError) -> str:
19
+ """Format Pydantic validation errors into a readable string."""
20
+ return "; ".join(
21
+ [f"{'.'.join(str(x) for x in err['loc']) or 'root'}: {err['msg']}" for err in e.errors()]
22
+ )
23
+
24
+
25
+ # Pydantic models for output validation
26
+ class ToolIntent(BaseModel):
27
+ """Model for tool intent in investigate output"""
28
+
29
+ tool_type: str = Field(..., description="Type of tool to use")
30
+ query: str = Field("", description="Query for the tool")
31
+ identifier: Optional[str] = Field(None, description="Optional identifier")
32
+
33
+ @field_validator("tool_type")
34
+ @classmethod
35
+ def validate_tool_type(cls, v):
36
+ if v.lower() not in VALID_INVESTIGATE_TOOLS:
37
+ raise ValueError(f"tool_type must be one of {VALID_INVESTIGATE_TOOLS}, got: {v}")
38
+ return v.lower()
39
+
40
+ @field_validator("query")
41
+ @classmethod
42
+ def validate_query_required(cls, v, info):
43
+ tool_type = info.data.get("tool_type", "").lower()
44
+ if tool_type != "none" and not v:
45
+ raise ValueError("query is required for non-none tools")
46
+ return v
47
+
48
+
49
+ class InvestigateOutput(BaseModel):
50
+ """Model for InvestigateAgent output"""
51
+
52
+ reasoning: str = Field(..., description="Reasoning for the investigation")
53
+ tools: list[ToolIntent] = Field(..., min_length=1, description="List of tool intents")
54
+
55
+ @field_validator("tools")
56
+ @classmethod
57
+ def validate_tools_consistency(cls, v):
58
+ # Check for 'none' tool exclusivity
59
+ has_none = any(tool.tool_type == "none" for tool in v)
60
+ if has_none and len(v) > 1:
61
+ raise ValueError("When 'none' tool exists, no other tool intents should be provided")
62
+ return v
63
+
64
+
65
+ class Citation(BaseModel):
66
+ """Model for citation in note output"""
67
+
68
+ reference_id: Optional[str] = None
69
+ source: Optional[str] = None
70
+ content: Optional[str] = None
71
+
72
+ @model_validator(mode="after")
73
+ def validate_citation_fields(self):
74
+ """Validate that at least one of reference_id or source is provided."""
75
+ if not self.reference_id and not self.source:
76
+ raise ValueError("citation must contain reference_id or source")
77
+ return self
78
+
79
+
80
+ class NoteOutput(BaseModel):
81
+ """Model for NoteAgent output"""
82
+
83
+ summary: str = Field(..., description="Summary of the notes")
84
+ citations: list[Citation] = Field(default_factory=list, description="List of citations")
85
+
86
+
87
+ class ReflectOutput(BaseModel):
88
+ """Model for InvestigateReflectAgent output"""
89
+
90
+ should_stop: bool = Field(..., description="Whether to stop the investigation")
91
+ reason: str = Field(..., description="Reason for the decision")
92
+ remaining_questions: list[str] = Field(..., description="List of remaining questions")
93
+
94
+
95
+ class PlanStep(BaseModel):
96
+ """Model for plan step"""
97
+
98
+ step_id: str = Field(..., description="Step identifier")
99
+ plan: str = Field(..., description="Plan description")
100
+
101
+
102
+ class PlanBlock(BaseModel):
103
+ """Model for plan block"""
104
+
105
+ block_id: str = Field(..., description="Block identifier")
106
+ format: str = Field(..., description="Output format")
107
+ steps: list[PlanStep] = Field(..., min_length=1, description="List of steps")
108
+
109
+
110
+ class PlanOutput(BaseModel):
111
+ """Model for PlanAgent output"""
112
+
113
+ answer_style: str = Field(..., description="Style of the answer")
114
+ blocks: list[PlanBlock] = Field(..., min_length=1, description="List of plan blocks")
115
+
116
+
117
+ class SolveToolCall(BaseModel):
118
+ """Model for tool call in solve output"""
119
+
120
+ tool_type: str = Field(..., description="Type of tool to call")
121
+ query: str = Field(..., description="Query for the tool call")
122
+
123
+ @field_validator("tool_type")
124
+ @classmethod
125
+ def validate_tool_type(cls, v):
126
+ if v.lower() not in VALID_SOLVE_TOOLS:
127
+ raise ValueError(f"tool_type must be one of {VALID_SOLVE_TOOLS}, got: {v}")
128
+ return v.lower()
129
+
130
+
131
+ class SolveOutput(BaseModel):
132
+ """Model for SolveAgent output"""
133
+
134
+ tool_calls: list[SolveToolCall] = Field(..., min_length=1, description="List of tool calls")
135
+
136
+
137
+ # Initialize module logger
138
+ logger = get_logger("ErrorHandler")
139
+
140
+
141
+ def retry_on_parse_error(
142
+ max_retries: int = 2,
143
+ delay: float = 1.0,
144
+ backoff: float = 2.0,
145
+ exceptions: tuple[type[Exception], ...] = (LLMParseError,),
146
+ ):
147
+ """
148
+ Parse error retry decorator using tenacity.
149
+
150
+ Args:
151
+ max_retries: Maximum retry count
152
+ delay: Initial delay time (seconds)
153
+ backoff: Delay multiplier factor
154
+ exceptions: Tuple of exception types to retry on (default: LLMParseError only)
155
+
156
+ Returns:
157
+ Decorated function
158
+ """
159
+
160
+ def decorator(func: Callable):
161
+ return tenacity.retry(
162
+ retry=tenacity.retry_if_exception_type(*exceptions),
163
+ wait=tenacity.wait_exponential(multiplier=backoff, min=delay, max=60),
164
+ stop=tenacity.stop_after_attempt(max_retries + 1),
165
+ before_sleep=lambda retry_state: logger.warning(
166
+ f"Parse failed (attempt {retry_state.attempt_number}/{max_retries + 1}), "
167
+ f"retrying in {retry_state.upcoming_sleep:.1f}s... Error: {str(retry_state.outcome.exception())}"
168
+ ),
169
+ )(func)
170
+
171
+ return decorator
172
+
173
+
174
+ def validate_output(
175
+ output: dict[str, Any], required_fields: list, field_types: dict[str, type] | None = None
176
+ ) -> bool:
177
+ """
178
+ Validate output contains required fields and correct types
179
+
180
+ Args:
181
+ output: Output dictionary
182
+ required_fields: List of required fields
183
+ field_types: Field type dictionary (optional)
184
+
185
+ Returns:
186
+ bool: Whether valid
187
+
188
+ Raises:
189
+ LLMParseError: Raised when validation fails
190
+ """
191
+ # Check required fields
192
+ missing_fields = [field for field in required_fields if field not in output]
193
+
194
+ if missing_fields:
195
+ raise LLMParseError(f"Missing required fields: {', '.join(missing_fields)}")
196
+
197
+ # Check field types
198
+ if field_types:
199
+ for field, expected_type in field_types.items():
200
+ if field in output and not isinstance(output[field], expected_type):
201
+ actual_type = type(output[field]).__name__
202
+ expected_type_name = expected_type.__name__
203
+ raise LLMParseError(
204
+ f"Field '{field}' type error: expected {expected_type_name}, got {actual_type}"
205
+ )
206
+
207
+ return True
208
+
209
+
210
+ def safe_parse(
211
+ text: str, parser_func: Callable, default: Any = None, raise_on_error: bool = False
212
+ ) -> Any:
213
+ """
214
+ Safe parsing (catch exceptions and return default value)
215
+
216
+ Args:
217
+ parser_func: Parser function
218
+ text: Text to parse
219
+ default: Default value
220
+ raise_on_error: Whether to raise exception on error
221
+
222
+ Returns:
223
+ Parsed result or default value
224
+ """
225
+ try:
226
+ return parser_func(text)
227
+ except Exception as e:
228
+ if raise_on_error:
229
+ raise LLMParseError(f"Parsing failed: {e!s}") from e
230
+
231
+ logger.error(
232
+ f"Parsing failed; falling back to default value {default!r}. This may affect behavior. Error: {e!s}",
233
+ exc_info=True,
234
+ )
235
+ return default
236
+
237
+
238
+ def validate_investigate_output(
239
+ output: dict[str, Any], valid_tools: list[str] = VALID_INVESTIGATE_TOOLS
240
+ ) -> bool:
241
+ """Validate InvestigateAgent output using Pydantic model"""
242
+ # Check if custom tools are provided
243
+ if valid_tools != VALID_INVESTIGATE_TOOLS:
244
+ # For custom tools, do manual validation
245
+ validate_output(output, ["reasoning"], {"reasoning": str})
246
+ tools = output.get("tools", [])
247
+ if not isinstance(tools, list) or len(tools) < 1:
248
+ raise LLMParseError("tools must be a non-empty list")
249
+
250
+ for i, tool in enumerate(tools):
251
+ if not isinstance(tool, dict):
252
+ raise LLMParseError(f"tool[{i}] must be a dictionary")
253
+ tool_type = tool.get("tool_type", "").lower()
254
+ if tool_type not in valid_tools:
255
+ raise LLMParseError(
256
+ f"tool[{i}] tool_type must be one of {valid_tools}, got: {tool_type}"
257
+ )
258
+ if tool_type != "none" and not tool.get("query"):
259
+ raise LLMParseError(f"tool[{i}] missing query")
260
+
261
+ # Check none tool exclusivity
262
+ has_none = any(t.get("tool_type", "").lower() == "none" for t in tools)
263
+ if has_none and len(tools) > 1:
264
+ raise LLMParseError("When 'none' tool exists, no other tool intents should be provided")
265
+ return True
266
+
267
+ # Use Pydantic for standard validation
268
+ try:
269
+ InvestigateOutput(**output)
270
+ return True
271
+ except ValidationError as e:
272
+ error_details = _format_validation_errors(e)
273
+ raise LLMParseError(f"InvestigateAgent output validation failed: {error_details}") from e
274
+
275
+
276
+ def validate_note_output(output: dict[str, Any]) -> bool:
277
+ """Validate NoteAgent output using Pydantic model"""
278
+ try:
279
+ NoteOutput(**output)
280
+ return True
281
+ except ValidationError as e:
282
+ error_details = _format_validation_errors(e)
283
+ raise LLMParseError(f"NoteAgent output validation failed: {error_details}") from e
284
+
285
+
286
+ def validate_reflect_output(output: dict[str, Any]) -> bool:
287
+ """Validate InvestigateReflectAgent output using Pydantic model"""
288
+ try:
289
+ ReflectOutput(**output)
290
+ return True
291
+ except ValidationError as e:
292
+ error_details = _format_validation_errors(e)
293
+ raise LLMParseError(
294
+ f"InvestigateReflectAgent output validation failed: {error_details}"
295
+ ) from e
296
+
297
+
298
+ def validate_plan_output(output: dict[str, Any]) -> bool:
299
+ """Validate PlanAgent output using Pydantic model"""
300
+ try:
301
+ PlanOutput(**output)
302
+ return True
303
+ except ValidationError as e:
304
+ error_details = _format_validation_errors(e)
305
+ raise LLMParseError(f"PlanAgent output validation failed: {error_details}") from e
306
+
307
+
308
+ def validate_solve_output(
309
+ output: dict[str, Any], valid_tool_types: list[str] = VALID_SOLVE_TOOLS
310
+ ) -> bool:
311
+ """Validate SolveAgent output using Pydantic model"""
312
+ # Check if custom tools are provided
313
+ if valid_tool_types != VALID_SOLVE_TOOLS:
314
+ # For custom tools, do manual validation
315
+ validate_output(output, ["tool_calls"], {"tool_calls": list})
316
+ tool_calls = output.get("tool_calls", [])
317
+ if not isinstance(tool_calls, list) or len(tool_calls) < 1:
318
+ raise LLMParseError("tool_calls must be a non-empty list")
319
+
320
+ for i, tool_call in enumerate(tool_calls):
321
+ if not isinstance(tool_call, dict):
322
+ raise LLMParseError(f"tool_call[{i}] must be a dictionary")
323
+ if "tool_type" not in tool_call or "query" not in tool_call:
324
+ raise LLMParseError(f"tool_call[{i}] missing required fields: tool_type, query")
325
+ tool_type = tool_call.get("tool_type", "").lower()
326
+ if tool_type not in valid_tool_types:
327
+ raise LLMParseError(
328
+ f"Invalid tool_type: {tool_type}, must be one of {valid_tool_types}"
329
+ )
330
+ return True
331
+
332
+ # Use Pydantic for standard validation
333
+ try:
334
+ SolveOutput(**output)
335
+ return True
336
+ except ValidationError as e:
337
+ error_details = _format_validation_errors(e)
338
+ raise LLMParseError(f"SolveAgent output validation failed: {error_details}") from e
339
+
340
+
341
+ def validate_none_tool_constraint(
342
+ tools: list[dict[str, Any]], tool_type_key: str = "tool_type"
343
+ ) -> None:
344
+ """
345
+ Validate that 'none' tool does not coexist with other tools.
346
+
347
+ Args:
348
+ tools: List of tool dictionaries
349
+ tool_type_key: Key to access tool type in each dict (default: "tool_type")
350
+
351
+ Raises:
352
+ LLMParseError: If none tool constraint is violated
353
+ """
354
+ has_none = any(
355
+ isinstance(tool_type := tool.get(tool_type_key), str) and tool_type.lower() == "none"
356
+ for tool in tools
357
+ )
358
+
359
+ if has_none and len(tools) > 1:
360
+ raise LLMParseError(
361
+ f"When 'none' tool exists, no other tool intents should be provided. "
362
+ f"Found {len(tools)} tools with types: {[tool.get(tool_type_key) for tool in tools]}"
363
+ )
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ JSON Utils - JSON parsing utilities
5
+ Specifically for extracting and parsing JSON data from LLM output,
6
+ supports Markdown code block processing and is robust to Python
7
+ triple-quoted strings generated by LLMs.
8
+ """
9
+
10
+ import json
11
+ import re
12
+ from typing import Any
13
+
14
+
15
+ def _escape_triple_quoted_strings(text: str) -> str:
16
+ """
17
+ Convert Python triple-quoted strings (\"\"\" ... \"\"\")
18
+ into JSON-safe escaped strings.
19
+
20
+ This fixes cases where LLMs generate Python code inside JSON,
21
+ which would otherwise violate JSON syntax.
22
+ """
23
+
24
+ def replacer(match: re.Match) -> str:
25
+ content = match.group(1)
26
+ # json.dumps safely escapes newlines and quotes
27
+ return json.dumps(content)
28
+
29
+ pattern = re.compile(r'"""([\s\S]*?)"""')
30
+ return pattern.sub(replacer, text)
31
+
32
+
33
+ def extract_json_from_text(text: str) -> dict[str, Any] | list[Any] | None:
34
+ """
35
+ Extract JSON object or array from text.
36
+
37
+ Supports:
38
+ 1. ```json ... ``` code blocks
39
+ 2. ``` ... ``` code blocks
40
+ 3. Pure JSON text
41
+ 4. JSON containing Python triple-quoted strings (sanitized)
42
+
43
+ Args:
44
+ text: Original text containing JSON
45
+
46
+ Returns:
47
+ Parsed JSON object (dict) or array (list),
48
+ or None if parsing fails.
49
+ """
50
+ if not text:
51
+ return None
52
+
53
+ # 🔧 FIX: sanitize triple-quoted strings before any JSON parsing
54
+ text = _escape_triple_quoted_strings(text)
55
+
56
+ # 1. Try matching Markdown code blocks
57
+ code_block_pattern = re.compile(r"```(?:json)?\s*([\s\S]*?)\s*```")
58
+ match = code_block_pattern.search(text)
59
+
60
+ if match:
61
+ json_str = match.group(1).strip()
62
+ try:
63
+ return json.loads(json_str)
64
+ except json.JSONDecodeError:
65
+ pass
66
+
67
+ # 2. Try parsing the full text directly
68
+ try:
69
+ return json.loads(text)
70
+ except json.JSONDecodeError:
71
+ pass
72
+
73
+ # 3. Try extracting outermost JSON object
74
+ json_obj_pattern = re.compile(r"\{[\s\S]*\}")
75
+ match_obj = json_obj_pattern.search(text)
76
+ if match_obj:
77
+ try:
78
+ return json.loads(match_obj.group(0))
79
+ except json.JSONDecodeError:
80
+ pass
81
+
82
+ # 4. Try extracting outermost JSON array
83
+ json_arr_pattern = re.compile(r"\[[\s\S]*\]")
84
+ match_arr = json_arr_pattern.search(text)
85
+ if match_arr:
86
+ try:
87
+ return json.loads(match_arr.group(0))
88
+ except json.JSONDecodeError:
89
+ pass
90
+
91
+ return None
92
+
93
+
94
+ def clean_json_string(json_str: str) -> str:
95
+ """
96
+ Clean JSON string by removing illegal control characters.
97
+ """
98
+ return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", json_str)