noesium 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. noesium/agents/askura_agent/__init__.py +22 -0
  2. noesium/agents/askura_agent/askura_agent.py +480 -0
  3. noesium/agents/askura_agent/conversation.py +164 -0
  4. noesium/agents/askura_agent/extractor.py +175 -0
  5. noesium/agents/askura_agent/memory.py +14 -0
  6. noesium/agents/askura_agent/models.py +239 -0
  7. noesium/agents/askura_agent/prompts.py +202 -0
  8. noesium/agents/askura_agent/reflection.py +234 -0
  9. noesium/agents/askura_agent/summarizer.py +30 -0
  10. noesium/agents/askura_agent/utils.py +6 -0
  11. noesium/agents/deep_research/__init__.py +13 -0
  12. noesium/agents/deep_research/agent.py +398 -0
  13. noesium/agents/deep_research/prompts.py +84 -0
  14. noesium/agents/deep_research/schemas.py +42 -0
  15. noesium/agents/deep_research/state.py +54 -0
  16. noesium/agents/search/__init__.py +5 -0
  17. noesium/agents/search/agent.py +474 -0
  18. noesium/agents/search/state.py +28 -0
  19. noesium/core/__init__.py +1 -1
  20. noesium/core/agent/base.py +10 -2
  21. noesium/core/goalith/decomposer/llm_decomposer.py +1 -1
  22. noesium/core/llm/__init__.py +1 -1
  23. noesium/core/llm/base.py +2 -2
  24. noesium/core/llm/litellm.py +42 -21
  25. noesium/core/llm/llamacpp.py +25 -4
  26. noesium/core/llm/ollama.py +43 -22
  27. noesium/core/llm/openai.py +25 -5
  28. noesium/core/llm/openrouter.py +1 -1
  29. noesium/core/toolify/base.py +9 -2
  30. noesium/core/toolify/config.py +2 -2
  31. noesium/core/toolify/registry.py +21 -5
  32. noesium/core/tracing/opik_tracing.py +7 -7
  33. noesium/core/vector_store/__init__.py +2 -2
  34. noesium/core/vector_store/base.py +1 -1
  35. noesium/core/vector_store/pgvector.py +10 -13
  36. noesium/core/vector_store/weaviate.py +2 -1
  37. noesium/toolkits/__init__.py +1 -0
  38. noesium/toolkits/arxiv_toolkit.py +310 -0
  39. noesium/toolkits/audio_aliyun_toolkit.py +441 -0
  40. noesium/toolkits/audio_toolkit.py +370 -0
  41. noesium/toolkits/bash_toolkit.py +332 -0
  42. noesium/toolkits/document_toolkit.py +454 -0
  43. noesium/toolkits/file_edit_toolkit.py +552 -0
  44. noesium/toolkits/github_toolkit.py +395 -0
  45. noesium/toolkits/gmail_toolkit.py +575 -0
  46. noesium/toolkits/image_toolkit.py +425 -0
  47. noesium/toolkits/memory_toolkit.py +398 -0
  48. noesium/toolkits/python_executor_toolkit.py +334 -0
  49. noesium/toolkits/search_toolkit.py +451 -0
  50. noesium/toolkits/serper_toolkit.py +623 -0
  51. noesium/toolkits/tabular_data_toolkit.py +537 -0
  52. noesium/toolkits/user_interaction_toolkit.py +365 -0
  53. noesium/toolkits/video_toolkit.py +168 -0
  54. noesium/toolkits/wikipedia_toolkit.py +420 -0
  55. noesium-0.2.1.dist-info/METADATA +253 -0
  56. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/RECORD +59 -23
  57. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/licenses/LICENSE +1 -1
  58. noesium-0.1.0.dist-info/METADATA +0 -525
  59. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/WHEEL +0 -0
  60. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,234 @@
1
+ import random
2
+ from typing import Any, List, Optional
3
+
4
+ from noesium.core.llm import BaseLLMClient
5
+ from noesium.core.utils.logging import get_logger
6
+
7
+ from .models import (
8
+ AskuraConfig,
9
+ AskuraState,
10
+ ConversationContext,
11
+ ConversationStyle,
12
+ InformationSlot,
13
+ KnowledgeGapAnalysis,
14
+ NextActionPlan,
15
+ )
16
+ from .prompts import get_conversation_analysis_prompts
17
+ from .utils import get_enum_value
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ class Reflection:
23
+ def __init__(self, config: AskuraConfig, llm_client: Optional[BaseLLMClient] = None):
24
+ self.config = config
25
+ self.llm = llm_client
26
+
27
+ def missing_slots(self, state: AskuraState) -> List[InformationSlot]:
28
+ info = state.extracted_info
29
+ missing: List[InformationSlot] = []
30
+ for slot in self.config.information_slots:
31
+ if slot.required and not self._is_slot_complete(slot, info.get(slot.name)):
32
+ missing.append(slot)
33
+ # Highest priority first (larger number means higher priority)
34
+ missing.sort(key=lambda s: s.priority, reverse=True)
35
+ return missing
36
+
37
+ def _is_slot_complete(self, slot: InformationSlot, value: Any) -> bool:
38
+ if value in (None, "", [], {}):
39
+ return False
40
+ if slot.extraction_model and isinstance(value, dict):
41
+ try:
42
+ # Pydantic v2: check required fields on the model
43
+ required_fields = [
44
+ name for name, field in slot.extraction_model.model_fields.items() if field.is_required
45
+ ]
46
+ for field_name in required_fields:
47
+ if value.get(field_name) in (None, "", [], {}):
48
+ return False
49
+ except Exception:
50
+ # If introspection fails, fall back to non-empty check
51
+ return True
52
+ return True
53
+
54
+ def evaluate_knowledge_gap(self, state: AskuraState, recent_messages: List[str] = None) -> KnowledgeGapAnalysis:
55
+ """Evaluate knowledge gaps using LLM analysis combining all upstream information."""
56
+ if not self.llm:
57
+ # Fallback when no LLM available
58
+ missing_slots = [s.name for s in self.missing_slots(state)]
59
+ return KnowledgeGapAnalysis(
60
+ knowledge_gap_summary=(
61
+ f"Missing slots: {', '.join(missing_slots)}"
62
+ if missing_slots
63
+ else "All required information collected"
64
+ ),
65
+ critical_missing_info=missing_slots,
66
+ suggested_next_topics=missing_slots[:5] if missing_slots else [],
67
+ readiness_to_proceed=0.0 if missing_slots else 1.0,
68
+ reasoning="Fallback analysis due to missing LLM client",
69
+ )
70
+
71
+ try:
72
+ # Format extracted information for prompt
73
+ extracted_info_text = ""
74
+ if state.extracted_info:
75
+ extracted_info_text = "\n".join([f"- {slot}: {info}" for slot, info in state.extracted_info.items()])
76
+ else:
77
+ extracted_info_text = "No information extracted yet"
78
+
79
+ # Format missing information for prompt
80
+ missing_info_text = ""
81
+ if state.missing_info:
82
+ missing_info_text = "\n".join([f"- {slot}: {desc}" for slot, desc in state.missing_info.items()])
83
+ else:
84
+ missing_info_text = "All required information collected"
85
+
86
+ # Format memory information
87
+ memory_text = ""
88
+ if state.memory:
89
+ memory_text = str(state.memory)
90
+ else:
91
+ memory_text = "No memory available"
92
+
93
+ # Format recent messages
94
+ recent_messages_text = ""
95
+ if recent_messages:
96
+ recent_messages_text = "\n".join([f"User: {msg}" for msg in recent_messages])
97
+ else:
98
+ recent_messages_text = "No recent messages"
99
+
100
+ # Get structured prompts for knowledge gap analysis
101
+ conversation_purpose = state.conversation_context.conversation_purpose
102
+ system_prompt, user_prompt = get_conversation_analysis_prompts(
103
+ "knowledge_gap_analysis",
104
+ conversation_purpose=conversation_purpose,
105
+ conversation_context=state.conversation_context.to_dict(),
106
+ extracted_info=extracted_info_text,
107
+ missing_info=missing_info_text,
108
+ memory=memory_text,
109
+ recent_messages=recent_messages_text,
110
+ )
111
+
112
+ # Use structured completion for reliable analysis
113
+ analysis: KnowledgeGapAnalysis = self.llm.structured_completion(
114
+ messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
115
+ response_model=KnowledgeGapAnalysis,
116
+ temperature=0.3,
117
+ max_tokens=800,
118
+ )
119
+
120
+ return analysis
121
+
122
+ except Exception as e:
123
+ logger.warning(f"LLM-based knowledge gap analysis failed: {e}, falling back to basic analysis")
124
+ # Fallback to basic analysis
125
+ missing_slots = [s.name for s in self.missing_slots(state)]
126
+ return KnowledgeGapAnalysis(
127
+ knowledge_gap_summary=(
128
+ f"Error in analysis. Missing slots: {', '.join(missing_slots)}"
129
+ if missing_slots
130
+ else "Analysis error but all slots filled"
131
+ ),
132
+ critical_missing_info=missing_slots,
133
+ suggested_next_topics=missing_slots[:5] if missing_slots else [],
134
+ readiness_to_proceed=0.0 if missing_slots else 0.5,
135
+ reasoning=f"Fallback analysis due to error: {str(e)}",
136
+ )
137
+
138
+ def next_action(
139
+ self,
140
+ state: AskuraState,
141
+ context: ConversationContext,
142
+ recent_messages: List[str],
143
+ ready_to_summarize: bool = False,
144
+ ) -> NextActionPlan:
145
+ """
146
+ Unified method to determine next action with intent classification.
147
+
148
+ This method combines intent classification and next action determination
149
+ into a single LLM call for better consistency and efficiency.
150
+ """
151
+ try:
152
+ # Prepare available actions
153
+ allowed = list(state.missing_info.keys())
154
+ if ready_to_summarize:
155
+ allowed.append("summarize")
156
+ allowed.extend(["redirect_conversation", "reply_smalltalk"])
157
+
158
+ # Get structured prompt for unified next action determination - preserve readability
159
+ recent_messages_text = "\n".join([f"User: {msg}" for msg in recent_messages]) if recent_messages else ""
160
+
161
+ system_prompt, user_prompt = get_conversation_analysis_prompts(
162
+ "determine_next_action",
163
+ conversation_context=context.to_dict(),
164
+ available_actions=allowed,
165
+ ready_to_summarize=ready_to_summarize,
166
+ recent_messages=recent_messages_text,
167
+ )
168
+
169
+ # Use structured completion with retry for reliable unified analysis
170
+ result: NextActionPlan = self.llm.structured_completion(
171
+ messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}],
172
+ response_model=NextActionPlan,
173
+ temperature=0.3,
174
+ max_tokens=300,
175
+ )
176
+
177
+ # Validate the response
178
+ if result.next_action not in allowed:
179
+ raise ValueError(f"LLM returned invalid action: {result.next_action}")
180
+ return result
181
+
182
+ except Exception as e:
183
+ logger.warning(f"Unified next action determination failed: {e}, falling back to heuristics")
184
+ # Fallback to heuristic approach
185
+ next_action = self._get_heuristic_next_action(context, list(state.missing_info.keys()))
186
+ return NextActionPlan(
187
+ intent_type="task",
188
+ next_action=next_action or "summarize",
189
+ reasoning=f"Heuristic fallback - error: {str(e)}",
190
+ confidence=0.5,
191
+ is_smalltalk=False,
192
+ )
193
+
194
+ def _get_heuristic_next_action(self, context: ConversationContext, missing_info: List[str]) -> Optional[str]:
195
+ """Get next action using heuristic approach as fallback."""
196
+
197
+ if not missing_info:
198
+ return "summarize"
199
+
200
+ # If conversation is off-track, prioritize redirecting
201
+ if context.conversation_on_track_confidence < 0.4:
202
+ return "redirect_conversation"
203
+
204
+ # If conversation is highly on-track, focus on gathering missing info
205
+ if context.conversation_on_track_confidence > 0.7:
206
+ # Prioritize based on conversation context
207
+ style_value = get_enum_value(context.conversation_style)
208
+ if style_value == ConversationStyle.DIRECT.value:
209
+ # Pick randomly from missing info instead of always first
210
+ return random.choice(missing_info) if missing_info else None
211
+ elif style_value == ConversationStyle.EXPLORATORY.value:
212
+ # For exploratory users, suggest topics they might be interested in
213
+ return random.choice(missing_info) if missing_info else None
214
+ elif style_value == ConversationStyle.CASUAL.value:
215
+ # For casual users, ask easy questions first
216
+ easy_questions = self._get_easy_questions()
217
+ for question in easy_questions:
218
+ if question in missing_info:
219
+ return question
220
+ # If no easy questions found, pick randomly from missing info
221
+ return random.choice(missing_info) if missing_info else None
222
+
223
+ # For moderate alignment, balance between staying on track and gathering info
224
+ # Pick randomly from missing info
225
+ return random.choice(missing_info) if missing_info else None
226
+
227
+ def _get_easy_questions(self) -> List[str]:
228
+ """Get list of easy questions that boost confidence."""
229
+ easy_questions = []
230
+ for slot in self.config.information_slots:
231
+ # Consider questions about preferences and interests as "easy"
232
+ if any(word in slot.name.lower() for word in ["interest", "preference", "like", "favorite"]):
233
+ easy_questions.append(f"ask_{slot.name}")
234
+ return easy_questions
@@ -0,0 +1,30 @@
1
+ from typing import List, Optional
2
+
3
+ from noesium.core.llm import BaseLLMClient
4
+ from noesium.core.utils.logging import get_logger
5
+
6
+ from .models import AskuraConfig, AskuraState
7
+ from .reflection import Reflection
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ class Summarizer:
13
+ def __init__(
14
+ self, config: AskuraConfig, llm_client: Optional[BaseLLMClient] = None, reflection: Optional[Reflection] = None
15
+ ):
16
+ self.config = config
17
+ self.llm = llm_client
18
+ self.reflection = reflection
19
+
20
+ def is_ready_to_summarize(self, state: AskuraState) -> bool:
21
+ # Summarize only when all required slots are complete
22
+ return len(self.reflection.missing_slots(state)) == 0 and state.turns > 1
23
+
24
+ def summarize(self, state: AskuraState) -> str:
25
+ information_slots = state.extracted_info
26
+ summary_parts: List[str] = []
27
+ for slot in self.config.information_slots:
28
+ if information_slots.get(slot.name):
29
+ summary_parts.append(f"{slot.name}: {information_slots[slot.name]}")
30
+ return "Summary: " + " | ".join(summary_parts) if summary_parts else "Conversation completed."
@@ -0,0 +1,6 @@
1
+ # Handle both enum objects and string values safely
2
+ def get_enum_value(field_value):
3
+ """Safely extract value from enum or string."""
4
+ if hasattr(field_value, "value"):
5
+ return field_value.value
6
+ return str(field_value)
@@ -0,0 +1,13 @@
1
+ """
2
+ DeepResearchAgent Module
3
+
4
+ This module provides advanced research capabilities using LangGraph and LLM integration.
5
+ """
6
+
7
+ from .agent import DeepResearchAgent
8
+ from .state import ResearchState
9
+
10
+ __all__ = [
11
+ "DeepResearchAgent",
12
+ "ResearchState",
13
+ ]
@@ -0,0 +1,398 @@
1
+ """
2
+ DeepResearchAgent implementation using LangGraph and LLM integration.
3
+ Enhanced base class designed for extensibility.
4
+ """
5
+
6
+ from datetime import datetime
7
+ from typing import Any, Dict, List, Optional, Type
8
+
9
+ try:
10
+ from langchain_core.messages import AIMessage, AnyMessage, HumanMessage
11
+ from langchain_core.runnables import RunnableConfig
12
+ from langgraph.graph import END, START, StateGraph
13
+ from langgraph.types import Send
14
+
15
+ LANGCHAIN_AVAILABLE = True
16
+ except ImportError:
17
+ AIMessage = None
18
+ AnyMessage = None
19
+ HumanMessage = None
20
+ RunnableConfig = None
21
+ StateGraph = None
22
+ END = None
23
+ START = None
24
+ Send = None
25
+ LANGCHAIN_AVAILABLE = False
26
+
27
+ from noesium.core.agent import BaseResearcher, ResearchOutput
28
+ from noesium.core.llm import BaseLLMClient
29
+ from noesium.core.utils.logging import get_logger
30
+ from noesium.core.utils.typing import override
31
+
32
+ from .prompts import answer_instructions, query_writer_instructions, reflection_instructions
33
+ from .schemas import Reflection, SearchQueryList
34
+ from .state import QueryState, ReflectionState, ResearchState, WebSearchState
35
+
36
+ # Configure logging
37
+ logger = get_logger(__name__)
38
+
39
+
40
+ def get_current_date() -> str:
41
+ """Get current date in a readable format."""
42
+ return datetime.now().strftime("%B %d, %Y")
43
+
44
+
45
+ class DeepResearchAgent(BaseResearcher):
46
+ """
47
+ Advanced research agent using LangGraph and LLM integration.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ llm_provider: str = "openai",
53
+ query_generation_llm: BaseLLMClient | None = None,
54
+ reflection_llm: BaseLLMClient | None = None,
55
+ number_of_initial_queries: int = 3,
56
+ max_research_loops: int = 3,
57
+ query_generation_temperature: float = 0.7,
58
+ query_generation_max_tokens: int = 1000,
59
+ web_search_temperature: float = 0.2,
60
+ web_search_max_tokens: int = 10000,
61
+ web_search_citation_enabled: bool = True,
62
+ reflection_temperature: float = 0.5,
63
+ reflection_max_tokens: int = 1000,
64
+ answer_temperature: float = 0.3,
65
+ answer_max_tokens: int = 100000,
66
+ search_engines: List[str] = ["tavily", "duckduckgo"],
67
+ max_results_per_engine: int = 5,
68
+ search_timeout: int = 30,
69
+ ):
70
+ """
71
+ Initialize the DeepResearchAgent.
72
+ """
73
+ # Initialize base class
74
+ super().__init__(llm_provider=llm_provider)
75
+
76
+ # Override the LLM client with instructor support if needed
77
+ # Base class already initializes self.llm, so we can reuse it
78
+ self.llm_client = self.llm
79
+ self.query_generation_llm = query_generation_llm if query_generation_llm else self.llm
80
+ self.reflection_llm = reflection_llm if reflection_llm else self.llm
81
+ self.number_of_initial_queries = number_of_initial_queries
82
+ self.max_research_loops = max_research_loops
83
+ self.query_generation_temperature = query_generation_temperature
84
+ self.query_generation_max_tokens = query_generation_max_tokens
85
+ self.web_search_temperature = web_search_temperature
86
+ self.web_search_max_tokens = web_search_max_tokens
87
+ self.web_search_citation_enabled = web_search_citation_enabled
88
+ self.reflection_temperature = reflection_temperature
89
+ self.reflection_max_tokens = reflection_max_tokens
90
+ self.answer_temperature = answer_temperature
91
+ self.answer_max_tokens = answer_max_tokens
92
+ self.search_engines = search_engines
93
+ self.max_results_per_engine = max_results_per_engine
94
+ self.search_timeout = search_timeout
95
+
96
+ # Load prompts (can be overridden by subclasses)
97
+ self.prompts = self.get_prompts()
98
+
99
+ # Create the research graph
100
+ self.graph = self._build_graph()
101
+
102
+ @override
103
+ def get_state_class(self) -> Type:
104
+ """
105
+ Get the state class for this researcher.
106
+ Override this method in subclasses for specialized state.
107
+
108
+ Returns:
109
+ The state class to use for the research workflow
110
+ """
111
+ return ResearchState
112
+
113
+ @override
114
+ def _build_graph(self) -> StateGraph:
115
+ """Create the LangGraph research workflow."""
116
+ state_class = self.get_state_class()
117
+ workflow = StateGraph(state_class)
118
+
119
+ # Add nodes
120
+ workflow.add_node("generate_query", self._generate_query_node)
121
+ workflow.add_node("web_research", self._research_node)
122
+ workflow.add_node("reflection", self._reflection_node)
123
+ workflow.add_node("finalize_answer", self._finalize_answer_node)
124
+
125
+ # Set entry point
126
+ workflow.add_edge(START, "generate_query")
127
+
128
+ # Add conditional edges
129
+ workflow.add_conditional_edges("generate_query", self._continue_to_web_research, ["web_research"])
130
+ workflow.add_edge("web_research", "reflection")
131
+ workflow.add_conditional_edges("reflection", self._evaluate_research, ["web_research", "finalize_answer"])
132
+ workflow.add_edge("finalize_answer", END)
133
+
134
+ return workflow.compile()
135
+
136
+ @override
137
+ async def research(
138
+ self,
139
+ user_message: str,
140
+ context: Dict[str, Any] = None,
141
+ config: Optional[RunnableConfig] = None,
142
+ ) -> ResearchOutput:
143
+ """
144
+ Research a topic and return structured results.
145
+
146
+ Args:
147
+ user_message: User's research request
148
+ context: Additional context for research
149
+ config: Optional RunnableConfig for runtime configuration
150
+
151
+ Returns:
152
+ ResearchOutput with content and sources
153
+ """
154
+ try:
155
+ # Initialize state (can be customized by subclasses)
156
+ initial_state = {
157
+ "messages": [HumanMessage(content=user_message)],
158
+ "context": context,
159
+ "search_query": [],
160
+ "web_research_result": [],
161
+ "sources_gathered": [],
162
+ "initial_search_query_count": self.number_of_initial_queries,
163
+ "max_research_loops": self.max_research_loops,
164
+ "research_loop_count": 0,
165
+ }
166
+ # Run the research graph with optional runtime configuration
167
+ if config:
168
+ result = await self.graph.ainvoke(initial_state, config=config)
169
+ else:
170
+ result = await self.graph.ainvoke(initial_state)
171
+
172
+ # Extract the final AI message
173
+ final_message = None
174
+ for message in reversed(result["messages"]):
175
+ if isinstance(message, AIMessage):
176
+ final_message = message.content
177
+ break
178
+
179
+ return ResearchOutput(
180
+ content=final_message or "Research completed",
181
+ sources=result.get("sources_gathered", []),
182
+ summary=f"Research completed for topic",
183
+ timestamp=datetime.now(),
184
+ )
185
+
186
+ except Exception as e:
187
+ logger.error(f"Error in research: {e}")
188
+ raise RuntimeError(f"Research failed: {str(e)}")
189
+
190
+ def get_prompts(self) -> Dict[str, str]:
191
+ """
192
+ Get prompts for the researcher.
193
+ Override this method in subclasses for specialized prompts.
194
+
195
+ Returns:
196
+ Dictionary containing all prompts for the research workflow
197
+ """
198
+ return {
199
+ "query_writer": query_writer_instructions,
200
+ "reflection": reflection_instructions,
201
+ "answer": answer_instructions,
202
+ }
203
+
204
+ def _preprocess_research_topic(self, messages: List[AnyMessage]) -> str:
205
+ """
206
+ Get the research topic from the messages.
207
+
208
+ Args:
209
+ messages: List of messages from the conversation
210
+
211
+ Returns:
212
+ Formatted research topic string
213
+ """
214
+ # Check if request has a history and combine the messages into a single string
215
+ if len(messages) == 1:
216
+ research_topic = messages[-1].content
217
+ else:
218
+ research_topic = ""
219
+ for message in messages:
220
+ if isinstance(message, HumanMessage):
221
+ research_topic += f"User: {message.content}\n"
222
+ elif isinstance(message, AIMessage):
223
+ research_topic += f"Assistant: {message.content}\n"
224
+ return research_topic
225
+
226
+ def _generate_query_node(self, state: ResearchState, config: RunnableConfig) -> QueryState:
227
+ """Generate search queries based on user request using instructor structured output."""
228
+ # Get research topic from messages (can be customized by subclasses)
229
+ research_topic = self._preprocess_research_topic(state["messages"])
230
+
231
+ # Format the prompt
232
+ current_date = get_current_date()
233
+ formatted_prompt = self.prompts["query_writer"].format(
234
+ current_date=current_date,
235
+ research_topic=research_topic,
236
+ number_queries=state.get("initial_search_query_count", self.number_of_initial_queries),
237
+ )
238
+
239
+ try:
240
+ # Generate queries using instructor with structured output
241
+ result: SearchQueryList = self.llm_client.structured_completion(
242
+ messages=[{"role": "user", "content": formatted_prompt}],
243
+ response_model=SearchQueryList,
244
+ temperature=self.query_generation_temperature,
245
+ max_tokens=self.query_generation_max_tokens,
246
+ )
247
+
248
+ logger.info(f"Generated {len(result.query)} queries: {result.query}, rationale: {result.rationale}")
249
+
250
+ # Create query list with rationale
251
+ query_list = [{"query": q, "rationale": result.rationale} for q in result.query]
252
+ return {"query_list": query_list}
253
+
254
+ except Exception as e:
255
+ raise RuntimeError(f"Error in structured query generation: {e}")
256
+
257
+ def _continue_to_web_research(self, state: QueryState) -> List[Send]:
258
+ """Send queries to web research nodes."""
259
+ return [
260
+ Send(
261
+ "web_research",
262
+ WebSearchState(search_query=item["query"], id=str(idx)),
263
+ )
264
+ for idx, item in enumerate(state["query_list"])
265
+ ]
266
+
267
+ async def _research_node(self, state: WebSearchState, config: RunnableConfig) -> ResearchState:
268
+ """Perform web research using Tavily Search API."""
269
+ search_query = state["search_query"]
270
+
271
+ try:
272
+ from wizsearch import WizSearch, WizSearchConfig
273
+
274
+ omnisearch = WizSearch(
275
+ config=WizSearchConfig(
276
+ enabled_engines=self.search_engines,
277
+ max_results_per_engine=self.max_results_per_engine,
278
+ timeout=self.search_timeout,
279
+ )
280
+ )
281
+
282
+ result = await omnisearch.search(query=search_query)
283
+
284
+ # Convert SearchResult objects to dictionaries for compatibility
285
+ sources_gathered = []
286
+ for source in result.sources:
287
+ sources_gathered.append(source.model_dump())
288
+
289
+ # Generate research summary based on actual search results
290
+ if sources_gathered:
291
+ # Create a summary from the actual content found
292
+ content_summary = "\n\n".join(
293
+ [f"Source: {s['title']}\n{s['content']}" for s in sources_gathered[:5]] # Use top 5 results
294
+ )
295
+
296
+ summary_prompt = f"""
297
+ Based on the following search results for "{search_query}", provide a concise and accurate research summary:
298
+
299
+ {content_summary}
300
+
301
+ Please provide a well-structured summary that:
302
+ 1. Addresses the search query directly
303
+ 2. Synthesizes information from multiple sources
304
+ 3. Highlights key findings and insights
305
+ 4. Maintains factual accuracy based on the provided content
306
+ """
307
+
308
+ search_summary = self.llm_client.completion(
309
+ messages=[{"role": "user", "content": summary_prompt}],
310
+ temperature=self.web_search_temperature,
311
+ max_tokens=self.web_search_max_tokens,
312
+ )
313
+ else:
314
+ search_summary = f"No relevant sources found for: {search_query}"
315
+
316
+ return ResearchState(
317
+ sources_gathered=sources_gathered,
318
+ search_query=[search_query],
319
+ search_summaries=[search_summary],
320
+ )
321
+
322
+ except Exception as e:
323
+ logger.error(f"Error in Tavily web search: {e}")
324
+ raise RuntimeError(f"Tavily web search failed: {str(e)}")
325
+
326
+ def _reflection_node(self, state: ResearchState, config: RunnableConfig) -> ReflectionState:
327
+ """Reflect on research results and identify gaps using instructor structured output."""
328
+ # Increment research loop count
329
+ research_loop_count = state.get("research_loop_count", 0) + 1
330
+
331
+ # Format the prompt
332
+ current_date = get_current_date()
333
+ research_topic = self._preprocess_research_topic(state["messages"])
334
+ summaries = "\n\n---\n\n".join(state.get("search_summaries", []))
335
+
336
+ formatted_prompt = self.prompts["reflection"].format(
337
+ current_date=current_date,
338
+ research_topic=research_topic,
339
+ summaries=summaries,
340
+ )
341
+
342
+ try:
343
+ # Use instructor for reflection and evaluation with structured output
344
+ result: Reflection = self.llm_client.structured_completion(
345
+ messages=[{"role": "user", "content": formatted_prompt}],
346
+ response_model=Reflection,
347
+ temperature=self.reflection_temperature,
348
+ max_tokens=self.reflection_max_tokens,
349
+ )
350
+
351
+ return ReflectionState(
352
+ is_sufficient=result.is_sufficient,
353
+ knowledge_gap=result.knowledge_gap,
354
+ follow_up_queries=result.follow_up_queries,
355
+ research_loop_count=research_loop_count,
356
+ number_of_ran_queries=len(state.get("search_query", [])),
357
+ )
358
+
359
+ except Exception as e:
360
+ raise RuntimeError(f"Error in structured reflection: {e}")
361
+
362
+ def _evaluate_research(self, state: ReflectionState, config: RunnableConfig):
363
+ """Evaluate research and decide next step."""
364
+
365
+ if state["is_sufficient"] or state["research_loop_count"] >= self.max_research_loops:
366
+ return "finalize_answer"
367
+ else:
368
+ return [
369
+ Send(
370
+ "web_research",
371
+ WebSearchState(search_query=follow_up_query, id=str(state["number_of_ran_queries"] + int(idx))),
372
+ )
373
+ for idx, follow_up_query in enumerate(state["follow_up_queries"])
374
+ ]
375
+
376
+ def _finalize_answer_node(self, state: ResearchState, config: RunnableConfig):
377
+ """Finalize the research answer with advanced formatting and citations."""
378
+ current_date = get_current_date()
379
+ research_topic = self._preprocess_research_topic(state["messages"])
380
+ summaries = "\n---\n\n".join(state.get("search_summaries", []))
381
+
382
+ formatted_prompt = self.prompts["answer"].format(
383
+ current_date=current_date,
384
+ research_topic=research_topic,
385
+ summaries=summaries,
386
+ )
387
+
388
+ # Generate final answer using LLM
389
+ final_answer = self.llm_client.completion(
390
+ messages=[{"role": "user", "content": formatted_prompt}],
391
+ temperature=self.answer_temperature,
392
+ max_tokens=self.answer_max_tokens,
393
+ )
394
+
395
+ return {
396
+ "messages": [AIMessage(content=final_answer)],
397
+ "sources_gathered": state.get("sources_gathered", []),
398
+ }