alita-sdk 0.3.176__py3-none-any.whl → 0.3.177__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/community/__init__.py +7 -17
- alita_sdk/tools/carrier/api_wrapper.py +6 -0
- alita_sdk/tools/carrier/backend_tests_tool.py +308 -7
- alita_sdk/tools/carrier/carrier_sdk.py +18 -0
- alita_sdk/tools/carrier/tools.py +2 -1
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/METADATA +1 -2
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/RECORD +10 -41
- alita_sdk/community/browseruse/__init__.py +0 -73
- alita_sdk/community/browseruse/api_wrapper.py +0 -288
- alita_sdk/community/deep_researcher/__init__.py +0 -70
- alita_sdk/community/deep_researcher/agents/__init__.py +0 -1
- alita_sdk/community/deep_researcher/agents/baseclass.py +0 -182
- alita_sdk/community/deep_researcher/agents/knowledge_gap_agent.py +0 -74
- alita_sdk/community/deep_researcher/agents/long_writer_agent.py +0 -251
- alita_sdk/community/deep_researcher/agents/planner_agent.py +0 -124
- alita_sdk/community/deep_researcher/agents/proofreader_agent.py +0 -80
- alita_sdk/community/deep_researcher/agents/thinking_agent.py +0 -64
- alita_sdk/community/deep_researcher/agents/tool_agents/__init__.py +0 -20
- alita_sdk/community/deep_researcher/agents/tool_agents/crawl_agent.py +0 -87
- alita_sdk/community/deep_researcher/agents/tool_agents/search_agent.py +0 -96
- alita_sdk/community/deep_researcher/agents/tool_selector_agent.py +0 -83
- alita_sdk/community/deep_researcher/agents/utils/__init__.py +0 -0
- alita_sdk/community/deep_researcher/agents/utils/parse_output.py +0 -148
- alita_sdk/community/deep_researcher/agents/writer_agent.py +0 -63
- alita_sdk/community/deep_researcher/api_wrapper.py +0 -116
- alita_sdk/community/deep_researcher/deep_research.py +0 -185
- alita_sdk/community/deep_researcher/examples/deep_example.py +0 -30
- alita_sdk/community/deep_researcher/examples/iterative_example.py +0 -34
- alita_sdk/community/deep_researcher/examples/report_plan_example.py +0 -27
- alita_sdk/community/deep_researcher/iterative_research.py +0 -419
- alita_sdk/community/deep_researcher/llm_config.py +0 -87
- alita_sdk/community/deep_researcher/main.py +0 -67
- alita_sdk/community/deep_researcher/tools/__init__.py +0 -2
- alita_sdk/community/deep_researcher/tools/crawl_website.py +0 -109
- alita_sdk/community/deep_researcher/tools/web_search.py +0 -294
- alita_sdk/community/deep_researcher/utils/__init__.py +0 -0
- alita_sdk/community/deep_researcher/utils/md_to_pdf.py +0 -8
- alita_sdk/community/deep_researcher/utils/os.py +0 -21
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/top_level.txt +0 -0
@@ -1,419 +0,0 @@
|
|
1
|
-
from __future__ import annotations
|
2
|
-
import asyncio
|
3
|
-
import time
|
4
|
-
from typing import Dict, List, Optional, Any
|
5
|
-
from agents import custom_span, gen_trace_id, trace
|
6
|
-
from .agents.baseclass import ResearchRunner
|
7
|
-
from .agents.writer_agent import init_writer_agent
|
8
|
-
from .agents.knowledge_gap_agent import KnowledgeGapOutput, init_knowledge_gap_agent
|
9
|
-
from .agents.tool_selector_agent import AgentTask, AgentSelectionPlan, init_tool_selector_agent
|
10
|
-
from .agents.thinking_agent import init_thinking_agent
|
11
|
-
from .agents.tool_agents import init_tool_agents, ToolAgentOutput
|
12
|
-
from pydantic import BaseModel, Field
|
13
|
-
from .llm_config import LLMConfig, create_default_config
|
14
|
-
|
15
|
-
|
16
|
-
class IterationData(BaseModel):
|
17
|
-
"""Data for a single iteration of the research loop."""
|
18
|
-
gap: str = Field(description="The gap addressed in the iteration", default_factory=list)
|
19
|
-
tool_calls: List[str] = Field(description="The tool calls made", default_factory=list)
|
20
|
-
findings: List[str] = Field(description="The findings collected from tool calls", default_factory=list)
|
21
|
-
thought: List[str] = Field(description="The thinking done to reflect on the success of the iteration and next steps", default_factory=list)
|
22
|
-
|
23
|
-
|
24
|
-
class Conversation(BaseModel):
|
25
|
-
"""A conversation between the user and the iterative researcher."""
|
26
|
-
history: List[IterationData] = Field(description="The data for each iteration of the research loop", default_factory=list)
|
27
|
-
|
28
|
-
def add_iteration(self, iteration_data: Optional[IterationData] = None):
|
29
|
-
if iteration_data is None:
|
30
|
-
iteration_data = IterationData()
|
31
|
-
self.history.append(iteration_data)
|
32
|
-
|
33
|
-
def set_latest_gap(self, gap: str):
|
34
|
-
self.history[-1].gap = gap
|
35
|
-
|
36
|
-
def set_latest_tool_calls(self, tool_calls: List[str]):
|
37
|
-
self.history[-1].tool_calls = tool_calls
|
38
|
-
|
39
|
-
def set_latest_findings(self, findings: List[str]):
|
40
|
-
self.history[-1].findings = findings
|
41
|
-
|
42
|
-
def set_latest_thought(self, thought: str):
|
43
|
-
self.history[-1].thought = thought
|
44
|
-
|
45
|
-
def get_latest_gap(self) -> str:
|
46
|
-
return self.history[-1].gap
|
47
|
-
|
48
|
-
def get_latest_tool_calls(self) -> List[str]:
|
49
|
-
return self.history[-1].tool_calls
|
50
|
-
|
51
|
-
def get_latest_findings(self) -> List[str]:
|
52
|
-
return self.history[-1].findings
|
53
|
-
|
54
|
-
def get_latest_thought(self) -> str:
|
55
|
-
return self.history[-1].thought
|
56
|
-
|
57
|
-
def get_all_findings(self) -> List[str]:
|
58
|
-
return [finding for iteration_data in self.history for finding in iteration_data.findings]
|
59
|
-
|
60
|
-
def compile_conversation_history(self) -> str:
|
61
|
-
"""Compile the conversation history into a string."""
|
62
|
-
conversation = ""
|
63
|
-
for iteration_num, iteration_data in enumerate(self.history):
|
64
|
-
conversation += f"[ITERATION {iteration_num + 1}]\n\n"
|
65
|
-
if iteration_data.thought:
|
66
|
-
conversation += f"{self.get_thought_string(iteration_num)}\n\n"
|
67
|
-
if iteration_data.gap:
|
68
|
-
conversation += f"{self.get_task_string(iteration_num)}\n\n"
|
69
|
-
if iteration_data.tool_calls:
|
70
|
-
conversation += f"{self.get_action_string(iteration_num)}\n\n"
|
71
|
-
if iteration_data.findings:
|
72
|
-
conversation += f"{self.get_findings_string(iteration_num)}\n\n"
|
73
|
-
|
74
|
-
return conversation
|
75
|
-
|
76
|
-
def get_task_string(self, iteration_num: int) -> str:
|
77
|
-
"""Get the task for the current iteration."""
|
78
|
-
if self.history[iteration_num].gap:
|
79
|
-
return f"<task>\nAddress this knowledge gap: {self.history[iteration_num].gap}\n</task>"
|
80
|
-
return ""
|
81
|
-
|
82
|
-
def get_action_string(self, iteration_num: int) -> str:
|
83
|
-
"""Get the action for the current iteration."""
|
84
|
-
if self.history[iteration_num].tool_calls:
|
85
|
-
joined_calls = '\n'.join(self.history[iteration_num].tool_calls)
|
86
|
-
return (
|
87
|
-
"<action>\nCalling the following tools to address the knowledge gap:\n"
|
88
|
-
f"{joined_calls}\n</action>"
|
89
|
-
)
|
90
|
-
return ""
|
91
|
-
|
92
|
-
def get_findings_string(self, iteration_num: int) -> str:
|
93
|
-
"""Get the findings for the current iteration."""
|
94
|
-
if self.history[iteration_num].findings:
|
95
|
-
joined_findings = '\n\n'.join(self.history[iteration_num].findings)
|
96
|
-
return f"<findings>\n{joined_findings}\n</findings>"
|
97
|
-
return ""
|
98
|
-
|
99
|
-
def get_thought_string(self, iteration_num: int) -> str:
|
100
|
-
"""Get the thought for the current iteration."""
|
101
|
-
if self.history[iteration_num].thought:
|
102
|
-
return f"<thought>\n{self.history[iteration_num].thought}\n</thought>"
|
103
|
-
return ""
|
104
|
-
|
105
|
-
def latest_task_string(self) -> str:
|
106
|
-
"""Get the latest task."""
|
107
|
-
return self.get_task_string(len(self.history) - 1)
|
108
|
-
|
109
|
-
def latest_action_string(self) -> str:
|
110
|
-
"""Get the latest action."""
|
111
|
-
return self.get_action_string(len(self.history) - 1)
|
112
|
-
|
113
|
-
def latest_findings_string(self) -> str:
|
114
|
-
"""Get the latest findings."""
|
115
|
-
return self.get_findings_string(len(self.history) - 1)
|
116
|
-
|
117
|
-
def latest_thought_string(self) -> str:
|
118
|
-
"""Get the latest thought."""
|
119
|
-
return self.get_thought_string(len(self.history) - 1)
|
120
|
-
|
121
|
-
|
122
|
-
class IterativeResearcher:
|
123
|
-
"""Manager for the iterative research workflow that conducts research on a topic or subtopic by running a continuous research loop."""
|
124
|
-
|
125
|
-
def __init__(
|
126
|
-
self,
|
127
|
-
max_iterations: int = 5,
|
128
|
-
max_time_minutes: int = 10,
|
129
|
-
verbose: bool = True,
|
130
|
-
tracing: bool = False,
|
131
|
-
config: Optional[LLMConfig] = None,
|
132
|
-
llm: Optional[Any] = None,
|
133
|
-
alita: Optional[Any] = None
|
134
|
-
):
|
135
|
-
self.max_iterations: int = max_iterations
|
136
|
-
self.max_time_minutes: int = max_time_minutes
|
137
|
-
self.start_time: float = None
|
138
|
-
self.iteration: int = 0
|
139
|
-
self.conversation: Conversation = Conversation()
|
140
|
-
self.should_continue: bool = True
|
141
|
-
self.verbose: bool = verbose
|
142
|
-
self.tracing: bool = tracing
|
143
|
-
self.alita = alita
|
144
|
-
|
145
|
-
# Initialize config with langchain LLM if provided
|
146
|
-
if llm is not None:
|
147
|
-
self.config = create_default_config(langchain_llm=llm)
|
148
|
-
elif config is not None:
|
149
|
-
self.config = config
|
150
|
-
else:
|
151
|
-
self.config = create_default_config()
|
152
|
-
|
153
|
-
# Initialize all the agents
|
154
|
-
self.knowledge_gap_agent = init_knowledge_gap_agent(self.config)
|
155
|
-
self.tool_selector_agent = init_tool_selector_agent(self.config)
|
156
|
-
self.thinking_agent = init_thinking_agent(self.config)
|
157
|
-
self.writer_agent = init_writer_agent(self.config)
|
158
|
-
self.tool_agents = init_tool_agents(self.config)
|
159
|
-
|
160
|
-
async def run(
|
161
|
-
self,
|
162
|
-
query: str,
|
163
|
-
output_length: str = "", # A text description of the desired output length, can be left blank
|
164
|
-
output_instructions: str = "", # Instructions for the final report (e.g. don't include any headings, just a couple of paragraphs of text)
|
165
|
-
background_context: str = "",
|
166
|
-
) -> str:
|
167
|
-
"""Run the deep research workflow for a given query."""
|
168
|
-
self.start_time = time.time()
|
169
|
-
|
170
|
-
if self.tracing:
|
171
|
-
trace_id = gen_trace_id()
|
172
|
-
workflow_trace = trace("iterative_researcher", trace_id=trace_id)
|
173
|
-
print(f"View trace: https://platform.openai.com/traces/trace?trace_id={trace_id}")
|
174
|
-
workflow_trace.start(mark_as_current=True)
|
175
|
-
|
176
|
-
self._log_message("=== Starting Iterative Research Workflow ===")
|
177
|
-
|
178
|
-
# Iterative research loop
|
179
|
-
while self.should_continue and self._check_constraints():
|
180
|
-
self.iteration += 1
|
181
|
-
self._log_message(f"\n=== Starting Iteration {self.iteration} ===")
|
182
|
-
|
183
|
-
# Set up blank IterationData for this iteration
|
184
|
-
self.conversation.add_iteration()
|
185
|
-
|
186
|
-
# 1. Generate observations
|
187
|
-
observations: str = await self._generate_observations(query, background_context=background_context)
|
188
|
-
|
189
|
-
# 2. Evaluate current gaps in the research
|
190
|
-
evaluation: KnowledgeGapOutput = await self._evaluate_gaps(query, background_context=background_context)
|
191
|
-
|
192
|
-
# Check if we should continue or break the loop
|
193
|
-
if not evaluation.research_complete:
|
194
|
-
next_gap = evaluation.outstanding_gaps[0]
|
195
|
-
|
196
|
-
# 3. Select agents to address knowledge gap
|
197
|
-
selection_plan: AgentSelectionPlan = await self._select_agents(next_gap, query, background_context=background_context)
|
198
|
-
|
199
|
-
# 4. Run the selected agents to gather information
|
200
|
-
results: Dict[str, ToolAgentOutput] = await self._execute_tools(selection_plan.tasks)
|
201
|
-
else:
|
202
|
-
self.should_continue = False
|
203
|
-
self._log_message("=== IterativeResearcher Marked As Complete - Finalizing Output ===")
|
204
|
-
|
205
|
-
# Create final report
|
206
|
-
report = await self._create_final_report(query, length=output_length, instructions=output_instructions)
|
207
|
-
|
208
|
-
elapsed_time = time.time() - self.start_time
|
209
|
-
self._log_message(f"IterativeResearcher completed in {int(elapsed_time // 60)} minutes and {int(elapsed_time % 60)} seconds after {self.iteration} iterations.")
|
210
|
-
|
211
|
-
if self.tracing:
|
212
|
-
workflow_trace.finish(reset_current=True)
|
213
|
-
|
214
|
-
return report
|
215
|
-
|
216
|
-
def _check_constraints(self) -> bool:
|
217
|
-
"""Check if we've exceeded our constraints (max iterations or time)."""
|
218
|
-
if self.iteration >= self.max_iterations:
|
219
|
-
self._log_message("\n=== Ending Research Loop ===")
|
220
|
-
self._log_message(f"Reached maximum iterations ({self.max_iterations})")
|
221
|
-
return False
|
222
|
-
|
223
|
-
elapsed_minutes = (time.time() - self.start_time) / 60
|
224
|
-
if elapsed_minutes >= self.max_time_minutes:
|
225
|
-
self._log_message("\n=== Ending Research Loop ===")
|
226
|
-
self._log_message(f"Reached maximum time ({self.max_time_minutes} minutes)")
|
227
|
-
return False
|
228
|
-
|
229
|
-
return True
|
230
|
-
|
231
|
-
async def _evaluate_gaps(
|
232
|
-
self,
|
233
|
-
query: str,
|
234
|
-
background_context: str = ""
|
235
|
-
) -> KnowledgeGapOutput:
|
236
|
-
"""Evaluate the current state of research and identify knowledge gaps."""
|
237
|
-
|
238
|
-
background = f"BACKGROUND CONTEXT:\n{background_context}" if background_context else ""
|
239
|
-
|
240
|
-
input_str = f"""
|
241
|
-
Current Iteration Number: {self.iteration}
|
242
|
-
Time Elapsed: {(time.time() - self.start_time) / 60:.2f} minutes of maximum {self.max_time_minutes} minutes
|
243
|
-
|
244
|
-
ORIGINAL QUERY:
|
245
|
-
{query}
|
246
|
-
|
247
|
-
{background}
|
248
|
-
|
249
|
-
HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
|
250
|
-
{self.conversation.compile_conversation_history() or "No previous actions, findings or thoughts available."}
|
251
|
-
"""
|
252
|
-
|
253
|
-
result = await ResearchRunner.run(
|
254
|
-
self.knowledge_gap_agent,
|
255
|
-
input_str,
|
256
|
-
)
|
257
|
-
|
258
|
-
evaluation = result.final_output_as(KnowledgeGapOutput)
|
259
|
-
|
260
|
-
if not evaluation.research_complete:
|
261
|
-
next_gap = evaluation.outstanding_gaps[0]
|
262
|
-
self.conversation.set_latest_gap(next_gap)
|
263
|
-
self._log_message(self.conversation.latest_task_string())
|
264
|
-
|
265
|
-
return evaluation
|
266
|
-
|
267
|
-
async def _select_agents(
|
268
|
-
self,
|
269
|
-
gap: str,
|
270
|
-
query: str,
|
271
|
-
background_context: str = ""
|
272
|
-
) -> AgentSelectionPlan:
|
273
|
-
"""Select agents to address the identified knowledge gap."""
|
274
|
-
|
275
|
-
background = f"BACKGROUND CONTEXT:\n{background_context}" if background_context else ""
|
276
|
-
|
277
|
-
input_str = f"""
|
278
|
-
ORIGINAL QUERY:
|
279
|
-
{query}
|
280
|
-
|
281
|
-
KNOWLEDGE GAP TO ADDRESS:
|
282
|
-
{gap}
|
283
|
-
|
284
|
-
{background}
|
285
|
-
|
286
|
-
HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
|
287
|
-
{self.conversation.compile_conversation_history() or "No previous actions, findings or thoughts available."}
|
288
|
-
"""
|
289
|
-
|
290
|
-
result = await ResearchRunner.run(
|
291
|
-
self.tool_selector_agent,
|
292
|
-
input_str,
|
293
|
-
)
|
294
|
-
|
295
|
-
selection_plan = result.final_output_as(AgentSelectionPlan)
|
296
|
-
|
297
|
-
# Add the tool calls to the conversation
|
298
|
-
self.conversation.set_latest_tool_calls([
|
299
|
-
f"[Agent] {task.agent} [Query] {task.query} [Entity] {task.entity_website if task.entity_website else 'null'}" for task in selection_plan.tasks
|
300
|
-
])
|
301
|
-
self._log_message(self.conversation.latest_action_string())
|
302
|
-
|
303
|
-
return selection_plan
|
304
|
-
|
305
|
-
async def _execute_tools(self, tasks: List[AgentTask]) -> Dict[str, ToolAgentOutput]:
|
306
|
-
"""Execute the selected tools concurrently to gather information."""
|
307
|
-
with custom_span("Execute Tool Agents"):
|
308
|
-
# Create a task for each agent
|
309
|
-
async_tasks = []
|
310
|
-
for task in tasks:
|
311
|
-
async_tasks.append(self._run_agent_task(task))
|
312
|
-
|
313
|
-
# Run all tasks concurrently
|
314
|
-
num_completed = 0
|
315
|
-
results = {}
|
316
|
-
for future in asyncio.as_completed(async_tasks):
|
317
|
-
gap, agent_name, result = await future
|
318
|
-
results[f"{agent_name}_{gap}"] = result
|
319
|
-
num_completed += 1
|
320
|
-
self._log_message(f"<processing>\nTool execution progress: {num_completed}/{len(async_tasks)}\n</processing>")
|
321
|
-
|
322
|
-
# Add findings from the tool outputs to the conversation
|
323
|
-
findings = []
|
324
|
-
for tool_output in results.values():
|
325
|
-
findings.append(tool_output.output)
|
326
|
-
self.conversation.set_latest_findings(findings)
|
327
|
-
|
328
|
-
return results
|
329
|
-
|
330
|
-
async def _run_agent_task(self, task: AgentTask) -> tuple[str, str, ToolAgentOutput]:
|
331
|
-
"""Run a single agent task and return the result."""
|
332
|
-
try:
|
333
|
-
agent_name = task.agent
|
334
|
-
agent = self.tool_agents.get(agent_name)
|
335
|
-
if agent:
|
336
|
-
result = await ResearchRunner.run(
|
337
|
-
agent,
|
338
|
-
task.model_dump_json(),
|
339
|
-
)
|
340
|
-
# Extract ToolAgentOutput from RunResult
|
341
|
-
output = result.final_output_as(ToolAgentOutput)
|
342
|
-
else:
|
343
|
-
output = ToolAgentOutput(
|
344
|
-
output=f"No implementation found for agent {agent_name}",
|
345
|
-
sources=[]
|
346
|
-
)
|
347
|
-
|
348
|
-
return task.gap, agent_name, output
|
349
|
-
except Exception as e:
|
350
|
-
error_output = ToolAgentOutput(
|
351
|
-
output=f"Error executing {task.agent} for gap '{task.gap}': {str(e)}",
|
352
|
-
sources=[]
|
353
|
-
)
|
354
|
-
return task.gap, task.agent, error_output
|
355
|
-
|
356
|
-
async def _generate_observations(self, query: str, background_context: str = "") -> str:
|
357
|
-
"""Generate observations from the current state of the research."""
|
358
|
-
|
359
|
-
background = f"BACKGROUND CONTEXT:\n{background_context}" if background_context else ""
|
360
|
-
|
361
|
-
input_str = f"""
|
362
|
-
You are starting iteration {self.iteration} of your research process.
|
363
|
-
|
364
|
-
ORIGINAL QUERY:
|
365
|
-
{query}
|
366
|
-
|
367
|
-
{background}
|
368
|
-
|
369
|
-
HISTORY OF ACTIONS, FINDINGS AND THOUGHTS:
|
370
|
-
{self.conversation.compile_conversation_history() or "No previous actions, findings or thoughts available."}
|
371
|
-
"""
|
372
|
-
result = await ResearchRunner.run(
|
373
|
-
self.thinking_agent,
|
374
|
-
input_str,
|
375
|
-
)
|
376
|
-
|
377
|
-
# Add the observations to the conversation
|
378
|
-
observations = result.final_output
|
379
|
-
self.conversation.set_latest_thought(observations)
|
380
|
-
self._log_message(self.conversation.latest_thought_string())
|
381
|
-
return observations
|
382
|
-
|
383
|
-
async def _create_final_report(
|
384
|
-
self,
|
385
|
-
query: str,
|
386
|
-
length: str = "",
|
387
|
-
instructions: str = ""
|
388
|
-
) -> str:
|
389
|
-
"""Create the final response from the completed draft."""
|
390
|
-
self._log_message("=== Drafting Final Response ===")
|
391
|
-
|
392
|
-
length_str = f"* The full response should be approximately {length}.\n" if length else ""
|
393
|
-
instructions_str = f"* {instructions}" if instructions else ""
|
394
|
-
guidelines_str = ("\n\nGUIDELINES:\n" + length_str + instructions_str).strip('\n') if length or instructions else ""
|
395
|
-
|
396
|
-
all_findings = '\n\n'.join(self.conversation.get_all_findings()) or "No findings available yet."
|
397
|
-
|
398
|
-
input_str = f"""
|
399
|
-
Provide a response based on the query and findings below with as much detail as possible. {guidelines_str}
|
400
|
-
|
401
|
-
QUERY: {query}
|
402
|
-
|
403
|
-
FINDINGS:
|
404
|
-
{all_findings}
|
405
|
-
"""
|
406
|
-
|
407
|
-
result = await ResearchRunner.run(
|
408
|
-
self.writer_agent,
|
409
|
-
input_str,
|
410
|
-
)
|
411
|
-
|
412
|
-
self._log_message("Final response from IterativeResearcher created successfully")
|
413
|
-
|
414
|
-
return result.final_output
|
415
|
-
|
416
|
-
def _log_message(self, message: str) -> None:
|
417
|
-
"""Log a message if verbose is True"""
|
418
|
-
if self.verbose:
|
419
|
-
print(message)
|
@@ -1,87 +0,0 @@
|
|
1
|
-
from typing import Optional, Any
|
2
|
-
from dotenv import load_dotenv
|
3
|
-
from .utils.os import get_env_with_prefix
|
4
|
-
|
5
|
-
load_dotenv(override=True)
|
6
|
-
|
7
|
-
# Only keeping the necessary environment variable for search provider
|
8
|
-
SEARCH_PROVIDER = get_env_with_prefix("SEARCH_PROVIDER", "serper")
|
9
|
-
|
10
|
-
class LLMConfig:
|
11
|
-
def __init__(
|
12
|
-
self,
|
13
|
-
search_provider: str,
|
14
|
-
langchain_llm: Any,
|
15
|
-
):
|
16
|
-
self.search_provider = search_provider
|
17
|
-
self.reasoning_model = LangchainModelAdapter(langchain_llm)
|
18
|
-
self.main_model = LangchainModelAdapter(langchain_llm)
|
19
|
-
self.fast_model = LangchainModelAdapter(langchain_llm)
|
20
|
-
|
21
|
-
|
22
|
-
def create_default_config(langchain_llm: Any) -> LLMConfig:
|
23
|
-
"""Create a default config using a Langchain LLM"""
|
24
|
-
return LLMConfig(
|
25
|
-
search_provider=SEARCH_PROVIDER,
|
26
|
-
langchain_llm=langchain_llm
|
27
|
-
)
|
28
|
-
|
29
|
-
|
30
|
-
class LangchainModelAdapter:
|
31
|
-
"""Adapter class to make Langchain LLMs work with the DeepResearcher framework"""
|
32
|
-
|
33
|
-
def __init__(self, langchain_llm):
|
34
|
-
self.langchain_llm = langchain_llm
|
35
|
-
self._client = type('DummyClient', (), {'_base_url': 'langchain'})()
|
36
|
-
|
37
|
-
async def agenerate_response(self, messages, **kwargs):
|
38
|
-
"""Adapter method to match the expected interface"""
|
39
|
-
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
|
40
|
-
|
41
|
-
# Convert message format to Langchain format
|
42
|
-
lc_messages = []
|
43
|
-
for message in messages:
|
44
|
-
role = message.get('role', '')
|
45
|
-
content = message.get('content', '')
|
46
|
-
|
47
|
-
if role == 'system':
|
48
|
-
lc_messages.append(SystemMessage(content=content))
|
49
|
-
elif role == 'user':
|
50
|
-
lc_messages.append(HumanMessage(content=content))
|
51
|
-
elif role == 'assistant':
|
52
|
-
lc_messages.append(AIMessage(content=content))
|
53
|
-
|
54
|
-
# Use langchain LLM to generate response
|
55
|
-
response = await self.langchain_llm.ainvoke(lc_messages)
|
56
|
-
|
57
|
-
# Return in format compatible with the existing code
|
58
|
-
return type('Response', (), {
|
59
|
-
'choices': [
|
60
|
-
type('Choice', (), {
|
61
|
-
'message': type('Message', (), {
|
62
|
-
'content': response.content,
|
63
|
-
'role': 'assistant'
|
64
|
-
})
|
65
|
-
})
|
66
|
-
]
|
67
|
-
})
|
68
|
-
|
69
|
-
async def agenerate_text(self, prompt, **kwargs):
|
70
|
-
"""Simple text completion adapter method"""
|
71
|
-
from langchain_core.messages import HumanMessage
|
72
|
-
|
73
|
-
response = await self.langchain_llm.ainvoke([HumanMessage(content=prompt)])
|
74
|
-
|
75
|
-
# Return in format compatible with the existing code
|
76
|
-
return type('Response', (), {
|
77
|
-
'choices': [
|
78
|
-
type('Choice', (), {
|
79
|
-
'text': response.content
|
80
|
-
})
|
81
|
-
]
|
82
|
-
})
|
83
|
-
|
84
|
-
def supports_json_mode(self):
|
85
|
-
"""Check if the model supports JSON mode"""
|
86
|
-
# Most Langchain LLMs support structured output, so return True by default
|
87
|
-
return True
|
@@ -1,67 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
import argparse
|
3
|
-
from .iterative_research import IterativeResearcher
|
4
|
-
from .deep_research import DeepResearcher
|
5
|
-
from typing import Literal
|
6
|
-
from dotenv import load_dotenv
|
7
|
-
|
8
|
-
load_dotenv(override=True)
|
9
|
-
|
10
|
-
|
11
|
-
async def main() -> None:
|
12
|
-
parser = argparse.ArgumentParser(description="Deep Research Assistant")
|
13
|
-
parser.add_argument("--query", type=str, help="Research query")
|
14
|
-
parser.add_argument("--model", type=str, choices=["deep", "simple"],
|
15
|
-
help="Mode of research (deep or simple)", default="deep")
|
16
|
-
parser.add_argument("--max-iterations", type=int, default=5,
|
17
|
-
help="Maximum number of iterations for deep research")
|
18
|
-
parser.add_argument("--max-time", type=int, default=10,
|
19
|
-
help="Maximum time in minutes for deep research")
|
20
|
-
parser.add_argument("--output-length", type=str, default="5 pages",
|
21
|
-
help="Desired output length for the report")
|
22
|
-
parser.add_argument("--output-instructions", type=str, default="",
|
23
|
-
help="Additional instructions for the report")
|
24
|
-
parser.add_argument("--verbose", action="store_true",
|
25
|
-
help="Print status updates to the console")
|
26
|
-
parser.add_argument("--tracing", action="store_true",
|
27
|
-
help="Enable tracing for the research (only valid for OpenAI models)")
|
28
|
-
|
29
|
-
args = parser.parse_args()
|
30
|
-
|
31
|
-
# If no query is provided via command line, prompt the user
|
32
|
-
query = args.query if args.query else input("What would you like to research? ")
|
33
|
-
|
34
|
-
print(f"Starting deep research on: {query}")
|
35
|
-
print(f"Max iterations: {args.max_iterations}, Max time: {args.max_time} minutes")
|
36
|
-
|
37
|
-
if args.model == "deep":
|
38
|
-
manager = DeepResearcher(
|
39
|
-
max_iterations=args.max_iterations,
|
40
|
-
max_time_minutes=args.max_time,
|
41
|
-
verbose=args.verbose,
|
42
|
-
tracing=args.tracing
|
43
|
-
)
|
44
|
-
report = await manager.run(query)
|
45
|
-
else:
|
46
|
-
manager = IterativeResearcher(
|
47
|
-
max_iterations=args.max_iterations,
|
48
|
-
max_time_minutes=args.max_time,
|
49
|
-
verbose=args.verbose,
|
50
|
-
tracing=args.tracing
|
51
|
-
)
|
52
|
-
report = await manager.run(
|
53
|
-
query,
|
54
|
-
output_length=args.output_length,
|
55
|
-
output_instructions=args.output_instructions
|
56
|
-
)
|
57
|
-
|
58
|
-
print("\n=== Final Report ===")
|
59
|
-
print(report)
|
60
|
-
|
61
|
-
# Command line entry point
|
62
|
-
def cli_entry():
|
63
|
-
"""Entry point for the command-line interface."""
|
64
|
-
asyncio.run(main())
|
65
|
-
|
66
|
-
if __name__ == "__main__":
|
67
|
-
cli_entry()
|
@@ -1,109 +0,0 @@
|
|
1
|
-
from typing import List, Set, Union
|
2
|
-
from urllib.parse import urlparse, urljoin
|
3
|
-
from bs4 import BeautifulSoup
|
4
|
-
import aiohttp
|
5
|
-
from .web_search import scrape_urls, ssl_context, ScrapeResult, WebpageSnippet
|
6
|
-
from agents import function_tool
|
7
|
-
|
8
|
-
|
9
|
-
@function_tool
|
10
|
-
async def crawl_website(starting_url: str) -> Union[List[ScrapeResult], str]:
|
11
|
-
"""Crawls the pages of a website starting with the starting_url and then descending into the pages linked from there.
|
12
|
-
Prioritizes links found in headers/navigation, then body links, then subsequent pages.
|
13
|
-
|
14
|
-
Args:
|
15
|
-
starting_url: Starting URL to scrape
|
16
|
-
|
17
|
-
Returns:
|
18
|
-
List of ScrapeResult objects which have the following fields:
|
19
|
-
- url: The URL of the web page
|
20
|
-
- title: The title of the web page
|
21
|
-
- description: The description of the web page
|
22
|
-
- text: The text content of the web page
|
23
|
-
"""
|
24
|
-
if not starting_url:
|
25
|
-
return "Empty URL provided"
|
26
|
-
|
27
|
-
# Ensure URL has a protocol
|
28
|
-
if not starting_url.startswith(('http://', 'https://')):
|
29
|
-
starting_url = 'http://' + starting_url
|
30
|
-
|
31
|
-
max_pages = 10
|
32
|
-
base_domain = urlparse(starting_url).netloc
|
33
|
-
|
34
|
-
async def extract_links(html: str, current_url: str) -> tuple[List[str], List[str]]:
|
35
|
-
"""Extract prioritized links from HTML content"""
|
36
|
-
soup = BeautifulSoup(html, 'html.parser')
|
37
|
-
nav_links = set()
|
38
|
-
body_links = set()
|
39
|
-
|
40
|
-
# Find navigation/header links
|
41
|
-
for nav_element in soup.find_all(['nav', 'header']):
|
42
|
-
for a in nav_element.find_all('a', href=True):
|
43
|
-
link = urljoin(current_url, a['href'])
|
44
|
-
if urlparse(link).netloc == base_domain:
|
45
|
-
nav_links.add(link)
|
46
|
-
|
47
|
-
# Find remaining body links
|
48
|
-
for a in soup.find_all('a', href=True):
|
49
|
-
link = urljoin(current_url, a['href'])
|
50
|
-
if urlparse(link).netloc == base_domain and link not in nav_links:
|
51
|
-
body_links.add(link)
|
52
|
-
|
53
|
-
return list(nav_links), list(body_links)
|
54
|
-
|
55
|
-
async def fetch_page(url: str) -> str:
|
56
|
-
"""Fetch HTML content from a URL"""
|
57
|
-
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
58
|
-
async with aiohttp.ClientSession(connector=connector) as session:
|
59
|
-
try:
|
60
|
-
async with session.get(url, timeout=30) as response:
|
61
|
-
if response.status == 200:
|
62
|
-
return await response.text()
|
63
|
-
except Exception as e:
|
64
|
-
print(f"Error fetching {url}: {str(e)}")
|
65
|
-
return "Error fetching page"
|
66
|
-
|
67
|
-
# Initialize with starting URL
|
68
|
-
queue: List[str] = [starting_url]
|
69
|
-
next_level_queue: List[str] = []
|
70
|
-
all_pages_to_scrape: Set[str] = set([starting_url])
|
71
|
-
|
72
|
-
# Breadth-first crawl
|
73
|
-
while queue and len(all_pages_to_scrape) < max_pages:
|
74
|
-
current_url = queue.pop(0)
|
75
|
-
|
76
|
-
# Fetch and process the page
|
77
|
-
html_content = await fetch_page(current_url)
|
78
|
-
if html_content:
|
79
|
-
nav_links, body_links = await extract_links(html_content, current_url)
|
80
|
-
|
81
|
-
# Add unvisited nav links to current queue (higher priority)
|
82
|
-
remaining_slots = max_pages - len(all_pages_to_scrape)
|
83
|
-
for link in nav_links:
|
84
|
-
link = link.rstrip('/')
|
85
|
-
if link not in all_pages_to_scrape and remaining_slots > 0:
|
86
|
-
queue.append(link)
|
87
|
-
all_pages_to_scrape.add(link)
|
88
|
-
remaining_slots -= 1
|
89
|
-
|
90
|
-
# Add unvisited body links to next level queue (lower priority)
|
91
|
-
for link in body_links:
|
92
|
-
link = link.rstrip('/')
|
93
|
-
if link not in all_pages_to_scrape and remaining_slots > 0:
|
94
|
-
next_level_queue.append(link)
|
95
|
-
all_pages_to_scrape.add(link)
|
96
|
-
remaining_slots -= 1
|
97
|
-
|
98
|
-
# If current queue is empty, add next level links
|
99
|
-
if not queue:
|
100
|
-
queue = next_level_queue
|
101
|
-
next_level_queue = []
|
102
|
-
|
103
|
-
# Convert set to list for final processing
|
104
|
-
pages_to_scrape = list(all_pages_to_scrape)[:max_pages]
|
105
|
-
pages_to_scrape = [WebpageSnippet(url=page, title="", description="") for page in pages_to_scrape]
|
106
|
-
|
107
|
-
# Use scrape_urls to get the content for all discovered pages
|
108
|
-
result = await scrape_urls(pages_to_scrape)
|
109
|
-
return result
|