alita-sdk 0.3.175__py3-none-any.whl → 0.3.177__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. alita_sdk/community/__init__.py +7 -17
  2. alita_sdk/tools/carrier/api_wrapper.py +6 -0
  3. alita_sdk/tools/carrier/backend_tests_tool.py +308 -7
  4. alita_sdk/tools/carrier/carrier_sdk.py +18 -0
  5. alita_sdk/tools/carrier/create_ui_test_tool.py +90 -109
  6. alita_sdk/tools/carrier/run_ui_test_tool.py +311 -184
  7. alita_sdk/tools/carrier/tools.py +2 -1
  8. alita_sdk/tools/confluence/api_wrapper.py +1 -0
  9. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/METADATA +2 -2
  10. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/RECORD +13 -44
  11. alita_sdk/community/browseruse/__init__.py +0 -73
  12. alita_sdk/community/browseruse/api_wrapper.py +0 -288
  13. alita_sdk/community/deep_researcher/__init__.py +0 -70
  14. alita_sdk/community/deep_researcher/agents/__init__.py +0 -1
  15. alita_sdk/community/deep_researcher/agents/baseclass.py +0 -182
  16. alita_sdk/community/deep_researcher/agents/knowledge_gap_agent.py +0 -74
  17. alita_sdk/community/deep_researcher/agents/long_writer_agent.py +0 -251
  18. alita_sdk/community/deep_researcher/agents/planner_agent.py +0 -124
  19. alita_sdk/community/deep_researcher/agents/proofreader_agent.py +0 -80
  20. alita_sdk/community/deep_researcher/agents/thinking_agent.py +0 -64
  21. alita_sdk/community/deep_researcher/agents/tool_agents/__init__.py +0 -20
  22. alita_sdk/community/deep_researcher/agents/tool_agents/crawl_agent.py +0 -87
  23. alita_sdk/community/deep_researcher/agents/tool_agents/search_agent.py +0 -96
  24. alita_sdk/community/deep_researcher/agents/tool_selector_agent.py +0 -83
  25. alita_sdk/community/deep_researcher/agents/utils/__init__.py +0 -0
  26. alita_sdk/community/deep_researcher/agents/utils/parse_output.py +0 -148
  27. alita_sdk/community/deep_researcher/agents/writer_agent.py +0 -63
  28. alita_sdk/community/deep_researcher/api_wrapper.py +0 -116
  29. alita_sdk/community/deep_researcher/deep_research.py +0 -185
  30. alita_sdk/community/deep_researcher/examples/deep_example.py +0 -30
  31. alita_sdk/community/deep_researcher/examples/iterative_example.py +0 -34
  32. alita_sdk/community/deep_researcher/examples/report_plan_example.py +0 -27
  33. alita_sdk/community/deep_researcher/iterative_research.py +0 -419
  34. alita_sdk/community/deep_researcher/llm_config.py +0 -87
  35. alita_sdk/community/deep_researcher/main.py +0 -67
  36. alita_sdk/community/deep_researcher/tools/__init__.py +0 -2
  37. alita_sdk/community/deep_researcher/tools/crawl_website.py +0 -109
  38. alita_sdk/community/deep_researcher/tools/web_search.py +0 -294
  39. alita_sdk/community/deep_researcher/utils/__init__.py +0 -0
  40. alita_sdk/community/deep_researcher/utils/md_to_pdf.py +0 -8
  41. alita_sdk/community/deep_researcher/utils/os.py +0 -21
  42. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/WHEEL +0 -0
  43. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/licenses/LICENSE +0 -0
  44. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/top_level.txt +0 -0
@@ -1,182 +0,0 @@
1
- from typing import Any, Callable, Optional, List, Dict, Union, TypeVar, Generic, Type
2
- from pydantic import BaseModel
3
- import asyncio
4
- import json
5
-
6
- # LangChain imports
7
- from langchain_core.tools import BaseTool
8
- from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, BaseMessage
9
- from langchain_core.prompts import ChatPromptTemplate
10
- from langchain.agents import AgentExecutor
11
- from langchain_core.runnables import RunnablePassthrough
12
- from langchain.agents.format_scratchpad import format_to_openai_functions
13
- from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
14
- from langchain_core.runnables.base import RunnableSerializable
15
-
16
- # Type variable for parameterizing the output type
17
- TContext = TypeVar("TContext")
18
-
19
- class ResearchRunner:
20
- """
21
- LangChain-based runner for research agents that supports both structured output
22
- and custom output parsing.
23
- """
24
-
25
- @classmethod
26
- async def run(cls, agent, user_message: str, **kwargs) -> 'RunResult':
27
- """
28
- Run the agent with the given user message and return the result.
29
-
30
- Args:
31
- agent: The agent to run
32
- user_message: The user message to send to the agent
33
-
34
- Returns:
35
- A RunResult containing the final output
36
- """
37
- if not isinstance(agent, ResearchAgent):
38
- raise TypeError("Agent must be a ResearchAgent")
39
-
40
- result = await agent.arun(user_message)
41
- return RunResult(final_output=result)
42
-
43
- class RunResult:
44
- """
45
- A simple class to maintain compatibility with the previous API
46
- while using LangChain agents under the hood.
47
- """
48
-
49
- def __init__(self, final_output: Any):
50
- self.final_output = final_output
51
-
52
- def final_output_as(self, output_type: Type[Any]) -> Any:
53
- """
54
- Convert the final output to the specified type.
55
-
56
- Args:
57
- output_type: The type to convert to
58
-
59
- Returns:
60
- An instance of output_type
61
- """
62
- if isinstance(self.final_output, output_type):
63
- return self.final_output
64
-
65
- if isinstance(self.final_output, str):
66
- try:
67
- # Try to parse as JSON if it's a string
68
- parsed = json.loads(self.final_output)
69
- return output_type(**parsed)
70
- except Exception:
71
- # If that fails, try to parse the string for JSON
72
- try:
73
- # Look for JSON-like content in the string
74
- import re
75
- json_match = re.search(r'```json\n(.*?)\n```', self.final_output, re.DOTALL)
76
- if json_match:
77
- json_str = json_match.group(1)
78
- parsed = json.loads(json_str)
79
- return output_type(**parsed)
80
- except Exception:
81
- pass
82
-
83
- # If all else fails, try to initialize with the entire output as a string
84
- try:
85
- if hasattr(output_type, "model_validate"):
86
- return output_type.model_validate({"output": self.final_output})
87
- else:
88
- return output_type(output=self.final_output)
89
- except Exception as e:
90
- raise ValueError(f"Could not convert output to {output_type.__name__}: {e}")
91
-
92
- class ResearchAgent(Generic[TContext]):
93
- """
94
- LangChain-based agent for research tasks that supports both structured output
95
- and custom output parsing.
96
- """
97
-
98
- def __init__(
99
- self,
100
- name: str,
101
- instructions: str,
102
- tools: List[BaseTool],
103
- model: Any,
104
- output_type: Optional[Type[BaseModel]] = None,
105
- output_parser: Optional[Callable[[str], Any]] = None
106
- ):
107
- self.name = name
108
- self.instructions = instructions
109
- self.tools = tools
110
- self.model = model
111
- self.output_type = output_type
112
- self.output_parser = output_parser
113
-
114
- # Create the LangChain agent
115
- self.agent = self._create_agent()
116
-
117
- def _create_agent(self) -> RunnableSerializable:
118
- """
119
- Create a LangChain agent with the specified configuration.
120
- """
121
- # Create the system prompt
122
- system_prompt = self.instructions
123
-
124
- # Create the prompt template
125
- prompt = ChatPromptTemplate.from_messages([
126
- ("system", system_prompt),
127
- ("human", "{input}"),
128
- ("ai", "{agent_scratchpad}")
129
- ])
130
-
131
- # Create the LangChain agent
132
- agent = (
133
- {
134
- "input": RunnablePassthrough(),
135
- "agent_scratchpad": lambda x: format_to_openai_functions(x["intermediate_steps"])
136
- }
137
- | prompt
138
- | self.model
139
- | OpenAIFunctionsAgentOutputParser()
140
- )
141
-
142
- # Create the agent executor
143
- return AgentExecutor(
144
- agent=agent,
145
- tools=self.tools,
146
- verbose=True,
147
- handle_parsing_errors=True
148
- )
149
-
150
- async def arun(self, user_input: str) -> Any:
151
- """
152
- Run the agent asynchronously with the given user input.
153
-
154
- Args:
155
- user_input: The user input to send to the agent
156
-
157
- Returns:
158
- The agent's output
159
- """
160
- try:
161
- # Run the agent
162
- result = await self.agent.ainvoke({"input": user_input, "intermediate_steps": []})
163
- output = result.get("output", "")
164
-
165
- # Apply output parser if specified
166
- if self.output_parser is not None:
167
- return self.output_parser(output)
168
-
169
- # Try to convert to output_type if specified
170
- if self.output_type is not None:
171
- try:
172
- return self.output_type.model_validate_json(output)
173
- except Exception:
174
- try:
175
- return self.output_type.model_validate({"output": output})
176
- except Exception:
177
- pass
178
-
179
- # Otherwise return the raw output
180
- return output
181
- except Exception as e:
182
- return f"Error: {str(e)}"
@@ -1,74 +0,0 @@
1
- """
2
- Agent used to evaluate the state of the research report (typically done in a loop) and identify knowledge gaps that still
3
- need to be addressed.
4
-
5
- The Agent takes as input a string in the following format:
6
- ===========================================================
7
- ORIGINAL QUERY: <original user query>
8
-
9
- HISTORY OF ACTIONS, FINDINGS AND THOUGHTS: <breakdown of activities and findings carried out so far>
10
- ===========================================================
11
-
12
- The Agent then:
13
- 1. Carefully reviews the current draft and assesses its completeness in answering the original query
14
- 2. Identifies specific knowledge gaps that still exist and need to be filled
15
- 3. Returns a KnowledgeGapOutput object
16
- """
17
-
18
- from pydantic import BaseModel, Field
19
- from typing import List, Optional, Any
20
- from langchain_core.tools import BaseTool
21
- from .baseclass import ResearchAgent
22
- from ..llm_config import LLMConfig, model_supports_structured_output
23
- from datetime import datetime
24
- from .utils.parse_output import create_type_parser
25
-
26
- class KnowledgeGapOutput(BaseModel):
27
- """Output from the Knowledge Gap Agent"""
28
- research_complete: bool = Field(description="Whether the research and findings are complete enough to end the research loop")
29
- outstanding_gaps: List[str] = Field(description="List of knowledge gaps that still need to be addressed")
30
-
31
-
32
- INSTRUCTIONS = f"""
33
- You are a Research State Evaluator. Today's date is {datetime.now().strftime("%Y-%m-%d")}.
34
- Your job is to critically analyze the current state of a research report,
35
- identify what knowledge gaps still exist and determine the best next step to take.
36
-
37
- You will be given:
38
- 1. The original user query and any relevant background context to the query
39
- 2. A full history of the tasks, actions, findings and thoughts you've made up until this point in the research process
40
-
41
- Your task is to:
42
- 1. Carefully review the findings and thoughts, particularly from the latest iteration, and assess their completeness in answering the original query
43
- 2. Determine if the findings are sufficiently complete to end the research loop
44
- 3. If not, identify up to 3 knowledge gaps that need to be addressed in sequence in order to continue with research - these should be relevant to the original query
45
-
46
- Be specific in the gaps you identify and include relevant information as this will be passed onto another agent to process without additional context.
47
-
48
- Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
49
- {KnowledgeGapOutput.model_json_schema()}
50
- """
51
-
52
- def init_knowledge_gap_agent(config: LLMConfig) -> ResearchAgent:
53
- """
54
- Initialize the knowledge gap agent.
55
-
56
- Args:
57
- config: The LLM configuration to use
58
-
59
- Returns:
60
- A ResearchAgent that can evaluate knowledge gaps
61
- """
62
- selected_model = config.fast_model
63
-
64
- # Determine whether to use structured output based on if we have a Langchain LLM
65
- use_output_parser = not hasattr(selected_model, 'langchain_llm')
66
-
67
- return ResearchAgent(
68
- name="KnowledgeGapAgent",
69
- instructions=INSTRUCTIONS,
70
- tools=[], # No tools needed for this agent
71
- model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
72
- output_type=KnowledgeGapOutput if not use_output_parser else None,
73
- output_parser=create_type_parser(KnowledgeGapOutput) if use_output_parser else None
74
- )
@@ -1,251 +0,0 @@
1
- """
2
- Agent used to synthesize a final report by iteratively writing each section of the report.
3
- Used to produce long reports given drafts of each section. Broadly aligned with the methodology described here:
4
-
5
-
6
- The LongWriterAgent takes as input a string in the following format:
7
- ===========================================================
8
- ORIGINAL QUERY: <original user query>
9
-
10
- CURRENT REPORT DRAFT: <current working draft of the report, all sections up to the current one being written>
11
-
12
- TITLE OF NEXT SECTION TO WRITE: <title of the next section of the report to be written>
13
-
14
- DRAFT OF NEXT SECTION: <draft of the next section of the report>
15
- ===========================================================
16
-
17
- The Agent then:
18
- 1. Reads the current draft and the draft of the next section
19
- 2. Writes the next section of the report
20
- 3. Produces an updated draft of the new section to fit the flow of the report
21
- 4. Returns the updated draft of the new section along with references/citations
22
- """
23
- from .baseclass import ResearchAgent, ResearchRunner
24
- from ..llm_config import LLMConfig, model_supports_structured_output
25
- from .utils.parse_output import create_type_parser
26
- from datetime import datetime
27
- from pydantic import BaseModel, Field
28
- from .proofreader_agent import ReportDraft
29
- from typing import List, Tuple, Dict
30
- import re
31
-
32
-
33
- class LongWriterOutput(BaseModel):
34
- next_section_markdown: str = Field(description="The final draft of the next section in markdown format")
35
- references: List[str] = Field(description="A list of URLs and their corresponding reference numbers for the section")
36
-
37
-
38
- INSTRUCTIONS = f"""
39
- You are an expert report writer tasked with iteratively writing each section of a report.
40
- Today's date is {datetime.now().strftime('%Y-%m-%d')}.
41
- You will be provided with:
42
- 1. The original research query
43
- 3. A final draft of the report containing the table of contents and all sections written up until this point (in the first iteration there will be no sections written yet)
44
- 3. A first draft of the next section of the report to be written
45
-
46
- OBJECTIVE:
47
- 1. Write a final draft of the next section of the report with numbered citations in square brackets in the body of the report
48
- 2. Produce a list of references to be appended to the end of the report
49
-
50
- CITATIONS/REFERENCES:
51
- The citations should be in numerical order, written in numbered square brackets in the body of the report.
52
- Separately, a list of all URLs and their corresponding reference numbers will be included at the end of the report.
53
- Follow the example below for formatting.
54
-
55
- LongWriterOutput(
56
- next_section_markdown="The company specializes in IT consulting [1](https://example.com/first-source-url). It operates in the software services market which is expected to grow at 10% per year [2](https://example.com/second-source-url).",
57
- references=["[1] https://example.com/first-source-url", "[2] https://example.com/second-source-url"]
58
- )
59
-
60
- GUIDELINES:
61
- - You can reformat and reorganize the flow of the content and headings within a section to flow logically, but DO NOT remove details that were included in the first draft
62
- - Only remove text from the first draft if it is already mentioned earlier in the report, or if it should be covered in a later section per the table of contents
63
- - Ensure the heading for the section matches the table of contents
64
- - Format the final output and references section as markdown
65
- - Do not include a title for the reference section, just a list of numbered references
66
-
67
- Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
68
- {LongWriterOutput.model_json_schema()}
69
- """
70
-
71
- def init_long_writer_agent(config: LLMConfig) -> ResearchAgent:
72
- """
73
- Initialize the long writer agent.
74
-
75
- Args:
76
- config: The LLM configuration
77
-
78
- Returns:
79
- A ResearchAgent capable of writing long-form content
80
- """
81
- selected_model = config.fast_model
82
-
83
- # Determine whether to use structured output
84
- use_output_parser = not hasattr(selected_model, 'langchain_llm')
85
-
86
- return ResearchAgent(
87
- name="LongWriterAgent",
88
- instructions=INSTRUCTIONS,
89
- tools=[], # No tools needed for this agent
90
- model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
91
- output_type=LongWriterOutput if not use_output_parser else None,
92
- output_parser=create_type_parser(LongWriterOutput) if use_output_parser else None
93
- )
94
-
95
-
96
- async def write_next_section(
97
- long_writer_agent: ResearchAgent,
98
- original_query: str,
99
- report_draft: str,
100
- next_section_title: str,
101
- next_section_draft: str,
102
- ) -> LongWriterOutput:
103
- """Write the next section of the report"""
104
-
105
- user_message = f"""
106
- <ORIGINAL QUERY>
107
- {original_query}
108
- </ORIGINAL QUERY>
109
-
110
- <CURRENT REPORT DRAFT>
111
- {report_draft or "No draft yet"}
112
- </CURRENT REPORT DRAFT>
113
-
114
- <TITLE OF NEXT SECTION TO WRITE>
115
- {next_section_title}
116
- </TITLE OF NEXT SECTION TO WRITE>
117
-
118
- <DRAFT OF NEXT SECTION>
119
- {next_section_draft}
120
- </DRAFT OF NEXT SECTION>
121
- """
122
-
123
- result = await ResearchRunner.run(
124
- long_writer_agent,
125
- user_message,
126
- )
127
-
128
- return result.final_output_as(LongWriterOutput)
129
-
130
-
131
- async def write_report(
132
- long_writer_agent: ResearchAgent,
133
- original_query: str,
134
- report_title: str,
135
- report_draft: ReportDraft,
136
- ) -> str:
137
- """Write the final report by iteratively writing each section"""
138
-
139
- # Initialize the final draft of the report with the title and table of contents
140
- final_draft = f"# {report_title}\n\n" + "## Table of Contents\n\n" + "\n".join([f"{i+1}. {section.section_title}" for i, section in enumerate(report_draft.sections)]) + "\n\n"
141
- all_references = []
142
-
143
- for section in report_draft.sections:
144
- # Produce the final draft of each section and add it to the report with corresponding references
145
- next_section_draft = await write_next_section(long_writer_agent, original_query, final_draft, section.section_title, section.section_content)
146
- section_markdown, all_references = reformat_references(
147
- next_section_draft.next_section_markdown,
148
- next_section_draft.references,
149
- all_references
150
- )
151
- section_markdown = reformat_section_headings(section_markdown)
152
- final_draft += section_markdown + '\n\n'
153
-
154
- # Add the final references to the end of the report
155
- final_draft += '## References:\n\n' + ' \n'.join(all_references)
156
- return final_draft
157
-
158
-
159
- def reformat_references(
160
- section_markdown: str,
161
- section_references: List[str],
162
- all_references: List[str]
163
- ) -> Tuple[str, List[str]]:
164
- """
165
- This method gracefully handles the re-numbering, de-duplication and re-formatting of references as new sections are added to the report draft.
166
- It takes as input:
167
- 1. The markdown content of the new section containing inline references in square brackets, e.g. [1], [2]
168
- 2. The list of references for the new section, e.g. ["[1] https://example1.com", "[2] https://example2.com"]
169
- 3. The list of references covering all prior sections of the report
170
-
171
- It returns:
172
- 1. The updated markdown content of the new section with the references re-numbered and de-duplicated, such that they increment from the previous references
173
- 2. The updated list of references for the full report, to include the new section's references
174
- """
175
- def convert_ref_list_to_map(ref_list: List[str]) -> Dict[str, str]:
176
- ref_map = {}
177
- for ref in ref_list:
178
- try:
179
- ref_num = int(ref.split(']')[0].strip('['))
180
- url = ref.split(']', 1)[1].strip()
181
- ref_map[url] = ref_num
182
- except ValueError:
183
- print(f"Invalid reference format: {ref}")
184
- continue
185
- return ref_map
186
-
187
- section_ref_map = convert_ref_list_to_map(section_references)
188
- report_ref_map = convert_ref_list_to_map(all_references)
189
- section_to_report_ref_map = {}
190
-
191
- report_urls = set(report_ref_map.keys())
192
- ref_count = max(report_ref_map.values() or [0])
193
- for url, section_ref_num in section_ref_map.items():
194
- if url in report_urls:
195
- section_to_report_ref_map[section_ref_num] = report_ref_map[url]
196
- else:
197
- # If the reference is not in the report, add it to the report
198
- ref_count += 1
199
- section_to_report_ref_map[section_ref_num] = ref_count
200
- all_references.append(f"[{ref_count}] {url}")
201
-
202
- def replace_reference(match):
203
- # Extract the reference number from the match
204
- ref_num = int(match.group(1))
205
- # Look up the new reference number
206
- mapped_ref_num = section_to_report_ref_map.get(ref_num)
207
- if mapped_ref_num:
208
- return f'[{mapped_ref_num}]'
209
- return ''
210
-
211
- # Replace all references in a single pass using a replacement function
212
- section_markdown = re.sub(r'\[(\d+)\]', replace_reference, section_markdown)
213
-
214
- return section_markdown, all_references
215
-
216
-
217
- def reformat_section_headings(section_markdown: str) -> str:
218
- """
219
- Reformat the headings of a section to be consistent with the report, by rebasing the section's heading to be a level-2 heading
220
-
221
- E.g. this:
222
- # Big Title
223
- Some content
224
- ## Subsection
225
-
226
- Becomes this:
227
- ## Big Title
228
- Some content
229
- ### Subsection
230
- """
231
- # If the section is empty, return as-is
232
- if not section_markdown.strip():
233
- return section_markdown
234
-
235
- # Find the first heading level
236
- first_heading_match = re.search(r'^(#+)\s', section_markdown, re.MULTILINE)
237
- if not first_heading_match:
238
- return section_markdown
239
-
240
- # Calculate the level adjustment needed
241
- first_heading_level = len(first_heading_match.group(1))
242
- level_adjustment = 2 - first_heading_level
243
-
244
- def adjust_heading_level(match):
245
- hashes = match.group(1)
246
- content = match.group(2)
247
- new_level = max(2, len(hashes) + level_adjustment)
248
- return '#' * new_level + ' ' + content
249
-
250
- # Apply the heading adjustment to all headings in one pass
251
- return re.sub(r'^(#+)\s(.+)$', adjust_heading_level, section_markdown, flags=re.MULTILINE)
@@ -1,124 +0,0 @@
1
- """
2
- Agent used to produce an initial outline of the report, including a list of section titles and the key question to be
3
- addressed in each section.
4
-
5
- The Agent takes as input a string in the following format:
6
- ===========================================================
7
- QUERY: <original user query>
8
- ===========================================================
9
-
10
- The Agent then outputs a ReportPlan object, which includes:
11
- 1. A summary of initial background context (if needed), based on web searches and/or crawling
12
- 2. An outline of the report that includes a list of section titles and the key question to be addressed in each section
13
- """
14
-
15
- from pydantic import BaseModel, Field
16
- from typing import List, Optional, Any
17
- from langchain_core.tools import BaseTool, Tool
18
- from .baseclass import ResearchAgent
19
- from ..llm_config import LLMConfig, model_supports_structured_output
20
- from .utils.parse_output import create_type_parser
21
- from datetime import datetime
22
-
23
-
24
- class ReportPlanSection(BaseModel):
25
- """A section of the report that needs to be written"""
26
- title: str = Field(description="The title of the section")
27
- key_question: str = Field(description="The key question to be addressed in the section")
28
-
29
-
30
- class ReportPlan(BaseModel):
31
- """Output from the Report Planner Agent"""
32
- background_context: str = Field(description="A summary of supporting context that can be passed onto the research agents")
33
- report_outline: List[ReportPlanSection] = Field(description="List of sections that need to be written in the report")
34
- report_title: str = Field(description="The title of the report")
35
-
36
-
37
- INSTRUCTIONS = f"""
38
- You are a research manager, managing a team of research agents. Today's date is {datetime.now().strftime("%Y-%m-%d")}.
39
- Given a research query, your job is to produce an initial outline of the report (section titles and key questions),
40
- as well as some background context. Each section will be assigned to a different researcher in your team who will then
41
- carry out research on the section.
42
-
43
- You will be given:
44
- - An initial research query
45
-
46
- Your task is to:
47
- 1. Produce 1-2 paragraphs of initial background context (if needed) on the query by running web searches or crawling websites
48
- 2. Produce an outline of the report that includes a list of section titles and the key question to be addressed in each section
49
- 3. Provide a title for the report that will be used as the main heading
50
-
51
- Guidelines:
52
- - Each section should cover a single topic/question that is independent of other sections
53
- - The key question for each section should include both the NAME and DOMAIN NAME / WEBSITE (if available and applicable) if it is related to a company, product or similar
54
- - The background_context should not be more than 2 paragraphs
55
- - The background_context should be very specific to the query and include any information that is relevant for researchers across all sections of the report
56
- - The background_context should be draw only from web search or crawl results rather than prior knowledge (i.e. it should only be included if you have called tools)
57
- - For example, if the query is about a company, the background context should include some basic information about what the company does
58
- - DO NOT do more than 2 tool calls
59
-
60
- Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
61
- {ReportPlan.model_json_schema()}
62
- """
63
-
64
- def init_planner_agent(config: LLMConfig) -> ResearchAgent:
65
- """
66
- Initialize the planner agent with the appropriate tools and configuration.
67
-
68
- Args:
69
- config: The LLM configuration
70
-
71
- Returns:
72
- A configured ResearchAgent for planning research
73
- """
74
- selected_model = config.reasoning_model
75
-
76
- # Create LangChain tools for web search and website crawling
77
-
78
- # Web search tool wrapper
79
- async def web_search_wrapper(query: str) -> str:
80
- """Search the web for information on a specific query."""
81
- # Import here to avoid circular imports
82
- from ...tools import web_search
83
- results = await web_search(query)
84
- # Format the results into a readable format
85
- formatted_results = "\n\n".join([
86
- f"Title: {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}"
87
- for result in results
88
- ])
89
- return formatted_results
90
-
91
- # Crawl website tool wrapper
92
- async def crawl_website_wrapper(url: str) -> str:
93
- """Crawl a website and extract its main content."""
94
- # Import here to avoid circular imports
95
- from ...tools import crawl_website
96
- result = await crawl_website(url)
97
- return result
98
-
99
- # Create LangChain Tool objects
100
- web_search_tool = Tool(
101
- name="web_search",
102
- description="Search the web for information on a specific query - provide a query with 3-6 words as input",
103
- func=web_search_wrapper,
104
- coroutine=web_search_wrapper
105
- )
106
-
107
- crawl_tool = Tool(
108
- name="crawl_website",
109
- description="Crawl a website for information relevant to the query - provide a starting URL as input",
110
- func=crawl_website_wrapper,
111
- coroutine=crawl_website_wrapper
112
- )
113
-
114
- # Determine whether to use structured output
115
- use_output_parser = not hasattr(selected_model, 'langchain_llm')
116
-
117
- return ResearchAgent(
118
- name="PlannerAgent",
119
- instructions=INSTRUCTIONS,
120
- tools=[web_search_tool, crawl_tool],
121
- model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
122
- output_type=ReportPlan if not use_output_parser else None,
123
- output_parser=create_type_parser(ReportPlan) if use_output_parser else None
124
- )