alita-sdk 0.3.176__py3-none-any.whl → 0.3.177__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/community/__init__.py +7 -17
- alita_sdk/tools/carrier/api_wrapper.py +6 -0
- alita_sdk/tools/carrier/backend_tests_tool.py +308 -7
- alita_sdk/tools/carrier/carrier_sdk.py +18 -0
- alita_sdk/tools/carrier/tools.py +2 -1
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/METADATA +1 -2
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/RECORD +10 -41
- alita_sdk/community/browseruse/__init__.py +0 -73
- alita_sdk/community/browseruse/api_wrapper.py +0 -288
- alita_sdk/community/deep_researcher/__init__.py +0 -70
- alita_sdk/community/deep_researcher/agents/__init__.py +0 -1
- alita_sdk/community/deep_researcher/agents/baseclass.py +0 -182
- alita_sdk/community/deep_researcher/agents/knowledge_gap_agent.py +0 -74
- alita_sdk/community/deep_researcher/agents/long_writer_agent.py +0 -251
- alita_sdk/community/deep_researcher/agents/planner_agent.py +0 -124
- alita_sdk/community/deep_researcher/agents/proofreader_agent.py +0 -80
- alita_sdk/community/deep_researcher/agents/thinking_agent.py +0 -64
- alita_sdk/community/deep_researcher/agents/tool_agents/__init__.py +0 -20
- alita_sdk/community/deep_researcher/agents/tool_agents/crawl_agent.py +0 -87
- alita_sdk/community/deep_researcher/agents/tool_agents/search_agent.py +0 -96
- alita_sdk/community/deep_researcher/agents/tool_selector_agent.py +0 -83
- alita_sdk/community/deep_researcher/agents/utils/__init__.py +0 -0
- alita_sdk/community/deep_researcher/agents/utils/parse_output.py +0 -148
- alita_sdk/community/deep_researcher/agents/writer_agent.py +0 -63
- alita_sdk/community/deep_researcher/api_wrapper.py +0 -116
- alita_sdk/community/deep_researcher/deep_research.py +0 -185
- alita_sdk/community/deep_researcher/examples/deep_example.py +0 -30
- alita_sdk/community/deep_researcher/examples/iterative_example.py +0 -34
- alita_sdk/community/deep_researcher/examples/report_plan_example.py +0 -27
- alita_sdk/community/deep_researcher/iterative_research.py +0 -419
- alita_sdk/community/deep_researcher/llm_config.py +0 -87
- alita_sdk/community/deep_researcher/main.py +0 -67
- alita_sdk/community/deep_researcher/tools/__init__.py +0 -2
- alita_sdk/community/deep_researcher/tools/crawl_website.py +0 -109
- alita_sdk/community/deep_researcher/tools/web_search.py +0 -294
- alita_sdk/community/deep_researcher/utils/__init__.py +0 -0
- alita_sdk/community/deep_researcher/utils/md_to_pdf.py +0 -8
- alita_sdk/community/deep_researcher/utils/os.py +0 -21
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.177.dist-info}/top_level.txt +0 -0
@@ -1,251 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Agent used to synthesize a final report by iteratively writing each section of the report.
|
3
|
-
Used to produce long reports given drafts of each section. Broadly aligned with the methodology described here:
|
4
|
-
|
5
|
-
|
6
|
-
The LongWriterAgent takes as input a string in the following format:
|
7
|
-
===========================================================
|
8
|
-
ORIGINAL QUERY: <original user query>
|
9
|
-
|
10
|
-
CURRENT REPORT DRAFT: <current working draft of the report, all sections up to the current one being written>
|
11
|
-
|
12
|
-
TITLE OF NEXT SECTION TO WRITE: <title of the next section of the report to be written>
|
13
|
-
|
14
|
-
DRAFT OF NEXT SECTION: <draft of the next section of the report>
|
15
|
-
===========================================================
|
16
|
-
|
17
|
-
The Agent then:
|
18
|
-
1. Reads the current draft and the draft of the next section
|
19
|
-
2. Writes the next section of the report
|
20
|
-
3. Produces an updated draft of the new section to fit the flow of the report
|
21
|
-
4. Returns the updated draft of the new section along with references/citations
|
22
|
-
"""
|
23
|
-
from .baseclass import ResearchAgent, ResearchRunner
|
24
|
-
from ..llm_config import LLMConfig, model_supports_structured_output
|
25
|
-
from .utils.parse_output import create_type_parser
|
26
|
-
from datetime import datetime
|
27
|
-
from pydantic import BaseModel, Field
|
28
|
-
from .proofreader_agent import ReportDraft
|
29
|
-
from typing import List, Tuple, Dict
|
30
|
-
import re
|
31
|
-
|
32
|
-
|
33
|
-
class LongWriterOutput(BaseModel):
|
34
|
-
next_section_markdown: str = Field(description="The final draft of the next section in markdown format")
|
35
|
-
references: List[str] = Field(description="A list of URLs and their corresponding reference numbers for the section")
|
36
|
-
|
37
|
-
|
38
|
-
INSTRUCTIONS = f"""
|
39
|
-
You are an expert report writer tasked with iteratively writing each section of a report.
|
40
|
-
Today's date is {datetime.now().strftime('%Y-%m-%d')}.
|
41
|
-
You will be provided with:
|
42
|
-
1. The original research query
|
43
|
-
3. A final draft of the report containing the table of contents and all sections written up until this point (in the first iteration there will be no sections written yet)
|
44
|
-
3. A first draft of the next section of the report to be written
|
45
|
-
|
46
|
-
OBJECTIVE:
|
47
|
-
1. Write a final draft of the next section of the report with numbered citations in square brackets in the body of the report
|
48
|
-
2. Produce a list of references to be appended to the end of the report
|
49
|
-
|
50
|
-
CITATIONS/REFERENCES:
|
51
|
-
The citations should be in numerical order, written in numbered square brackets in the body of the report.
|
52
|
-
Separately, a list of all URLs and their corresponding reference numbers will be included at the end of the report.
|
53
|
-
Follow the example below for formatting.
|
54
|
-
|
55
|
-
LongWriterOutput(
|
56
|
-
next_section_markdown="The company specializes in IT consulting [1](https://example.com/first-source-url). It operates in the software services market which is expected to grow at 10% per year [2](https://example.com/second-source-url).",
|
57
|
-
references=["[1] https://example.com/first-source-url", "[2] https://example.com/second-source-url"]
|
58
|
-
)
|
59
|
-
|
60
|
-
GUIDELINES:
|
61
|
-
- You can reformat and reorganize the flow of the content and headings within a section to flow logically, but DO NOT remove details that were included in the first draft
|
62
|
-
- Only remove text from the first draft if it is already mentioned earlier in the report, or if it should be covered in a later section per the table of contents
|
63
|
-
- Ensure the heading for the section matches the table of contents
|
64
|
-
- Format the final output and references section as markdown
|
65
|
-
- Do not include a title for the reference section, just a list of numbered references
|
66
|
-
|
67
|
-
Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
|
68
|
-
{LongWriterOutput.model_json_schema()}
|
69
|
-
"""
|
70
|
-
|
71
|
-
def init_long_writer_agent(config: LLMConfig) -> ResearchAgent:
|
72
|
-
"""
|
73
|
-
Initialize the long writer agent.
|
74
|
-
|
75
|
-
Args:
|
76
|
-
config: The LLM configuration
|
77
|
-
|
78
|
-
Returns:
|
79
|
-
A ResearchAgent capable of writing long-form content
|
80
|
-
"""
|
81
|
-
selected_model = config.fast_model
|
82
|
-
|
83
|
-
# Determine whether to use structured output
|
84
|
-
use_output_parser = not hasattr(selected_model, 'langchain_llm')
|
85
|
-
|
86
|
-
return ResearchAgent(
|
87
|
-
name="LongWriterAgent",
|
88
|
-
instructions=INSTRUCTIONS,
|
89
|
-
tools=[], # No tools needed for this agent
|
90
|
-
model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
|
91
|
-
output_type=LongWriterOutput if not use_output_parser else None,
|
92
|
-
output_parser=create_type_parser(LongWriterOutput) if use_output_parser else None
|
93
|
-
)
|
94
|
-
|
95
|
-
|
96
|
-
async def write_next_section(
|
97
|
-
long_writer_agent: ResearchAgent,
|
98
|
-
original_query: str,
|
99
|
-
report_draft: str,
|
100
|
-
next_section_title: str,
|
101
|
-
next_section_draft: str,
|
102
|
-
) -> LongWriterOutput:
|
103
|
-
"""Write the next section of the report"""
|
104
|
-
|
105
|
-
user_message = f"""
|
106
|
-
<ORIGINAL QUERY>
|
107
|
-
{original_query}
|
108
|
-
</ORIGINAL QUERY>
|
109
|
-
|
110
|
-
<CURRENT REPORT DRAFT>
|
111
|
-
{report_draft or "No draft yet"}
|
112
|
-
</CURRENT REPORT DRAFT>
|
113
|
-
|
114
|
-
<TITLE OF NEXT SECTION TO WRITE>
|
115
|
-
{next_section_title}
|
116
|
-
</TITLE OF NEXT SECTION TO WRITE>
|
117
|
-
|
118
|
-
<DRAFT OF NEXT SECTION>
|
119
|
-
{next_section_draft}
|
120
|
-
</DRAFT OF NEXT SECTION>
|
121
|
-
"""
|
122
|
-
|
123
|
-
result = await ResearchRunner.run(
|
124
|
-
long_writer_agent,
|
125
|
-
user_message,
|
126
|
-
)
|
127
|
-
|
128
|
-
return result.final_output_as(LongWriterOutput)
|
129
|
-
|
130
|
-
|
131
|
-
async def write_report(
|
132
|
-
long_writer_agent: ResearchAgent,
|
133
|
-
original_query: str,
|
134
|
-
report_title: str,
|
135
|
-
report_draft: ReportDraft,
|
136
|
-
) -> str:
|
137
|
-
"""Write the final report by iteratively writing each section"""
|
138
|
-
|
139
|
-
# Initialize the final draft of the report with the title and table of contents
|
140
|
-
final_draft = f"# {report_title}\n\n" + "## Table of Contents\n\n" + "\n".join([f"{i+1}. {section.section_title}" for i, section in enumerate(report_draft.sections)]) + "\n\n"
|
141
|
-
all_references = []
|
142
|
-
|
143
|
-
for section in report_draft.sections:
|
144
|
-
# Produce the final draft of each section and add it to the report with corresponding references
|
145
|
-
next_section_draft = await write_next_section(long_writer_agent, original_query, final_draft, section.section_title, section.section_content)
|
146
|
-
section_markdown, all_references = reformat_references(
|
147
|
-
next_section_draft.next_section_markdown,
|
148
|
-
next_section_draft.references,
|
149
|
-
all_references
|
150
|
-
)
|
151
|
-
section_markdown = reformat_section_headings(section_markdown)
|
152
|
-
final_draft += section_markdown + '\n\n'
|
153
|
-
|
154
|
-
# Add the final references to the end of the report
|
155
|
-
final_draft += '## References:\n\n' + ' \n'.join(all_references)
|
156
|
-
return final_draft
|
157
|
-
|
158
|
-
|
159
|
-
def reformat_references(
|
160
|
-
section_markdown: str,
|
161
|
-
section_references: List[str],
|
162
|
-
all_references: List[str]
|
163
|
-
) -> Tuple[str, List[str]]:
|
164
|
-
"""
|
165
|
-
This method gracefully handles the re-numbering, de-duplication and re-formatting of references as new sections are added to the report draft.
|
166
|
-
It takes as input:
|
167
|
-
1. The markdown content of the new section containing inline references in square brackets, e.g. [1], [2]
|
168
|
-
2. The list of references for the new section, e.g. ["[1] https://example1.com", "[2] https://example2.com"]
|
169
|
-
3. The list of references covering all prior sections of the report
|
170
|
-
|
171
|
-
It returns:
|
172
|
-
1. The updated markdown content of the new section with the references re-numbered and de-duplicated, such that they increment from the previous references
|
173
|
-
2. The updated list of references for the full report, to include the new section's references
|
174
|
-
"""
|
175
|
-
def convert_ref_list_to_map(ref_list: List[str]) -> Dict[str, str]:
|
176
|
-
ref_map = {}
|
177
|
-
for ref in ref_list:
|
178
|
-
try:
|
179
|
-
ref_num = int(ref.split(']')[0].strip('['))
|
180
|
-
url = ref.split(']', 1)[1].strip()
|
181
|
-
ref_map[url] = ref_num
|
182
|
-
except ValueError:
|
183
|
-
print(f"Invalid reference format: {ref}")
|
184
|
-
continue
|
185
|
-
return ref_map
|
186
|
-
|
187
|
-
section_ref_map = convert_ref_list_to_map(section_references)
|
188
|
-
report_ref_map = convert_ref_list_to_map(all_references)
|
189
|
-
section_to_report_ref_map = {}
|
190
|
-
|
191
|
-
report_urls = set(report_ref_map.keys())
|
192
|
-
ref_count = max(report_ref_map.values() or [0])
|
193
|
-
for url, section_ref_num in section_ref_map.items():
|
194
|
-
if url in report_urls:
|
195
|
-
section_to_report_ref_map[section_ref_num] = report_ref_map[url]
|
196
|
-
else:
|
197
|
-
# If the reference is not in the report, add it to the report
|
198
|
-
ref_count += 1
|
199
|
-
section_to_report_ref_map[section_ref_num] = ref_count
|
200
|
-
all_references.append(f"[{ref_count}] {url}")
|
201
|
-
|
202
|
-
def replace_reference(match):
|
203
|
-
# Extract the reference number from the match
|
204
|
-
ref_num = int(match.group(1))
|
205
|
-
# Look up the new reference number
|
206
|
-
mapped_ref_num = section_to_report_ref_map.get(ref_num)
|
207
|
-
if mapped_ref_num:
|
208
|
-
return f'[{mapped_ref_num}]'
|
209
|
-
return ''
|
210
|
-
|
211
|
-
# Replace all references in a single pass using a replacement function
|
212
|
-
section_markdown = re.sub(r'\[(\d+)\]', replace_reference, section_markdown)
|
213
|
-
|
214
|
-
return section_markdown, all_references
|
215
|
-
|
216
|
-
|
217
|
-
def reformat_section_headings(section_markdown: str) -> str:
|
218
|
-
"""
|
219
|
-
Reformat the headings of a section to be consistent with the report, by rebasing the section's heading to be a level-2 heading
|
220
|
-
|
221
|
-
E.g. this:
|
222
|
-
# Big Title
|
223
|
-
Some content
|
224
|
-
## Subsection
|
225
|
-
|
226
|
-
Becomes this:
|
227
|
-
## Big Title
|
228
|
-
Some content
|
229
|
-
### Subsection
|
230
|
-
"""
|
231
|
-
# If the section is empty, return as-is
|
232
|
-
if not section_markdown.strip():
|
233
|
-
return section_markdown
|
234
|
-
|
235
|
-
# Find the first heading level
|
236
|
-
first_heading_match = re.search(r'^(#+)\s', section_markdown, re.MULTILINE)
|
237
|
-
if not first_heading_match:
|
238
|
-
return section_markdown
|
239
|
-
|
240
|
-
# Calculate the level adjustment needed
|
241
|
-
first_heading_level = len(first_heading_match.group(1))
|
242
|
-
level_adjustment = 2 - first_heading_level
|
243
|
-
|
244
|
-
def adjust_heading_level(match):
|
245
|
-
hashes = match.group(1)
|
246
|
-
content = match.group(2)
|
247
|
-
new_level = max(2, len(hashes) + level_adjustment)
|
248
|
-
return '#' * new_level + ' ' + content
|
249
|
-
|
250
|
-
# Apply the heading adjustment to all headings in one pass
|
251
|
-
return re.sub(r'^(#+)\s(.+)$', adjust_heading_level, section_markdown, flags=re.MULTILINE)
|
@@ -1,124 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Agent used to produce an initial outline of the report, including a list of section titles and the key question to be
|
3
|
-
addressed in each section.
|
4
|
-
|
5
|
-
The Agent takes as input a string in the following format:
|
6
|
-
===========================================================
|
7
|
-
QUERY: <original user query>
|
8
|
-
===========================================================
|
9
|
-
|
10
|
-
The Agent then outputs a ReportPlan object, which includes:
|
11
|
-
1. A summary of initial background context (if needed), based on web searches and/or crawling
|
12
|
-
2. An outline of the report that includes a list of section titles and the key question to be addressed in each section
|
13
|
-
"""
|
14
|
-
|
15
|
-
from pydantic import BaseModel, Field
|
16
|
-
from typing import List, Optional, Any
|
17
|
-
from langchain_core.tools import BaseTool, Tool
|
18
|
-
from .baseclass import ResearchAgent
|
19
|
-
from ..llm_config import LLMConfig, model_supports_structured_output
|
20
|
-
from .utils.parse_output import create_type_parser
|
21
|
-
from datetime import datetime
|
22
|
-
|
23
|
-
|
24
|
-
class ReportPlanSection(BaseModel):
|
25
|
-
"""A section of the report that needs to be written"""
|
26
|
-
title: str = Field(description="The title of the section")
|
27
|
-
key_question: str = Field(description="The key question to be addressed in the section")
|
28
|
-
|
29
|
-
|
30
|
-
class ReportPlan(BaseModel):
|
31
|
-
"""Output from the Report Planner Agent"""
|
32
|
-
background_context: str = Field(description="A summary of supporting context that can be passed onto the research agents")
|
33
|
-
report_outline: List[ReportPlanSection] = Field(description="List of sections that need to be written in the report")
|
34
|
-
report_title: str = Field(description="The title of the report")
|
35
|
-
|
36
|
-
|
37
|
-
INSTRUCTIONS = f"""
|
38
|
-
You are a research manager, managing a team of research agents. Today's date is {datetime.now().strftime("%Y-%m-%d")}.
|
39
|
-
Given a research query, your job is to produce an initial outline of the report (section titles and key questions),
|
40
|
-
as well as some background context. Each section will be assigned to a different researcher in your team who will then
|
41
|
-
carry out research on the section.
|
42
|
-
|
43
|
-
You will be given:
|
44
|
-
- An initial research query
|
45
|
-
|
46
|
-
Your task is to:
|
47
|
-
1. Produce 1-2 paragraphs of initial background context (if needed) on the query by running web searches or crawling websites
|
48
|
-
2. Produce an outline of the report that includes a list of section titles and the key question to be addressed in each section
|
49
|
-
3. Provide a title for the report that will be used as the main heading
|
50
|
-
|
51
|
-
Guidelines:
|
52
|
-
- Each section should cover a single topic/question that is independent of other sections
|
53
|
-
- The key question for each section should include both the NAME and DOMAIN NAME / WEBSITE (if available and applicable) if it is related to a company, product or similar
|
54
|
-
- The background_context should not be more than 2 paragraphs
|
55
|
-
- The background_context should be very specific to the query and include any information that is relevant for researchers across all sections of the report
|
56
|
-
- The background_context should be draw only from web search or crawl results rather than prior knowledge (i.e. it should only be included if you have called tools)
|
57
|
-
- For example, if the query is about a company, the background context should include some basic information about what the company does
|
58
|
-
- DO NOT do more than 2 tool calls
|
59
|
-
|
60
|
-
Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
|
61
|
-
{ReportPlan.model_json_schema()}
|
62
|
-
"""
|
63
|
-
|
64
|
-
def init_planner_agent(config: LLMConfig) -> ResearchAgent:
|
65
|
-
"""
|
66
|
-
Initialize the planner agent with the appropriate tools and configuration.
|
67
|
-
|
68
|
-
Args:
|
69
|
-
config: The LLM configuration
|
70
|
-
|
71
|
-
Returns:
|
72
|
-
A configured ResearchAgent for planning research
|
73
|
-
"""
|
74
|
-
selected_model = config.reasoning_model
|
75
|
-
|
76
|
-
# Create LangChain tools for web search and website crawling
|
77
|
-
|
78
|
-
# Web search tool wrapper
|
79
|
-
async def web_search_wrapper(query: str) -> str:
|
80
|
-
"""Search the web for information on a specific query."""
|
81
|
-
# Import here to avoid circular imports
|
82
|
-
from ...tools import web_search
|
83
|
-
results = await web_search(query)
|
84
|
-
# Format the results into a readable format
|
85
|
-
formatted_results = "\n\n".join([
|
86
|
-
f"Title: {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}"
|
87
|
-
for result in results
|
88
|
-
])
|
89
|
-
return formatted_results
|
90
|
-
|
91
|
-
# Crawl website tool wrapper
|
92
|
-
async def crawl_website_wrapper(url: str) -> str:
|
93
|
-
"""Crawl a website and extract its main content."""
|
94
|
-
# Import here to avoid circular imports
|
95
|
-
from ...tools import crawl_website
|
96
|
-
result = await crawl_website(url)
|
97
|
-
return result
|
98
|
-
|
99
|
-
# Create LangChain Tool objects
|
100
|
-
web_search_tool = Tool(
|
101
|
-
name="web_search",
|
102
|
-
description="Search the web for information on a specific query - provide a query with 3-6 words as input",
|
103
|
-
func=web_search_wrapper,
|
104
|
-
coroutine=web_search_wrapper
|
105
|
-
)
|
106
|
-
|
107
|
-
crawl_tool = Tool(
|
108
|
-
name="crawl_website",
|
109
|
-
description="Crawl a website for information relevant to the query - provide a starting URL as input",
|
110
|
-
func=crawl_website_wrapper,
|
111
|
-
coroutine=crawl_website_wrapper
|
112
|
-
)
|
113
|
-
|
114
|
-
# Determine whether to use structured output
|
115
|
-
use_output_parser = not hasattr(selected_model, 'langchain_llm')
|
116
|
-
|
117
|
-
return ResearchAgent(
|
118
|
-
name="PlannerAgent",
|
119
|
-
instructions=INSTRUCTIONS,
|
120
|
-
tools=[web_search_tool, crawl_tool],
|
121
|
-
model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
|
122
|
-
output_type=ReportPlan if not use_output_parser else None,
|
123
|
-
output_parser=create_type_parser(ReportPlan) if use_output_parser else None
|
124
|
-
)
|
@@ -1,80 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Agent used to produce the final draft of a report given initial drafts of each section.
|
3
|
-
|
4
|
-
The Agent takes as input the original user query and a stringified object of type ReportDraft.model_dump_json() (defined below).
|
5
|
-
|
6
|
-
====
|
7
|
-
QUERY: <original user query>
|
8
|
-
|
9
|
-
REPORT DRAFT: <stringified ReportDraft object containing all draft sections>
|
10
|
-
====
|
11
|
-
|
12
|
-
The Agent then outputs the final markdown for the report as a string.
|
13
|
-
"""
|
14
|
-
|
15
|
-
from pydantic import BaseModel, Field
|
16
|
-
from typing import List
|
17
|
-
from .baseclass import ResearchAgent
|
18
|
-
from ..llm_config import LLMConfig
|
19
|
-
from datetime import datetime
|
20
|
-
from langchain_core.tools import BaseTool
|
21
|
-
|
22
|
-
|
23
|
-
class ReportDraftSection(BaseModel):
|
24
|
-
"""A section of the report that needs to be written"""
|
25
|
-
section_title: str = Field(description="The title of the section")
|
26
|
-
section_content: str = Field(description="The content of the section")
|
27
|
-
|
28
|
-
|
29
|
-
class ReportDraft(BaseModel):
|
30
|
-
"""Output from the Report Planner Agent"""
|
31
|
-
sections: List[ReportDraftSection] = Field(description="List of sections that are in the report")
|
32
|
-
|
33
|
-
|
34
|
-
INSTRUCTIONS = f"""
|
35
|
-
You are a research expert who proofreads and edits research reports.
|
36
|
-
Today's date is {datetime.now().strftime("%Y-%m-%d")}.
|
37
|
-
|
38
|
-
You are given:
|
39
|
-
1. The original query topic for the report
|
40
|
-
2. A first draft of the report in ReportDraft format containing each section in sequence
|
41
|
-
|
42
|
-
Your task is to:
|
43
|
-
1. **Combine sections:** Concatenate the sections into a single string
|
44
|
-
2. **Add section titles:** Add the section titles to the beginning of each section in markdown format, as well as a main title for the report
|
45
|
-
3. **De-duplicate:** Remove duplicate content across sections to avoid repetition
|
46
|
-
4. **Remove irrelevant sections:** If any sections or sub-sections are completely irrelevant to the query, remove them
|
47
|
-
5. **Refine wording:** Edit the wording of the report to be polished, concise and punchy, but **without eliminating any detail** or large chunks of text
|
48
|
-
6. **Add a summary:** Add a short report summary / outline to the beginning of the report to provide an overview of the sections and what is discussed
|
49
|
-
7. **Preserve sources:** Preserve all sources / references - move the long list of references to the end of the report
|
50
|
-
8. **Update reference numbers:** Continue to include reference numbers in square brackets ([1], [2], [3], etc.) in the main body of the report, but update the numbering to match the new order of references at the end of the report
|
51
|
-
9. **Output final report:** Output the final report in markdown format (do not wrap it in a code block)
|
52
|
-
|
53
|
-
Guidelines:
|
54
|
-
- Do not add any new facts or data to the report
|
55
|
-
- Do not remove any content from the report unless it is very clearly wrong, contradictory or irrelevant
|
56
|
-
- Remove or reformat any redundant or excessive headings, and ensure that the final nesting of heading levels is correct
|
57
|
-
- Ensure that the final report flows well and has a logical structure
|
58
|
-
- Include all sources and references that are present in the final report
|
59
|
-
"""
|
60
|
-
|
61
|
-
def init_proofreader_agent(config: LLMConfig) -> ResearchAgent:
|
62
|
-
"""
|
63
|
-
Initialize the proofreader agent.
|
64
|
-
|
65
|
-
Args:
|
66
|
-
config: The LLM configuration to use
|
67
|
-
|
68
|
-
Returns:
|
69
|
-
A ResearchAgent that can proofread and edit research reports
|
70
|
-
"""
|
71
|
-
selected_model = config.fast_model
|
72
|
-
|
73
|
-
return ResearchAgent(
|
74
|
-
name="ProofreaderAgent",
|
75
|
-
instructions=INSTRUCTIONS,
|
76
|
-
tools=[], # No tools needed for this agent
|
77
|
-
model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
|
78
|
-
output_type=None, # Direct string output
|
79
|
-
output_parser=None
|
80
|
-
)
|
@@ -1,64 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Agent used to reflect on the research process so far and share your latest thoughts.
|
3
|
-
|
4
|
-
The Agent takes as input a string in the following format:
|
5
|
-
===========================================================
|
6
|
-
ORIGINAL QUERY: <original user query>
|
7
|
-
|
8
|
-
BACKGROUND CONTEXT: <supporting background context related to the original query>
|
9
|
-
|
10
|
-
HISTORY OF ACTIONS, FINDINGS AND THOUGHTS: <a log of prior iterations of the research process>
|
11
|
-
===========================================================
|
12
|
-
|
13
|
-
The Agent then outputs a string containing its latest thoughts on the research process.
|
14
|
-
"""
|
15
|
-
from .baseclass import ResearchAgent
|
16
|
-
from ..llm_config import LLMConfig
|
17
|
-
from datetime import datetime
|
18
|
-
from langchain_core.tools import BaseTool
|
19
|
-
|
20
|
-
INSTRUCTIONS = f"""
|
21
|
-
You are a research expert who is managing a research process in iterations. Today's date is {datetime.now().strftime("%Y-%m-%d")}.
|
22
|
-
|
23
|
-
You are given:
|
24
|
-
1. The original research query along with some supporting background context
|
25
|
-
2. A history of the tasks, actions, findings and thoughts you've made up until this point in the research process (on iteration 1 you will be at the start of the research process, so this will be empty)
|
26
|
-
|
27
|
-
Your objective is to reflect on the research process so far and share your latest thoughts.
|
28
|
-
|
29
|
-
Specifically, your thoughts should include reflections on questions such as:
|
30
|
-
- What have you learned from the last iteration?
|
31
|
-
- What new areas would you like to explore next, or existing topics you'd like to go deeper into?
|
32
|
-
- Were you able to retrieve the information you were looking for in the last iteration?
|
33
|
-
- If not, should we change our approach or move to the next topic?
|
34
|
-
- Is there any info that is contradictory or conflicting?
|
35
|
-
|
36
|
-
Guidelines:
|
37
|
-
- Share you stream of consciousness on the above questions as raw text
|
38
|
-
- Keep your response concise and informal
|
39
|
-
- Focus most of your thoughts on the most recent iteration and how that influences this next iteration
|
40
|
-
- Our aim is to do very deep and thorough research - bear this in mind when reflecting on the research process
|
41
|
-
- DO NOT produce a draft of the final report. This is not your job.
|
42
|
-
- If this is the first iteration (i.e. no data from prior iterations), provide thoughts on what info we need to gather in the first iteration to get started
|
43
|
-
"""
|
44
|
-
|
45
|
-
def init_thinking_agent(config: LLMConfig) -> ResearchAgent:
|
46
|
-
"""
|
47
|
-
Initialize the thinking agent for reflection and meta-cognition.
|
48
|
-
|
49
|
-
Args:
|
50
|
-
config: The LLM configuration to use
|
51
|
-
|
52
|
-
Returns:
|
53
|
-
A ResearchAgent that can reflect on the research process
|
54
|
-
"""
|
55
|
-
selected_model = config.reasoning_model
|
56
|
-
|
57
|
-
return ResearchAgent(
|
58
|
-
name="ThinkingAgent",
|
59
|
-
instructions=INSTRUCTIONS,
|
60
|
-
tools=[], # No tools needed for this agent
|
61
|
-
model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
|
62
|
-
output_type=None, # Direct string output
|
63
|
-
output_parser=None
|
64
|
-
)
|
@@ -1,20 +0,0 @@
|
|
1
|
-
from pydantic import BaseModel, Field
|
2
|
-
|
3
|
-
class ToolAgentOutput(BaseModel):
|
4
|
-
"""Standard output for all tool agents"""
|
5
|
-
output: str
|
6
|
-
sources: list[str] = Field(default_factory=list)
|
7
|
-
|
8
|
-
from .search_agent import init_search_agent
|
9
|
-
from .crawl_agent import init_crawl_agent
|
10
|
-
from ...llm_config import LLMConfig
|
11
|
-
from ..baseclass import ResearchAgent
|
12
|
-
|
13
|
-
def init_tool_agents(config: LLMConfig) -> dict[str, ResearchAgent]:
|
14
|
-
search_agent = init_search_agent(config)
|
15
|
-
crawl_agent = init_crawl_agent(config)
|
16
|
-
|
17
|
-
return {
|
18
|
-
"WebSearchAgent": search_agent,
|
19
|
-
"SiteCrawlerAgent": crawl_agent,
|
20
|
-
}
|
@@ -1,87 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
Agent used to crawl a website and return the results.
|
3
|
-
|
4
|
-
The CrawlAgent takes as input a string in the format of AgentTask.model_dump_json(), or can take a simple starting url string as input
|
5
|
-
|
6
|
-
The Agent then:
|
7
|
-
1. Uses the crawl_website tool to crawl the website
|
8
|
-
2. Writes a 3+ paragraph summary of the crawled contents
|
9
|
-
3. Includes citations/URLs in brackets next to information sources
|
10
|
-
4. Returns the formatted summary as a string
|
11
|
-
"""
|
12
|
-
|
13
|
-
from langchain_core.tools import Tool
|
14
|
-
from typing import Dict, Any
|
15
|
-
|
16
|
-
from . import ToolAgentOutput
|
17
|
-
from ...llm_config import LLMConfig
|
18
|
-
from ..baseclass import ResearchAgent
|
19
|
-
from ..utils.parse_output import create_type_parser
|
20
|
-
|
21
|
-
INSTRUCTIONS = f"""
|
22
|
-
You are a web crawling agent that crawls the contents of a website and answers a query based on the crawled contents. Follow these steps exactly:
|
23
|
-
|
24
|
-
* From the provided information, use the 'entity_website' as the starting_url for the web crawler
|
25
|
-
* Crawl the website using the crawl_website tool
|
26
|
-
* After using the crawl_website tool, write a 3+ paragraph summary that captures the main points from the crawled contents
|
27
|
-
* In your summary, try to comprehensively answer/address the 'gaps' and 'query' provided (if available)
|
28
|
-
* If the crawled contents are not relevant to the 'gaps' or 'query', simply write "No relevant results found"
|
29
|
-
* Use headings and bullets to organize the summary if needed
|
30
|
-
* Include citations/URLs in brackets next to all associated information in your summary
|
31
|
-
* Only run the crawler once
|
32
|
-
|
33
|
-
Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
|
34
|
-
{ToolAgentOutput.model_json_schema()}
|
35
|
-
"""
|
36
|
-
|
37
|
-
def init_crawl_agent(config: LLMConfig) -> ResearchAgent:
|
38
|
-
"""
|
39
|
-
Initialize a crawl agent using LangChain tools.
|
40
|
-
|
41
|
-
Args:
|
42
|
-
config: The LLM configuration to use
|
43
|
-
|
44
|
-
Returns:
|
45
|
-
A ResearchAgent that can crawl websites
|
46
|
-
"""
|
47
|
-
# Create a LangChain wrapper around the crawl_website tool
|
48
|
-
async def crawl_website_wrapper(starting_url: str, max_links: int = 5) -> str:
|
49
|
-
"""
|
50
|
-
Crawl a website and extract its main content.
|
51
|
-
|
52
|
-
Args:
|
53
|
-
starting_url: The URL to start crawling from
|
54
|
-
max_links: Maximum number of links to follow from the starting page
|
55
|
-
|
56
|
-
Returns:
|
57
|
-
The extracted content from the website
|
58
|
-
"""
|
59
|
-
from ...tools import crawl_website
|
60
|
-
# Import inside function to avoid circular imports
|
61
|
-
|
62
|
-
# Use the original crawl_website function
|
63
|
-
result = await crawl_website(starting_url, max_links)
|
64
|
-
return result
|
65
|
-
|
66
|
-
# Create a LangChain Tool
|
67
|
-
crawl_tool = Tool(
|
68
|
-
name="crawl_website",
|
69
|
-
description="Crawls a website and extracts its main content starting from the provided URL",
|
70
|
-
func=crawl_website_wrapper,
|
71
|
-
coroutine=crawl_website_wrapper,
|
72
|
-
)
|
73
|
-
|
74
|
-
# Use our adapter to initialize the agent with the LangChain tool
|
75
|
-
selected_model = config.fast_model
|
76
|
-
|
77
|
-
# Determine whether to use structured output
|
78
|
-
use_output_parser = not hasattr(selected_model, 'langchain_llm')
|
79
|
-
|
80
|
-
return ResearchAgent(
|
81
|
-
name="SiteCrawlerAgent",
|
82
|
-
instructions=INSTRUCTIONS,
|
83
|
-
tools=[crawl_tool],
|
84
|
-
model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
|
85
|
-
output_type=ToolAgentOutput if not use_output_parser else None,
|
86
|
-
output_parser=create_type_parser(ToolAgentOutput) if use_output_parser else None
|
87
|
-
)
|