alita-sdk 0.3.176__py3-none-any.whl → 0.3.178__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. alita_sdk/community/__init__.py +7 -17
  2. alita_sdk/tools/__init__.py +22 -18
  3. alita_sdk/tools/carrier/api_wrapper.py +6 -0
  4. alita_sdk/tools/carrier/backend_tests_tool.py +308 -7
  5. alita_sdk/tools/carrier/carrier_sdk.py +18 -0
  6. alita_sdk/tools/carrier/tools.py +2 -1
  7. {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.178.dist-info}/METADATA +1 -2
  8. {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.178.dist-info}/RECORD +11 -42
  9. alita_sdk/community/browseruse/__init__.py +0 -73
  10. alita_sdk/community/browseruse/api_wrapper.py +0 -288
  11. alita_sdk/community/deep_researcher/__init__.py +0 -70
  12. alita_sdk/community/deep_researcher/agents/__init__.py +0 -1
  13. alita_sdk/community/deep_researcher/agents/baseclass.py +0 -182
  14. alita_sdk/community/deep_researcher/agents/knowledge_gap_agent.py +0 -74
  15. alita_sdk/community/deep_researcher/agents/long_writer_agent.py +0 -251
  16. alita_sdk/community/deep_researcher/agents/planner_agent.py +0 -124
  17. alita_sdk/community/deep_researcher/agents/proofreader_agent.py +0 -80
  18. alita_sdk/community/deep_researcher/agents/thinking_agent.py +0 -64
  19. alita_sdk/community/deep_researcher/agents/tool_agents/__init__.py +0 -20
  20. alita_sdk/community/deep_researcher/agents/tool_agents/crawl_agent.py +0 -87
  21. alita_sdk/community/deep_researcher/agents/tool_agents/search_agent.py +0 -96
  22. alita_sdk/community/deep_researcher/agents/tool_selector_agent.py +0 -83
  23. alita_sdk/community/deep_researcher/agents/utils/__init__.py +0 -0
  24. alita_sdk/community/deep_researcher/agents/utils/parse_output.py +0 -148
  25. alita_sdk/community/deep_researcher/agents/writer_agent.py +0 -63
  26. alita_sdk/community/deep_researcher/api_wrapper.py +0 -116
  27. alita_sdk/community/deep_researcher/deep_research.py +0 -185
  28. alita_sdk/community/deep_researcher/examples/deep_example.py +0 -30
  29. alita_sdk/community/deep_researcher/examples/iterative_example.py +0 -34
  30. alita_sdk/community/deep_researcher/examples/report_plan_example.py +0 -27
  31. alita_sdk/community/deep_researcher/iterative_research.py +0 -419
  32. alita_sdk/community/deep_researcher/llm_config.py +0 -87
  33. alita_sdk/community/deep_researcher/main.py +0 -67
  34. alita_sdk/community/deep_researcher/tools/__init__.py +0 -2
  35. alita_sdk/community/deep_researcher/tools/crawl_website.py +0 -109
  36. alita_sdk/community/deep_researcher/tools/web_search.py +0 -294
  37. alita_sdk/community/deep_researcher/utils/__init__.py +0 -0
  38. alita_sdk/community/deep_researcher/utils/md_to_pdf.py +0 -8
  39. alita_sdk/community/deep_researcher/utils/os.py +0 -21
  40. {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.178.dist-info}/WHEEL +0 -0
  41. {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.178.dist-info}/licenses/LICENSE +0 -0
  42. {alita_sdk-0.3.176.dist-info → alita_sdk-0.3.178.dist-info}/top_level.txt +0 -0
@@ -1,80 +0,0 @@
1
- """
2
- Agent used to produce the final draft of a report given initial drafts of each section.
3
-
4
- The Agent takes as input the original user query and a stringified object of type ReportDraft.model_dump_json() (defined below).
5
-
6
- ====
7
- QUERY: <original user query>
8
-
9
- REPORT DRAFT: <stringified ReportDraft object containing all draft sections>
10
- ====
11
-
12
- The Agent then outputs the final markdown for the report as a string.
13
- """
14
-
15
- from pydantic import BaseModel, Field
16
- from typing import List
17
- from .baseclass import ResearchAgent
18
- from ..llm_config import LLMConfig
19
- from datetime import datetime
20
- from langchain_core.tools import BaseTool
21
-
22
-
23
- class ReportDraftSection(BaseModel):
24
- """A section of the report that needs to be written"""
25
- section_title: str = Field(description="The title of the section")
26
- section_content: str = Field(description="The content of the section")
27
-
28
-
29
- class ReportDraft(BaseModel):
30
- """Output from the Report Planner Agent"""
31
- sections: List[ReportDraftSection] = Field(description="List of sections that are in the report")
32
-
33
-
34
- INSTRUCTIONS = f"""
35
- You are a research expert who proofreads and edits research reports.
36
- Today's date is {datetime.now().strftime("%Y-%m-%d")}.
37
-
38
- You are given:
39
- 1. The original query topic for the report
40
- 2. A first draft of the report in ReportDraft format containing each section in sequence
41
-
42
- Your task is to:
43
- 1. **Combine sections:** Concatenate the sections into a single string
44
- 2. **Add section titles:** Add the section titles to the beginning of each section in markdown format, as well as a main title for the report
45
- 3. **De-duplicate:** Remove duplicate content across sections to avoid repetition
46
- 4. **Remove irrelevant sections:** If any sections or sub-sections are completely irrelevant to the query, remove them
47
- 5. **Refine wording:** Edit the wording of the report to be polished, concise and punchy, but **without eliminating any detail** or large chunks of text
48
- 6. **Add a summary:** Add a short report summary / outline to the beginning of the report to provide an overview of the sections and what is discussed
49
- 7. **Preserve sources:** Preserve all sources / references - move the long list of references to the end of the report
50
- 8. **Update reference numbers:** Continue to include reference numbers in square brackets ([1], [2], [3], etc.) in the main body of the report, but update the numbering to match the new order of references at the end of the report
51
- 9. **Output final report:** Output the final report in markdown format (do not wrap it in a code block)
52
-
53
- Guidelines:
54
- - Do not add any new facts or data to the report
55
- - Do not remove any content from the report unless it is very clearly wrong, contradictory or irrelevant
56
- - Remove or reformat any redundant or excessive headings, and ensure that the final nesting of heading levels is correct
57
- - Ensure that the final report flows well and has a logical structure
58
- - Include all sources and references that are present in the final report
59
- """
60
-
61
- def init_proofreader_agent(config: LLMConfig) -> ResearchAgent:
62
- """
63
- Initialize the proofreader agent.
64
-
65
- Args:
66
- config: The LLM configuration to use
67
-
68
- Returns:
69
- A ResearchAgent that can proofread and edit research reports
70
- """
71
- selected_model = config.fast_model
72
-
73
- return ResearchAgent(
74
- name="ProofreaderAgent",
75
- instructions=INSTRUCTIONS,
76
- tools=[], # No tools needed for this agent
77
- model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
78
- output_type=None, # Direct string output
79
- output_parser=None
80
- )
@@ -1,64 +0,0 @@
1
- """
2
- Agent used to reflect on the research process so far and share your latest thoughts.
3
-
4
- The Agent takes as input a string in the following format:
5
- ===========================================================
6
- ORIGINAL QUERY: <original user query>
7
-
8
- BACKGROUND CONTEXT: <supporting background context related to the original query>
9
-
10
- HISTORY OF ACTIONS, FINDINGS AND THOUGHTS: <a log of prior iterations of the research process>
11
- ===========================================================
12
-
13
- The Agent then outputs a string containing its latest thoughts on the research process.
14
- """
15
- from .baseclass import ResearchAgent
16
- from ..llm_config import LLMConfig
17
- from datetime import datetime
18
- from langchain_core.tools import BaseTool
19
-
20
- INSTRUCTIONS = f"""
21
- You are a research expert who is managing a research process in iterations. Today's date is {datetime.now().strftime("%Y-%m-%d")}.
22
-
23
- You are given:
24
- 1. The original research query along with some supporting background context
25
- 2. A history of the tasks, actions, findings and thoughts you've made up until this point in the research process (on iteration 1 you will be at the start of the research process, so this will be empty)
26
-
27
- Your objective is to reflect on the research process so far and share your latest thoughts.
28
-
29
- Specifically, your thoughts should include reflections on questions such as:
30
- - What have you learned from the last iteration?
31
- - What new areas would you like to explore next, or existing topics you'd like to go deeper into?
32
- - Were you able to retrieve the information you were looking for in the last iteration?
33
- - If not, should we change our approach or move to the next topic?
34
- - Is there any info that is contradictory or conflicting?
35
-
36
- Guidelines:
37
- - Share you stream of consciousness on the above questions as raw text
38
- - Keep your response concise and informal
39
- - Focus most of your thoughts on the most recent iteration and how that influences this next iteration
40
- - Our aim is to do very deep and thorough research - bear this in mind when reflecting on the research process
41
- - DO NOT produce a draft of the final report. This is not your job.
42
- - If this is the first iteration (i.e. no data from prior iterations), provide thoughts on what info we need to gather in the first iteration to get started
43
- """
44
-
45
- def init_thinking_agent(config: LLMConfig) -> ResearchAgent:
46
- """
47
- Initialize the thinking agent for reflection and meta-cognition.
48
-
49
- Args:
50
- config: The LLM configuration to use
51
-
52
- Returns:
53
- A ResearchAgent that can reflect on the research process
54
- """
55
- selected_model = config.reasoning_model
56
-
57
- return ResearchAgent(
58
- name="ThinkingAgent",
59
- instructions=INSTRUCTIONS,
60
- tools=[], # No tools needed for this agent
61
- model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
62
- output_type=None, # Direct string output
63
- output_parser=None
64
- )
@@ -1,20 +0,0 @@
1
- from pydantic import BaseModel, Field
2
-
3
- class ToolAgentOutput(BaseModel):
4
- """Standard output for all tool agents"""
5
- output: str
6
- sources: list[str] = Field(default_factory=list)
7
-
8
- from .search_agent import init_search_agent
9
- from .crawl_agent import init_crawl_agent
10
- from ...llm_config import LLMConfig
11
- from ..baseclass import ResearchAgent
12
-
13
- def init_tool_agents(config: LLMConfig) -> dict[str, ResearchAgent]:
14
- search_agent = init_search_agent(config)
15
- crawl_agent = init_crawl_agent(config)
16
-
17
- return {
18
- "WebSearchAgent": search_agent,
19
- "SiteCrawlerAgent": crawl_agent,
20
- }
@@ -1,87 +0,0 @@
1
- """
2
- Agent used to crawl a website and return the results.
3
-
4
- The CrawlAgent takes as input a string in the format of AgentTask.model_dump_json(), or can take a simple starting url string as input
5
-
6
- The Agent then:
7
- 1. Uses the crawl_website tool to crawl the website
8
- 2. Writes a 3+ paragraph summary of the crawled contents
9
- 3. Includes citations/URLs in brackets next to information sources
10
- 4. Returns the formatted summary as a string
11
- """
12
-
13
- from langchain_core.tools import Tool
14
- from typing import Dict, Any
15
-
16
- from . import ToolAgentOutput
17
- from ...llm_config import LLMConfig
18
- from ..baseclass import ResearchAgent
19
- from ..utils.parse_output import create_type_parser
20
-
21
- INSTRUCTIONS = f"""
22
- You are a web crawling agent that crawls the contents of a website and answers a query based on the crawled contents. Follow these steps exactly:
23
-
24
- * From the provided information, use the 'entity_website' as the starting_url for the web crawler
25
- * Crawl the website using the crawl_website tool
26
- * After using the crawl_website tool, write a 3+ paragraph summary that captures the main points from the crawled contents
27
- * In your summary, try to comprehensively answer/address the 'gaps' and 'query' provided (if available)
28
- * If the crawled contents are not relevant to the 'gaps' or 'query', simply write "No relevant results found"
29
- * Use headings and bullets to organize the summary if needed
30
- * Include citations/URLs in brackets next to all associated information in your summary
31
- * Only run the crawler once
32
-
33
- Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
34
- {ToolAgentOutput.model_json_schema()}
35
- """
36
-
37
- def init_crawl_agent(config: LLMConfig) -> ResearchAgent:
38
- """
39
- Initialize a crawl agent using LangChain tools.
40
-
41
- Args:
42
- config: The LLM configuration to use
43
-
44
- Returns:
45
- A ResearchAgent that can crawl websites
46
- """
47
- # Create a LangChain wrapper around the crawl_website tool
48
- async def crawl_website_wrapper(starting_url: str, max_links: int = 5) -> str:
49
- """
50
- Crawl a website and extract its main content.
51
-
52
- Args:
53
- starting_url: The URL to start crawling from
54
- max_links: Maximum number of links to follow from the starting page
55
-
56
- Returns:
57
- The extracted content from the website
58
- """
59
- from ...tools import crawl_website
60
- # Import inside function to avoid circular imports
61
-
62
- # Use the original crawl_website function
63
- result = await crawl_website(starting_url, max_links)
64
- return result
65
-
66
- # Create a LangChain Tool
67
- crawl_tool = Tool(
68
- name="crawl_website",
69
- description="Crawls a website and extracts its main content starting from the provided URL",
70
- func=crawl_website_wrapper,
71
- coroutine=crawl_website_wrapper,
72
- )
73
-
74
- # Use our adapter to initialize the agent with the LangChain tool
75
- selected_model = config.fast_model
76
-
77
- # Determine whether to use structured output
78
- use_output_parser = not hasattr(selected_model, 'langchain_llm')
79
-
80
- return ResearchAgent(
81
- name="SiteCrawlerAgent",
82
- instructions=INSTRUCTIONS,
83
- tools=[crawl_tool],
84
- model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
85
- output_type=ToolAgentOutput if not use_output_parser else None,
86
- output_parser=create_type_parser(ToolAgentOutput) if use_output_parser else None
87
- )
@@ -1,96 +0,0 @@
1
- """
2
- Agent used to perform web searches and summarize the results.
3
-
4
- The SearchAgent takes as input a string in the format of AgentTask.model_dump_json(), or can take a simple query string as input
5
-
6
- The Agent then:
7
- 1. Uses the web_search tool to retrieve search results
8
- 2. Analyzes the retrieved information
9
- 3. Writes a 3+ paragraph summary of the search results
10
- 4. Includes citations/URLs in brackets next to information sources
11
- 5. Returns the formatted summary as a string
12
-
13
- The agent can use either OpenAI's built-in web search capability or a custom
14
- web search implementation based on environment configuration.
15
- """
16
-
17
- from langchain_core.tools import Tool
18
- from typing import Dict, Any, List
19
-
20
- from . import ToolAgentOutput
21
- from ...llm_config import LLMConfig
22
- from ..baseclass import ResearchAgent
23
- from ..utils.parse_output import create_type_parser
24
-
25
- INSTRUCTIONS = f"""You are a research assistant that specializes in retrieving and summarizing information from the web.
26
-
27
- OBJECTIVE:
28
- Given an AgentTask, follow these steps:
29
- - Convert the 'query' into an optimized SERP search term for Google, limited to 3-5 words
30
- - If an 'entity_website' is provided, make sure to include the domain name in your optimized Google search term
31
- - Enter the optimized search term into the web_search tool
32
- - After using the web_search tool, write a 3+ paragraph summary that captures the main points from the search results
33
-
34
- GUIDELINES:
35
- - In your summary, try to comprehensively answer/address the 'gap' provided (which is the objective of the search)
36
- - The summary should always quote detailed facts, figures and numbers where these are available
37
- - If the search results are not relevant to the search term or do not address the 'gap', simply write "No relevant results found"
38
- - Use headings and bullets to organize the summary if needed
39
- - Include citations/URLs in brackets next to all associated information in your summary
40
- - Do not make additional searches
41
-
42
- Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
43
- {ToolAgentOutput.model_json_schema()}
44
- """
45
-
46
- def init_search_agent(config: LLMConfig) -> ResearchAgent:
47
- """
48
- Initialize a search agent using LangChain tools.
49
-
50
- Args:
51
- config: The LLM configuration to use
52
-
53
- Returns:
54
- A ResearchAgent that can search the web and summarize results
55
- """
56
- # Create a LangChain wrapper around the web_search tool
57
- async def web_search_wrapper(query: str, num_results: int = 8) -> List[Dict[str, Any]]:
58
- """
59
- Perform a web search and return the results.
60
-
61
- Args:
62
- query: The query to search for
63
- num_results: Number of results to return
64
-
65
- Returns:
66
- A list of search results with title, url, and snippet
67
- """
68
- # Import here to avoid circular imports
69
- from ...tools import web_search
70
-
71
- # Use the original web_search function
72
- results = await web_search(query, num_results)
73
- return results
74
-
75
- # Create a LangChain Tool
76
- web_search_tool = Tool(
77
- name="web_search",
78
- description="Search the web for information on a specific query. Returns a list of search results.",
79
- func=web_search_wrapper,
80
- coroutine=web_search_wrapper,
81
- )
82
-
83
- # Use our adapter to initialize the agent with the LangChain tool
84
- selected_model = config.fast_model
85
-
86
- # Determine whether to use structured output
87
- use_output_parser = not hasattr(selected_model, 'langchain_llm')
88
-
89
- return ResearchAgent(
90
- name="WebSearchAgent",
91
- instructions=INSTRUCTIONS,
92
- tools=[web_search_tool],
93
- model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
94
- output_type=ToolAgentOutput if not use_output_parser else None,
95
- output_parser=create_type_parser(ToolAgentOutput) if use_output_parser else None
96
- )
@@ -1,83 +0,0 @@
1
- """
2
- Agent used to determine which specialized agents should be used to address knowledge gaps.
3
-
4
- The Agent takes as input a string in the following format:
5
- ===========================================================
6
- ORIGINAL QUERY: <original user query>
7
-
8
- KNOWLEDGE GAP TO ADDRESS: <knowledge gap that needs to be addressed>
9
-
10
- BACKGROUND CONTEXT: <supporting background context related to the original query>
11
-
12
- HISTORY OF ACTIONS, FINDINGS AND THOUGHTS: <a log of prior iterations of the research process>
13
- ===========================================================
14
-
15
- The Agent then:
16
- 1. Analyzes the knowledge gap to determine which agents are best suited to address it
17
- 2. Returns an AgentSelectionPlan object containing a list of AgentTask objects
18
-
19
- The available agents are:
20
- - WebSearchAgent: General web search for broad topics
21
- - SiteCrawlerAgent: Crawl the pages of a specific website to retrieve information about it
22
- """
23
-
24
- from pydantic import BaseModel, Field
25
- from typing import List, Optional
26
- from ..llm_config import LLMConfig, model_supports_structured_output
27
- from datetime import datetime
28
- from .baseclass import ResearchAgent
29
- from .utils.parse_output import create_type_parser
30
-
31
-
32
- class AgentTask(BaseModel):
33
- """A task for a specific agent to address knowledge gaps"""
34
- gap: Optional[str] = Field(description="The knowledge gap being addressed", default=None)
35
- agent: str = Field(description="The name of the agent to use")
36
- query: str = Field(description="The specific query for the agent")
37
- entity_website: Optional[str] = Field(description="The website of the entity being researched, if known", default=None)
38
-
39
-
40
- class AgentSelectionPlan(BaseModel):
41
- """Plan for which agents to use for knowledge gaps"""
42
- tasks: List[AgentTask] = Field(description="List of agent tasks to address knowledge gaps")
43
-
44
-
45
- INSTRUCTIONS = f"""
46
- You are an Tool Selector responsible for determining which specialized agents should address a knowledge gap in a research project.
47
- Today's date is {datetime.now().strftime("%Y-%m-%d")}.
48
-
49
- You will be given:
50
- 1. The original user query
51
- 2. A knowledge gap identified in the research
52
- 3. A full history of the tasks, actions, findings and thoughts you've made up until this point in the research process
53
-
54
- Your task is to decide:
55
- 1. Which specialized agents are best suited to address the gap
56
- 2. What specific queries should be given to the agents (keep this short - 3-6 words)
57
-
58
- Available specialized agents:
59
- - WebSearchAgent: General web search for broad topics (can be called multiple times with different queries)
60
- - SiteCrawlerAgent: Crawl the pages of a specific website to retrieve information about it - use this if you want to find out something about a particular company, entity or product
61
-
62
- Guidelines:
63
- - Aim to call at most 3 agents at a time in your final output
64
- - You can list the WebSearchAgent multiple times with different queries if needed to cover the full scope of the knowledge gap
65
- - Be specific and concise (3-6 words) with the agent queries - they should target exactly what information is needed
66
- - If you know the website or domain name of an entity being researched, always include it in the query
67
- - If a gap doesn't clearly match any agent's capability, default to the WebSearchAgent
68
- - Use the history of actions / tool calls as a guide - try not to repeat yourself if an approach didn't work previously
69
-
70
- Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
71
- {AgentSelectionPlan.model_json_schema()}
72
- """
73
-
74
- def init_tool_selector_agent(config: LLMConfig) -> ResearchAgent:
75
- selected_model = config.reasoning_model
76
-
77
- return ResearchAgent(
78
- name="ToolSelectorAgent",
79
- instructions=INSTRUCTIONS,
80
- model=selected_model,
81
- output_type=AgentSelectionPlan if model_supports_structured_output(selected_model) else None,
82
- output_parser=create_type_parser(AgentSelectionPlan) if not model_supports_structured_output(selected_model) else None
83
- )
@@ -1,148 +0,0 @@
1
- import json
2
- import re
3
- from typing import Type, Any, Callable, TypeVar
4
-
5
- from pydantic import BaseModel
6
-
7
- T = TypeVar('T', bound=BaseModel)
8
-
9
-
10
- class OutputParserError(Exception):
11
- """
12
- Exception raised when the output parser fails to parse the output.
13
- """
14
- def __init__(self, message, output=None):
15
- self.message = message
16
- self.output = output
17
- super().__init__(self.message)
18
-
19
- def __str__(self):
20
- if self.output:
21
- return f"{self.message}\nProblematic output: {self.output}"
22
- return self.message
23
-
24
-
25
- def find_json_in_string(string: str) -> str:
26
- """
27
- Method to extract all text in the left-most brace that appears in a string.
28
- Used to extract JSON from a string (note that this function does not validate the JSON).
29
-
30
- Example:
31
- string = "bla bla bla {this is {some} text{{}and it's sneaky}} because {it's} confusing"
32
- output = "{this is {some} text{{}and it's sneaky}}"
33
- """
34
- stack = 0
35
- start_index = None
36
-
37
- for i, c in enumerate(string):
38
- if c == '{':
39
- if stack == 0:
40
- start_index = i # Start index of the first '{'
41
- stack += 1 # Push to stack
42
- elif c == '}':
43
- stack -= 1 # Pop stack
44
- if stack == 0:
45
- # Return the substring from the start of the first '{' to the current '}'
46
- return string[start_index:i + 1] if start_index is not None else ""
47
-
48
- # If no complete set of braces is found, return an empty string
49
- return ""
50
-
51
-
52
- def parse_json_output(output: str) -> Any:
53
- """Take a string output and parse it as JSON"""
54
- # First try to load the string as JSON
55
- try:
56
- return json.loads(output)
57
- except json.JSONDecodeError as e:
58
- pass
59
-
60
- # If that fails, assume that the output is in a code block - remove the code block markers and try again
61
- parsed_output = output
62
- parsed_output = parsed_output.split("```")[1]
63
- parsed_output = parsed_output.split("```")[0]
64
- if parsed_output.startswith("json") or parsed_output.startswith("JSON"):
65
- parsed_output = parsed_output[4:].strip()
66
- try:
67
- return json.loads(parsed_output)
68
- except json.JSONDecodeError:
69
- pass
70
-
71
- # As a last attempt, try to manually find the JSON object in the output and parse it
72
- parsed_output = find_json_in_string(output)
73
- if parsed_output:
74
- try:
75
- return json.loads(parsed_output)
76
- except json.JSONDecodeError:
77
- raise OutputParserError(f"Failed to parse output as JSON", output)
78
-
79
- # If all fails, raise an error
80
- raise OutputParserError(f"Failed to parse output as JSON", output)
81
-
82
-
83
- def create_type_parser(model_class: Type[T]) -> Callable[[str], T]:
84
- """
85
- Creates a parser function that attempts to parse the output into the given model class.
86
- This handles various formats that might be returned by the LLM.
87
-
88
- Args:
89
- model_class: The Pydantic model class to parse the output into
90
-
91
- Returns:
92
- A function that takes a string and returns an instance of the model class
93
- """
94
- def parser(text: str) -> T:
95
- """
96
- Parse the output into the model class.
97
-
98
- Args:
99
- text: The text to parse
100
-
101
- Returns:
102
- An instance of the model class
103
- """
104
- # First try direct JSON parsing
105
- try:
106
- return model_class.model_validate_json(text)
107
- except Exception:
108
- pass
109
-
110
- # Try to extract JSON from markdown codeblocks
111
- json_match = re.search(r"```(?:json)?\n(.*?)\n```", text, re.DOTALL)
112
- if json_match:
113
- try:
114
- json_str = json_match.group(1).strip()
115
- return model_class.model_validate_json(json_str)
116
- except Exception:
117
- pass
118
-
119
- # Try to parse the entire text as a JSON object
120
- try:
121
- # Look for JSON-like patterns
122
- json_pattern = r"(\{.*\})"
123
- match = re.search(json_pattern, text, re.DOTALL)
124
- if match:
125
- json_str = match.group(1)
126
- parsed = json.loads(json_str)
127
- return model_class.model_validate(parsed)
128
- except Exception:
129
- pass
130
-
131
- # Fall back to creating an instance with the text as output
132
- try:
133
- # Check if model has 'output' field
134
- if 'output' in model_class.model_fields:
135
- return model_class(output=text)
136
- except Exception:
137
- pass
138
-
139
- # Last resort: just try to create an empty instance and set attributes
140
- try:
141
- instance = model_class()
142
- if hasattr(instance, 'output'):
143
- setattr(instance, 'output', text)
144
- return instance
145
- except Exception as e:
146
- raise ValueError(f"Could not parse output to {model_class.__name__}: {e}")
147
-
148
- return parser
@@ -1,63 +0,0 @@
1
- """
2
- Agent used to synthesize a final report based on provided findings.
3
-
4
- The WriterAgent takes as input a string in the following format:
5
- ===========================================================
6
- QUERY: <original user query>
7
-
8
- FINDINGS: <findings from the iterative research process>
9
- ===========================================================
10
-
11
- The Agent then:
12
- 1. Generates a comprehensive markdown report based on all available information
13
- 2. Includes proper citations for sources in the format [1], [2], etc.
14
- 3. Returns a string containing the markdown formatted report
15
- """
16
- from .baseclass import ResearchAgent
17
- from ..llm_config import LLMConfig
18
- from datetime import datetime
19
- from langchain_core.tools import BaseTool
20
-
21
- INSTRUCTIONS = f"""
22
- You are a senior researcher tasked with comprehensively answering a research query.
23
- Today's date is {datetime.now().strftime('%Y-%m-%d')}.
24
- You will be provided with the original query along with research findings put together by a research assistant.
25
- Your objective is to generate the final response in markdown format.
26
- The response should be as lengthy and detailed as possible with the information provided, focusing on answering the original query.
27
- In your final output, include references to the source URLs for all information and data gathered.
28
- This should be formatted in the form of a numbered square bracket next to the relevant information,
29
- followed by a list of URLs at the end of the response, per the example below.
30
-
31
- EXAMPLE REFERENCE FORMAT:
32
- The company has XYZ products [1]. It operates in the software services market which is expected to grow at 10% per year [2].
33
-
34
- References:
35
- [1] https://example.com/first-source-url
36
- [2] https://example.com/second-source-url
37
-
38
- GUIDELINES:
39
- * Answer the query directly, do not include unrelated or tangential information.
40
- * Adhere to any instructions on the length of your final response if provided in the user prompt.
41
- * If any additional guidelines are provided in the user prompt, follow them exactly and give them precedence over these system instructions.
42
- """
43
-
44
- def init_writer_agent(config: LLMConfig) -> ResearchAgent:
45
- """
46
- Initialize the writer agent.
47
-
48
- Args:
49
- config: The LLM configuration to use
50
-
51
- Returns:
52
- A ResearchAgent that can generate comprehensive research reports
53
- """
54
- selected_model = config.main_model
55
-
56
- return ResearchAgent(
57
- name="WriterAgent",
58
- instructions=INSTRUCTIONS,
59
- tools=[], # No tools needed for this agent
60
- model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
61
- output_type=None, # Direct string output
62
- output_parser=None
63
- )