PyPI - alita-sdk - Versions diffs - 0.3.176__py3-none-any.whl → 0.3.177__py3-none-any.whl - Mend

alita-sdk 0.3.176py3-none-any.whl → 0.3.177py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

alita_sdk/community/deep_researcher/agents/tool_agents/search_agent.py DELETED Viewed

@@ -1,96 +0,0 @@
-"""
-Agent used to perform web searches and summarize the results.
-The SearchAgent takes as input a string in the format of AgentTask.model_dump_json(), or can take a simple query string as input
-The Agent then:
-1. Uses the web_search tool to retrieve search results
-2. Analyzes the retrieved information
-3. Writes a 3+ paragraph summary of the search results
-4. Includes citations/URLs in brackets next to information sources
-5. Returns the formatted summary as a string
-The agent can use either OpenAI's built-in web search capability or a custom
-web search implementation based on environment configuration.
-"""
-from langchain_core.tools import Tool
-from typing import Dict, Any, List
-from . import ToolAgentOutput
-from ...llm_config import LLMConfig
-from ..baseclass import ResearchAgent
-from ..utils.parse_output import create_type_parser
-INSTRUCTIONS = f"""You are a research assistant that specializes in retrieving and summarizing information from the web.
-OBJECTIVE:
-Given an AgentTask, follow these steps:
-- Convert the 'query' into an optimized SERP search term for Google, limited to 3-5 words
-- If an 'entity_website' is provided, make sure to include the domain name in your optimized Google search term
-- Enter the optimized search term into the web_search tool
-- After using the web_search tool, write a 3+ paragraph summary that captures the main points from the search results
-GUIDELINES:
-- In your summary, try to comprehensively answer/address the 'gap' provided (which is the objective of the search)
-- The summary should always quote detailed facts, figures and numbers where these are available
-- If the search results are not relevant to the search term or do not address the 'gap', simply write "No relevant results found"
-- Use headings and bullets to organize the summary if needed
-- Include citations/URLs in brackets next to all associated information in your summary
-- Do not make additional searches
-Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
-{ToolAgentOutput.model_json_schema()}
-"""
-def init_search_agent(config: LLMConfig) -> ResearchAgent:
-    """
-    Initialize a search agent using LangChain tools.
-    Args:
-        config: The LLM configuration to use
-    Returns:
-        A ResearchAgent that can search the web and summarize results
-    """
-    # Create a LangChain wrapper around the web_search tool
-    async def web_search_wrapper(query: str, num_results: int = 8) -> List[Dict[str, Any]]:
-        """
-        Perform a web search and return the results.
-        Args:
-            query: The query to search for
-            num_results: Number of results to return
-        Returns:
-            A list of search results with title, url, and snippet
-        """
-        # Import here to avoid circular imports
-        from ...tools import web_search
-        # Use the original web_search function
-        results = await web_search(query, num_results)
-        return results
-    # Create a LangChain Tool
-    web_search_tool = Tool(
-        name="web_search",
-        description="Search the web for information on a specific query. Returns a list of search results.",
-        func=web_search_wrapper,
-        coroutine=web_search_wrapper,
-    )
-    # Use our adapter to initialize the agent with the LangChain tool
-    selected_model = config.fast_model
-    # Determine whether to use structured output
-    use_output_parser = not hasattr(selected_model, 'langchain_llm')
-    return ResearchAgent(
-        name="WebSearchAgent",
-        instructions=INSTRUCTIONS,
-        tools=[web_search_tool],
-        model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
-        output_type=ToolAgentOutput if not use_output_parser else None,
-        output_parser=create_type_parser(ToolAgentOutput) if use_output_parser else None
-    )

alita_sdk/community/deep_researcher/agents/tool_selector_agent.py DELETED Viewed

@@ -1,83 +0,0 @@
-"""
-Agent used to determine which specialized agents should be used to address knowledge gaps.
-The Agent takes as input a string in the following format:
-===========================================================
-ORIGINAL QUERY: <original user query>
-KNOWLEDGE GAP TO ADDRESS: <knowledge gap that needs to be addressed>
-BACKGROUND CONTEXT: <supporting background context related to the original query>
-HISTORY OF ACTIONS, FINDINGS AND THOUGHTS: <a log of prior iterations of the research process>
-===========================================================
-The Agent then:
-1. Analyzes the knowledge gap to determine which agents are best suited to address it
-2. Returns an AgentSelectionPlan object containing a list of AgentTask objects
-The available agents are:
-- WebSearchAgent: General web search for broad topics
-- SiteCrawlerAgent: Crawl the pages of a specific website to retrieve information about it
-"""
-from pydantic import BaseModel, Field
-from typing import List, Optional
-from ..llm_config import LLMConfig, model_supports_structured_output
-from datetime import datetime
-from .baseclass import ResearchAgent
-from .utils.parse_output import create_type_parser
-class AgentTask(BaseModel):
-    """A task for a specific agent to address knowledge gaps"""
-    gap: Optional[str] = Field(description="The knowledge gap being addressed", default=None)
-    agent: str = Field(description="The name of the agent to use")
-    query: str = Field(description="The specific query for the agent")
-    entity_website: Optional[str] = Field(description="The website of the entity being researched, if known", default=None)
-class AgentSelectionPlan(BaseModel):
-    """Plan for which agents to use for knowledge gaps"""
-    tasks: List[AgentTask] = Field(description="List of agent tasks to address knowledge gaps")
-INSTRUCTIONS = f"""
-You are an Tool Selector responsible for determining which specialized agents should address a knowledge gap in a research project.
-Today's date is {datetime.now().strftime("%Y-%m-%d")}.
-You will be given:
-1. The original user query
-2. A knowledge gap identified in the research
-3. A full history of the tasks, actions, findings and thoughts you've made up until this point in the research process
-Your task is to decide:
-1. Which specialized agents are best suited to address the gap
-2. What specific queries should be given to the agents (keep this short - 3-6 words)
-Available specialized agents:
-- WebSearchAgent: General web search for broad topics (can be called multiple times with different queries)
-- SiteCrawlerAgent: Crawl the pages of a specific website to retrieve information about it - use this if you want to find out something about a particular company, entity or product
-Guidelines:
-- Aim to call at most 3 agents at a time in your final output
-- You can list the WebSearchAgent multiple times with different queries if needed to cover the full scope of the knowledge gap
-- Be specific and concise (3-6 words) with the agent queries - they should target exactly what information is needed
-- If you know the website or domain name of an entity being researched, always include it in the query
-- If a gap doesn't clearly match any agent's capability, default to the WebSearchAgent
-- Use the history of actions / tool calls as a guide - try not to repeat yourself if an approach didn't work previously
-Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
-{AgentSelectionPlan.model_json_schema()}
-"""
-def init_tool_selector_agent(config: LLMConfig) -> ResearchAgent:
-    selected_model = config.reasoning_model
-    return ResearchAgent(
-        name="ToolSelectorAgent",
-        instructions=INSTRUCTIONS,
-        model=selected_model,
-        output_type=AgentSelectionPlan if model_supports_structured_output(selected_model) else None,
-        output_parser=create_type_parser(AgentSelectionPlan) if not model_supports_structured_output(selected_model) else None
-    )

alita_sdk/community/deep_researcher/agents/utils/__init__.py DELETED Viewed

File without changes

alita_sdk/community/deep_researcher/agents/utils/parse_output.py DELETED Viewed

@@ -1,148 +0,0 @@
-import json
-import re
-from typing import Type, Any, Callable, TypeVar
-from pydantic import BaseModel
-T = TypeVar('T', bound=BaseModel)
-class OutputParserError(Exception):
-    """
-    Exception raised when the output parser fails to parse the output.
-    """
-    def __init__(self, message, output=None):
-        self.message = message
-        self.output = output
-        super().__init__(self.message)
-    def __str__(self):
-        if self.output:
-            return f"{self.message}\nProblematic output: {self.output}"
-        return self.message
-def find_json_in_string(string: str) -> str:
-    """
-    Method to extract all text in the left-most brace that appears in a string.
-    Used to extract JSON from a string (note that this function does not validate the JSON).
-    Example:
-        string = "bla bla bla {this is {some} text{{}and it's sneaky}} because {it's} confusing"
-        output = "{this is {some} text{{}and it's sneaky}}"
-    """
-    stack = 0
-    start_index = None
-    for i, c in enumerate(string):
-        if c == '{':
-            if stack == 0:
-                start_index = i  # Start index of the first '{'
-            stack += 1  # Push to stack
-        elif c == '}':
-            stack -= 1  # Pop stack
-            if stack == 0:
-                # Return the substring from the start of the first '{' to the current '}'
-                return string[start_index:i + 1] if start_index is not None else ""
-    # If no complete set of braces is found, return an empty string
-    return ""
-def parse_json_output(output: str) -> Any:
-    """Take a string output and parse it as JSON"""
-    # First try to load the string as JSON
-    try:
-        return json.loads(output)
-    except json.JSONDecodeError as e:
-        pass
-    # If that fails, assume that the output is in a code block - remove the code block markers and try again
-    parsed_output = output
-    parsed_output = parsed_output.split("```")[1]
-    parsed_output = parsed_output.split("```")[0]
-    if parsed_output.startswith("json") or parsed_output.startswith("JSON"):
-        parsed_output = parsed_output[4:].strip()
-    try:
-        return json.loads(parsed_output)
-    except json.JSONDecodeError:
-        pass
-    # As a last attempt, try to manually find the JSON object in the output and parse it
-    parsed_output = find_json_in_string(output)
-    if parsed_output:
-        try:
-            return json.loads(parsed_output)
-        except json.JSONDecodeError:
-            raise OutputParserError(f"Failed to parse output as JSON", output)
-    # If all fails, raise an error
-    raise OutputParserError(f"Failed to parse output as JSON", output)
-def create_type_parser(model_class: Type[T]) -> Callable[[str], T]:
-    """
-    Creates a parser function that attempts to parse the output into the given model class.
-    This handles various formats that might be returned by the LLM.
-    Args:
-        model_class: The Pydantic model class to parse the output into
-    Returns:
-        A function that takes a string and returns an instance of the model class
-    """
-    def parser(text: str) -> T:
-        """
-        Parse the output into the model class.
-        Args:
-            text: The text to parse
-        Returns:
-            An instance of the model class
-        """
-        # First try direct JSON parsing
-        try:
-            return model_class.model_validate_json(text)
-        except Exception:
-            pass
-        # Try to extract JSON from markdown codeblocks
-        json_match = re.search(r"```(?:json)?\n(.*?)\n```", text, re.DOTALL)
-        if json_match:
-            try:
-                json_str = json_match.group(1).strip()
-                return model_class.model_validate_json(json_str)
-            except Exception:
-                pass
-        # Try to parse the entire text as a JSON object
-        try:
-            # Look for JSON-like patterns
-            json_pattern = r"(\{.*\})"
-            match = re.search(json_pattern, text, re.DOTALL)
-            if match:
-                json_str = match.group(1)
-                parsed = json.loads(json_str)
-                return model_class.model_validate(parsed)
-        except Exception:
-            pass
-        # Fall back to creating an instance with the text as output
-        try:
-            # Check if model has 'output' field
-            if 'output' in model_class.model_fields:
-                return model_class(output=text)
-        except Exception:
-            pass
-        # Last resort: just try to create an empty instance and set attributes
-        try:
-            instance = model_class()
-            if hasattr(instance, 'output'):
-                setattr(instance, 'output', text)
-            return instance
-        except Exception as e:
-            raise ValueError(f"Could not parse output to {model_class.__name__}: {e}")
-    return parser

alita_sdk/community/deep_researcher/agents/writer_agent.py DELETED Viewed

@@ -1,63 +0,0 @@
-"""
-Agent used to synthesize a final report based on provided findings.
-The WriterAgent takes as input a string in the following format:
-===========================================================
-QUERY: <original user query>
-FINDINGS: <findings from the iterative research process>
-===========================================================
-The Agent then:
-1. Generates a comprehensive markdown report based on all available information
-2. Includes proper citations for sources in the format [1], [2], etc.
-3. Returns a string containing the markdown formatted report
-"""
-from .baseclass import ResearchAgent
-from ..llm_config import LLMConfig
-from datetime import datetime
-from langchain_core.tools import BaseTool
-INSTRUCTIONS = f"""
-You are a senior researcher tasked with comprehensively answering a research query.
-Today's date is {datetime.now().strftime('%Y-%m-%d')}.
-You will be provided with the original query along with research findings put together by a research assistant.
-Your objective is to generate the final response in markdown format.
-The response should be as lengthy and detailed as possible with the information provided, focusing on answering the original query.
-In your final output, include references to the source URLs for all information and data gathered.
-This should be formatted in the form of a numbered square bracket next to the relevant information,
-followed by a list of URLs at the end of the response, per the example below.
-EXAMPLE REFERENCE FORMAT:
-The company has XYZ products [1]. It operates in the software services market which is expected to grow at 10% per year [2].
-References:
-[1] https://example.com/first-source-url
-[2] https://example.com/second-source-url
-GUIDELINES:
-* Answer the query directly, do not include unrelated or tangential information.
-* Adhere to any instructions on the length of your final response if provided in the user prompt.
-* If any additional guidelines are provided in the user prompt, follow them exactly and give them precedence over these system instructions.
-"""
-def init_writer_agent(config: LLMConfig) -> ResearchAgent:
-    """
-    Initialize the writer agent.
-    Args:
-        config: The LLM configuration to use
-    Returns:
-        A ResearchAgent that can generate comprehensive research reports
-    """
-    selected_model = config.main_model
-    return ResearchAgent(
-        name="WriterAgent",
-        instructions=INSTRUCTIONS,
-        tools=[],  # No tools needed for this agent
-        model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
-        output_type=None,  # Direct string output
-        output_parser=None
-    )

alita_sdk/community/deep_researcher/api_wrapper.py DELETED Viewed

@@ -1,116 +0,0 @@
-from typing import Any, Optional, Dict
-import asyncio
-import json
-from pydantic import create_model, Field
-from alita_sdk.tools.elitea_base import BaseToolApiWrapper
-from .deep_research import DeepResearcher
-from .iterative_research import IterativeResearcher
-from .llm_config import LLMConfig, create_default_config
-from langchain_core.language_models.llms import BaseLLM
-from langchain_core.language_models.chat_models import BaseChatModel
-class DeepResearcherWrapper(BaseToolApiWrapper):
-    """Wrapper for deep_researcher module to be used as a LangChain toolkit."""
-    alita: Any = None
-    llm: Optional[BaseLLM | BaseChatModel] = None
-    max_iterations: int = 5
-    max_time_minutes: int = 10
-    verbose: bool = False
-    tracing: bool = False
-    config: Optional[LLMConfig] = None
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        # Initialize the config if not provided
-        if not self.config:
-            self.config = create_default_config(langchain_llm=self.llm)
-        # Override llm in config if provided
-        elif self.llm and not self.config.langchain_llm:
-            # Create a new config with the langchain_llm
-            self.config = create_default_config(langchain_llm=self.llm)
-    def _setup_deep_researcher(self) -> DeepResearcher:
-        """Initialize a DeepResearcher instance with current settings."""
-        return DeepResearcher(
-            max_iterations=self.max_iterations,
-            max_time_minutes=self.max_time_minutes,
-            verbose=self.verbose,
-            tracing=self.tracing,
-            config=self.config,
-            llm=self.llm,
-            alita=self.alita
-        )
-    def _setup_iterative_researcher(self) -> IterativeResearcher:
-        """Initialize an IterativeResearcher instance with current settings."""
-        return IterativeResearcher(
-            max_iterations=self.max_iterations,
-            max_time_minutes=self.max_time_minutes,
-            verbose=self.verbose,
-            tracing=self.tracing,
-            config=self.config,
-            llm=self.llm,
-            alita=self.alita
-        )
-    def run_deep_research(self, query: str) -> str:
-        """
-        Run deep research on a query, breaking it down into sections and iteratively researching each part.
-        Args:
-            query: The research query
-        Returns:
-            Comprehensive research report
-        """
-        researcher = self._setup_deep_researcher()
-        return asyncio.run(researcher.run(query))
-    def run_iterative_research(self, query: str, output_length: str = "5 pages", output_instructions: str = "", background_context: str = "") -> str:
-        """
-        Run iterative research on a query, conducting multiple iterations to address knowledge gaps.
-        Args:
-            query: The research query
-            output_length: Desired length of the output (e.g., "5 pages", "2 paragraphs")
-            output_instructions: Additional instructions for output formatting
-            background_context: Additional context to provide for the research
-        Returns:
-            Research report based on iterative findings
-        """
-        researcher = self._setup_iterative_researcher()
-        return asyncio.run(researcher.run(
-            query=query,
-            output_length=output_length,
-            output_instructions=output_instructions,
-            background_context=background_context
-        ))
-    def get_available_tools(self):
-        """Return the list of available tools."""
-        return [
-            {
-                "name": "run_deep_research",
-                "ref": self.run_deep_research,
-                "description": self.run_deep_research.__doc__,
-                "args_schema": create_model(
-                    "DeepResearchModel",
-                    query=(str, Field(description="The research query to investigate thoroughly"))
-                )
-            },
-            {
-                "name": "run_iterative_research",
-                "ref": self.run_iterative_research,
-                "description": self.run_iterative_research.__doc__,
-                "args_schema": create_model(
-                    "IterativeResearchModel",
-                    query=(str, Field(description="The research query to investigate")),
-                    output_length=(str, Field(description="Desired length of the output (e.g., '5 pages', '2 paragraphs')", default="5 pages")),
-                    output_instructions=(str, Field(description="Additional instructions for output formatting", default="")),
-                    background_context=(str, Field(description="Additional context to provide for the research", default=""))
-                )
-            }
-        ]

alita-sdk 0.3.176__py3-none-any.whl → 0.3.177__py3-none-any.whl

alita-sdk 0.3.176py3-none-any.whl → 0.3.177py3-none-any.whl