PyPI - alita-sdk - Versions diffs - 0.3.176__py3-none-any.whl → 0.3.177__py3-none-any.whl - Mend

alita-sdk 0.3.176py3-none-any.whl → 0.3.177py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

alita_sdk/community/browseruse/api_wrapper.py DELETED Viewed

@@ -1,288 +0,0 @@
-from datetime import datetime
-from typing import Dict, List, Any, Optional, Type
-from pydantic import BaseModel, Field
-from browser_use import Agent, ActionResult, Browser, BrowserConfig, BrowserContextConfig
-from browser_use.agent.views import AgentHistoryList
-from playwright._impl._api_structures import ProxySettings
-from alita_sdk.tools.elitea_base import BaseToolApiWrapper
-from pydantic import create_model, Field, model_validator
-from tempfile import TemporaryDirectory, NamedTemporaryFile
-from browser_use.controller.service import Controller
-from langchain_core.callbacks import dispatch_custom_event
-from pyobjtojson import obj_to_json
-import os
-import asyncio
-import socket
-from browser_use.browser.utils.screen_resolution import get_screen_resolution, get_window_adjustments
-from playwright.async_api import Playwright, Browser as PlaywrightBrowser
-from browser_use.browser.chrome import (
-	CHROME_ARGS,
-	CHROME_DEBUG_PORT,
-	CHROME_DETERMINISTIC_RENDERING_ARGS,
-	CHROME_DISABLE_SECURITY_ARGS,
-	CHROME_DOCKER_ARGS,
-	CHROME_HEADLESS_ARGS,
-)
-IN_DOCKER = os.environ.get('IN_DOCKER', 'false').lower()[0] in 'ty1'
-class BrowserEx(Browser):
-    def __init__(self, config: BrowserConfig):
-        super().__init__(config)
-        self.config = config
-    async def _setup_builtin_browser(self, playwright: Playwright) -> PlaywrightBrowser:
-        """Sets up and returns a Playwright Browser instance with anti-detection measures."""
-        assert self.config.browser_binary_path is None, 'browser_binary_path should be None if trying to use the builtin browsers'
-		# Use the configured window size from new_context_config if available
-        if (
-			not self.config.headless
-			and hasattr(self.config, 'new_context_config')
-			and hasattr(self.config.new_context_config, 'browser_window_size')
-		):
-            screen_size = self.config.new_context_config.browser_window_size.model_dump()
-            offset_x, offset_y = get_window_adjustments()
-        elif self.config.headless:
-            screen_size = {'width': 1920, 'height': 1080}
-            offset_x, offset_y = 0, 0
-        else:
-            screen_size = get_screen_resolution()
-            offset_x, offset_y = get_window_adjustments()
-        chrome_args = {
-			f'--remote-debugging-port={self.config.chrome_remote_debugging_port}',
-			*CHROME_ARGS,
-			*(CHROME_DOCKER_ARGS if IN_DOCKER else []),
-			*(CHROME_HEADLESS_ARGS if self.config.headless else []),
-			*(CHROME_DISABLE_SECURITY_ARGS if self.config.disable_security else []),
-			*(CHROME_DETERMINISTIC_RENDERING_ARGS if self.config.deterministic_rendering else []),
-			f'--window-position={offset_x},{offset_y}',
-			f'--window-size={screen_size["width"]},{screen_size["height"]}',
-			*self.config.extra_browser_args,
-		}
-		# check if chrome remote debugging port is already taken,
-		# if so remove the remote-debugging-port arg to prevent conflicts
-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            if s.connect_ex(('localhost', self.config.chrome_remote_debugging_port)) == 0:
-                chrome_args.remove(f'--remote-debugging-port={self.config.chrome_remote_debugging_port}')
-        browser_class = getattr(playwright, self.config.browser_class)
-        args = {
-			'chromium': list(chrome_args),
-			'firefox': [
-				*{
-					'-no-remote',
-					*self.config.extra_browser_args,
-				}
-			],
-			'webkit': [
-				*{
-					'--no-startup-window',
-					*self.config.extra_browser_args,
-				}
-			],
-		}
-        browser = await browser_class.launch(
-			headless=self.config.headless,
-			channel='chromium',
-			args=args[self.config.browser_class],
-			proxy=self.config.proxy.model_dump() if self.config.proxy else None,
-			handle_sigterm=False,
-			handle_sigint=False,
-		)
-        return browser
-BrowserTask = create_model(
-    "BrowserTask",
-    task=(str, Field(description="Task to perform")),
-    max_steps=(Optional[int], Field(description="Maximum number of steps to perform")),
-    debug=(Optional[bool], Field(description="Whether debug mode is enabled")),
-    __config__=Field(description="Browser Use API Wrapper")
-)
-BrowserTasks = create_model(
-    "BrowserTasks",
-    tasks=(List[str], Field(description="List of tasks to perform")),
-    max_steps=(Optional[int], Field(description="Maximum number of steps to perform")),
-    debug=(Optional[bool], Field(description="Whether debug mode is enabled")),
-    __config__=Field(description="Browser Use API Wrapper")
-)
-async def thinking_processor(agent):
-    """Hook to be called after each step."""
-    if hasattr(agent, "state"):
-        history = agent.state.history
-    else:
-        history = None
-        return
-    # Process model thoughts
-    model_thoughts = obj_to_json(
-        obj=history.model_thoughts(),
-        check_circular=False
-    )
-    if len(model_thoughts) > 0:
-        model_thoughts_last_elem = model_thoughts[-1]
-        evalualtion = model_thoughts_last_elem.get('evaluation_previous_goal')
-        memory = model_thoughts_last_elem.get('memory')
-        next_goal = model_thoughts_last_elem.get('next_goal')
-        dispatch_custom_event(
-            name="thinking_step",
-            data={
-                "message": f"**Memory** : \n\n{memory}\n\n**Evaluation goal**:\n\n{evalualtion}\n\n**Next goal**:\n\n{next_goal}",
-                "tool_name": "task",
-                "toolkit": "browser_use"
-            }
-        )
-class DoneResult(BaseModel):
-	title: str
-	comments: str
-	hours_since_start: int
-gif_default_location = './agent_history.gif'
-default_bucket = 'browseruse'
-class BrowserUseAPIWrapper(BaseToolApiWrapper):
-    """Wrapper for Browser Use API."""
-    headless: bool = True
-    width: int = 1280
-    height: int = 800
-    use_vision: bool = False
-    trace_actions: bool = False
-    trace_actions_path: Optional[str] = None
-    cookies: Optional[Dict[str, Any]] = None
-    disable_security: bool = True
-    proxy: Any = None
-    extra_chromium_args: List[str] = []
-    client: Any = None # AlitaClient
-    artifact: Any = None # Artifact
-    llm: Any = None # LLMLikeObject
-    bucket: str = None
-    proxy_settings: Any = None
-    validate_output: bool = False
-    planner_llm: Any = None
-    browser_window_size: Dict[str, int] = None
-    @model_validator(mode='before')
-    @classmethod
-    def validate_toolkit(cls, values):
-        """Validate toolkit parameters."""
-        values['proxy'] = ProxySettings(**values['proxy']) if values.get('proxy') else None
-        values['extra_chromium_args'] = values.get('extra_chromium_args') or []
-        values['browser_window_size'] = {"width": values.get('width', 1280), "height": values.get('height', 800)}
-        values['artifact'] = values.get('client').artifact(values.get('bucket', default_bucket))
-        return values
-    def _create_browser(self):
-        cookies_file = None
-        if self.cookies:
-            cookies_file = NamedTemporaryFile(delete=False)
-            cookies_file.write(self.cookies)
-            cookies_file.close()
-        context_config = BrowserContextConfig(
-                cookies_file=cookies_file,
-                wait_for_network_idle_page_load_time=10.0, # TODO: Make this configurable
-                highlight_elements=True,
-                browser_window_size=self.browser_window_size
-            )
-        browser_config = BrowserConfig(
-            headless=self.headless,
-            browser_class='chromium', # TODO: Make this configurable
-            disable_security=self.disable_security,
-            extra_chromium_args=self.extra_chromium_args,
-            proxy=self.proxy,
-            new_context_config=context_config
-        )
-        return BrowserEx(config=browser_config)
-    def task(self, task: str, max_steps: Optional[int] = 20, debug: Optional[bool] = False):
-        """Perform a task using the browser."""
-        return asyncio.run(self._tasks([task], max_steps, debug))
-    async def _tasks(self, tasks: List[str], max_steps: Optional[int] = 20, debug: Optional[bool] = False):
-        browser = self._create_browser()
-        context_config = BrowserContextConfig(
-                wait_for_network_idle_page_load_time=10.0, # TODO: Make this configurable
-                highlight_elements=True,
-                browser_window_size=self.browser_window_size
-            )
-        async with await browser.new_context(context_config) as browser:
-            start = tasks[0]
-            if len(tasks) == 1:
-                tasks = []
-            agent = Agent(
-                task=start,
-                llm=self.llm,
-                browser_context=browser,
-                max_actions_per_step=20,
-                use_vision=self.use_vision,
-                save_conversation_path=None,
-                generate_gif=True,
-                planner_llm=self.planner_llm,
-                controller=Controller(),
-                message_context = "Carefully check every step, and make sure to provide detailed feedback on the results.",
-                validate_output=self.validate_output
-            )
-            for task in tasks:
-                agent.add_new_task(task)
-            history: AgentHistoryList = await agent.run(
-                max_steps=max_steps,
-                on_step_end=thinking_processor
-                )
-        await browser.close()
-        files = self._save_execution(history.model_dump_json())
-        return {
-            "run_data": str(history.extracted_content()),
-            "files": files
-        }
-    def _save_execution(self, data_content: Any):
-        """Saves tasks execution gif"""
-        try:
-            with open(gif_default_location, 'rb') as file:
-                artifact_data = file.read()
-        except FileNotFoundError:
-            artifact_data = None
-        filename = f"tasks_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-        files = []
-        if data_content:
-            self.artifact.create(f'{filename}.json', data_content)
-            files.append(f'{filename}.json')
-        if artifact_data:
-            self.artifact.create(f'{filename}.gif', artifact_data)
-            files.append(f'{filename}.gif')
-        return files
-    def tasks(self, tasks: List[str], max_steps: Optional[int] = 20, debug: Optional[bool] = False):
-        """Perform a list of tasks using the browser."""
-        return asyncio.run(self._tasks(tasks, max_steps, debug))
-    def get_available_tools(self):
-        return [
-            {
-                "name": "task",
-                "description": self.task.__doc__,
-                "args_schema": BrowserTask,
-                "ref": self.task
-            },
-            {
-                "name": "tasks",
-                "description": self.tasks.__doc__,
-                "args_schema": BrowserTasks,
-                "ref": self.tasks
-            }
-        ]

alita_sdk/community/deep_researcher/__init__.py DELETED Viewed

@@ -1,70 +0,0 @@
-from .deep_research import DeepResearcher
-from .iterative_research import IterativeResearcher
-from .agents.baseclass import ResearchRunner
-from .llm_config import LLMConfig
-__all__ = ["DeepResearcher", "IterativeResearcher", "ResearchRunner", "LLMConfig"]
-from typing import Any, List, Literal, Optional
-from langchain_core.tools import BaseToolkit, BaseTool
-from pydantic import BaseModel, ConfigDict, create_model, Field
-from .api_wrapper import DeepResearcherWrapper
-from ..base.tool import BaseAction
-from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
-name = "deep_researcher"
-def get_tools(tool):
-    return DeepResearcherToolkit().get_toolkit(
-        selected_tools=tool['settings'].get('selected_tools', []),
-        max_iterations=tool['settings'].get('max_iterations', 5),
-        max_time_minutes=tool['settings'].get('max_time_minutes', 10),
-        verbose=tool['settings'].get('verbose', False),
-        tracing=tool['settings'].get('tracing', False),
-        alita=tool['settings'].get('alita', None),
-        llm=tool['settings'].get('llm', None),
-        toolkit_name=tool.get('toolkit_name')
-    ).get_tools()
-class DeepResearcherToolkit(BaseToolkit):
-    tools: List[BaseTool] = []
-    toolkit_max_length: int = 0
-    @staticmethod
-    def toolkit_config_schema() -> BaseModel:
-        selected_tools = {x['name']: x['args_schema'].schema() for x in DeepResearcherWrapper.model_construct().get_available_tools()}
-        DeepResearcherToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
-        return create_model(
-            name,
-            max_iterations=(int, Field(default=5, title="Max iterations", description="Maximum number of iterations for research", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': DeepResearcherToolkit.toolkit_max_length})),
-            max_time_minutes=(int, Field(default=10, title="Max time (minutes)", description="Maximum time in minutes for research")),
-            verbose=(bool, Field(default=False, title="Verbose", description="Print status updates to the console")),
-            tracing=(bool, Field(default=False, title="Tracing", description="Enable tracing (only for OpenAI models)")),
-            selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
-            __config__=ConfigDict(json_schema_extra={'metadata': {"label": "Deep Researcher", "icon_url": "research-icon.svg"}})
-        )
-    @classmethod
-    def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
-        if selected_tools is None:
-            selected_tools = []
-        deep_researcher_api_wrapper = DeepResearcherWrapper(**kwargs)
-        prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
-        available_tools = deep_researcher_api_wrapper.get_available_tools()
-        tools = []
-        for tool in available_tools:
-            if selected_tools and tool["name"] not in selected_tools:
-                continue
-            tools.append(BaseAction(
-                api_wrapper=deep_researcher_api_wrapper,
-                name=prefix + tool["name"],
-                description=tool["description"],
-                args_schema=tool["args_schema"]
-            ))
-        return cls(tools=tools)
-    def get_tools(self):
-        return self.tools

alita_sdk/community/deep_researcher/agents/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from .baseclass import ResearchAgent, ResearchRunner

alita_sdk/community/deep_researcher/agents/baseclass.py DELETED Viewed

@@ -1,182 +0,0 @@
-from typing import Any, Callable, Optional, List, Dict, Union, TypeVar, Generic, Type
-from pydantic import BaseModel
-import asyncio
-import json
-# LangChain imports
-from langchain_core.tools import BaseTool
-from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, BaseMessage
-from langchain_core.prompts import ChatPromptTemplate
-from langchain.agents import AgentExecutor
-from langchain_core.runnables import RunnablePassthrough
-from langchain.agents.format_scratchpad import format_to_openai_functions
-from langchain.agents.output_parsers import OpenAIFunctionsAgentOutputParser
-from langchain_core.runnables.base import RunnableSerializable
-# Type variable for parameterizing the output type
-TContext = TypeVar("TContext")
-class ResearchRunner:
-    """
-    LangChain-based runner for research agents that supports both structured output
-    and custom output parsing.
-    """
-    @classmethod
-    async def run(cls, agent, user_message: str, **kwargs) -> 'RunResult':
-        """
-        Run the agent with the given user message and return the result.
-        Args:
-            agent: The agent to run
-            user_message: The user message to send to the agent
-        Returns:
-            A RunResult containing the final output
-        """
-        if not isinstance(agent, ResearchAgent):
-            raise TypeError("Agent must be a ResearchAgent")
-        result = await agent.arun(user_message)
-        return RunResult(final_output=result)
-class RunResult:
-    """
-    A simple class to maintain compatibility with the previous API
-    while using LangChain agents under the hood.
-    """
-    def __init__(self, final_output: Any):
-        self.final_output = final_output
-    def final_output_as(self, output_type: Type[Any]) -> Any:
-        """
-        Convert the final output to the specified type.
-        Args:
-            output_type: The type to convert to
-        Returns:
-            An instance of output_type
-        """
-        if isinstance(self.final_output, output_type):
-            return self.final_output
-        if isinstance(self.final_output, str):
-            try:
-                # Try to parse as JSON if it's a string
-                parsed = json.loads(self.final_output)
-                return output_type(**parsed)
-            except Exception:
-                # If that fails, try to parse the string for JSON
-                try:
-                    # Look for JSON-like content in the string
-                    import re
-                    json_match = re.search(r'```json\n(.*?)\n```', self.final_output, re.DOTALL)
-                    if json_match:
-                        json_str = json_match.group(1)
-                        parsed = json.loads(json_str)
-                        return output_type(**parsed)
-                except Exception:
-                    pass
-        # If all else fails, try to initialize with the entire output as a string
-        try:
-            if hasattr(output_type, "model_validate"):
-                return output_type.model_validate({"output": self.final_output})
-            else:
-                return output_type(output=self.final_output)
-        except Exception as e:
-            raise ValueError(f"Could not convert output to {output_type.__name__}: {e}")
-class ResearchAgent(Generic[TContext]):
-    """
-    LangChain-based agent for research tasks that supports both structured output
-    and custom output parsing.
-    """
-    def __init__(
-        self,
-        name: str,
-        instructions: str,
-        tools: List[BaseTool],
-        model: Any,
-        output_type: Optional[Type[BaseModel]] = None,
-        output_parser: Optional[Callable[[str], Any]] = None
-    ):
-        self.name = name
-        self.instructions = instructions
-        self.tools = tools
-        self.model = model
-        self.output_type = output_type
-        self.output_parser = output_parser
-        # Create the LangChain agent
-        self.agent = self._create_agent()
-    def _create_agent(self) -> RunnableSerializable:
-        """
-        Create a LangChain agent with the specified configuration.
-        """
-        # Create the system prompt
-        system_prompt = self.instructions
-        # Create the prompt template
-        prompt = ChatPromptTemplate.from_messages([
-            ("system", system_prompt),
-            ("human", "{input}"),
-            ("ai", "{agent_scratchpad}")
-        ])
-        # Create the LangChain agent
-        agent = (
-            {
-                "input": RunnablePassthrough(),
-                "agent_scratchpad": lambda x: format_to_openai_functions(x["intermediate_steps"])
-            }
-            | prompt
-            | self.model
-            | OpenAIFunctionsAgentOutputParser()
-        )
-        # Create the agent executor
-        return AgentExecutor(
-            agent=agent,
-            tools=self.tools,
-            verbose=True,
-            handle_parsing_errors=True
-        )
-    async def arun(self, user_input: str) -> Any:
-        """
-        Run the agent asynchronously with the given user input.
-        Args:
-            user_input: The user input to send to the agent
-        Returns:
-            The agent's output
-        """
-        try:
-            # Run the agent
-            result = await self.agent.ainvoke({"input": user_input, "intermediate_steps": []})
-            output = result.get("output", "")
-            # Apply output parser if specified
-            if self.output_parser is not None:
-                return self.output_parser(output)
-            # Try to convert to output_type if specified
-            if self.output_type is not None:
-                try:
-                    return self.output_type.model_validate_json(output)
-                except Exception:
-                    try:
-                        return self.output_type.model_validate({"output": output})
-                    except Exception:
-                        pass
-            # Otherwise return the raw output
-            return output
-        except Exception as e:
-            return f"Error: {str(e)}"

alita_sdk/community/deep_researcher/agents/knowledge_gap_agent.py DELETED Viewed

@@ -1,74 +0,0 @@
-"""
-Agent used to evaluate the state of the research report (typically done in a loop) and identify knowledge gaps that still
-need to be addressed.
-The Agent takes as input a string in the following format:
-===========================================================
-ORIGINAL QUERY: <original user query>
-HISTORY OF ACTIONS, FINDINGS AND THOUGHTS: <breakdown of activities and findings carried out so far>
-===========================================================
-The Agent then:
-1. Carefully reviews the current draft and assesses its completeness in answering the original query
-2. Identifies specific knowledge gaps that still exist and need to be filled
-3. Returns a KnowledgeGapOutput object
-"""
-from pydantic import BaseModel, Field
-from typing import List, Optional, Any
-from langchain_core.tools import BaseTool
-from .baseclass import ResearchAgent
-from ..llm_config import LLMConfig, model_supports_structured_output
-from datetime import datetime
-from .utils.parse_output import create_type_parser
-class KnowledgeGapOutput(BaseModel):
-    """Output from the Knowledge Gap Agent"""
-    research_complete: bool = Field(description="Whether the research and findings are complete enough to end the research loop")
-    outstanding_gaps: List[str] = Field(description="List of knowledge gaps that still need to be addressed")
-INSTRUCTIONS = f"""
-You are a Research State Evaluator. Today's date is {datetime.now().strftime("%Y-%m-%d")}.
-Your job is to critically analyze the current state of a research report,
-identify what knowledge gaps still exist and determine the best next step to take.
-You will be given:
-1. The original user query and any relevant background context to the query
-2. A full history of the tasks, actions, findings and thoughts you've made up until this point in the research process
-Your task is to:
-1. Carefully review the findings and thoughts, particularly from the latest iteration, and assess their completeness in answering the original query
-2. Determine if the findings are sufficiently complete to end the research loop
-3. If not, identify up to 3 knowledge gaps that need to be addressed in sequence in order to continue with research - these should be relevant to the original query
-Be specific in the gaps you identify and include relevant information as this will be passed onto another agent to process without additional context.
-Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
-{KnowledgeGapOutput.model_json_schema()}
-"""
-def init_knowledge_gap_agent(config: LLMConfig) -> ResearchAgent:
-    """
-    Initialize the knowledge gap agent.
-    Args:
-        config: The LLM configuration to use
-    Returns:
-        A ResearchAgent that can evaluate knowledge gaps
-    """
-    selected_model = config.fast_model
-    # Determine whether to use structured output based on if we have a Langchain LLM
-    use_output_parser = not hasattr(selected_model, 'langchain_llm')
-    return ResearchAgent(
-        name="KnowledgeGapAgent",
-        instructions=INSTRUCTIONS,
-        tools=[],  # No tools needed for this agent
-        model=selected_model.langchain_llm if hasattr(selected_model, 'langchain_llm') else selected_model,
-        output_type=KnowledgeGapOutput if not use_output_parser else None,
-        output_parser=create_type_parser(KnowledgeGapOutput) if use_output_parser else None
-    )

alita-sdk 0.3.176__py3-none-any.whl → 0.3.177__py3-none-any.whl

alita-sdk 0.3.176py3-none-any.whl → 0.3.177py3-none-any.whl