PyPI - notte-agent - Versions diffs - 0.0.dev0__py3-none-any.whl - Mend

notte-agent 0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

notte_agent/README.md +58 -0
notte_agent/__init__.py +7 -0
notte_agent/common/__init__.py +0 -0
notte_agent/common/base.py +14 -0
notte_agent/common/captcha_detector.py +87 -0
notte_agent/common/config.py +219 -0
notte_agent/common/conversation.py +246 -0
notte_agent/common/notifier.py +55 -0
notte_agent/common/parser.py +78 -0
notte_agent/common/perception.py +21 -0
notte_agent/common/prompt.py +15 -0
notte_agent/common/safe_executor.py +100 -0
notte_agent/common/trajectory_history.py +100 -0
notte_agent/common/types.py +41 -0
notte_agent/common/validator.py +90 -0
notte_agent/falco/__init__.py +0 -0
notte_agent/falco/agent.py +343 -0
notte_agent/falco/perception.py +83 -0
notte_agent/falco/prompt.py +132 -0
notte_agent/falco/prompts/system_prompt_multi_actions.md +107 -0
notte_agent/falco/prompts/system_prompt_single_action.md +107 -0
notte_agent/falco/trajectory_history.py +42 -0
notte_agent/falco/types.py +132 -0
notte_agent/gufo/__init__.py +0 -0
notte_agent/gufo/agent.py +180 -0
notte_agent/gufo/parser.py +79 -0
notte_agent/gufo/perception.py +53 -0
notte_agent/gufo/prompt.py +61 -0
notte_agent/gufo/system.md +8 -0
notte_agent/main.py +77 -0
notte_agent/py.typed +0 -0
notte_agent-0.0.dev0.dist-info/METADATA +8 -0
notte_agent-0.0.dev0.dist-info/RECORD +34 -0
notte_agent-0.0.dev0.dist-info/WHEEL +4 -0

notte_agent/common/parser.py ADDED Viewed

@@ -0,0 +1,78 @@
+import json
+import re
+from abc import ABC, abstractmethod
+from typing import Literal
+from notte_core.actions.base import ExecutableAction
+from notte_core.controller.actions import (
+    BaseAction,
+    CompletionAction,
+    GotoAction,
+    ScrapeAction,
+)
+from pydantic import BaseModel
+class NotteStepAgentOutput(BaseModel):
+    observe: GotoAction | None = None
+    step: ExecutableAction | None = None
+    scrape: ScrapeAction | None = None
+    completion: CompletionAction | None = None
+    @property
+    def endpoint(self) -> Literal["observe", "step", "scrape", "done"] | None:
+        if self.observe is not None:
+            return "observe"
+        elif self.step is not None:
+            return "step"
+        elif self.scrape is not None:
+            return "scrape"
+        elif self.completion is not None:
+            return "done"
+        else:
+            return None
+    @property
+    def action(self) -> BaseAction | None:
+        if self.observe is not None:
+            return self.observe
+        elif self.step is not None:
+            return self.step
+        elif self.scrape is not None:
+            return self.scrape
+        else:
+            return None
+class ParameterizedAction(BaseModel):
+    action_id: str
+    params: dict[str, str] | None = None
+class BaseParser(ABC):
+    @abstractmethod
+    def parse(self, text: str) -> NotteStepAgentOutput | None:
+        raise NotImplementedError
+    @abstractmethod
+    def example_format(self, endpoint: Literal["observe", "step", "scrape"]) -> str | None:
+        raise NotImplementedError
+    @staticmethod
+    def search_pattern(text: str, tag: str) -> str | None:
+        pattern = re.compile(rf"<{tag}>(.*?)</{tag}>", re.IGNORECASE | re.DOTALL)
+        match = pattern.search(text)
+        return match.group(1).strip() if match else None
+    @staticmethod
+    def parse_json(text: str, tag: str | None = None) -> dict[str, str]:
+        if tag is not None:
+            _text = BaseParser.search_pattern(text, tag)
+            if _text is None:
+                raise ValueError(f"No text found within <{tag}> tags")
+            text = _text
+        try:
+            data: dict[str, str] = json.loads(text)
+        except json.JSONDecodeError:
+            raise ValueError("Invalid JSON in action")
+        return data

notte_agent/common/perception.py ADDED Viewed

@@ -0,0 +1,21 @@
+from abc import ABC, abstractmethod
+from notte_core.browser.observation import Observation
+class BasePerception(ABC):
+    @abstractmethod
+    def perceive_metadata(self, obs: Observation) -> str:
+        pass
+    @abstractmethod
+    def perceive_actions(self, obs: Observation) -> str:
+        pass
+    @abstractmethod
+    def perceive_data(self, obs: Observation) -> str:
+        pass
+    @abstractmethod
+    def perceive(self, obs: Observation) -> str:
+        pass

notte_agent/common/prompt.py ADDED Viewed

@@ -0,0 +1,15 @@
+from abc import ABC, abstractmethod
+class BasePrompt(ABC):
+    @abstractmethod
+    def system(self) -> str:
+        pass
+    @abstractmethod
+    def output_format_rules(self) -> str:
+        pass
+    @abstractmethod
+    def select_action_rules(self) -> str:
+        pass

notte_agent/common/safe_executor.py ADDED Viewed

@@ -0,0 +1,100 @@
+from collections.abc import Awaitable
+from typing import Callable, Generic, TypeVar, final
+from notte_core.errors.base import NotteBaseError
+from notte_core.errors.provider import RateLimitError
+from pydantic import BaseModel
+from pydantic_core import ValidationError
+S = TypeVar("S")  # Source type
+T = TypeVar("T")  # Target type
+class ExecutionStatus(BaseModel, Generic[S, T]):
+    input: S
+    output: T | None
+    success: bool
+    message: str
+    def get(self) -> T:
+        if self.output is None or not self.success:
+            raise ValueError(f"Execution failed with message: {self.message}")
+        return self.output
+class StepExecutionFailure(NotteBaseError):
+    def __init__(self, message: str):
+        super().__init__(
+            user_message=message,
+            agent_message=message,
+            dev_message=message,
+        )
+class MaxConsecutiveFailuresError(NotteBaseError):
+    def __init__(self, max_failures: int):
+        self.max_failures: int = max_failures
+        message = f"Max consecutive failures reached in a single step: {max_failures}."
+        super().__init__(
+            user_message=message,
+            agent_message=message,
+            dev_message=message,
+        )
+@final
+class SafeActionExecutor(Generic[S, T]):
+    def __init__(
+        self,
+        func: Callable[[S], Awaitable[T]],
+        max_consecutive_failures: int = 3,
+        raise_on_failure: bool = True,
+    ) -> None:
+        self.func = func
+        self.max_consecutive_failures = max_consecutive_failures
+        self.consecutive_failures = 0
+        self.raise_on_failure = raise_on_failure
+    def reset(self) -> None:
+        self.consecutive_failures = 0
+    def on_failure(self, input_data: S, error_msg: str, e: Exception) -> ExecutionStatus[S, T]:
+        self.consecutive_failures += 1
+        if self.consecutive_failures >= self.max_consecutive_failures:
+            raise MaxConsecutiveFailuresError(self.max_consecutive_failures) from e
+        if self.raise_on_failure:
+            raise StepExecutionFailure(error_msg) from e
+        return ExecutionStatus(
+            input=input_data,
+            output=None,
+            success=False,
+            message=error_msg,
+        )
+    async def execute(self, input_data: S) -> ExecutionStatus[S, T]:
+        try:
+            result = await self.func(input_data)
+            self.consecutive_failures = 0
+            return ExecutionStatus(
+                input=input_data,
+                success=True,
+                output=result,
+                message=f"Successfully executed action with input: {input_data}",
+            )
+        except RateLimitError as e:
+            return self.on_failure(input_data, "Rate limit reached. Waiting before retry.", e)
+        except NotteBaseError as e:
+            # When raise_on_failure is True, we use the dev message to give more details to the user
+            msg = e.dev_message if self.raise_on_failure else e.agent_message
+            return self.on_failure(input_data, msg, e)
+        except ValidationError as e:
+            return self.on_failure(
+                input_data,
+                (
+                    "JSON Schema Validation error: The output format is invalid. "
+                    f"Please ensure your response follows the expected schema. Details: {str(e)}"
+                ),
+                e,
+            )
+        except Exception as e:
+            return self.on_failure(input_data, f"An unexpected error occurred: {e}", e)

notte_agent/common/trajectory_history.py ADDED Viewed

@@ -0,0 +1,100 @@
+from abc import ABC, abstractmethod
+from typing import Generic
+from notte_core.browser.observation import Observation
+from notte_core.common.tracer import TStepAgentOutput
+from notte_core.controller.actions import BaseAction, GotoAction
+from pydantic import BaseModel, Field
+from notte_agent.common.safe_executor import ExecutionStatus
+ExecutionStepStatus = ExecutionStatus[BaseAction, Observation]
+class TrajectoryStep(BaseModel, Generic[TStepAgentOutput]):
+    agent_response: TStepAgentOutput
+    results: list[ExecutionStepStatus]
+    def observations(self) -> list[Observation]:
+        return [result.output for result in self.results if result.output is not None]
+def trim_message(message: str, max_length: int | None = None) -> str:
+    if max_length is None or len(message) <= max_length:
+        return message
+    return f"...{message[-max_length:]}"
+class TrajectoryHistory(BaseModel, ABC, Generic[TStepAgentOutput]):  # type: ignore[reportUnsafeMultipleInheritance]
+    steps: list[TrajectoryStep[TStepAgentOutput]] = Field(default_factory=list)
+    max_error_length: int | None = None
+    def reset(self) -> None:
+        self.steps = []
+    def perceive(self) -> str:
+        steps = "\n".join([self.perceive_step(step, step_idx=i) for i, step in enumerate(self.steps)])
+        return f"""
+[Start of action execution history memory]
+{steps or self.start_rules()}
+[End of action execution history memory]
+    """
+    def start_rules(self) -> str:
+        return f"""
+No action executed so far...
+Your first action should always be a `{GotoAction.name()}` action with a url related to the task.
+You should reflect what url best fits the task you are trying to solve to start the task, e.g.
+- flight search task => https://www.google.com/travel/flights
+- go to reddit => https://www.reddit.com
+- ...
+ONLY if you have ABSOLUTELY no idea what to do, you can use `https://www.google.com` as the default url.
+THIS SHOULD BE THE LAST RESORT.
+"""
+    def perceive_step_result(
+        self,
+        result: ExecutionStepStatus,
+        include_ids: bool = False,
+        include_data: bool = False,
+    ) -> str:
+        action = result.input
+        id_str = f" with id={action.id}" if include_ids else ""
+        if not result.success:
+            err_msg = trim_message(result.message, self.max_error_length)
+            return f"❌ action '{action.name()}'{id_str} failed with error: {err_msg}"
+        success_msg = f"✅ action '{action.name()}'{id_str} succeeded: '{action.execution_message()}'"
+        data = result.get().data
+        if include_data and data is not None and data.structured is not None and data.structured.data is not None:
+            return f"{success_msg}\n\nExtracted JSON data:\n{data.structured.data.model_dump_json()}"
+        return success_msg
+    @abstractmethod
+    def perceive_step(
+        self,
+        step: TrajectoryStep[TStepAgentOutput],
+        step_idx: int = 0,
+        include_ids: bool = False,
+        include_data: bool = True,
+    ) -> str:
+        raise NotImplementedError
+    @abstractmethod
+    def add_output(self, output: TStepAgentOutput) -> None:
+        raise NotImplementedError
+    def add_step(self, step: ExecutionStepStatus) -> None:
+        if len(self.steps) == 0:
+            raise ValueError("Cannot add step to empty trajectory. Use `add_output` first.")
+        else:
+            self.steps[-1].results.append(step)
+    def observations(self) -> list[Observation]:
+        return [obs for step in self.steps for obs in step.observations()]
+    def last_obs(self) -> Observation | None:
+        for step in self.steps[::-1]:
+            for step_result in step.results[::-1]:
+                if step_result.success and step_result.output is not None:
+                    return step_result.output
+        return None

notte_agent/common/types.py ADDED Viewed

@@ -0,0 +1,41 @@
+from __future__ import annotations
+from litellm import AllMessageValues
+from notte_browser.session import TrajectoryStep
+from notte_core.common.tracer import LlmUsageDictTracer
+from notte_core.utils.webp_replay import ScreenshotReplay, WebpReplay
+from pydantic import BaseModel
+from typing_extensions import override
+from notte_agent.common.trajectory_history import TrajectoryStep as AgentTrajectoryStep
+class AgentResponse(BaseModel):
+    success: bool
+    answer: str
+    session_trajectory: list[TrajectoryStep]
+    agent_trajectory: list[AgentTrajectoryStep[BaseModel]]
+    messages: list[AllMessageValues] | None = None
+    llm_usage: list[LlmUsageDictTracer.LlmUsage]
+    duration_in_s: float = -1
+    @override
+    def __str__(self) -> str:
+        return (
+            f"AgentResponse(success={self.success}, duration_in_s={round(self.duration_in_s, 2)}, answer={self.answer})"
+        )
+    def replay(self) -> WebpReplay:
+        screenshots: list[bytes] = [
+            obs.screenshot
+            for step in self.agent_trajectory
+            for obs in step.observations()
+            if obs.screenshot is not None
+        ]
+        if len(screenshots) == 0:
+            raise ValueError("No screenshots found in agent trajectory")
+        return ScreenshotReplay.from_bytes(screenshots).get()
+    @override
+    def __repr__(self) -> str:
+        return self.__str__()

notte_agent/common/validator.py ADDED Viewed

@@ -0,0 +1,90 @@
+from typing import final
+import chevron
+from notte_browser.session import TrajectoryStep
+from notte_core.controller.actions import CompletionAction
+from notte_core.llms.engine import LLMEngine
+from pydantic import BaseModel
+from notte_agent.common.conversation import Conversation
+from notte_agent.common.perception import BasePerception
+system_rules = """
+You are a validator of an agent who interacts with a browser.
+Validate if the output of last action is what the user wanted and if the task is completed.
+If the task is unclear defined, you can let it pass.
+But if something is missing or the image does not show what was requested dont let it pass.
+Try to understand the page and help the model with suggestions like scroll, do x, ... to get the solution right.
+Task to validate: {{task}}.
+Return a JSON object with 2 keys: `is_valid` and `reason`:
+- `is_valid` is a boolean that indicates if the output is correct.
+- `reason` is a string that explains why it is valid or not.
+Example:
+```json
+{{&example}}
+```
+Your turn:
+"""
+class CompletionValidation(BaseModel):
+    is_valid: bool
+    reason: str
+@final
+class CompletionValidator:
+    def __init__(
+        self,
+        llm: LLMEngine,
+        perception: BasePerception,
+        use_vision: bool = True,
+        include_attributes: bool = True,
+    ):
+        self.use_vision = use_vision
+        self.include_attributes = include_attributes
+        self.llm: LLMEngine = llm
+        self.conv: Conversation = Conversation()
+        self.perception: BasePerception = perception
+    @staticmethod
+    def example() -> CompletionValidation:
+        return CompletionValidation(
+            is_valid=False,
+            reason="The user wanted to search for 'cat photos', but the agent searched for 'dog photos' instead.",
+        )
+    def validation_message(
+        self,
+        output: CompletionAction,
+        step: TrajectoryStep,
+    ) -> str:
+        return f"""
+Last observation:
+{self.perception.perceive(step.obs)}
+Last action:
+{step.action.model_dump_json(exclude_unset=True)}
+Agent task output:
+{output}
+"""
+    def validate(
+        self,
+        task: str,
+        output: CompletionAction,
+        step: TrajectoryStep,
+    ) -> CompletionValidation:
+        """Validate the output of the last action is what the user wanted"""
+        self.conv.reset()
+        system_prompt = chevron.render(system_rules, {"task": task, "example": self.example().model_dump_json()})
+        self.conv.add_system_message(content=system_prompt)
+        self.conv.add_user_message(content=self.validation_message(output, step))
+        answer: CompletionValidation = self.llm.structured_completion(self.conv.messages(), CompletionValidation)
+        return answer

notte_agent/falco/__init__.py ADDED Viewed

File without changes