notte-agent 0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,78 @@
1
+ import json
2
+ import re
3
+ from abc import ABC, abstractmethod
4
+ from typing import Literal
5
+
6
+ from notte_core.actions.base import ExecutableAction
7
+ from notte_core.controller.actions import (
8
+ BaseAction,
9
+ CompletionAction,
10
+ GotoAction,
11
+ ScrapeAction,
12
+ )
13
+ from pydantic import BaseModel
14
+
15
+
16
+ class NotteStepAgentOutput(BaseModel):
17
+ observe: GotoAction | None = None
18
+ step: ExecutableAction | None = None
19
+ scrape: ScrapeAction | None = None
20
+ completion: CompletionAction | None = None
21
+
22
+ @property
23
+ def endpoint(self) -> Literal["observe", "step", "scrape", "done"] | None:
24
+ if self.observe is not None:
25
+ return "observe"
26
+ elif self.step is not None:
27
+ return "step"
28
+ elif self.scrape is not None:
29
+ return "scrape"
30
+ elif self.completion is not None:
31
+ return "done"
32
+ else:
33
+ return None
34
+
35
+ @property
36
+ def action(self) -> BaseAction | None:
37
+ if self.observe is not None:
38
+ return self.observe
39
+ elif self.step is not None:
40
+ return self.step
41
+ elif self.scrape is not None:
42
+ return self.scrape
43
+ else:
44
+ return None
45
+
46
+
47
+ class ParameterizedAction(BaseModel):
48
+ action_id: str
49
+ params: dict[str, str] | None = None
50
+
51
+
52
+ class BaseParser(ABC):
53
+ @abstractmethod
54
+ def parse(self, text: str) -> NotteStepAgentOutput | None:
55
+ raise NotImplementedError
56
+
57
+ @abstractmethod
58
+ def example_format(self, endpoint: Literal["observe", "step", "scrape"]) -> str | None:
59
+ raise NotImplementedError
60
+
61
+ @staticmethod
62
+ def search_pattern(text: str, tag: str) -> str | None:
63
+ pattern = re.compile(rf"<{tag}>(.*?)</{tag}>", re.IGNORECASE | re.DOTALL)
64
+ match = pattern.search(text)
65
+ return match.group(1).strip() if match else None
66
+
67
+ @staticmethod
68
+ def parse_json(text: str, tag: str | None = None) -> dict[str, str]:
69
+ if tag is not None:
70
+ _text = BaseParser.search_pattern(text, tag)
71
+ if _text is None:
72
+ raise ValueError(f"No text found within <{tag}> tags")
73
+ text = _text
74
+ try:
75
+ data: dict[str, str] = json.loads(text)
76
+ except json.JSONDecodeError:
77
+ raise ValueError("Invalid JSON in action")
78
+ return data
@@ -0,0 +1,21 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from notte_core.browser.observation import Observation
4
+
5
+
6
+ class BasePerception(ABC):
7
+ @abstractmethod
8
+ def perceive_metadata(self, obs: Observation) -> str:
9
+ pass
10
+
11
+ @abstractmethod
12
+ def perceive_actions(self, obs: Observation) -> str:
13
+ pass
14
+
15
+ @abstractmethod
16
+ def perceive_data(self, obs: Observation) -> str:
17
+ pass
18
+
19
+ @abstractmethod
20
+ def perceive(self, obs: Observation) -> str:
21
+ pass
@@ -0,0 +1,15 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+
4
+ class BasePrompt(ABC):
5
+ @abstractmethod
6
+ def system(self) -> str:
7
+ pass
8
+
9
+ @abstractmethod
10
+ def output_format_rules(self) -> str:
11
+ pass
12
+
13
+ @abstractmethod
14
+ def select_action_rules(self) -> str:
15
+ pass
@@ -0,0 +1,100 @@
1
+ from collections.abc import Awaitable
2
+ from typing import Callable, Generic, TypeVar, final
3
+
4
+ from notte_core.errors.base import NotteBaseError
5
+ from notte_core.errors.provider import RateLimitError
6
+ from pydantic import BaseModel
7
+ from pydantic_core import ValidationError
8
+
9
+ S = TypeVar("S") # Source type
10
+ T = TypeVar("T") # Target type
11
+
12
+
13
+ class ExecutionStatus(BaseModel, Generic[S, T]):
14
+ input: S
15
+ output: T | None
16
+ success: bool
17
+ message: str
18
+
19
+ def get(self) -> T:
20
+ if self.output is None or not self.success:
21
+ raise ValueError(f"Execution failed with message: {self.message}")
22
+ return self.output
23
+
24
+
25
+ class StepExecutionFailure(NotteBaseError):
26
+ def __init__(self, message: str):
27
+ super().__init__(
28
+ user_message=message,
29
+ agent_message=message,
30
+ dev_message=message,
31
+ )
32
+
33
+
34
+ class MaxConsecutiveFailuresError(NotteBaseError):
35
+ def __init__(self, max_failures: int):
36
+ self.max_failures: int = max_failures
37
+ message = f"Max consecutive failures reached in a single step: {max_failures}."
38
+ super().__init__(
39
+ user_message=message,
40
+ agent_message=message,
41
+ dev_message=message,
42
+ )
43
+
44
+
45
+ @final
46
+ class SafeActionExecutor(Generic[S, T]):
47
+ def __init__(
48
+ self,
49
+ func: Callable[[S], Awaitable[T]],
50
+ max_consecutive_failures: int = 3,
51
+ raise_on_failure: bool = True,
52
+ ) -> None:
53
+ self.func = func
54
+ self.max_consecutive_failures = max_consecutive_failures
55
+ self.consecutive_failures = 0
56
+ self.raise_on_failure = raise_on_failure
57
+
58
+ def reset(self) -> None:
59
+ self.consecutive_failures = 0
60
+
61
+ def on_failure(self, input_data: S, error_msg: str, e: Exception) -> ExecutionStatus[S, T]:
62
+ self.consecutive_failures += 1
63
+ if self.consecutive_failures >= self.max_consecutive_failures:
64
+ raise MaxConsecutiveFailuresError(self.max_consecutive_failures) from e
65
+ if self.raise_on_failure:
66
+ raise StepExecutionFailure(error_msg) from e
67
+ return ExecutionStatus(
68
+ input=input_data,
69
+ output=None,
70
+ success=False,
71
+ message=error_msg,
72
+ )
73
+
74
+ async def execute(self, input_data: S) -> ExecutionStatus[S, T]:
75
+ try:
76
+ result = await self.func(input_data)
77
+ self.consecutive_failures = 0
78
+ return ExecutionStatus(
79
+ input=input_data,
80
+ success=True,
81
+ output=result,
82
+ message=f"Successfully executed action with input: {input_data}",
83
+ )
84
+ except RateLimitError as e:
85
+ return self.on_failure(input_data, "Rate limit reached. Waiting before retry.", e)
86
+ except NotteBaseError as e:
87
+ # When raise_on_failure is True, we use the dev message to give more details to the user
88
+ msg = e.dev_message if self.raise_on_failure else e.agent_message
89
+ return self.on_failure(input_data, msg, e)
90
+ except ValidationError as e:
91
+ return self.on_failure(
92
+ input_data,
93
+ (
94
+ "JSON Schema Validation error: The output format is invalid. "
95
+ f"Please ensure your response follows the expected schema. Details: {str(e)}"
96
+ ),
97
+ e,
98
+ )
99
+ except Exception as e:
100
+ return self.on_failure(input_data, f"An unexpected error occurred: {e}", e)
@@ -0,0 +1,100 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Generic
3
+
4
+ from notte_core.browser.observation import Observation
5
+ from notte_core.common.tracer import TStepAgentOutput
6
+ from notte_core.controller.actions import BaseAction, GotoAction
7
+ from pydantic import BaseModel, Field
8
+
9
+ from notte_agent.common.safe_executor import ExecutionStatus
10
+
11
+ ExecutionStepStatus = ExecutionStatus[BaseAction, Observation]
12
+
13
+
14
+ class TrajectoryStep(BaseModel, Generic[TStepAgentOutput]):
15
+ agent_response: TStepAgentOutput
16
+ results: list[ExecutionStepStatus]
17
+
18
+ def observations(self) -> list[Observation]:
19
+ return [result.output for result in self.results if result.output is not None]
20
+
21
+
22
+ def trim_message(message: str, max_length: int | None = None) -> str:
23
+ if max_length is None or len(message) <= max_length:
24
+ return message
25
+ return f"...{message[-max_length:]}"
26
+
27
+
28
+ class TrajectoryHistory(BaseModel, ABC, Generic[TStepAgentOutput]): # type: ignore[reportUnsafeMultipleInheritance]
29
+ steps: list[TrajectoryStep[TStepAgentOutput]] = Field(default_factory=list)
30
+ max_error_length: int | None = None
31
+
32
+ def reset(self) -> None:
33
+ self.steps = []
34
+
35
+ def perceive(self) -> str:
36
+ steps = "\n".join([self.perceive_step(step, step_idx=i) for i, step in enumerate(self.steps)])
37
+ return f"""
38
+ [Start of action execution history memory]
39
+ {steps or self.start_rules()}
40
+ [End of action execution history memory]
41
+ """
42
+
43
+ def start_rules(self) -> str:
44
+ return f"""
45
+ No action executed so far...
46
+ Your first action should always be a `{GotoAction.name()}` action with a url related to the task.
47
+ You should reflect what url best fits the task you are trying to solve to start the task, e.g.
48
+ - flight search task => https://www.google.com/travel/flights
49
+ - go to reddit => https://www.reddit.com
50
+ - ...
51
+ ONLY if you have ABSOLUTELY no idea what to do, you can use `https://www.google.com` as the default url.
52
+ THIS SHOULD BE THE LAST RESORT.
53
+ """
54
+
55
+ def perceive_step_result(
56
+ self,
57
+ result: ExecutionStepStatus,
58
+ include_ids: bool = False,
59
+ include_data: bool = False,
60
+ ) -> str:
61
+ action = result.input
62
+ id_str = f" with id={action.id}" if include_ids else ""
63
+ if not result.success:
64
+ err_msg = trim_message(result.message, self.max_error_length)
65
+ return f"❌ action '{action.name()}'{id_str} failed with error: {err_msg}"
66
+ success_msg = f"✅ action '{action.name()}'{id_str} succeeded: '{action.execution_message()}'"
67
+ data = result.get().data
68
+ if include_data and data is not None and data.structured is not None and data.structured.data is not None:
69
+ return f"{success_msg}\n\nExtracted JSON data:\n{data.structured.data.model_dump_json()}"
70
+ return success_msg
71
+
72
+ @abstractmethod
73
+ def perceive_step(
74
+ self,
75
+ step: TrajectoryStep[TStepAgentOutput],
76
+ step_idx: int = 0,
77
+ include_ids: bool = False,
78
+ include_data: bool = True,
79
+ ) -> str:
80
+ raise NotImplementedError
81
+
82
+ @abstractmethod
83
+ def add_output(self, output: TStepAgentOutput) -> None:
84
+ raise NotImplementedError
85
+
86
+ def add_step(self, step: ExecutionStepStatus) -> None:
87
+ if len(self.steps) == 0:
88
+ raise ValueError("Cannot add step to empty trajectory. Use `add_output` first.")
89
+ else:
90
+ self.steps[-1].results.append(step)
91
+
92
+ def observations(self) -> list[Observation]:
93
+ return [obs for step in self.steps for obs in step.observations()]
94
+
95
+ def last_obs(self) -> Observation | None:
96
+ for step in self.steps[::-1]:
97
+ for step_result in step.results[::-1]:
98
+ if step_result.success and step_result.output is not None:
99
+ return step_result.output
100
+ return None
@@ -0,0 +1,41 @@
1
+ from __future__ import annotations
2
+
3
+ from litellm import AllMessageValues
4
+ from notte_browser.session import TrajectoryStep
5
+ from notte_core.common.tracer import LlmUsageDictTracer
6
+ from notte_core.utils.webp_replay import ScreenshotReplay, WebpReplay
7
+ from pydantic import BaseModel
8
+ from typing_extensions import override
9
+
10
+ from notte_agent.common.trajectory_history import TrajectoryStep as AgentTrajectoryStep
11
+
12
+
13
+ class AgentResponse(BaseModel):
14
+ success: bool
15
+ answer: str
16
+ session_trajectory: list[TrajectoryStep]
17
+ agent_trajectory: list[AgentTrajectoryStep[BaseModel]]
18
+ messages: list[AllMessageValues] | None = None
19
+ llm_usage: list[LlmUsageDictTracer.LlmUsage]
20
+ duration_in_s: float = -1
21
+
22
+ @override
23
+ def __str__(self) -> str:
24
+ return (
25
+ f"AgentResponse(success={self.success}, duration_in_s={round(self.duration_in_s, 2)}, answer={self.answer})"
26
+ )
27
+
28
+ def replay(self) -> WebpReplay:
29
+ screenshots: list[bytes] = [
30
+ obs.screenshot
31
+ for step in self.agent_trajectory
32
+ for obs in step.observations()
33
+ if obs.screenshot is not None
34
+ ]
35
+ if len(screenshots) == 0:
36
+ raise ValueError("No screenshots found in agent trajectory")
37
+ return ScreenshotReplay.from_bytes(screenshots).get()
38
+
39
+ @override
40
+ def __repr__(self) -> str:
41
+ return self.__str__()
@@ -0,0 +1,90 @@
1
+ from typing import final
2
+
3
+ import chevron
4
+ from notte_browser.session import TrajectoryStep
5
+ from notte_core.controller.actions import CompletionAction
6
+ from notte_core.llms.engine import LLMEngine
7
+ from pydantic import BaseModel
8
+
9
+ from notte_agent.common.conversation import Conversation
10
+ from notte_agent.common.perception import BasePerception
11
+
12
+ system_rules = """
13
+ You are a validator of an agent who interacts with a browser.
14
+ Validate if the output of last action is what the user wanted and if the task is completed.
15
+ If the task is unclear defined, you can let it pass.
16
+ But if something is missing or the image does not show what was requested dont let it pass.
17
+ Try to understand the page and help the model with suggestions like scroll, do x, ... to get the solution right.
18
+
19
+ Task to validate: {{task}}.
20
+
21
+ Return a JSON object with 2 keys: `is_valid` and `reason`:
22
+ - `is_valid` is a boolean that indicates if the output is correct.
23
+ - `reason` is a string that explains why it is valid or not.
24
+
25
+ Example:
26
+ ```json
27
+ {{&example}}
28
+ ```
29
+
30
+ Your turn:
31
+ """
32
+
33
+
34
+ class CompletionValidation(BaseModel):
35
+ is_valid: bool
36
+ reason: str
37
+
38
+
39
+ @final
40
+ class CompletionValidator:
41
+ def __init__(
42
+ self,
43
+ llm: LLMEngine,
44
+ perception: BasePerception,
45
+ use_vision: bool = True,
46
+ include_attributes: bool = True,
47
+ ):
48
+ self.use_vision = use_vision
49
+ self.include_attributes = include_attributes
50
+ self.llm: LLMEngine = llm
51
+ self.conv: Conversation = Conversation()
52
+ self.perception: BasePerception = perception
53
+
54
+ @staticmethod
55
+ def example() -> CompletionValidation:
56
+ return CompletionValidation(
57
+ is_valid=False,
58
+ reason="The user wanted to search for 'cat photos', but the agent searched for 'dog photos' instead.",
59
+ )
60
+
61
+ def validation_message(
62
+ self,
63
+ output: CompletionAction,
64
+ step: TrajectoryStep,
65
+ ) -> str:
66
+ return f"""
67
+ Last observation:
68
+ {self.perception.perceive(step.obs)}
69
+
70
+ Last action:
71
+ {step.action.model_dump_json(exclude_unset=True)}
72
+
73
+ Agent task output:
74
+ {output}
75
+ """
76
+
77
+ def validate(
78
+ self,
79
+ task: str,
80
+ output: CompletionAction,
81
+ step: TrajectoryStep,
82
+ ) -> CompletionValidation:
83
+ """Validate the output of the last action is what the user wanted"""
84
+ self.conv.reset()
85
+ system_prompt = chevron.render(system_rules, {"task": task, "example": self.example().model_dump_json()})
86
+ self.conv.add_system_message(content=system_prompt)
87
+ self.conv.add_user_message(content=self.validation_message(output, step))
88
+
89
+ answer: CompletionValidation = self.llm.structured_completion(self.conv.messages(), CompletionValidation)
90
+ return answer
File without changes