notte-agent 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. notte_agent-1.4.0/.gitignore +179 -0
  2. notte_agent-1.4.0/PKG-INFO +8 -0
  3. notte_agent-1.4.0/README.md +0 -0
  4. notte_agent-1.4.0/pyproject.toml +22 -0
  5. notte_agent-1.4.0/src/notte_agent/README.md +58 -0
  6. notte_agent-1.4.0/src/notte_agent/__init__.py +7 -0
  7. notte_agent-1.4.0/src/notte_agent/common/__init__.py +0 -0
  8. notte_agent-1.4.0/src/notte_agent/common/base.py +9 -0
  9. notte_agent-1.4.0/src/notte_agent/common/config.py +213 -0
  10. notte_agent-1.4.0/src/notte_agent/common/conversation.py +246 -0
  11. notte_agent-1.4.0/src/notte_agent/common/notifier.py +54 -0
  12. notte_agent-1.4.0/src/notte_agent/common/parser.py +78 -0
  13. notte_agent-1.4.0/src/notte_agent/common/perception.py +21 -0
  14. notte_agent-1.4.0/src/notte_agent/common/prompt.py +15 -0
  15. notte_agent-1.4.0/src/notte_agent/common/safe_executor.py +100 -0
  16. notte_agent-1.4.0/src/notte_agent/common/trajectory_history.py +100 -0
  17. notte_agent-1.4.0/src/notte_agent/common/types.py +41 -0
  18. notte_agent-1.4.0/src/notte_agent/common/validator.py +90 -0
  19. notte_agent-1.4.0/src/notte_agent/falco/__init__.py +0 -0
  20. notte_agent-1.4.0/src/notte_agent/falco/agent.py +324 -0
  21. notte_agent-1.4.0/src/notte_agent/falco/perception.py +86 -0
  22. notte_agent-1.4.0/src/notte_agent/falco/prompt.py +132 -0
  23. notte_agent-1.4.0/src/notte_agent/falco/prompts/system_prompt_multi_actions.md +107 -0
  24. notte_agent-1.4.0/src/notte_agent/falco/prompts/system_prompt_single_action.md +107 -0
  25. notte_agent-1.4.0/src/notte_agent/falco/trajectory_history.py +42 -0
  26. notte_agent-1.4.0/src/notte_agent/falco/types.py +132 -0
  27. notte_agent-1.4.0/src/notte_agent/gufo/__init__.py +0 -0
  28. notte_agent-1.4.0/src/notte_agent/gufo/agent.py +180 -0
  29. notte_agent-1.4.0/src/notte_agent/gufo/parser.py +79 -0
  30. notte_agent-1.4.0/src/notte_agent/gufo/perception.py +58 -0
  31. notte_agent-1.4.0/src/notte_agent/gufo/prompt.py +61 -0
  32. notte_agent-1.4.0/src/notte_agent/gufo/system.md +8 -0
  33. notte_agent-1.4.0/src/notte_agent/main.py +59 -0
  34. notte_agent-1.4.0/src/notte_agent/py.typed +0 -0
@@ -0,0 +1,179 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ ignore.*
171
+ llm_usage.jsonl
172
+ llm_parsing_error.jsonl
173
+ traces/
174
+
175
+ **/__pycache__/**
176
+ .DS_Store
177
+ **/.DS_Store
178
+ old
179
+ notebook
@@ -0,0 +1,8 @@
1
+ Metadata-Version: 2.4
2
+ Name: notte-agent
3
+ Version: 1.4.0
4
+ Summary: Notte Web AI Agents
5
+ Author-email: Notte Team <hello@notte.cc>
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: notte-browser>=1.3.3
8
+ Requires-Dist: notte-core>=1.3.3
File without changes
@@ -0,0 +1,22 @@
1
+ [project]
2
+ name = "notte-agent"
3
+ version = "1.4.0"
4
+ description = "Notte Web AI Agents"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Notte Team ", email = "hello@notte.cc" }
8
+ ]
9
+ packages = [
10
+ { include = "notte_agent", from = "src" },
11
+ ]
12
+
13
+ requires-python = ">=3.11"
14
+ dependencies = [
15
+ "notte_core>=1.3.3",
16
+ "notte_browser>=1.3.3",
17
+ ]
18
+
19
+
20
+ [build-system]
21
+ requires = ["hatchling"]
22
+ build-backend = "hatchling.build"
@@ -0,0 +1,58 @@
1
+ # How to build an LLM agent with *Notte*
2
+
3
+ This guide explains how to build a custom LLM agent using *Notte*. The example in `agent.py` demonstrates a basic implementation that you can customize for your specific needs.
4
+
5
+ ## Overview
6
+
7
+ *Notte* provides a flexible environment for web automation that can be controlled through an API. To build an agent with *Notte*, you need:
8
+
9
+ 1. An agent implementation that coordinates between your LLM and the *Notte* environment
10
+ 2. A parser that formats *Notte*'s outputs into prompts suitable for your LLM
11
+ 3. A way to interpret the LLM's responses back into *Notte* commands
12
+
13
+ ## Key Components
14
+
15
+ ### Agent
16
+
17
+ The `Agent` class in `agent.py` shows how to:
18
+ - Initialize a connection to your LLM service
19
+ - Manage the conversation flow between the LLM and *Notte*
20
+ - Track the state of task completion
21
+
22
+ ### Parser
23
+
24
+ The parser is crucial for translating between *Notte* and your LLM. You'll need to:
25
+
26
+ 1. Create a custom parser (by extending `BaseNotteParser` or implementing the `Parser` interface)
27
+ 2. Define how to format:
28
+ - Observations from web pages
29
+ - Available actions
30
+ - Data extraction results
31
+ - Task completion status
32
+
33
+ The provided `BaseNotteParser` is a simple example that you should modify based on your needs. Consider:
34
+ - The prompt format your LLM works best with
35
+ - How to structure web observations for your specific tasks
36
+ - What action format makes sense for your use case
37
+ - How to handle task completion and data extraction
38
+
39
+ ## Example Implementation
40
+
41
+ See `agent.py` for a basic implementation. Key points to customize:
42
+ - The parser implementation
43
+ - The prompt engineering in the conversation flow
44
+ - How task completion is determined
45
+ - Error handling and retry logic
46
+
47
+ ## Best Practices
48
+
49
+ 1. **Custom Parser**: Don't just use the `BaseNotteParser` as-is. Create your own parser that:
50
+ - Formats observations in a way that makes sense for your LLM
51
+ - Structures action possibilities clearly
52
+ - Handles task-specific data extraction
53
+
54
+ 2. **Prompt Engineering**: Carefully design your system prompt and conversation flow
55
+
56
+ 3. **Error Handling**: Add robust error handling for both LLM and *Notte* interactions
57
+
58
+ 4. **Testing**: Test your parser and agent with different scenarios
@@ -0,0 +1,7 @@
1
+ from notte_core import check_notte_version
2
+
3
+ from notte_agent.main import Agent
4
+
5
+ __version__ = check_notte_version("notte_agent")
6
+
7
+ __all__ = ["Agent"]
File without changes
@@ -0,0 +1,9 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from notte_agent.common.types import AgentResponse
4
+
5
+
6
+ class BaseAgent(ABC):
7
+ @abstractmethod
8
+ async def run(self, task: str, url: str | None = None) -> AgentResponse:
9
+ pass
@@ -0,0 +1,213 @@
1
+ from abc import ABC, abstractmethod
2
+ from argparse import ArgumentParser, Namespace
3
+ from collections.abc import Callable
4
+ from enum import StrEnum
5
+ from typing import Any, ClassVar, Self, get_origin, get_type_hints
6
+
7
+ from notte_browser.env import NotteEnvConfig
8
+ from notte_core.common.config import FrozenConfig
9
+ from notte_core.llms.engine import LlmModel
10
+ from notte_sdk.types import DEFAULT_MAX_NB_STEPS
11
+ from pydantic import Field, model_validator
12
+
13
+
14
+ class RaiseCondition(StrEnum):
15
+ """How to raise an error when the agent fails to complete a step.
16
+
17
+ Either immediately upon failure, after retry, or never.
18
+ """
19
+
20
+ IMMEDIATELY = "immediately"
21
+ RETRY = "retry"
22
+ NEVER = "never"
23
+
24
+
25
+ class DefaultAgentArgs(StrEnum):
26
+ ENV_DISABLE_WEB_SECURITY = "disable_web_security"
27
+ ENV_HEADLESS = "headless"
28
+ ENV_PERCEPTION_MODEL = "perception_model"
29
+ ENV_MAX_STEPS = "max_steps"
30
+
31
+ def with_prefix(self: Self, prefix: str = "env") -> str:
32
+ return f"{prefix}.{self.value}"
33
+
34
+
35
+ class AgentConfig(FrozenConfig, ABC):
36
+ # make env private to avoid exposing the NotteEnvConfig class
37
+ env: NotteEnvConfig = Field(init=False)
38
+ reasoning_model: str = Field(
39
+ default=LlmModel.default(), description="The model to use for reasoning (i.e taking actions)."
40
+ )
41
+ include_screenshot: bool = Field(default=False, description="Whether to include a screenshot in the response.")
42
+ max_history_tokens: int | None = Field(
43
+ default=None,
44
+ description="The maximum number of tokens in the history. When the history exceeds this limit, the oldest messages are discarded.",
45
+ )
46
+ max_error_length: int = Field(
47
+ default=500, description="The maximum length of an error message to be forwarded to the reasoning model."
48
+ )
49
+ raise_condition: RaiseCondition = Field(
50
+ default=RaiseCondition.RETRY, description="How to raise an error when the agent fails to complete a step."
51
+ )
52
+ max_consecutive_failures: int = Field(
53
+ default=3, description="The maximum number of consecutive failures before the agent gives up."
54
+ )
55
+ force_env: bool | None = Field(
56
+ default=None,
57
+ description="Whether to allow the user to set the environment.",
58
+ )
59
+
60
+ @classmethod
61
+ @abstractmethod
62
+ def default_env(cls) -> NotteEnvConfig:
63
+ raise NotImplementedError("Subclasses must implement this method")
64
+
65
+ @model_validator(mode="before")
66
+ @classmethod
67
+ def set_env(cls, values: dict[str, Any]) -> dict[str, Any]:
68
+ if "env" in values:
69
+ if "force_env" in values and values["force_env"]:
70
+ del values["force_env"]
71
+ return values
72
+ raise ValueError("Env should not be set by the user. Set `default_env` instead.")
73
+ values["env"] = cls.default_env() # Set the env field using the subclass's method
74
+ return values
75
+
76
+ def groq(self: Self, deep: bool = True) -> Self:
77
+ return self.model(LlmModel.groq, deep=deep)
78
+
79
+ def openai(self: Self, deep: bool = True) -> Self:
80
+ return self.model(LlmModel.openai, deep=deep)
81
+
82
+ def gemini(self: Self, deep: bool = True) -> Self:
83
+ return self.model(LlmModel.gemini, deep=deep)
84
+
85
+ def cerebras(self: Self, deep: bool = True) -> Self:
86
+ return self.model(LlmModel.cerebras, deep=deep)
87
+
88
+ def model(self: Self, model: LlmModel, deep: bool = True) -> Self:
89
+ config = self._copy_and_validate(reasoning_model=model, max_history_tokens=LlmModel.context_length(model))
90
+ if deep:
91
+ config = config.map_env(lambda env: env.model(model))
92
+ return config
93
+
94
+ def use_vision(self: Self, value: bool = True) -> Self:
95
+ return self._copy_and_validate(include_screenshot=value)
96
+
97
+ def dev_mode(self: Self) -> Self:
98
+ return self._copy_and_validate(
99
+ raise_condition=RaiseCondition.IMMEDIATELY,
100
+ max_error_length=1000,
101
+ env=self.env.dev_mode(),
102
+ force_env=True,
103
+ )
104
+
105
+ def set_raise_condition(self: Self, value: RaiseCondition) -> Self:
106
+ return self._copy_and_validate(raise_condition=value)
107
+
108
+ def map_env(self: Self, env: Callable[[NotteEnvConfig], NotteEnvConfig]) -> Self:
109
+ return self._copy_and_validate(env=env(self.env), force_env=True)
110
+
111
+ @staticmethod
112
+ def _get_arg_type(python_type: Any) -> Any:
113
+ """Maps Python types to argparse types."""
114
+ type_map = {
115
+ str: str,
116
+ int: int,
117
+ float: float,
118
+ bool: bool,
119
+ }
120
+ return type_map.get(python_type, str)
121
+
122
+ @staticmethod
123
+ def create_base_parser() -> ArgumentParser:
124
+ """Creates a base ArgumentParser with all the fields from the config."""
125
+ parser = ArgumentParser()
126
+ _ = parser.add_argument(
127
+ f"--{DefaultAgentArgs.ENV_HEADLESS.with_prefix()}",
128
+ action="store_true",
129
+ help="Whether to run the browser in headless mode.",
130
+ )
131
+ _ = parser.add_argument(
132
+ f"--{DefaultAgentArgs.ENV_DISABLE_WEB_SECURITY.with_prefix()}",
133
+ action="store_true",
134
+ help="Whether disable web security.",
135
+ )
136
+ _ = parser.add_argument(
137
+ f"--{DefaultAgentArgs.ENV_PERCEPTION_MODEL.with_prefix()}",
138
+ type=str,
139
+ default=None,
140
+ help="The model to use for perception.",
141
+ )
142
+ _ = parser.add_argument(
143
+ f"--{DefaultAgentArgs.ENV_MAX_STEPS.with_prefix()}",
144
+ type=int,
145
+ default=DEFAULT_MAX_NB_STEPS,
146
+ help="The maximum number of steps the agent can take.",
147
+ )
148
+ return parser
149
+
150
+ @classmethod
151
+ def create_parser(cls) -> ArgumentParser:
152
+ """Creates an ArgumentParser with all the fields from the config."""
153
+ parser = cls.create_base_parser()
154
+ hints = get_type_hints(cls)
155
+
156
+ for field_name, field_info in cls.model_fields.items():
157
+ if field_name == "env":
158
+ continue
159
+ field_type = hints.get(field_name)
160
+ if get_origin(field_type) is ClassVar:
161
+ continue
162
+
163
+ default = field_info.default
164
+ help_text = field_info.description or "no description available"
165
+ arg_type = cls._get_arg_type(field_type)
166
+
167
+ _ = parser.add_argument(
168
+ f"--{field_name.replace('_', '-')}",
169
+ type=arg_type,
170
+ default=default,
171
+ help=f"{help_text} (default: {default})",
172
+ )
173
+
174
+ return parser
175
+
176
+ @classmethod
177
+ def from_args(cls: type[Self], args: Namespace) -> Self:
178
+ """Creates an AgentConfig from a Namespace of arguments.
179
+
180
+ The return type will match the class that called this method.
181
+ """
182
+ disallowed_args = ["task", "env.window.headless"]
183
+
184
+ env_args = {
185
+ k.replace("env.", "").replace("-", "_"): v
186
+ for k, v in vars(args).items()
187
+ if k.startswith("env.") and k not in disallowed_args
188
+ }
189
+ agent_args = {
190
+ k.replace("-", "_"): v
191
+ for k, v in vars(args).items()
192
+ if not k.startswith("env.") and k not in disallowed_args
193
+ }
194
+
195
+ def update_env(env: NotteEnvConfig) -> NotteEnvConfig:
196
+ operations: list[Callable[[NotteEnvConfig], NotteEnvConfig]] = []
197
+ if DefaultAgentArgs.ENV_HEADLESS in env_args:
198
+ headless = env_args[DefaultAgentArgs.ENV_HEADLESS]
199
+ operations.append(lambda env: env.headless(headless))
200
+ del env_args[DefaultAgentArgs.ENV_HEADLESS]
201
+ if DefaultAgentArgs.ENV_DISABLE_WEB_SECURITY in env_args:
202
+ disable_web_security = env_args[DefaultAgentArgs.ENV_DISABLE_WEB_SECURITY]
203
+ operations.append(
204
+ lambda env: env.disable_web_security() if disable_web_security else env.enable_web_security()
205
+ )
206
+ del env_args[DefaultAgentArgs.ENV_DISABLE_WEB_SECURITY]
207
+
208
+ env = env._copy_and_validate(**env_args)
209
+ for operation in operations:
210
+ env = operation(env)
211
+ return env
212
+
213
+ return cls(**agent_args).map_env(update_env)