notte-core 0.0.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- notte_core-0.0.dev0/.gitignore +179 -0
- notte_core-0.0.dev0/PKG-INFO +19 -0
- notte_core-0.0.dev0/README.md +0 -0
- notte_core-0.0.dev0/pyproject.toml +38 -0
- notte_core-0.0.dev0/src/notte_core/__init__.py +32 -0
- notte_core-0.0.dev0/src/notte_core/actions/__init__.py +0 -0
- notte_core-0.0.dev0/src/notte_core/actions/base.py +314 -0
- notte_core-0.0.dev0/src/notte_core/actions/space.py +106 -0
- notte_core-0.0.dev0/src/notte_core/browser/__init__.py +0 -0
- notte_core-0.0.dev0/src/notte_core/browser/dom_tree.py +602 -0
- notte_core-0.0.dev0/src/notte_core/browser/node_type.py +393 -0
- notte_core-0.0.dev0/src/notte_core/browser/observation.py +64 -0
- notte_core-0.0.dev0/src/notte_core/browser/snapshot.py +100 -0
- notte_core-0.0.dev0/src/notte_core/common/__init__.py +0 -0
- notte_core-0.0.dev0/src/notte_core/common/config.py +30 -0
- notte_core-0.0.dev0/src/notte_core/common/logging.py +21 -0
- notte_core-0.0.dev0/src/notte_core/common/resource.py +68 -0
- notte_core-0.0.dev0/src/notte_core/common/telemetry.py +125 -0
- notte_core-0.0.dev0/src/notte_core/common/tracer.py +190 -0
- notte_core-0.0.dev0/src/notte_core/controller/__init__.py +0 -0
- notte_core-0.0.dev0/src/notte_core/controller/actions.py +344 -0
- notte_core-0.0.dev0/src/notte_core/controller/proxy.py +156 -0
- notte_core-0.0.dev0/src/notte_core/controller/space.py +177 -0
- notte_core-0.0.dev0/src/notte_core/credentials/__init__.py +0 -0
- notte_core-0.0.dev0/src/notte_core/credentials/base.py +660 -0
- notte_core-0.0.dev0/src/notte_core/credentials/types.py +100 -0
- notte_core-0.0.dev0/src/notte_core/data/__init__.py +0 -0
- notte_core-0.0.dev0/src/notte_core/data/space.py +104 -0
- notte_core-0.0.dev0/src/notte_core/errors/__init__.py +0 -0
- notte_core-0.0.dev0/src/notte_core/errors/actions.py +59 -0
- notte_core-0.0.dev0/src/notte_core/errors/base.py +86 -0
- notte_core-0.0.dev0/src/notte_core/errors/llm.py +55 -0
- notte_core-0.0.dev0/src/notte_core/errors/processing.py +85 -0
- notte_core-0.0.dev0/src/notte_core/errors/provider.py +80 -0
- notte_core-0.0.dev0/src/notte_core/errors/validation.py +31 -0
- notte_core-0.0.dev0/src/notte_core/llms/__init__.py +0 -0
- notte_core-0.0.dev0/src/notte_core/llms/config/endpoints.csv +9 -0
- notte_core-0.0.dev0/src/notte_core/llms/engine.py +272 -0
- notte_core-0.0.dev0/src/notte_core/llms/logging.py +79 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompt.py +67 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/action-listing/anthropic/user.md +126 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/action-listing/optim/user.md +128 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/action-listing/simple/user.md +26 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/action-listing-incr/user.md +135 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/data-extraction/all_data/user.md +178 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/data-extraction/only_main_content/user.md +157 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/data-extraction/two_sections/user.md +48 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/data-extraction/user.md +86 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/debug-failing-action-exec/user.md +55 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/document-category/base/user.md +58 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/document-category/optim/user.md +38 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/extract-json-schema/multi-entity/system.md +42 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/extract-json-schema/multi-entity/user.md +1 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/extract-without-json-schema/system.md +40 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/extract-without-json-schema/user.md +1 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/generate-json-schema/system.md +25 -0
- notte_core-0.0.dev0/src/notte_core/llms/prompts/generate-json-schema/user.md +1 -0
- notte_core-0.0.dev0/src/notte_core/llms/service.py +121 -0
- notte_core-0.0.dev0/src/notte_core/py.typed +0 -0
- notte_core-0.0.dev0/src/notte_core/utils/__init__.py +0 -0
- notte_core-0.0.dev0/src/notte_core/utils/code.py +22 -0
- notte_core-0.0.dev0/src/notte_core/utils/image.py +118 -0
- notte_core-0.0.dev0/src/notte_core/utils/platform.py +13 -0
- notte_core-0.0.dev0/src/notte_core/utils/pydantic_schema.py +98 -0
- notte_core-0.0.dev0/src/notte_core/utils/singleton.py +13 -0
- notte_core-0.0.dev0/src/notte_core/utils/url.py +89 -0
- notte_core-0.0.dev0/src/notte_core/utils/webp_replay.py +124 -0
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py,cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
|
|
110
|
+
# pdm
|
|
111
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
112
|
+
#pdm.lock
|
|
113
|
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
114
|
+
# in version control.
|
|
115
|
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
|
116
|
+
.pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
121
|
+
__pypackages__/
|
|
122
|
+
|
|
123
|
+
# Celery stuff
|
|
124
|
+
celerybeat-schedule
|
|
125
|
+
celerybeat.pid
|
|
126
|
+
|
|
127
|
+
# SageMath parsed files
|
|
128
|
+
*.sage.py
|
|
129
|
+
|
|
130
|
+
# Environments
|
|
131
|
+
.env
|
|
132
|
+
.venv
|
|
133
|
+
env/
|
|
134
|
+
venv/
|
|
135
|
+
ENV/
|
|
136
|
+
env.bak/
|
|
137
|
+
venv.bak/
|
|
138
|
+
|
|
139
|
+
# Spyder project settings
|
|
140
|
+
.spyderproject
|
|
141
|
+
.spyproject
|
|
142
|
+
|
|
143
|
+
# Rope project settings
|
|
144
|
+
.ropeproject
|
|
145
|
+
|
|
146
|
+
# mkdocs documentation
|
|
147
|
+
/site
|
|
148
|
+
|
|
149
|
+
# mypy
|
|
150
|
+
.mypy_cache/
|
|
151
|
+
.dmypy.json
|
|
152
|
+
dmypy.json
|
|
153
|
+
|
|
154
|
+
# Pyre type checker
|
|
155
|
+
.pyre/
|
|
156
|
+
|
|
157
|
+
# pytype static type analyzer
|
|
158
|
+
.pytype/
|
|
159
|
+
|
|
160
|
+
# Cython debug symbols
|
|
161
|
+
cython_debug/
|
|
162
|
+
|
|
163
|
+
# PyCharm
|
|
164
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
165
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
166
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
167
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
168
|
+
#.idea/
|
|
169
|
+
|
|
170
|
+
ignore.*
|
|
171
|
+
llm_usage.jsonl
|
|
172
|
+
llm_parsing_error.jsonl
|
|
173
|
+
traces/
|
|
174
|
+
|
|
175
|
+
**/__pycache__/**
|
|
176
|
+
.DS_Store
|
|
177
|
+
**/.DS_Store
|
|
178
|
+
old
|
|
179
|
+
notebook
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: notte-core
|
|
3
|
+
Version: 0.0.dev0
|
|
4
|
+
Summary: The web browser for LLMs agents
|
|
5
|
+
Author-email: Notte Team <hello@notte.cc>
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: chevron>=0.14.0
|
|
8
|
+
Requires-Dist: litellm>=1.61.16
|
|
9
|
+
Requires-Dist: llamux>=0.1.9
|
|
10
|
+
Requires-Dist: loguru>=0.7.3
|
|
11
|
+
Requires-Dist: pillow>=11.1.0
|
|
12
|
+
Requires-Dist: posthog>=3.0.1
|
|
13
|
+
Requires-Dist: pydantic>=2.10.6
|
|
14
|
+
Requires-Dist: pyotp>=2.9.0
|
|
15
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
16
|
+
Requires-Dist: requests>=2.32.3
|
|
17
|
+
Requires-Dist: tldextract>=5.3.0
|
|
18
|
+
Provides-Extra: server
|
|
19
|
+
Requires-Dist: litellm[proxy]>=1.61.16; extra == 'server'
|
|
File without changes
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "notte-core"
|
|
3
|
+
version = "0.0.dev"
|
|
4
|
+
description = "The web browser for LLMs agents"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Notte Team ", email = "hello@notte.cc" }
|
|
8
|
+
]
|
|
9
|
+
packages = [
|
|
10
|
+
{ include = "notte_core", from = "src" },
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
requires-python = ">=3.11"
|
|
14
|
+
dependencies = [
|
|
15
|
+
"chevron>=0.14.0",
|
|
16
|
+
"litellm>=1.61.16",
|
|
17
|
+
"llamux>=0.1.9",
|
|
18
|
+
"loguru>=0.7.3",
|
|
19
|
+
"pillow>=11.1.0",
|
|
20
|
+
"posthog>=3.0.1",
|
|
21
|
+
"pydantic>=2.10.6",
|
|
22
|
+
"pyotp>=2.9.0",
|
|
23
|
+
"python-dotenv>=1.0.1",
|
|
24
|
+
"requests>=2.32.3",
|
|
25
|
+
"tldextract>=5.3.0",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
server = [
|
|
30
|
+
"litellm[proxy]>=1.61.16",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[build-system]
|
|
34
|
+
requires = ["hatchling"]
|
|
35
|
+
build-backend = "hatchling.build"
|
|
36
|
+
|
|
37
|
+
[tool.uv.sources]
|
|
38
|
+
maincontentextractor = { git = "https://github.com/HawkClaws/main_content_extractor", rev = "7c3ed7f6ed7f6c10223a3357d43ab741663bc812" }
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from importlib import metadata
|
|
2
|
+
|
|
3
|
+
from notte_core.errors.base import ErrorConfig, ErrorMessageMode, ErrorMode
|
|
4
|
+
|
|
5
|
+
__version__ = metadata.version("notte_core")
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def set_error_mode(mode: ErrorMode) -> None:
|
|
9
|
+
"""Set the error message mode for the package.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
mode: Either 'developer', 'user' or 'agent'
|
|
13
|
+
"""
|
|
14
|
+
ErrorConfig.set_message_mode(mode)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def check_notte_version(package_name: str) -> str:
|
|
18
|
+
package_version = metadata.version(package_name)
|
|
19
|
+
if __version__ != package_version:
|
|
20
|
+
raise ValueError(
|
|
21
|
+
f"Version mismatch between notte_core and {package_name}: {__version__} != {package_version}. Please update your packages."
|
|
22
|
+
)
|
|
23
|
+
return package_version
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Default to user mode
|
|
27
|
+
ErrorConfig.set_message_mode(ErrorMessageMode.DEVELOPER.value)
|
|
28
|
+
|
|
29
|
+
# Initialize telemetry
|
|
30
|
+
# This import only initializes the module, actual tracking will be disabled
|
|
31
|
+
# if ANONYMIZED_TELEMETRY=false is set or if PostHog is not installed
|
|
32
|
+
from notte_core.common import telemetry # type: ignore # noqa
|
|
File without changes
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
from pydantic import BaseModel, Field
|
|
2
|
+
from typing_extensions import override
|
|
3
|
+
|
|
4
|
+
from notte_core.browser.dom_tree import DomNode
|
|
5
|
+
from notte_core.controller.actions import ActionRole, ActionStatus, BaseAction, BrowserActionId, InteractionAction
|
|
6
|
+
from notte_core.controller.actions import BrowserAction as _BrowserAction
|
|
7
|
+
from notte_core.credentials.types import ValueWithPlaceholder
|
|
8
|
+
from notte_core.errors.actions import InvalidActionError, MoreThanOneParameterActionError
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ActionParameter(BaseModel):
|
|
12
|
+
name: str
|
|
13
|
+
type: str
|
|
14
|
+
default: str | None = None
|
|
15
|
+
values: list[str] = Field(default_factory=list)
|
|
16
|
+
|
|
17
|
+
def description(self) -> str:
|
|
18
|
+
base = f"{self.name}: {self.type}"
|
|
19
|
+
if len(self.values) > 0:
|
|
20
|
+
base += f" = [{', '.join(self.values)}]"
|
|
21
|
+
return base
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ActionParameterValue(BaseModel):
|
|
25
|
+
name: str
|
|
26
|
+
value: str | ValueWithPlaceholder
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class CachedAction(BaseModel):
|
|
30
|
+
status: ActionStatus
|
|
31
|
+
description: str
|
|
32
|
+
category: str
|
|
33
|
+
code: str | None
|
|
34
|
+
params: list[ActionParameter] = Field(default_factory=list)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# generic action that can be parametrized
|
|
38
|
+
class PossibleAction(BaseModel):
|
|
39
|
+
id: str
|
|
40
|
+
description: str
|
|
41
|
+
category: str
|
|
42
|
+
params: list[ActionParameter] = Field(default_factory=list)
|
|
43
|
+
|
|
44
|
+
def __post_init__(self) -> None:
|
|
45
|
+
self.check_params()
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def role(self, raise_error: bool = False) -> ActionRole:
|
|
49
|
+
match self.id[0]:
|
|
50
|
+
case "L":
|
|
51
|
+
return "link"
|
|
52
|
+
case "B":
|
|
53
|
+
return "button"
|
|
54
|
+
case "I":
|
|
55
|
+
return "input"
|
|
56
|
+
case "O":
|
|
57
|
+
return "option"
|
|
58
|
+
case "M":
|
|
59
|
+
return "misc"
|
|
60
|
+
case "F":
|
|
61
|
+
raise NotImplementedError("Image actions are not supported")
|
|
62
|
+
case "S":
|
|
63
|
+
return "special"
|
|
64
|
+
case _:
|
|
65
|
+
if raise_error:
|
|
66
|
+
raise InvalidActionError(
|
|
67
|
+
self.id, f"First ID character must be one of {ActionRole} but got {self.id[0]}"
|
|
68
|
+
)
|
|
69
|
+
return "other"
|
|
70
|
+
|
|
71
|
+
def check_params(self) -> None:
|
|
72
|
+
if self.role == "input":
|
|
73
|
+
if len(self.params) != 1:
|
|
74
|
+
raise MoreThanOneParameterActionError(self.id, len(self.params))
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class Action(BaseAction, PossibleAction):
|
|
78
|
+
status: ActionStatus = "valid"
|
|
79
|
+
params: list[ActionParameter] = Field(default_factory=list)
|
|
80
|
+
|
|
81
|
+
def markdown(self) -> str:
|
|
82
|
+
return self.description
|
|
83
|
+
|
|
84
|
+
def embedding_description(self) -> str:
|
|
85
|
+
return self.role + " " + self.description
|
|
86
|
+
|
|
87
|
+
@override
|
|
88
|
+
def execution_message(self) -> str:
|
|
89
|
+
# TODO: think about a better message here
|
|
90
|
+
return f"Sucessfully executed: '{self.description}'"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ExecutableAction(Action, InteractionAction):
|
|
94
|
+
"""
|
|
95
|
+
An action that can be executed by the proxy.
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
# description is not needed for the proxy
|
|
99
|
+
category: str = "Executable Actions"
|
|
100
|
+
description: str = "Executable action"
|
|
101
|
+
params_values: list[ActionParameterValue] = Field(default_factory=list)
|
|
102
|
+
node: DomNode | None = None
|
|
103
|
+
|
|
104
|
+
@staticmethod
|
|
105
|
+
def parse(
|
|
106
|
+
action_id: str,
|
|
107
|
+
params: dict[str, str] | str | None = None,
|
|
108
|
+
enter: bool | None = None,
|
|
109
|
+
) -> "ExecutableAction":
|
|
110
|
+
if isinstance(params, str):
|
|
111
|
+
params = {"value": params}
|
|
112
|
+
_param_values: list[ActionParameterValue] = []
|
|
113
|
+
_params: list[ActionParameter] = []
|
|
114
|
+
if params is not None:
|
|
115
|
+
_param_values = [
|
|
116
|
+
ActionParameterValue(
|
|
117
|
+
name=name,
|
|
118
|
+
value=value,
|
|
119
|
+
)
|
|
120
|
+
for name, value in params.items()
|
|
121
|
+
]
|
|
122
|
+
_params = [
|
|
123
|
+
ActionParameter(
|
|
124
|
+
name=name,
|
|
125
|
+
type=type(value).__name__,
|
|
126
|
+
)
|
|
127
|
+
for name, value in params.items()
|
|
128
|
+
]
|
|
129
|
+
# TODO: reneble if needed
|
|
130
|
+
# enter = enter if enter is not None else action_id.startswith("I")
|
|
131
|
+
return ExecutableAction(
|
|
132
|
+
id=action_id,
|
|
133
|
+
description="ID only",
|
|
134
|
+
category="",
|
|
135
|
+
status="valid",
|
|
136
|
+
params=_params,
|
|
137
|
+
params_values=_param_values,
|
|
138
|
+
press_enter=enter,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class BrowserAction(Action, _BrowserAction):
|
|
143
|
+
"""
|
|
144
|
+
Browser actions are actions that are always available and are not related to the current page.
|
|
145
|
+
|
|
146
|
+
GOTO: Go to a specific URL
|
|
147
|
+
SCRAPE: Extract Data page data
|
|
148
|
+
SCREENSHOT: Take a screenshot of the current page
|
|
149
|
+
BACK: Go to the previous page
|
|
150
|
+
FORWARD: Go to the next page
|
|
151
|
+
WAIT: Wait for a specific amount of time (in seconds)
|
|
152
|
+
TERMINATE: Terminate the current session
|
|
153
|
+
OPEN_NEW_TAB: Open a new tab
|
|
154
|
+
PRESS_KEY: Press a specific key
|
|
155
|
+
CLICK_ELEMENT: Click on a specific element
|
|
156
|
+
TYPE_TEXT: Type text into a specific element
|
|
157
|
+
SELECT_OPTION: Select an option from a dropdown
|
|
158
|
+
SCROLL_TO_ELEMENT: Scroll to a specific element
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
id: BrowserActionId # type: ignore[type-assignment]
|
|
162
|
+
description: str = "Special action"
|
|
163
|
+
category: str = "Special Browser Actions"
|
|
164
|
+
|
|
165
|
+
@staticmethod
|
|
166
|
+
def is_special(action_id: str) -> bool:
|
|
167
|
+
return action_id in BrowserActionId.__members__.values()
|
|
168
|
+
|
|
169
|
+
def __post_init__(self):
|
|
170
|
+
if not BrowserAction.is_special(self.id):
|
|
171
|
+
raise InvalidActionError(self.id, f"Special actions ID must be one of {BrowserActionId} but got {self.id}")
|
|
172
|
+
|
|
173
|
+
@staticmethod
|
|
174
|
+
def goto() -> "BrowserAction":
|
|
175
|
+
return BrowserAction(
|
|
176
|
+
id=BrowserActionId.GOTO,
|
|
177
|
+
description="Go to a specific URL",
|
|
178
|
+
category="Special Browser Actions",
|
|
179
|
+
params=[
|
|
180
|
+
ActionParameter(name="url", type="string", default=None),
|
|
181
|
+
],
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
@staticmethod
|
|
185
|
+
def scrape() -> "BrowserAction":
|
|
186
|
+
return BrowserAction(
|
|
187
|
+
id=BrowserActionId.SCRAPE,
|
|
188
|
+
description="Scrape data from the current page",
|
|
189
|
+
category="Special Browser Actions",
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# @staticmethod
|
|
193
|
+
# def screenshot() -> "BrowserAction":
|
|
194
|
+
# return BrowserAction(
|
|
195
|
+
# id=BrowserActionId.SCREENSHOT,
|
|
196
|
+
# description="Take a screenshot of the current page",
|
|
197
|
+
# category="Special Browser Actions",
|
|
198
|
+
# )
|
|
199
|
+
|
|
200
|
+
@staticmethod
|
|
201
|
+
def go_back() -> "BrowserAction":
|
|
202
|
+
return BrowserAction(
|
|
203
|
+
id=BrowserActionId.GO_BACK,
|
|
204
|
+
description="Go to the previous page",
|
|
205
|
+
category="Special Browser Actions",
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
@staticmethod
|
|
209
|
+
def go_forward() -> "BrowserAction":
|
|
210
|
+
return BrowserAction(
|
|
211
|
+
id=BrowserActionId.GO_FORWARD,
|
|
212
|
+
description="Go to the next page",
|
|
213
|
+
category="Special Browser Actions",
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
def reload() -> "BrowserAction":
|
|
218
|
+
return BrowserAction(
|
|
219
|
+
id=BrowserActionId.RELOAD,
|
|
220
|
+
description="Refresh the current page",
|
|
221
|
+
category="Special Browser Actions",
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
@staticmethod
|
|
225
|
+
def wait() -> "BrowserAction":
|
|
226
|
+
return BrowserAction(
|
|
227
|
+
id=BrowserActionId.WAIT,
|
|
228
|
+
description="Wait for a specific amount of time (in ms)",
|
|
229
|
+
category="Special Browser Actions",
|
|
230
|
+
params=[
|
|
231
|
+
ActionParameter(name="time_ms", type="int", default=None),
|
|
232
|
+
],
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
@staticmethod
|
|
236
|
+
def completion() -> "BrowserAction":
|
|
237
|
+
return BrowserAction(
|
|
238
|
+
id=BrowserActionId.COMPLETION,
|
|
239
|
+
description="Terminate the current session",
|
|
240
|
+
category="Special Browser Actions",
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
@staticmethod
|
|
244
|
+
def press_key() -> "BrowserAction":
|
|
245
|
+
return BrowserAction(
|
|
246
|
+
id=BrowserActionId.PRESS_KEY,
|
|
247
|
+
description="Press a specific key",
|
|
248
|
+
category="Special Browser Actions",
|
|
249
|
+
params=[
|
|
250
|
+
ActionParameter(name="key", type="string", default=None),
|
|
251
|
+
],
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
@staticmethod
|
|
255
|
+
def scroll_up() -> "BrowserAction":
|
|
256
|
+
return BrowserAction(
|
|
257
|
+
id=BrowserActionId.SCROLL_UP,
|
|
258
|
+
description="Scroll up",
|
|
259
|
+
category="Special Browser Actions",
|
|
260
|
+
params=[
|
|
261
|
+
ActionParameter(name="amount", type="int", default=None),
|
|
262
|
+
],
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
@staticmethod
|
|
266
|
+
def scroll_down() -> "BrowserAction":
|
|
267
|
+
return BrowserAction(
|
|
268
|
+
id=BrowserActionId.SCROLL_DOWN,
|
|
269
|
+
description="Scroll down",
|
|
270
|
+
category="Special Browser Actions",
|
|
271
|
+
params=[
|
|
272
|
+
ActionParameter(name="amount", type="int", default=None),
|
|
273
|
+
],
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
@staticmethod
|
|
277
|
+
def goto_new_tab() -> "BrowserAction":
|
|
278
|
+
return BrowserAction(
|
|
279
|
+
id=BrowserActionId.GOTO_NEW_TAB,
|
|
280
|
+
description="Go to a new tab",
|
|
281
|
+
category="Special Browser Actions",
|
|
282
|
+
params=[
|
|
283
|
+
ActionParameter(name="url", type="string"),
|
|
284
|
+
],
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
@staticmethod
|
|
288
|
+
def switch_tab() -> "BrowserAction":
|
|
289
|
+
return BrowserAction(
|
|
290
|
+
id=BrowserActionId.SWITCH_TAB,
|
|
291
|
+
description="Switch to a specific tab",
|
|
292
|
+
category="Special Browser Actions",
|
|
293
|
+
params=[
|
|
294
|
+
ActionParameter(name="tab_index", type="int"),
|
|
295
|
+
],
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
@staticmethod
|
|
299
|
+
def list() -> list["BrowserAction"]:
|
|
300
|
+
return [
|
|
301
|
+
BrowserAction.goto(),
|
|
302
|
+
BrowserAction.scrape(),
|
|
303
|
+
BrowserAction.go_back(),
|
|
304
|
+
BrowserAction.go_forward(),
|
|
305
|
+
BrowserAction.reload(),
|
|
306
|
+
BrowserAction.wait(),
|
|
307
|
+
BrowserAction.completion(),
|
|
308
|
+
BrowserAction.press_key(),
|
|
309
|
+
BrowserAction.scroll_up(),
|
|
310
|
+
BrowserAction.scroll_down(),
|
|
311
|
+
BrowserAction.goto_new_tab(),
|
|
312
|
+
BrowserAction.switch_tab(),
|
|
313
|
+
# BrowserAction.screenshot(),
|
|
314
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
from typing_extensions import override
|
|
6
|
+
|
|
7
|
+
from notte_core.actions.base import Action, BrowserAction, PossibleAction
|
|
8
|
+
from notte_core.controller.actions import AllActionRole, AllActionStatus
|
|
9
|
+
from notte_core.controller.space import BaseActionSpace
|
|
10
|
+
from notte_core.errors.actions import InvalidActionError
|
|
11
|
+
from notte_core.errors.processing import InvalidInternalCheckError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PossibleActionSpace(BaseModel):
|
|
15
|
+
description: str
|
|
16
|
+
actions: Sequence[PossibleAction]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ActionSpace(BaseActionSpace):
|
|
20
|
+
raw_actions: Sequence[Action] = Field(description="List of available actions in the current state", exclude=True)
|
|
21
|
+
|
|
22
|
+
def __post_init__(self) -> None:
|
|
23
|
+
# filter out special actions
|
|
24
|
+
nb_original_actions = len(self.raw_actions)
|
|
25
|
+
self.raw_actions = [action for action in self.raw_actions if not BrowserAction.is_special(action.id)]
|
|
26
|
+
if len(self.raw_actions) != nb_original_actions:
|
|
27
|
+
logger.warning(
|
|
28
|
+
(
|
|
29
|
+
"Special actions are not allowed in the action space. "
|
|
30
|
+
f"Removed {nb_original_actions - len(self.raw_actions)} actions."
|
|
31
|
+
)
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
for action in self.raw_actions:
|
|
35
|
+
# check 1: check action id is valid
|
|
36
|
+
if action.role == "other":
|
|
37
|
+
raise InvalidActionError(
|
|
38
|
+
action.id,
|
|
39
|
+
f"actions listed in action space should have a valid role (L, B, I), got '{action.id[0]}' .",
|
|
40
|
+
)
|
|
41
|
+
# check 2: actions should have description
|
|
42
|
+
if len(action.description) == 0:
|
|
43
|
+
raise InvalidActionError(action.id, "actions listed in action space should have a description.")
|
|
44
|
+
|
|
45
|
+
@override
|
|
46
|
+
def actions(
|
|
47
|
+
self,
|
|
48
|
+
status: AllActionStatus = "valid",
|
|
49
|
+
role: AllActionRole = "all",
|
|
50
|
+
include_browser: bool = False,
|
|
51
|
+
) -> Sequence[Action]:
|
|
52
|
+
match (status, role):
|
|
53
|
+
case ("all", "all"):
|
|
54
|
+
actions = list(self.raw_actions)
|
|
55
|
+
case ("all", _):
|
|
56
|
+
actions = [action for action in self.raw_actions if action.role == role]
|
|
57
|
+
case (_, "all"):
|
|
58
|
+
actions = [action for action in self.raw_actions if action.status == status]
|
|
59
|
+
case (_, _):
|
|
60
|
+
actions = [action for action in self.raw_actions if action.status == status and action.role == role]
|
|
61
|
+
|
|
62
|
+
if include_browser:
|
|
63
|
+
return actions + BrowserAction.list()
|
|
64
|
+
return actions
|
|
65
|
+
|
|
66
|
+
@override
|
|
67
|
+
def browser_actions(self) -> Sequence[BrowserAction]:
|
|
68
|
+
return BrowserAction.list()
|
|
69
|
+
|
|
70
|
+
@override
|
|
71
|
+
def markdown(self, status: AllActionStatus = "valid", include_browser: bool = True) -> str:
|
|
72
|
+
# Get actions with requested status
|
|
73
|
+
actions_to_format = self.actions(status, include_browser=include_browser)
|
|
74
|
+
|
|
75
|
+
# Group actions by category
|
|
76
|
+
grouped_actions: dict[str, list[Action]] = {}
|
|
77
|
+
for action in actions_to_format:
|
|
78
|
+
if len(action.category) == 0:
|
|
79
|
+
# should not happen
|
|
80
|
+
raise InvalidInternalCheckError(
|
|
81
|
+
check=f"action {action} has no category.",
|
|
82
|
+
url="unknown url",
|
|
83
|
+
dev_advice=(
|
|
84
|
+
"This should technically never happen due to post init checks in `notte.actions.space.py`."
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
if action.category not in grouped_actions:
|
|
88
|
+
grouped_actions[action.category] = []
|
|
89
|
+
grouped_actions[action.category].append(action)
|
|
90
|
+
|
|
91
|
+
# Build markdown output
|
|
92
|
+
output: list[str] = []
|
|
93
|
+
for category, actions in grouped_actions.items():
|
|
94
|
+
if len(output) == 0:
|
|
95
|
+
# no \n at the beginning
|
|
96
|
+
output.append(f"# {category}")
|
|
97
|
+
else:
|
|
98
|
+
output.append(f"\n# {category}")
|
|
99
|
+
# Sort actions by ID lexicographically
|
|
100
|
+
sorted_actions = sorted(actions, key=lambda x: x.id)
|
|
101
|
+
for action in sorted_actions:
|
|
102
|
+
line = f"* {action.id}: {action.description}"
|
|
103
|
+
if len(action.params) > 0:
|
|
104
|
+
line += f" ({action.params})"
|
|
105
|
+
output.append(line)
|
|
106
|
+
return "\n".join(output)
|
|
File without changes
|