notte-sdk 0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,247 @@
1
+ import os
2
+ from abc import ABC, abstractmethod
3
+ from collections.abc import Sequence
4
+ from typing import Any, ClassVar, Generic, Literal, Self, TypeVar
5
+
6
+ import requests
7
+ from loguru import logger
8
+ from pydantic import BaseModel
9
+
10
+ from notte_sdk.errors import AuthenticationError, NotteAPIError
11
+
12
+ TResponse = TypeVar("TResponse", bound=BaseModel, covariant=True)
13
+
14
+
15
+ class NotteEndpoint(BaseModel, Generic[TResponse]):
16
+ path: str
17
+ response: type[TResponse]
18
+ request: BaseModel | None = None
19
+ method: Literal["GET", "POST", "DELETE"]
20
+ params: BaseModel | None = None
21
+
22
+ def with_request(self, request: BaseModel) -> Self:
23
+ # return deep copy of self with the request set
24
+ """
25
+ Return a deep copy of the endpoint with the specified request.
26
+
27
+ Creates a new instance of the endpoint with its request attribute updated to the provided model.
28
+ The original instance remains unmodified.
29
+
30
+ Args:
31
+ request: A Pydantic model instance carrying the request data.
32
+
33
+ Returns:
34
+ A new endpoint instance with the updated request.
35
+ """
36
+ return self.model_copy(update={"request": request})
37
+
38
+ def with_params(self, params: BaseModel) -> Self:
39
+ # return deep copy of self with the params set
40
+ """
41
+ Return a new endpoint instance with updated parameters.
42
+
43
+ Creates a copy of the current endpoint with its "params" attribute set to the provided
44
+ Pydantic model.
45
+
46
+ Args:
47
+ params: A Pydantic model instance containing the new parameters.
48
+ """
49
+ return self.model_copy(update={"params": params})
50
+
51
+
52
+ class BaseClient(ABC):
53
+ DEFAULT_NOTTE_API_URL: ClassVar[str] = "https://api.notte.cc"
54
+ DEFAULT_REQUEST_TIMEOUT_SECONDS: ClassVar[int] = 60
55
+
56
+ def __init__(
57
+ self,
58
+ base_endpoint_path: str | None,
59
+ api_key: str | None = None,
60
+ verbose: bool = False,
61
+ ):
62
+ """
63
+ Initialize a new API client instance.
64
+
65
+ Sets up the client by resolving an API key from the provided parameter or the
66
+ NOTTE_API_KEY environment variable. Selects the server URL (defaulting to a
67
+ preconfigured server if none is provided), initializes a mapping of endpoints
68
+ using the implemented 'endpoints' method, and stores an optional base endpoint
69
+ path for constructing request URLs.
70
+
71
+ Args:
72
+ base_endpoint_path: Optional base path to be prefixed to endpoint URLs.
73
+ api_key: Optional API key for authentication; if not supplied, retrieved from
74
+ the NOTTE_API_KEY environment variable.
75
+
76
+ Raises:
77
+ AuthenticationError: If an API key is neither provided nor available in the environment.
78
+ """
79
+ token = api_key or os.getenv("NOTTE_API_KEY")
80
+ if token is None:
81
+ raise AuthenticationError("NOTTE_API_KEY needs to be provided")
82
+ self.token: str = token
83
+ if os.getenv("NOTTE_API_URL") is not None and os.getenv("NOTTE_API_URL") != self.DEFAULT_NOTTE_API_URL:
84
+ logger.warning(f"Custom NOTTE_API_URL is set: {os.getenv('NOTTE_API_URL')}")
85
+ self.server_url: str = os.getenv("NOTTE_API_URL") or self.DEFAULT_NOTTE_API_URL
86
+ self._endpoints: dict[str, NotteEndpoint[BaseModel]] = {
87
+ endpoint.path: endpoint for endpoint in self.endpoints()
88
+ }
89
+ self.base_endpoint_path: str | None = base_endpoint_path
90
+ self.verbose: bool = verbose
91
+
92
+ @staticmethod
93
+ @abstractmethod
94
+ def endpoints() -> Sequence[NotteEndpoint[BaseModel]]:
95
+ """
96
+ Return API endpoints for the client.
97
+
98
+ This abstract method should be implemented by subclasses to supply the list of available
99
+ NotteEndpoint instances for the client.
100
+
101
+ Returns:
102
+ Sequence[NotteEndpoint[BaseModel]]: A list of endpoints for the client.
103
+ """
104
+ pass
105
+
106
+ def headers(self) -> dict[str, str]:
107
+ """
108
+ Return HTTP headers for authenticated API requests.
109
+
110
+ Constructs and returns a dictionary containing the 'Authorization' header,
111
+ which is formatted as a Bearer token using the API key stored in self.token.
112
+ """
113
+ return {"Authorization": f"Bearer {self.token}"}
114
+
115
+ def request_path(self, endpoint: NotteEndpoint[TResponse]) -> str:
116
+ """
117
+ Constructs the full request URL for the given API endpoint.
118
+
119
+ If a base endpoint path is defined, the URL is formed by concatenating the server URL,
120
+ the base endpoint path, and the endpoint's path. Otherwise, the endpoint's path is appended
121
+ directly to the server URL.
122
+ """
123
+ if self.base_endpoint_path is None:
124
+ return f"{self.server_url}/{endpoint.path}"
125
+ return f"{self.server_url}/{self.base_endpoint_path}/{endpoint.path}"
126
+
127
+ def _request(self, endpoint: NotteEndpoint[TResponse]) -> requests.Response:
128
+ """
129
+ Executes an HTTP request for the given API endpoint.
130
+
131
+ Constructs the full URL and headers from the endpoint's configuration and issues an HTTP
132
+ request using the specified method (GET, POST, or DELETE). For POST requests, a request model
133
+ must be provided; otherwise, a ValueError is raised. If the response status code is not 200 or
134
+ the JSON response contains an error detail, a NotteAPIError is raised.
135
+
136
+ Args:
137
+ endpoint: An API endpoint instance containing the HTTP method, path, optional request model,
138
+ and query parameters.
139
+
140
+ Returns:
141
+ The JSON-decoded response from the API.
142
+
143
+ Raises:
144
+ ValueError: If a POST request is attempted without a request model.
145
+ NotteAPIError: If the API response indicates a failure.
146
+ """
147
+ headers = self.headers()
148
+ url = self.request_path(endpoint)
149
+ params = endpoint.params.model_dump() if endpoint.params is not None else None
150
+ if self.verbose:
151
+ logger.info(f"Making `{endpoint.method}` request to `{endpoint.path} (i.e `{url}`) with params `{params}`.")
152
+ match endpoint.method:
153
+ case "GET":
154
+ response = requests.get(
155
+ url=url,
156
+ headers=headers,
157
+ params=params,
158
+ timeout=self.DEFAULT_REQUEST_TIMEOUT_SECONDS,
159
+ )
160
+ case "POST":
161
+ if endpoint.request is None:
162
+ raise ValueError("Request model is required for POST requests")
163
+ response = requests.post(
164
+ url=url,
165
+ headers=headers,
166
+ json=endpoint.request.model_dump(),
167
+ params=params,
168
+ timeout=self.DEFAULT_REQUEST_TIMEOUT_SECONDS,
169
+ )
170
+ case "DELETE":
171
+ response = requests.delete(
172
+ url=url,
173
+ headers=headers,
174
+ params=params,
175
+ timeout=self.DEFAULT_REQUEST_TIMEOUT_SECONDS,
176
+ )
177
+ if response.status_code != 200:
178
+ raise NotteAPIError(path=f"{self.base_endpoint_path}/{endpoint.path}", response=response)
179
+ response_dict: Any = response.json()
180
+ if "detail" in response_dict:
181
+ raise NotteAPIError(path=f"{self.base_endpoint_path}/{endpoint.path}", response=response)
182
+ return response_dict
183
+
184
+ def request(self, endpoint: NotteEndpoint[TResponse]) -> TResponse:
185
+ """
186
+ Requests the specified API endpoint and returns the validated response.
187
+
188
+ This method sends an HTTP request according to the endpoint configuration and
189
+ validates that the response is a dictionary. It then parses the response using the
190
+ endpoint's associated response model. If the response is not a dictionary, a
191
+ NotteAPIError is raised.
192
+
193
+ Args:
194
+ endpoint: The API endpoint configuration containing request details and the
195
+ expected response model.
196
+
197
+ Returns:
198
+ The validated response parsed using the endpoint's response model.
199
+
200
+ Raises:
201
+ NotteAPIError: If the API response is not a dictionary.
202
+ """
203
+ response: Any = self._request(endpoint)
204
+ if not isinstance(response, dict):
205
+ raise NotteAPIError(path=f"{self.base_endpoint_path}/{endpoint.path}", response=response)
206
+ return endpoint.response.model_validate(response)
207
+
208
+ def request_list(self, endpoint: NotteEndpoint[TResponse]) -> Sequence[TResponse]:
209
+ # Handle the case where TResponse is a list of BaseModel
210
+ """
211
+ Retrieves and validates a list of responses from the API.
212
+
213
+ This method sends a request using the provided endpoint and expects the response to be a list. Each item is validated
214
+ against the model defined in the endpoint. A NotteAPIError is raised if the response is not a list.
215
+
216
+ Parameters:
217
+ endpoint: The API endpoint containing the path and the expected response model.
218
+
219
+ Returns:
220
+ A list of validated response items.
221
+
222
+ Raises:
223
+ NotteAPIError: If the response is not a list.
224
+ """
225
+ response_list: Any = self._request(endpoint)
226
+ if not isinstance(response_list, list):
227
+ raise NotteAPIError(path=f"{self.base_endpoint_path}/{endpoint.path}", response=response_list)
228
+ return [endpoint.response.model_validate(item) for item in response_list] # pyright: ignore[reportUnknownVariableType]
229
+
230
+ def _request_file(
231
+ self, endpoint: NotteEndpoint[TResponse], file_type: str, output_file: str | None = None
232
+ ) -> bytes:
233
+ url = self.request_path(endpoint)
234
+ response = requests.get(
235
+ url=url,
236
+ headers=self.headers(),
237
+ timeout=self.DEFAULT_REQUEST_TIMEOUT_SECONDS,
238
+ )
239
+ if b"not found" in response.content:
240
+ raise ValueError("Replay is not available.")
241
+
242
+ if output_file is not None:
243
+ if not output_file.endswith(f".{file_type}"):
244
+ raise ValueError(f"Output file must have a .{file_type} extension.")
245
+ with open(output_file, "wb") as f:
246
+ _ = f.write(response.content)
247
+ return response.content
@@ -0,0 +1,215 @@
1
+ from collections.abc import Sequence
2
+ from typing import TypeVar, Unpack
3
+
4
+ from notte_core.actions.space import ActionSpace
5
+ from notte_core.browser.observation import Observation
6
+ from notte_core.controller.space import SpaceCategory
7
+ from notte_core.data.space import DataSpace
8
+ from pydantic import BaseModel
9
+ from typing_extensions import final, override
10
+
11
+ from notte_sdk.endpoints.base import BaseClient, NotteEndpoint
12
+ from notte_sdk.types import (
13
+ ObserveRequest,
14
+ ObserveRequestDict,
15
+ ObserveResponse,
16
+ ScrapeRequest,
17
+ ScrapeRequestDict,
18
+ ScrapeResponse,
19
+ SessionRequest,
20
+ StepRequest,
21
+ StepRequestDict,
22
+ )
23
+
24
+ TSessionRequestDict = TypeVar("TSessionRequestDict", bound=SessionRequest)
25
+
26
+
27
+ @final
28
+ class PageClient(BaseClient):
29
+ """
30
+ Client for the Notte API.
31
+
32
+ Note: this client is only able to handle one session at a time.
33
+ If you need to handle multiple sessions, you need to create a new client for each session.
34
+ """
35
+
36
+ # Session
37
+ PAGE_SCRAPE = "{session_id}/page/scrape"
38
+ PAGE_OBSERVE = "{session_id}/page/observe"
39
+ PAGE_STEP = "{session_id}/page/step"
40
+
41
+ def __init__(
42
+ self,
43
+ api_key: str | None = None,
44
+ verbose: bool = False,
45
+ ):
46
+ """
47
+ Initialize the PageClient instance.
48
+
49
+ Configures the client with the page base endpoint for interacting with the Notte API and initializes session tracking for subsequent requests.
50
+
51
+ Args:
52
+ api_key: Optional API key used for authenticating API requests.
53
+ """
54
+ # TODO: change to page base endpoint when it's deployed
55
+ super().__init__(base_endpoint_path="sessions", api_key=api_key, verbose=verbose)
56
+
57
+ @staticmethod
58
+ def page_scrape_endpoint(session_id: str | None = None) -> NotteEndpoint[ScrapeResponse]:
59
+ """
60
+ Creates a NotteEndpoint for the scrape action.
61
+
62
+ Returns:
63
+ NotteEndpoint[ObserveResponse]: An endpoint configured with the scrape path,
64
+ POST method, and an expected ObserveResponse.
65
+ """
66
+ path = PageClient.PAGE_SCRAPE
67
+ if session_id is not None:
68
+ path = path.format(session_id=session_id)
69
+ return NotteEndpoint(path=path, response=ScrapeResponse, method="POST")
70
+
71
+ @staticmethod
72
+ def page_observe_endpoint(session_id: str | None = None) -> NotteEndpoint[ObserveResponse]:
73
+ """
74
+ Creates a NotteEndpoint for observe operations.
75
+
76
+ Returns:
77
+ NotteEndpoint[ObserveResponse]: An endpoint configured with the observe path,
78
+ using the HTTP POST method and expecting an ObserveResponse.
79
+ """
80
+ path = PageClient.PAGE_OBSERVE
81
+ if session_id is not None:
82
+ path = path.format(session_id=session_id)
83
+ return NotteEndpoint(path=path, response=ObserveResponse, method="POST")
84
+
85
+ @staticmethod
86
+ def page_step_endpoint(session_id: str | None = None) -> NotteEndpoint[ObserveResponse]:
87
+ """
88
+ Creates a NotteEndpoint for initiating a step action.
89
+
90
+ Returns a NotteEndpoint configured with the 'POST' method using the PAGE_STEP path and expecting an ObserveResponse.
91
+ """
92
+ path = PageClient.PAGE_STEP
93
+ if session_id is not None:
94
+ path = path.format(session_id=session_id)
95
+ return NotteEndpoint(path=path, response=ObserveResponse, method="POST")
96
+
97
+ @override
98
+ @staticmethod
99
+ def endpoints() -> Sequence[NotteEndpoint[BaseModel]]:
100
+ """
101
+ Returns the API endpoints for scraping, observing, and stepping actions.
102
+
103
+ This function aggregates and returns the endpoints used by the client to perform
104
+ scrape, observe, and step operations with the Notte API.
105
+ """
106
+ return [
107
+ PageClient.page_scrape_endpoint(),
108
+ PageClient.page_observe_endpoint(),
109
+ PageClient.page_step_endpoint(),
110
+ ]
111
+
112
+ def scrape(self, session_id: str, **data: Unpack[ScrapeRequestDict]) -> DataSpace:
113
+ """
114
+ Scrapes a page using provided parameters via the Notte API.
115
+
116
+ Validates the scraped request data to ensure that either a URL or session ID is provided.
117
+ If both are omitted, raises an InvalidRequestError. The request is sent to the configured
118
+ scrape endpoint and the resulting response is formatted into an Observation object.
119
+
120
+ Args:
121
+ **data: Arbitrary keyword arguments validated against ScrapeRequestDict,
122
+ expecting at least one of 'url' or 'session_id'.
123
+
124
+ Returns:
125
+ An Observation object containing metadata, screenshot, action space, and data space.
126
+
127
+ Raises:
128
+ InvalidRequestError: If neither 'url' nor 'session_id' is supplied.
129
+ """
130
+ request = ScrapeRequest.model_validate(data)
131
+ endpoint = PageClient.page_scrape_endpoint(session_id=session_id)
132
+ response = self.request(endpoint.with_request(request))
133
+ # Manually override the data.structured space to better match the response format
134
+ response_format = request.response_format
135
+ structured = response.data.structured
136
+ if response_format is not None and structured is not None:
137
+ if structured.success and structured.data is not None:
138
+ structured.data = response_format.model_validate(structured.data.model_dump())
139
+ return response.data
140
+
141
+ def observe(self, session_id: str, **data: Unpack[ObserveRequestDict]) -> Observation:
142
+ """
143
+ Observes a page via the Notte API.
144
+
145
+ Constructs and validates an observation request from the provided keyword arguments.
146
+ Either a 'url' or a 'session_id' must be supplied; otherwise, an InvalidRequestError is raised.
147
+ The request is sent to the observe endpoint, and the response is formatted into an Observation object.
148
+
149
+ Parameters:
150
+ **data: Arbitrary keyword arguments corresponding to observation request fields.
151
+ At least one of 'url' or 'session_id' must be provided.
152
+
153
+ Returns:
154
+ Observation: The formatted observation result from the API response.
155
+ """
156
+ request = ObserveRequest.model_validate(data)
157
+ endpoint = PageClient.page_observe_endpoint(session_id=session_id)
158
+ obs_response = self.request(endpoint.with_request(request))
159
+ return self._format_observe_response(obs_response)
160
+
161
+ def step(self, session_id: str, **data: Unpack[StepRequestDict]) -> Observation:
162
+ """
163
+ Sends a step action request and returns an Observation.
164
+
165
+ Validates the provided keyword arguments to ensure they conform to the step
166
+ request schema, retrieves the step endpoint, submits the request, and transforms
167
+ the API response into an Observation.
168
+
169
+ Args:
170
+ **data: Arbitrary keyword arguments matching the expected structure for a
171
+ step request.
172
+
173
+ Returns:
174
+ An Observation object constructed from the API response.
175
+ """
176
+ request = StepRequest.model_validate(data)
177
+ endpoint = PageClient.page_step_endpoint(session_id=session_id)
178
+ obs_response = self.request(endpoint.with_request(request))
179
+ return self._format_observe_response(obs_response)
180
+
181
+ def _format_observe_response(self, response: ObserveResponse) -> Observation:
182
+ """
183
+ Formats an observe response into an Observation object.
184
+
185
+ Extracts session information from the provided response to update the client's last session state
186
+ and constructs an Observation using response metadata and screenshot. If the response does not include
187
+ space or data details, those Observation attributes are set to None; otherwise, they are converted into
188
+ an ActionSpace or DataSpace instance respectively.
189
+
190
+ Args:
191
+ response: An ObserveResponse object containing session, metadata, screenshot, space, and data.
192
+
193
+ Returns:
194
+ An Observation object representing the formatted response.
195
+ """
196
+ return Observation(
197
+ metadata=response.metadata,
198
+ screenshot=response.screenshot,
199
+ space=(
200
+ ActionSpace(
201
+ description=response.space.description,
202
+ raw_actions=response.space.actions,
203
+ category=None if response.space.category is None else SpaceCategory(response.space.category),
204
+ )
205
+ ),
206
+ data=(
207
+ None
208
+ if response.data is None
209
+ else DataSpace(
210
+ markdown=response.data.markdown,
211
+ images=(None if response.data.images is None else response.data.images),
212
+ structured=None if response.data.structured is None else response.data.structured,
213
+ )
214
+ ),
215
+ )