openserp 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openserp/__init__.py +66 -0
- openserp/backend.py +42 -0
- openserp/client.py +534 -0
- openserp/errors.py +83 -0
- openserp/models.py +318 -0
- openserp/py.typed +1 -0
- openserp-0.1.0.dist-info/METADATA +213 -0
- openserp-0.1.0.dist-info/RECORD +9 -0
- openserp-0.1.0.dist-info/WHEEL +4 -0
openserp/__init__.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from .client import AsyncOpenSERP, OpenSERP
|
|
2
|
+
from .errors import (
|
|
3
|
+
CaptchaError,
|
|
4
|
+
CloudOnlyError,
|
|
5
|
+
OssOnlyError,
|
|
6
|
+
RateLimitError,
|
|
7
|
+
SERPError,
|
|
8
|
+
TimeoutError,
|
|
9
|
+
)
|
|
10
|
+
from .models import (
|
|
11
|
+
Backend,
|
|
12
|
+
CircuitBreakerStatsResponse,
|
|
13
|
+
CloudAccount,
|
|
14
|
+
CreditInfo,
|
|
15
|
+
Engine,
|
|
16
|
+
EnginesCapabilities,
|
|
17
|
+
EnginesStatus,
|
|
18
|
+
HealthStatus,
|
|
19
|
+
ImageEnvelope,
|
|
20
|
+
ImageResult,
|
|
21
|
+
LastResponse,
|
|
22
|
+
MegaEnginesResponse,
|
|
23
|
+
MegaMode,
|
|
24
|
+
MegaSearchEnvelope,
|
|
25
|
+
Pricing,
|
|
26
|
+
ProxyStats,
|
|
27
|
+
ReadinessStatus,
|
|
28
|
+
ResponseFormat,
|
|
29
|
+
SearchEnvelope,
|
|
30
|
+
SearchResult,
|
|
31
|
+
StatsResponse,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
__version__ = "0.1.0"
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"AsyncOpenSERP",
|
|
38
|
+
"Backend",
|
|
39
|
+
"CaptchaError",
|
|
40
|
+
"CircuitBreakerStatsResponse",
|
|
41
|
+
"CloudAccount",
|
|
42
|
+
"CloudOnlyError",
|
|
43
|
+
"CreditInfo",
|
|
44
|
+
"Engine",
|
|
45
|
+
"EnginesCapabilities",
|
|
46
|
+
"EnginesStatus",
|
|
47
|
+
"HealthStatus",
|
|
48
|
+
"ImageEnvelope",
|
|
49
|
+
"ImageResult",
|
|
50
|
+
"LastResponse",
|
|
51
|
+
"MegaEnginesResponse",
|
|
52
|
+
"MegaMode",
|
|
53
|
+
"MegaSearchEnvelope",
|
|
54
|
+
"OpenSERP",
|
|
55
|
+
"OssOnlyError",
|
|
56
|
+
"Pricing",
|
|
57
|
+
"ProxyStats",
|
|
58
|
+
"RateLimitError",
|
|
59
|
+
"ReadinessStatus",
|
|
60
|
+
"ResponseFormat",
|
|
61
|
+
"SERPError",
|
|
62
|
+
"SearchEnvelope",
|
|
63
|
+
"SearchResult",
|
|
64
|
+
"StatsResponse",
|
|
65
|
+
"TimeoutError",
|
|
66
|
+
]
|
openserp/backend.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
|
|
5
|
+
from .models import Backend
|
|
6
|
+
|
|
7
|
+
OSS_BASE_URL = "http://localhost:7000"
|
|
8
|
+
CLOUD_BASE_URL = "https://api.openserp.org/v1"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def normalize_base_url(base_url: str) -> str:
|
|
12
|
+
return base_url.rstrip("/")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def resolve_base_url(api_key: str | None = None, base_url: str | None = None) -> str:
|
|
16
|
+
if base_url:
|
|
17
|
+
return normalize_base_url(base_url)
|
|
18
|
+
if api_key:
|
|
19
|
+
return CLOUD_BASE_URL
|
|
20
|
+
return OSS_BASE_URL
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def infer_backend(
|
|
24
|
+
api_key: str | None = None,
|
|
25
|
+
base_url: str | None = None,
|
|
26
|
+
backend: Backend | None = None,
|
|
27
|
+
) -> Backend:
|
|
28
|
+
if backend:
|
|
29
|
+
return backend
|
|
30
|
+
|
|
31
|
+
if base_url:
|
|
32
|
+
try:
|
|
33
|
+
if urlparse(base_url).hostname == "api.openserp.org":
|
|
34
|
+
return "cloud"
|
|
35
|
+
except ValueError:
|
|
36
|
+
if "api.openserp.org" in base_url:
|
|
37
|
+
return "cloud"
|
|
38
|
+
|
|
39
|
+
if api_key:
|
|
40
|
+
return "cloud"
|
|
41
|
+
|
|
42
|
+
return "oss"
|
openserp/client.py
ADDED
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Mapping
|
|
4
|
+
from typing import Any, TypeVar, cast
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
from .backend import infer_backend, resolve_base_url
|
|
10
|
+
from .errors import CloudOnlyError, OssOnlyError, TimeoutError, error_from_response
|
|
11
|
+
from .models import (
|
|
12
|
+
Backend,
|
|
13
|
+
CacheStats,
|
|
14
|
+
CircuitBreakerStatsResponse,
|
|
15
|
+
CloudAccount,
|
|
16
|
+
CreditInfo,
|
|
17
|
+
Engine,
|
|
18
|
+
EnginesCapabilities,
|
|
19
|
+
EnginesStatus,
|
|
20
|
+
HealthStatus,
|
|
21
|
+
ImageEnvelope,
|
|
22
|
+
LastResponse,
|
|
23
|
+
MegaEnginesResponse,
|
|
24
|
+
MegaSearchEnvelope,
|
|
25
|
+
Pricing,
|
|
26
|
+
ProxyStats,
|
|
27
|
+
ReadinessStatus,
|
|
28
|
+
ResponseFormat,
|
|
29
|
+
SearchEnvelope,
|
|
30
|
+
StatsResponse,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
QueryValue = str | int | float | bool | list[str] | tuple[str, ...] | None
|
|
34
|
+
RetryHook = Callable[[Exception, int], bool]
|
|
35
|
+
ModelT = TypeVar("ModelT", bound=BaseModel)
|
|
36
|
+
|
|
37
|
+
_PROXY_HEADER_MAP: dict[str, str] = {
|
|
38
|
+
"use_proxy": "X-Use-Proxy",
|
|
39
|
+
"proxy_url": "X-Proxy-URL",
|
|
40
|
+
"proxy_country": "X-Proxy-Country",
|
|
41
|
+
"proxy_class": "X-Proxy-Class",
|
|
42
|
+
"proxy_provider": "X-Proxy-Provider",
|
|
43
|
+
"proxy_session_id": "X-Proxy-Session-ID",
|
|
44
|
+
"tenant": "X-Tenant",
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class _BaseOpenSERP:
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
*,
|
|
52
|
+
api_key: str | None = None,
|
|
53
|
+
base_url: str | None = None,
|
|
54
|
+
backend: Backend | None = None,
|
|
55
|
+
timeout: float = 30.0,
|
|
56
|
+
headers: Mapping[str, str] | None = None,
|
|
57
|
+
retry: RetryHook | None = None,
|
|
58
|
+
) -> None:
|
|
59
|
+
self.api_key = api_key
|
|
60
|
+
self._base_url = resolve_base_url(api_key=api_key, base_url=base_url)
|
|
61
|
+
self._backend = infer_backend(api_key=api_key, base_url=base_url, backend=backend)
|
|
62
|
+
self.timeout = timeout
|
|
63
|
+
self.headers = dict(headers or {})
|
|
64
|
+
self.retry = retry
|
|
65
|
+
self.last_response: LastResponse | None = None
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def base_url(self) -> str:
|
|
69
|
+
return self._base_url
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def backend(self) -> Backend:
|
|
73
|
+
return self._backend
|
|
74
|
+
|
|
75
|
+
def _assert_cloud(self, method: str) -> None:
|
|
76
|
+
if self.backend != "cloud":
|
|
77
|
+
raise CloudOnlyError(method)
|
|
78
|
+
|
|
79
|
+
def _assert_oss(self, method: str) -> None:
|
|
80
|
+
if self.backend != "oss":
|
|
81
|
+
raise OssOnlyError(method)
|
|
82
|
+
|
|
83
|
+
def _merged_headers(self, headers: Mapping[str, str] | None = None) -> dict[str, str]:
|
|
84
|
+
merged = dict(self.headers)
|
|
85
|
+
if self.api_key:
|
|
86
|
+
merged["Authorization"] = f"Bearer {self.api_key}"
|
|
87
|
+
if headers:
|
|
88
|
+
merged.update(headers)
|
|
89
|
+
return merged
|
|
90
|
+
|
|
91
|
+
def _build_search_request(
|
|
92
|
+
self,
|
|
93
|
+
params: Mapping[str, Any],
|
|
94
|
+
) -> tuple[dict[str, QueryValue], dict[str, str], ResponseFormat | None]:
|
|
95
|
+
engines = params.get("engines")
|
|
96
|
+
query: dict[str, QueryValue] = {**params}
|
|
97
|
+
if engines is not None:
|
|
98
|
+
query["engines"] = list(engines)
|
|
99
|
+
clean_query, headers = _split_query_and_headers(query)
|
|
100
|
+
return clean_query, headers, cast(ResponseFormat | None, params.get("format"))
|
|
101
|
+
|
|
102
|
+
def _build_parse_request(
|
|
103
|
+
self, format: ResponseFormat | None
|
|
104
|
+
) -> tuple[dict[str, QueryValue] | None, dict[str, str]]:
|
|
105
|
+
query: dict[str, QueryValue] | None = {"format": format} if format else None
|
|
106
|
+
return query, {"Content-Type": "text/html; charset=utf-8"}
|
|
107
|
+
|
|
108
|
+
def _set_last_response(self, response: httpx.Response) -> None:
|
|
109
|
+
headers = {key.lower(): value for key, value in response.headers.items()}
|
|
110
|
+
credits_used = _int_header(response.headers, "x-credits-used")
|
|
111
|
+
credits_remaining = _int_header(response.headers, "x-credits-remaining")
|
|
112
|
+
credits = (
|
|
113
|
+
CreditInfo(used=credits_used, remaining=credits_remaining)
|
|
114
|
+
if credits_used is not None or credits_remaining is not None
|
|
115
|
+
else None
|
|
116
|
+
)
|
|
117
|
+
self.last_response = LastResponse(
|
|
118
|
+
status=response.status_code,
|
|
119
|
+
request_id=response.headers.get("x-request-id"),
|
|
120
|
+
credits=credits,
|
|
121
|
+
engine_used=response.headers.get("x-engine-used"),
|
|
122
|
+
fallback_engine=response.headers.get("x-fallback-engine"),
|
|
123
|
+
cache=response.headers.get("x-cache"),
|
|
124
|
+
proxy_mode=response.headers.get("x-proxy-mode"),
|
|
125
|
+
proxy_tag=response.headers.get("x-proxy-tag"),
|
|
126
|
+
proxy_used=response.headers.get("x-proxy-used"),
|
|
127
|
+
network_bytes=_int_header(response.headers, "x-network-bytes"),
|
|
128
|
+
browser_profile_id=response.headers.get("x-browser-profile-id"),
|
|
129
|
+
headers=headers,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def _handle_response(self, response: httpx.Response, format: ResponseFormat | None) -> Any:
|
|
133
|
+
self._set_last_response(response)
|
|
134
|
+
body = _read_body(response, format)
|
|
135
|
+
if response.is_error:
|
|
136
|
+
raise error_from_response(
|
|
137
|
+
response.status_code, body, response.headers.get("x-request-id")
|
|
138
|
+
)
|
|
139
|
+
return body
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class OpenSERP(_BaseOpenSERP):
|
|
143
|
+
def __init__(
|
|
144
|
+
self,
|
|
145
|
+
*,
|
|
146
|
+
api_key: str | None = None,
|
|
147
|
+
base_url: str | None = None,
|
|
148
|
+
backend: Backend | None = None,
|
|
149
|
+
timeout: float = 30.0,
|
|
150
|
+
headers: Mapping[str, str] | None = None,
|
|
151
|
+
retry: RetryHook | None = None,
|
|
152
|
+
client: httpx.Client | None = None,
|
|
153
|
+
) -> None:
|
|
154
|
+
super().__init__(
|
|
155
|
+
api_key=api_key,
|
|
156
|
+
base_url=base_url,
|
|
157
|
+
backend=backend,
|
|
158
|
+
timeout=timeout,
|
|
159
|
+
headers=headers,
|
|
160
|
+
retry=retry,
|
|
161
|
+
)
|
|
162
|
+
self._client = client or httpx.Client(timeout=timeout)
|
|
163
|
+
self._owns_client = client is None
|
|
164
|
+
|
|
165
|
+
def close(self) -> None:
|
|
166
|
+
if self._owns_client:
|
|
167
|
+
self._client.close()
|
|
168
|
+
|
|
169
|
+
def __enter__(self) -> OpenSERP:
|
|
170
|
+
return self
|
|
171
|
+
|
|
172
|
+
def __exit__(self, *_exc: object) -> None:
|
|
173
|
+
self.close()
|
|
174
|
+
|
|
175
|
+
def search(self, *, engine: Engine, **params: Any) -> SearchEnvelope | str:
|
|
176
|
+
query, headers, format = self._build_search_request(params)
|
|
177
|
+
return self._get_model(SearchEnvelope, f"/{engine}/search", query, headers, format)
|
|
178
|
+
|
|
179
|
+
def image(self, *, engine: Engine, **params: Any) -> ImageEnvelope | str:
|
|
180
|
+
query, headers, format = self._build_search_request(params)
|
|
181
|
+
return self._get_model(ImageEnvelope, f"/{engine}/image", query, headers, format)
|
|
182
|
+
|
|
183
|
+
def mega_search(self, **params: Any) -> MegaSearchEnvelope | str:
|
|
184
|
+
query, headers, format = self._build_search_request(params)
|
|
185
|
+
return self._get_model(MegaSearchEnvelope, "/mega/search", query, headers, format)
|
|
186
|
+
|
|
187
|
+
def fast_search(self, **params: Any) -> MegaSearchEnvelope | str:
|
|
188
|
+
return self.mega_search(**{**params, "mode": "fast"})
|
|
189
|
+
|
|
190
|
+
def any_search(self, **params: Any) -> MegaSearchEnvelope | str:
|
|
191
|
+
return self.mega_search(**{**params, "mode": "any"})
|
|
192
|
+
|
|
193
|
+
def mega_image(self, **params: Any) -> ImageEnvelope | str:
|
|
194
|
+
query, headers, format = self._build_search_request(params)
|
|
195
|
+
return self._get_model(ImageEnvelope, "/mega/image", query, headers, format)
|
|
196
|
+
|
|
197
|
+
def fast_image(self, **params: Any) -> ImageEnvelope | str:
|
|
198
|
+
return self.mega_image(**{**params, "mode": "fast"})
|
|
199
|
+
|
|
200
|
+
def any_image(self, **params: Any) -> ImageEnvelope | str:
|
|
201
|
+
return self.mega_image(**{**params, "mode": "any"})
|
|
202
|
+
|
|
203
|
+
def parse_google(
|
|
204
|
+
self, *, html: str, format: ResponseFormat | None = None
|
|
205
|
+
) -> SearchEnvelope | str:
|
|
206
|
+
self._assert_oss("parse_google")
|
|
207
|
+
return self._parse("/google/parse", html, format)
|
|
208
|
+
|
|
209
|
+
def parse_bing(
|
|
210
|
+
self, *, html: str, format: ResponseFormat | None = None
|
|
211
|
+
) -> SearchEnvelope | str:
|
|
212
|
+
self._assert_oss("parse_bing")
|
|
213
|
+
return self._parse("/bing/parse", html, format)
|
|
214
|
+
|
|
215
|
+
def health(self) -> HealthStatus:
|
|
216
|
+
self._assert_oss("health")
|
|
217
|
+
return self._get_json(HealthStatus, "/health")
|
|
218
|
+
|
|
219
|
+
def ready(self) -> ReadinessStatus:
|
|
220
|
+
self._assert_oss("ready")
|
|
221
|
+
return self._get_json(ReadinessStatus, "/ready")
|
|
222
|
+
|
|
223
|
+
def stats(self) -> StatsResponse:
|
|
224
|
+
self._assert_oss("stats")
|
|
225
|
+
return self._get_json(StatsResponse, "/stats")
|
|
226
|
+
|
|
227
|
+
def cache_stats(self) -> CacheStats:
|
|
228
|
+
self._assert_oss("cache_stats")
|
|
229
|
+
return self._get_json(CacheStats, "/stats/cache")
|
|
230
|
+
|
|
231
|
+
def proxy_stats(self) -> ProxyStats:
|
|
232
|
+
self._assert_oss("proxy_stats")
|
|
233
|
+
return self._get_json(ProxyStats, "/stats/proxy")
|
|
234
|
+
|
|
235
|
+
def circuit_breaker_stats(self) -> CircuitBreakerStatsResponse:
|
|
236
|
+
self._assert_oss("circuit_breaker_stats")
|
|
237
|
+
return self._get_json(CircuitBreakerStatsResponse, "/stats/cb")
|
|
238
|
+
|
|
239
|
+
def engines(self) -> MegaEnginesResponse:
|
|
240
|
+
self._assert_oss("engines")
|
|
241
|
+
return self._get_json(MegaEnginesResponse, "/mega/engines")
|
|
242
|
+
|
|
243
|
+
def me(self) -> CloudAccount:
|
|
244
|
+
self._assert_cloud("me")
|
|
245
|
+
return self._get_json(CloudAccount, "/me")
|
|
246
|
+
|
|
247
|
+
def pricing(self) -> Pricing:
|
|
248
|
+
self._assert_cloud("pricing")
|
|
249
|
+
return self._get_json(Pricing, "/pricing")
|
|
250
|
+
|
|
251
|
+
def engines_status(self) -> EnginesStatus:
|
|
252
|
+
self._assert_cloud("engines_status")
|
|
253
|
+
return self._get_json(EnginesStatus, "/engines/status")
|
|
254
|
+
|
|
255
|
+
def engines_capabilities(self) -> EnginesCapabilities:
|
|
256
|
+
self._assert_cloud("engines_capabilities")
|
|
257
|
+
return self._get_json(EnginesCapabilities, "/engines/capabilities")
|
|
258
|
+
|
|
259
|
+
def _get_json(self, model: type[ModelT], path: str) -> ModelT:
|
|
260
|
+
body = self._send("GET", path)
|
|
261
|
+
return model.model_validate(body)
|
|
262
|
+
|
|
263
|
+
def _parse(self, path: str, html: str, format: ResponseFormat | None) -> SearchEnvelope | str:
|
|
264
|
+
query, headers = self._build_parse_request(format)
|
|
265
|
+
body = self._send("POST", path, query=query, headers=headers, content=html, format=format)
|
|
266
|
+
return body if isinstance(body, str) else SearchEnvelope.model_validate(body)
|
|
267
|
+
|
|
268
|
+
def _get_model(
|
|
269
|
+
self,
|
|
270
|
+
model: type[ModelT],
|
|
271
|
+
path: str,
|
|
272
|
+
query: Mapping[str, QueryValue] | None = None,
|
|
273
|
+
headers: Mapping[str, str] | None = None,
|
|
274
|
+
format: ResponseFormat | None = None,
|
|
275
|
+
) -> ModelT | str:
|
|
276
|
+
body = self._send("GET", path, query=query, headers=headers, format=format)
|
|
277
|
+
return body if isinstance(body, str) else model.model_validate(body)
|
|
278
|
+
|
|
279
|
+
def _send(
|
|
280
|
+
self,
|
|
281
|
+
method: str,
|
|
282
|
+
path: str,
|
|
283
|
+
*,
|
|
284
|
+
query: Mapping[str, QueryValue] | None = None,
|
|
285
|
+
headers: Mapping[str, str] | None = None,
|
|
286
|
+
format: ResponseFormat | None = None,
|
|
287
|
+
content: str | None = None,
|
|
288
|
+
) -> Any:
|
|
289
|
+
attempt = 0
|
|
290
|
+
while True:
|
|
291
|
+
try:
|
|
292
|
+
try:
|
|
293
|
+
response = self._client.request(
|
|
294
|
+
method,
|
|
295
|
+
f"{self.base_url}{path}",
|
|
296
|
+
params=_encode_query(query),
|
|
297
|
+
headers=self._merged_headers(headers),
|
|
298
|
+
content=content,
|
|
299
|
+
)
|
|
300
|
+
except httpx.TimeoutException as exc:
|
|
301
|
+
raise TimeoutError(self.timeout) from exc
|
|
302
|
+
return self._handle_response(response, format)
|
|
303
|
+
except Exception as exc:
|
|
304
|
+
if not self.retry or not self.retry(exc, attempt):
|
|
305
|
+
raise
|
|
306
|
+
attempt += 1
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
class AsyncOpenSERP(_BaseOpenSERP):
|
|
310
|
+
def __init__(
|
|
311
|
+
self,
|
|
312
|
+
*,
|
|
313
|
+
api_key: str | None = None,
|
|
314
|
+
base_url: str | None = None,
|
|
315
|
+
backend: Backend | None = None,
|
|
316
|
+
timeout: float = 30.0,
|
|
317
|
+
headers: Mapping[str, str] | None = None,
|
|
318
|
+
retry: RetryHook | None = None,
|
|
319
|
+
client: httpx.AsyncClient | None = None,
|
|
320
|
+
) -> None:
|
|
321
|
+
super().__init__(
|
|
322
|
+
api_key=api_key,
|
|
323
|
+
base_url=base_url,
|
|
324
|
+
backend=backend,
|
|
325
|
+
timeout=timeout,
|
|
326
|
+
headers=headers,
|
|
327
|
+
retry=retry,
|
|
328
|
+
)
|
|
329
|
+
self._client = client or httpx.AsyncClient(timeout=timeout)
|
|
330
|
+
self._owns_client = client is None
|
|
331
|
+
|
|
332
|
+
async def close(self) -> None:
|
|
333
|
+
if self._owns_client:
|
|
334
|
+
await self._client.aclose()
|
|
335
|
+
|
|
336
|
+
async def __aenter__(self) -> AsyncOpenSERP:
|
|
337
|
+
return self
|
|
338
|
+
|
|
339
|
+
async def __aexit__(self, *_exc: object) -> None:
|
|
340
|
+
await self.close()
|
|
341
|
+
|
|
342
|
+
async def search(self, *, engine: Engine, **params: Any) -> SearchEnvelope | str:
|
|
343
|
+
query, headers, format = self._build_search_request(params)
|
|
344
|
+
return await self._get_model(SearchEnvelope, f"/{engine}/search", query, headers, format)
|
|
345
|
+
|
|
346
|
+
async def image(self, *, engine: Engine, **params: Any) -> ImageEnvelope | str:
|
|
347
|
+
query, headers, format = self._build_search_request(params)
|
|
348
|
+
return await self._get_model(ImageEnvelope, f"/{engine}/image", query, headers, format)
|
|
349
|
+
|
|
350
|
+
async def mega_search(self, **params: Any) -> MegaSearchEnvelope | str:
|
|
351
|
+
query, headers, format = self._build_search_request(params)
|
|
352
|
+
return await self._get_model(MegaSearchEnvelope, "/mega/search", query, headers, format)
|
|
353
|
+
|
|
354
|
+
async def fast_search(self, **params: Any) -> MegaSearchEnvelope | str:
|
|
355
|
+
return await self.mega_search(**{**params, "mode": "fast"})
|
|
356
|
+
|
|
357
|
+
async def any_search(self, **params: Any) -> MegaSearchEnvelope | str:
|
|
358
|
+
return await self.mega_search(**{**params, "mode": "any"})
|
|
359
|
+
|
|
360
|
+
async def mega_image(self, **params: Any) -> ImageEnvelope | str:
|
|
361
|
+
query, headers, format = self._build_search_request(params)
|
|
362
|
+
return await self._get_model(ImageEnvelope, "/mega/image", query, headers, format)
|
|
363
|
+
|
|
364
|
+
async def fast_image(self, **params: Any) -> ImageEnvelope | str:
|
|
365
|
+
return await self.mega_image(**{**params, "mode": "fast"})
|
|
366
|
+
|
|
367
|
+
async def any_image(self, **params: Any) -> ImageEnvelope | str:
|
|
368
|
+
return await self.mega_image(**{**params, "mode": "any"})
|
|
369
|
+
|
|
370
|
+
async def parse_google(
|
|
371
|
+
self, *, html: str, format: ResponseFormat | None = None
|
|
372
|
+
) -> SearchEnvelope | str:
|
|
373
|
+
self._assert_oss("parse_google")
|
|
374
|
+
return await self._parse("/google/parse", html, format)
|
|
375
|
+
|
|
376
|
+
async def parse_bing(
|
|
377
|
+
self, *, html: str, format: ResponseFormat | None = None
|
|
378
|
+
) -> SearchEnvelope | str:
|
|
379
|
+
self._assert_oss("parse_bing")
|
|
380
|
+
return await self._parse("/bing/parse", html, format)
|
|
381
|
+
|
|
382
|
+
async def health(self) -> HealthStatus:
|
|
383
|
+
self._assert_oss("health")
|
|
384
|
+
return await self._get_json(HealthStatus, "/health")
|
|
385
|
+
|
|
386
|
+
async def ready(self) -> ReadinessStatus:
|
|
387
|
+
self._assert_oss("ready")
|
|
388
|
+
return await self._get_json(ReadinessStatus, "/ready")
|
|
389
|
+
|
|
390
|
+
async def stats(self) -> StatsResponse:
|
|
391
|
+
self._assert_oss("stats")
|
|
392
|
+
return await self._get_json(StatsResponse, "/stats")
|
|
393
|
+
|
|
394
|
+
async def cache_stats(self) -> CacheStats:
|
|
395
|
+
self._assert_oss("cache_stats")
|
|
396
|
+
return await self._get_json(CacheStats, "/stats/cache")
|
|
397
|
+
|
|
398
|
+
async def proxy_stats(self) -> ProxyStats:
|
|
399
|
+
self._assert_oss("proxy_stats")
|
|
400
|
+
return await self._get_json(ProxyStats, "/stats/proxy")
|
|
401
|
+
|
|
402
|
+
async def circuit_breaker_stats(self) -> CircuitBreakerStatsResponse:
|
|
403
|
+
self._assert_oss("circuit_breaker_stats")
|
|
404
|
+
return await self._get_json(CircuitBreakerStatsResponse, "/stats/cb")
|
|
405
|
+
|
|
406
|
+
async def engines(self) -> MegaEnginesResponse:
|
|
407
|
+
self._assert_oss("engines")
|
|
408
|
+
return await self._get_json(MegaEnginesResponse, "/mega/engines")
|
|
409
|
+
|
|
410
|
+
async def me(self) -> CloudAccount:
|
|
411
|
+
self._assert_cloud("me")
|
|
412
|
+
return await self._get_json(CloudAccount, "/me")
|
|
413
|
+
|
|
414
|
+
async def pricing(self) -> Pricing:
|
|
415
|
+
self._assert_cloud("pricing")
|
|
416
|
+
return await self._get_json(Pricing, "/pricing")
|
|
417
|
+
|
|
418
|
+
async def engines_status(self) -> EnginesStatus:
|
|
419
|
+
self._assert_cloud("engines_status")
|
|
420
|
+
return await self._get_json(EnginesStatus, "/engines/status")
|
|
421
|
+
|
|
422
|
+
async def engines_capabilities(self) -> EnginesCapabilities:
|
|
423
|
+
self._assert_cloud("engines_capabilities")
|
|
424
|
+
return await self._get_json(EnginesCapabilities, "/engines/capabilities")
|
|
425
|
+
|
|
426
|
+
async def _get_json(self, model: type[ModelT], path: str) -> ModelT:
|
|
427
|
+
body = await self._send("GET", path)
|
|
428
|
+
return model.model_validate(body)
|
|
429
|
+
|
|
430
|
+
async def _parse(
|
|
431
|
+
self, path: str, html: str, format: ResponseFormat | None
|
|
432
|
+
) -> SearchEnvelope | str:
|
|
433
|
+
query, headers = self._build_parse_request(format)
|
|
434
|
+
body = await self._send(
|
|
435
|
+
"POST", path, query=query, headers=headers, content=html, format=format
|
|
436
|
+
)
|
|
437
|
+
return body if isinstance(body, str) else SearchEnvelope.model_validate(body)
|
|
438
|
+
|
|
439
|
+
async def _get_model(
|
|
440
|
+
self,
|
|
441
|
+
model: type[ModelT],
|
|
442
|
+
path: str,
|
|
443
|
+
query: Mapping[str, QueryValue] | None = None,
|
|
444
|
+
headers: Mapping[str, str] | None = None,
|
|
445
|
+
format: ResponseFormat | None = None,
|
|
446
|
+
) -> ModelT | str:
|
|
447
|
+
body = await self._send("GET", path, query=query, headers=headers, format=format)
|
|
448
|
+
return body if isinstance(body, str) else model.model_validate(body)
|
|
449
|
+
|
|
450
|
+
async def _send(
|
|
451
|
+
self,
|
|
452
|
+
method: str,
|
|
453
|
+
path: str,
|
|
454
|
+
*,
|
|
455
|
+
query: Mapping[str, QueryValue] | None = None,
|
|
456
|
+
headers: Mapping[str, str] | None = None,
|
|
457
|
+
format: ResponseFormat | None = None,
|
|
458
|
+
content: str | None = None,
|
|
459
|
+
) -> Any:
|
|
460
|
+
attempt = 0
|
|
461
|
+
while True:
|
|
462
|
+
try:
|
|
463
|
+
try:
|
|
464
|
+
response = await self._client.request(
|
|
465
|
+
method,
|
|
466
|
+
f"{self.base_url}{path}",
|
|
467
|
+
params=_encode_query(query),
|
|
468
|
+
headers=self._merged_headers(headers),
|
|
469
|
+
content=content,
|
|
470
|
+
)
|
|
471
|
+
except httpx.TimeoutException as exc:
|
|
472
|
+
raise TimeoutError(self.timeout) from exc
|
|
473
|
+
return self._handle_response(response, format)
|
|
474
|
+
except Exception as exc:
|
|
475
|
+
if not self.retry or not self.retry(exc, attempt):
|
|
476
|
+
raise
|
|
477
|
+
attempt += 1
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def _split_query_and_headers(
|
|
481
|
+
query: Mapping[str, QueryValue],
|
|
482
|
+
) -> tuple[dict[str, QueryValue], dict[str, str]]:
|
|
483
|
+
clean_query: dict[str, QueryValue] = {}
|
|
484
|
+
headers: dict[str, str] = {}
|
|
485
|
+
for key, value in query.items():
|
|
486
|
+
if value is None:
|
|
487
|
+
continue
|
|
488
|
+
header_name = _PROXY_HEADER_MAP.get(key)
|
|
489
|
+
if header_name is not None:
|
|
490
|
+
headers[header_name] = _encode_value(value)
|
|
491
|
+
else:
|
|
492
|
+
clean_query[key] = value
|
|
493
|
+
return clean_query, headers
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
def _encode_query(query: Mapping[str, QueryValue] | None) -> dict[str, str]:
|
|
497
|
+
if not query:
|
|
498
|
+
return {}
|
|
499
|
+
return {key: _encode_value(value) for key, value in query.items() if value is not None}
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def _encode_value(value: QueryValue) -> str:
|
|
503
|
+
if isinstance(value, bool):
|
|
504
|
+
return "true" if value else "false"
|
|
505
|
+
if isinstance(value, list | tuple):
|
|
506
|
+
return ",".join(str(item) for item in value)
|
|
507
|
+
return str(value)
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def _read_body(response: httpx.Response, format: ResponseFormat | None = None) -> Any:
|
|
511
|
+
if response.status_code == 204:
|
|
512
|
+
return None
|
|
513
|
+
if format and format != "json":
|
|
514
|
+
return response.text
|
|
515
|
+
content_type = response.headers.get("content-type", "")
|
|
516
|
+
if "application/json" in content_type:
|
|
517
|
+
return response.json()
|
|
518
|
+
text = response.text
|
|
519
|
+
if not text:
|
|
520
|
+
return None
|
|
521
|
+
try:
|
|
522
|
+
return response.json()
|
|
523
|
+
except ValueError:
|
|
524
|
+
return text
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def _int_header(headers: httpx.Headers, name: str) -> int | None:
|
|
528
|
+
value = headers.get(name)
|
|
529
|
+
if value is None or value == "":
|
|
530
|
+
return None
|
|
531
|
+
try:
|
|
532
|
+
return int(value)
|
|
533
|
+
except ValueError:
|
|
534
|
+
return None
|
openserp/errors.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SERPError(Exception):
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
message: str,
|
|
10
|
+
*,
|
|
11
|
+
status: int = 0,
|
|
12
|
+
code: str | None = None,
|
|
13
|
+
reason: str | None = None,
|
|
14
|
+
request_id: str | None = None,
|
|
15
|
+
meta: dict[str, Any] | None = None,
|
|
16
|
+
response: Any = None,
|
|
17
|
+
) -> None:
|
|
18
|
+
super().__init__(message)
|
|
19
|
+
self.status = status
|
|
20
|
+
self.code = code
|
|
21
|
+
self.reason = reason
|
|
22
|
+
self.request_id = request_id
|
|
23
|
+
self.meta = meta
|
|
24
|
+
self.response = response
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class RateLimitError(SERPError):
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class CaptchaError(SERPError):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class CloudOnlyError(SERPError):
|
|
36
|
+
def __init__(self, method: str) -> None:
|
|
37
|
+
super().__init__(
|
|
38
|
+
f"{method} is only available against OpenSERP Cloud. "
|
|
39
|
+
"Configure api_key/base_url for https://api.openserp.org/v1 "
|
|
40
|
+
'or set backend="cloud".'
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class OssOnlyError(SERPError):
|
|
45
|
+
def __init__(self, method: str) -> None:
|
|
46
|
+
super().__init__(
|
|
47
|
+
f"{method} is only available against a self-hosted OpenSERP server. "
|
|
48
|
+
'Configure base_url for your OSS server or set backend="oss".'
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class TimeoutError(SERPError):
|
|
53
|
+
def __init__(self, timeout: float) -> None:
|
|
54
|
+
super().__init__(
|
|
55
|
+
f"OpenSERP request timed out after {timeout:g}s",
|
|
56
|
+
code="request_timeout",
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def error_from_response(
|
|
61
|
+
status: int,
|
|
62
|
+
body: Any,
|
|
63
|
+
request_id: str | None = None,
|
|
64
|
+
) -> SERPError:
|
|
65
|
+
data = body if isinstance(body, dict) else {}
|
|
66
|
+
code = data.get("error")
|
|
67
|
+
message = data.get("message") or f"OpenSERP request failed with status {status}"
|
|
68
|
+
options = {
|
|
69
|
+
"status": status,
|
|
70
|
+
"code": code,
|
|
71
|
+
"reason": data.get("reason"),
|
|
72
|
+
"request_id": data.get("request_id") or request_id,
|
|
73
|
+
"meta": data.get("meta"),
|
|
74
|
+
"response": body,
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if status == 429 or code == "rate_limited":
|
|
78
|
+
return RateLimitError(message, **options)
|
|
79
|
+
|
|
80
|
+
if code == "captcha_detected":
|
|
81
|
+
return CaptchaError(message, **options)
|
|
82
|
+
|
|
83
|
+
return SERPError(message, **options)
|
openserp/models.py
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, Literal
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
6
|
+
|
|
7
|
+
Backend = Literal["oss", "cloud"]
|
|
8
|
+
Engine = Literal["google", "yandex", "baidu", "bing", "duck", "duckduckgo", "ecosia"]
|
|
9
|
+
MegaMode = Literal["balanced", "any", "fast"]
|
|
10
|
+
ResponseFormat = Literal["json", "markdown", "text", "ndjson"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class OpenSERPModel(BaseModel):
|
|
14
|
+
model_config = ConfigDict(extra="allow")
|
|
15
|
+
|
|
16
|
+
def to_dict(self) -> dict[str, Any]:
|
|
17
|
+
return self.model_dump(mode="json")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CreditInfo(OpenSERPModel):
|
|
21
|
+
used: int | None = None
|
|
22
|
+
remaining: int | None = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LastResponse(OpenSERPModel):
|
|
26
|
+
status: int
|
|
27
|
+
request_id: str | None = None
|
|
28
|
+
credits: CreditInfo | None = None
|
|
29
|
+
engine_used: str | None = None
|
|
30
|
+
fallback_engine: str | None = None
|
|
31
|
+
cache: str | None = None
|
|
32
|
+
proxy_mode: str | None = None
|
|
33
|
+
proxy_tag: str | None = None
|
|
34
|
+
proxy_used: str | None = None
|
|
35
|
+
network_bytes: int | None = None
|
|
36
|
+
browser_profile_id: str | None = None
|
|
37
|
+
headers: dict[str, str] = Field(default_factory=dict)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class QueryEcho(OpenSERPModel):
|
|
41
|
+
text: str | None = None
|
|
42
|
+
lang: str | None = None
|
|
43
|
+
region: str | None = None
|
|
44
|
+
engines_requested: list[str] = Field(default_factory=list)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class EngineErrorDetail(OpenSERPModel):
|
|
48
|
+
engine: str
|
|
49
|
+
error: str
|
|
50
|
+
message: str | None = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ResponseMeta(OpenSERPModel):
|
|
54
|
+
request_id: str | None = None
|
|
55
|
+
requested_at: str | None = None
|
|
56
|
+
took_ms: int | None = None
|
|
57
|
+
engines_failed: list[str] = Field(default_factory=list)
|
|
58
|
+
engine_errors: list[EngineErrorDetail] | None = None
|
|
59
|
+
version: str | None = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class Pagination(OpenSERPModel):
|
|
63
|
+
page: int | None = None
|
|
64
|
+
has_more: bool | None = None
|
|
65
|
+
next_start: int | None = None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class Position(OpenSERPModel):
|
|
69
|
+
absolute: int | None = None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class DomainInfo(OpenSERPModel):
|
|
73
|
+
tld: str | None = None
|
|
74
|
+
sld: str | None = None
|
|
75
|
+
category: str | None = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class Classification(OpenSERPModel):
|
|
79
|
+
content_type: str | None = None
|
|
80
|
+
source_hint: str | None = None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class SearchResult(OpenSERPModel):
|
|
84
|
+
id: str | None = None
|
|
85
|
+
rank: int | None = None
|
|
86
|
+
type: str | None = None
|
|
87
|
+
title: str | None = None
|
|
88
|
+
url: str | None = None
|
|
89
|
+
display_url: str | None = None
|
|
90
|
+
snippet: str | None = None
|
|
91
|
+
domain: str | None = None
|
|
92
|
+
favicon: str | None = None
|
|
93
|
+
position: Position | None = None
|
|
94
|
+
engine: str | None = None
|
|
95
|
+
domain_info: DomainInfo | None = None
|
|
96
|
+
classification: Classification | None = None
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ImageData(OpenSERPModel):
|
|
100
|
+
url: str | None = None
|
|
101
|
+
thumbnail: str | None = None
|
|
102
|
+
width: int | None = None
|
|
103
|
+
height: int | None = None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class ImageSource(OpenSERPModel):
|
|
107
|
+
page_url: str | None = None
|
|
108
|
+
domain: str | None = None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class ImageResult(OpenSERPModel):
|
|
112
|
+
id: str | None = None
|
|
113
|
+
rank: int | None = None
|
|
114
|
+
type: str | None = None
|
|
115
|
+
title: str | None = None
|
|
116
|
+
image: ImageData | None = None
|
|
117
|
+
source: ImageSource | None = None
|
|
118
|
+
engine: str | None = None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class ClusterOccurrence(OpenSERPModel):
|
|
122
|
+
engine: str | None = None
|
|
123
|
+
rank: int | None = None
|
|
124
|
+
result_id: str | None = None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class Cluster(OpenSERPModel):
|
|
128
|
+
id: str | None = None
|
|
129
|
+
canonical_url: str | None = None
|
|
130
|
+
domain: str | None = None
|
|
131
|
+
title: str | None = None
|
|
132
|
+
occurrences: list[ClusterOccurrence] = Field(default_factory=list)
|
|
133
|
+
engines_count: int | None = None
|
|
134
|
+
best_rank: int | None = None
|
|
135
|
+
score: float | None = None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class Envelope(OpenSERPModel):
|
|
139
|
+
query: QueryEcho | None = None
|
|
140
|
+
meta: ResponseMeta | None = None
|
|
141
|
+
pagination: Pagination | None = None
|
|
142
|
+
credits: CreditInfo | None = None
|
|
143
|
+
|
|
144
|
+
def to_pandas(self) -> Any:
|
|
145
|
+
try:
|
|
146
|
+
import pandas as pd # type: ignore[import-untyped]
|
|
147
|
+
except ImportError as exc: # pragma: no cover
|
|
148
|
+
raise ImportError("Install pandas support with: pip install openserp[pandas]") from exc
|
|
149
|
+
|
|
150
|
+
return pd.DataFrame(self._result_records())
|
|
151
|
+
|
|
152
|
+
def _result_records(self) -> list[dict[str, Any]]:
|
|
153
|
+
raise NotImplementedError
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class SearchEnvelope(Envelope):
|
|
157
|
+
results: list[SearchResult] = Field(default_factory=list)
|
|
158
|
+
|
|
159
|
+
def _result_records(self) -> list[dict[str, Any]]:
|
|
160
|
+
records: list[dict[str, Any]] = []
|
|
161
|
+
for item in self.results:
|
|
162
|
+
record = item.model_dump(mode="json")
|
|
163
|
+
if item.position:
|
|
164
|
+
record["position_absolute"] = item.position.absolute
|
|
165
|
+
records.append(record)
|
|
166
|
+
return records
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class MegaSearchEnvelope(SearchEnvelope):
|
|
170
|
+
clusters: list[Cluster] | None = None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class ImageEnvelope(Envelope):
|
|
174
|
+
results: list[ImageResult] = Field(default_factory=list)
|
|
175
|
+
|
|
176
|
+
def _result_records(self) -> list[dict[str, Any]]:
|
|
177
|
+
records: list[dict[str, Any]] = []
|
|
178
|
+
for item in self.results:
|
|
179
|
+
record = item.model_dump(mode="json")
|
|
180
|
+
if item.image:
|
|
181
|
+
record["image_url"] = item.image.url
|
|
182
|
+
record["thumbnail"] = item.image.thumbnail
|
|
183
|
+
record["width"] = item.image.width
|
|
184
|
+
record["height"] = item.image.height
|
|
185
|
+
if item.source:
|
|
186
|
+
record["page_url"] = item.source.page_url
|
|
187
|
+
record["domain"] = item.source.domain
|
|
188
|
+
records.append(record)
|
|
189
|
+
return records
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class ErrorResponse(OpenSERPModel):
|
|
193
|
+
error: str
|
|
194
|
+
code: int
|
|
195
|
+
request_id: str | None = None
|
|
196
|
+
message: str | None = None
|
|
197
|
+
reason: str | None = None
|
|
198
|
+
meta: dict[str, Any] | None = None
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class EngineHealth(OpenSERPModel):
|
|
202
|
+
name: str
|
|
203
|
+
initialized: bool
|
|
204
|
+
status: str
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class HealthStatus(OpenSERPModel):
|
|
208
|
+
status: str
|
|
209
|
+
uptime: str | None = None
|
|
210
|
+
engines: list[EngineHealth] = Field(default_factory=list)
|
|
211
|
+
system: dict[str, Any] = Field(default_factory=dict)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class ReadinessStatus(OpenSERPModel):
|
|
215
|
+
status: str
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class CacheStats(OpenSERPModel):
|
|
219
|
+
status: bool
|
|
220
|
+
entries: int | None = None
|
|
221
|
+
hits: int | None = None
|
|
222
|
+
misses: int | None = None
|
|
223
|
+
bypasses: int | None = None
|
|
224
|
+
evictions: int | None = None
|
|
225
|
+
ttl_seconds: int | None = None
|
|
226
|
+
max_size: int | None = None
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class ProxyStats(OpenSERPModel):
|
|
230
|
+
configured_count: int | None = None
|
|
231
|
+
healthy_count: int | None = None
|
|
232
|
+
unhealthy_count: int | None = None
|
|
233
|
+
request_proxy_url_enabled: bool | None = None
|
|
234
|
+
lanes: dict[str, Any] | None = None
|
|
235
|
+
browser_processes: dict[str, Any] | None = None
|
|
236
|
+
tags: dict[str, Any] = Field(default_factory=dict)
|
|
237
|
+
entries: list[dict[str, Any]] = Field(default_factory=list)
|
|
238
|
+
engines: dict[str, Any] | None = None
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class CircuitBreakerStat(OpenSERPModel):
|
|
242
|
+
engine: str
|
|
243
|
+
state: str
|
|
244
|
+
failure_count: int
|
|
245
|
+
last_changed: str
|
|
246
|
+
retry_in: int | None = None
|
|
247
|
+
avg_response_ms: int | None = None
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class CircuitBreakerStatsResponse(OpenSERPModel):
|
|
251
|
+
circuit_breakers: list[CircuitBreakerStat] = Field(default_factory=list)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class StatsResponse(CircuitBreakerStatsResponse):
|
|
255
|
+
cache: CacheStats
|
|
256
|
+
proxy: ProxyStats
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
class MegaEngineInfo(OpenSERPModel):
|
|
260
|
+
name: str
|
|
261
|
+
initialized: bool
|
|
262
|
+
circuit_state: str | None = None
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
class MegaEnginesResponse(OpenSERPModel):
|
|
266
|
+
engines: list[MegaEngineInfo] = Field(default_factory=list)
|
|
267
|
+
total: int
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
class CloudAccount(OpenSERPModel):
|
|
271
|
+
id: str | None = None
|
|
272
|
+
email: str | None = None
|
|
273
|
+
created_at: str | None = None
|
|
274
|
+
credits_remaining: int | None = None
|
|
275
|
+
plan: str | None = None
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class Price(OpenSERPModel):
|
|
279
|
+
credits: int | None = None
|
|
280
|
+
price_usd: float | None = None
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
class Pricing(OpenSERPModel):
|
|
284
|
+
credit_price_usd: float | None = None
|
|
285
|
+
search: Price | None = None
|
|
286
|
+
mega_search: Price | None = None
|
|
287
|
+
image_search: Price | None = None
|
|
288
|
+
any_search: Price | None = None
|
|
289
|
+
fast_search: Price | None = None
|
|
290
|
+
any_image: Price | None = None
|
|
291
|
+
fast_image: Price | None = None
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
class EngineStatus(OpenSERPModel):
|
|
295
|
+
status: str | None = None
|
|
296
|
+
latency_ms: int | None = None
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
class EnginesStatus(OpenSERPModel):
|
|
300
|
+
overall: str | None = None
|
|
301
|
+
engines: dict[str, EngineStatus] | None = None
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
class EngineCapability(OpenSERPModel):
|
|
305
|
+
web: bool | None = None
|
|
306
|
+
image: bool | None = None
|
|
307
|
+
fallback_web: bool | None = None
|
|
308
|
+
fallback_image: bool | None = None
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
class ModeCapability(OpenSERPModel):
|
|
312
|
+
web: bool | None = None
|
|
313
|
+
image: bool | None = None
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
class EnginesCapabilities(OpenSERPModel):
|
|
317
|
+
engines: dict[str, EngineCapability] | None = None
|
|
318
|
+
modes: dict[str, ModeCapability] | None = None
|
openserp/py.typed
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openserp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python SDK for the OpenSERP self-hosted server and OpenSERP Cloud.
|
|
5
|
+
Project-URL: Homepage, https://openserp.org
|
|
6
|
+
Project-URL: Documentation, https://openserp.org/docs
|
|
7
|
+
Project-URL: Repository, https://github.com/karust/openserp
|
|
8
|
+
Project-URL: Issues, https://github.com/karust/openserp/issues
|
|
9
|
+
Author: OpenSERP
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
Keywords: ai-grounding,baidu,bing,duckduckgo,ecosia,google,openserp,search,seo,serp,yandex
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: httpx<1,>=0.27
|
|
24
|
+
Requires-Dist: pydantic<3,>=2.7
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: build>=1.2; extra == 'dev'
|
|
27
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
28
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest>=8.2; extra == 'dev'
|
|
30
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
31
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
32
|
+
Provides-Extra: pandas
|
|
33
|
+
Requires-Dist: pandas>=2.0; extra == 'pandas'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# OpenSERP Python SDK
|
|
37
|
+
|
|
38
|
+
Alpha: API may change before 1.0.
|
|
39
|
+
|
|
40
|
+
Python SDK for the OpenSERP self-hosted server and OpenSERP Cloud. The same client works against both backends.
|
|
41
|
+
|
|
42
|
+
## Install
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install openserp
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
For DataFrame export:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install "openserp[pandas]"
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## OSS Mode
|
|
55
|
+
|
|
56
|
+
OSS mode is the default. Run the open source server locally, then point the SDK at it:
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from openserp import OpenSERP
|
|
60
|
+
|
|
61
|
+
client = OpenSERP(base_url="http://localhost:7000")
|
|
62
|
+
|
|
63
|
+
resp = client.search(
|
|
64
|
+
engine="google",
|
|
65
|
+
text="openserp",
|
|
66
|
+
limit=10,
|
|
67
|
+
region="US",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
print(resp.results[0].title, resp.results[0].url)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
If you omit every option, the client uses `http://localhost:7000`.
|
|
74
|
+
|
|
75
|
+
## Cloud Mode
|
|
76
|
+
|
|
77
|
+
Pass an API key and the client defaults to `https://api.openserp.org/v1`.
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
import os
|
|
81
|
+
|
|
82
|
+
from openserp import OpenSERP
|
|
83
|
+
|
|
84
|
+
client = OpenSERP(api_key=os.environ["OPENSERP_API_KEY"])
|
|
85
|
+
resp = client.search(engine="google", text="openserp")
|
|
86
|
+
|
|
87
|
+
print(resp.results[0].title)
|
|
88
|
+
print(client.last_response.credits)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Async
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
import asyncio
|
|
95
|
+
import os
|
|
96
|
+
|
|
97
|
+
from openserp import AsyncOpenSERP
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
async def main() -> None:
|
|
101
|
+
async with AsyncOpenSERP(api_key=os.environ["OPENSERP_API_KEY"]) as client:
|
|
102
|
+
resp = await client.search(engine="google", text="openserp")
|
|
103
|
+
print(resp.results[0].title)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
asyncio.run(main())
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Mega Search
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from openserp import OpenSERP
|
|
113
|
+
|
|
114
|
+
client = OpenSERP()
|
|
115
|
+
|
|
116
|
+
mega = client.mega_search(
|
|
117
|
+
text="openserp",
|
|
118
|
+
engines=["google", "bing", "yandex"],
|
|
119
|
+
mode="balanced",
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
df = mega.to_pandas()
|
|
123
|
+
print(df[["rank", "title", "url", "engine"]])
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Convenience helpers are also available:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
client.fast_search(text="openserp", engines=["google", "bing"])
|
|
130
|
+
client.any_search(text="openserp", engines=["google", "bing"])
|
|
131
|
+
client.mega_image(text="golang logo", engines=["google", "bing"])
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## AI / RAG
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from openserp import OpenSERP
|
|
138
|
+
|
|
139
|
+
client = OpenSERP()
|
|
140
|
+
resp = client.search(engine="google", text="latest postgres indexing guide", limit=5)
|
|
141
|
+
|
|
142
|
+
context = "\n\n".join(
|
|
143
|
+
f"{item.title}\n{item.url}\n{item.snippet}"
|
|
144
|
+
for item in resp.results
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
prompt = f"Use these web results as grounding:\n\n{context}\n\nSummarize the key points."
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## SEO Keyword Tracker
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from openserp import OpenSERP
|
|
154
|
+
|
|
155
|
+
client = OpenSERP()
|
|
156
|
+
keywords = ["openserp", "serp api", "google search api"]
|
|
157
|
+
frames = []
|
|
158
|
+
|
|
159
|
+
for keyword in keywords:
|
|
160
|
+
resp = client.search(engine="google", text=keyword, region="US", limit=10)
|
|
161
|
+
frame = resp.to_pandas()
|
|
162
|
+
frame["keyword"] = keyword
|
|
163
|
+
frames.append(frame)
|
|
164
|
+
|
|
165
|
+
report = __import__("pandas").concat(frames, ignore_index=True)
|
|
166
|
+
report.to_csv("rank-report.csv", index=False)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Async Batch
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
import asyncio
|
|
173
|
+
|
|
174
|
+
from openserp import AsyncOpenSERP
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
async def main() -> None:
|
|
178
|
+
sem = asyncio.Semaphore(20)
|
|
179
|
+
queries = [f"keyword {i}" for i in range(500)]
|
|
180
|
+
|
|
181
|
+
async with AsyncOpenSERP() as client:
|
|
182
|
+
async def run(query: str):
|
|
183
|
+
async with sem:
|
|
184
|
+
return await client.search(engine="google", text=query, limit=10)
|
|
185
|
+
|
|
186
|
+
responses = await asyncio.gather(*(run(query) for query in queries))
|
|
187
|
+
print(len(responses))
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
asyncio.run(main())
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
## Endpoint Availability
|
|
194
|
+
|
|
195
|
+
Search endpoints work in both modes. Operational OSS endpoints such as `health()`, `stats()`, `parse_google()`, and `parse_bing()` require a self-hosted server and raise `OssOnlyError` in Cloud mode.
|
|
196
|
+
|
|
197
|
+
Cloud account endpoints such as `me()`, `pricing()`, `engines_status()`, and `engines_capabilities()` require Cloud mode and raise `CloudOnlyError` in OSS mode.
|
|
198
|
+
|
|
199
|
+
## Development
|
|
200
|
+
|
|
201
|
+
```bash
|
|
202
|
+
python -m pip install -e ".[dev,pandas]"
|
|
203
|
+
pytest
|
|
204
|
+
ruff check .
|
|
205
|
+
mypy src
|
|
206
|
+
python -m build
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
The project is scaffolded for `uv` too:
|
|
210
|
+
|
|
211
|
+
```bash
|
|
212
|
+
uv build
|
|
213
|
+
```
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
openserp/__init__.py,sha256=Le2lAfY38d2mLKIseiUILcQNBJbTkrtQEwaYHYtd5u8,1235
|
|
2
|
+
openserp/backend.py,sha256=I2sw_NzhamwJAyHBtMFA_h6v9Qg8WkLIdalojVyK2XU,951
|
|
3
|
+
openserp/client.py,sha256=gFocl1laNI9GuonoqknHLBA4Di7fDYg_revmKGlMXkg,19532
|
|
4
|
+
openserp/errors.py,sha256=38HxqEU1xjicr0oSOY0gWN7bS6C5C6IreNrnP3Dj4bg,2181
|
|
5
|
+
openserp/models.py,sha256=hY6vfNAov3kmJAWBjzi1GzuNrdbTnPZ5w9Rxui13npI,8511
|
|
6
|
+
openserp/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
7
|
+
openserp-0.1.0.dist-info/METADATA,sha256=MCWk57fLJijWuVpeQ_UhHMmgYipwm9D6LHaaroMVcPk,5187
|
|
8
|
+
openserp-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
9
|
+
openserp-0.1.0.dist-info/RECORD,,
|