openserp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openserp/__init__.py ADDED
@@ -0,0 +1,66 @@
1
+ from .client import AsyncOpenSERP, OpenSERP
2
+ from .errors import (
3
+ CaptchaError,
4
+ CloudOnlyError,
5
+ OssOnlyError,
6
+ RateLimitError,
7
+ SERPError,
8
+ TimeoutError,
9
+ )
10
+ from .models import (
11
+ Backend,
12
+ CircuitBreakerStatsResponse,
13
+ CloudAccount,
14
+ CreditInfo,
15
+ Engine,
16
+ EnginesCapabilities,
17
+ EnginesStatus,
18
+ HealthStatus,
19
+ ImageEnvelope,
20
+ ImageResult,
21
+ LastResponse,
22
+ MegaEnginesResponse,
23
+ MegaMode,
24
+ MegaSearchEnvelope,
25
+ Pricing,
26
+ ProxyStats,
27
+ ReadinessStatus,
28
+ ResponseFormat,
29
+ SearchEnvelope,
30
+ SearchResult,
31
+ StatsResponse,
32
+ )
33
+
34
+ __version__ = "0.1.0"
35
+
36
+ __all__ = [
37
+ "AsyncOpenSERP",
38
+ "Backend",
39
+ "CaptchaError",
40
+ "CircuitBreakerStatsResponse",
41
+ "CloudAccount",
42
+ "CloudOnlyError",
43
+ "CreditInfo",
44
+ "Engine",
45
+ "EnginesCapabilities",
46
+ "EnginesStatus",
47
+ "HealthStatus",
48
+ "ImageEnvelope",
49
+ "ImageResult",
50
+ "LastResponse",
51
+ "MegaEnginesResponse",
52
+ "MegaMode",
53
+ "MegaSearchEnvelope",
54
+ "OpenSERP",
55
+ "OssOnlyError",
56
+ "Pricing",
57
+ "ProxyStats",
58
+ "RateLimitError",
59
+ "ReadinessStatus",
60
+ "ResponseFormat",
61
+ "SERPError",
62
+ "SearchEnvelope",
63
+ "SearchResult",
64
+ "StatsResponse",
65
+ "TimeoutError",
66
+ ]
openserp/backend.py ADDED
@@ -0,0 +1,42 @@
1
+ from __future__ import annotations
2
+
3
+ from urllib.parse import urlparse
4
+
5
+ from .models import Backend
6
+
7
+ OSS_BASE_URL = "http://localhost:7000"
8
+ CLOUD_BASE_URL = "https://api.openserp.org/v1"
9
+
10
+
11
+ def normalize_base_url(base_url: str) -> str:
12
+ return base_url.rstrip("/")
13
+
14
+
15
+ def resolve_base_url(api_key: str | None = None, base_url: str | None = None) -> str:
16
+ if base_url:
17
+ return normalize_base_url(base_url)
18
+ if api_key:
19
+ return CLOUD_BASE_URL
20
+ return OSS_BASE_URL
21
+
22
+
23
+ def infer_backend(
24
+ api_key: str | None = None,
25
+ base_url: str | None = None,
26
+ backend: Backend | None = None,
27
+ ) -> Backend:
28
+ if backend:
29
+ return backend
30
+
31
+ if base_url:
32
+ try:
33
+ if urlparse(base_url).hostname == "api.openserp.org":
34
+ return "cloud"
35
+ except ValueError:
36
+ if "api.openserp.org" in base_url:
37
+ return "cloud"
38
+
39
+ if api_key:
40
+ return "cloud"
41
+
42
+ return "oss"
openserp/client.py ADDED
@@ -0,0 +1,534 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable, Mapping
4
+ from typing import Any, TypeVar, cast
5
+
6
+ import httpx
7
+ from pydantic import BaseModel
8
+
9
+ from .backend import infer_backend, resolve_base_url
10
+ from .errors import CloudOnlyError, OssOnlyError, TimeoutError, error_from_response
11
+ from .models import (
12
+ Backend,
13
+ CacheStats,
14
+ CircuitBreakerStatsResponse,
15
+ CloudAccount,
16
+ CreditInfo,
17
+ Engine,
18
+ EnginesCapabilities,
19
+ EnginesStatus,
20
+ HealthStatus,
21
+ ImageEnvelope,
22
+ LastResponse,
23
+ MegaEnginesResponse,
24
+ MegaSearchEnvelope,
25
+ Pricing,
26
+ ProxyStats,
27
+ ReadinessStatus,
28
+ ResponseFormat,
29
+ SearchEnvelope,
30
+ StatsResponse,
31
+ )
32
+
33
+ QueryValue = str | int | float | bool | list[str] | tuple[str, ...] | None
34
+ RetryHook = Callable[[Exception, int], bool]
35
+ ModelT = TypeVar("ModelT", bound=BaseModel)
36
+
37
+ _PROXY_HEADER_MAP: dict[str, str] = {
38
+ "use_proxy": "X-Use-Proxy",
39
+ "proxy_url": "X-Proxy-URL",
40
+ "proxy_country": "X-Proxy-Country",
41
+ "proxy_class": "X-Proxy-Class",
42
+ "proxy_provider": "X-Proxy-Provider",
43
+ "proxy_session_id": "X-Proxy-Session-ID",
44
+ "tenant": "X-Tenant",
45
+ }
46
+
47
+
48
+ class _BaseOpenSERP:
49
+ def __init__(
50
+ self,
51
+ *,
52
+ api_key: str | None = None,
53
+ base_url: str | None = None,
54
+ backend: Backend | None = None,
55
+ timeout: float = 30.0,
56
+ headers: Mapping[str, str] | None = None,
57
+ retry: RetryHook | None = None,
58
+ ) -> None:
59
+ self.api_key = api_key
60
+ self._base_url = resolve_base_url(api_key=api_key, base_url=base_url)
61
+ self._backend = infer_backend(api_key=api_key, base_url=base_url, backend=backend)
62
+ self.timeout = timeout
63
+ self.headers = dict(headers or {})
64
+ self.retry = retry
65
+ self.last_response: LastResponse | None = None
66
+
67
+ @property
68
+ def base_url(self) -> str:
69
+ return self._base_url
70
+
71
+ @property
72
+ def backend(self) -> Backend:
73
+ return self._backend
74
+
75
+ def _assert_cloud(self, method: str) -> None:
76
+ if self.backend != "cloud":
77
+ raise CloudOnlyError(method)
78
+
79
+ def _assert_oss(self, method: str) -> None:
80
+ if self.backend != "oss":
81
+ raise OssOnlyError(method)
82
+
83
+ def _merged_headers(self, headers: Mapping[str, str] | None = None) -> dict[str, str]:
84
+ merged = dict(self.headers)
85
+ if self.api_key:
86
+ merged["Authorization"] = f"Bearer {self.api_key}"
87
+ if headers:
88
+ merged.update(headers)
89
+ return merged
90
+
91
+ def _build_search_request(
92
+ self,
93
+ params: Mapping[str, Any],
94
+ ) -> tuple[dict[str, QueryValue], dict[str, str], ResponseFormat | None]:
95
+ engines = params.get("engines")
96
+ query: dict[str, QueryValue] = {**params}
97
+ if engines is not None:
98
+ query["engines"] = list(engines)
99
+ clean_query, headers = _split_query_and_headers(query)
100
+ return clean_query, headers, cast(ResponseFormat | None, params.get("format"))
101
+
102
+ def _build_parse_request(
103
+ self, format: ResponseFormat | None
104
+ ) -> tuple[dict[str, QueryValue] | None, dict[str, str]]:
105
+ query: dict[str, QueryValue] | None = {"format": format} if format else None
106
+ return query, {"Content-Type": "text/html; charset=utf-8"}
107
+
108
+ def _set_last_response(self, response: httpx.Response) -> None:
109
+ headers = {key.lower(): value for key, value in response.headers.items()}
110
+ credits_used = _int_header(response.headers, "x-credits-used")
111
+ credits_remaining = _int_header(response.headers, "x-credits-remaining")
112
+ credits = (
113
+ CreditInfo(used=credits_used, remaining=credits_remaining)
114
+ if credits_used is not None or credits_remaining is not None
115
+ else None
116
+ )
117
+ self.last_response = LastResponse(
118
+ status=response.status_code,
119
+ request_id=response.headers.get("x-request-id"),
120
+ credits=credits,
121
+ engine_used=response.headers.get("x-engine-used"),
122
+ fallback_engine=response.headers.get("x-fallback-engine"),
123
+ cache=response.headers.get("x-cache"),
124
+ proxy_mode=response.headers.get("x-proxy-mode"),
125
+ proxy_tag=response.headers.get("x-proxy-tag"),
126
+ proxy_used=response.headers.get("x-proxy-used"),
127
+ network_bytes=_int_header(response.headers, "x-network-bytes"),
128
+ browser_profile_id=response.headers.get("x-browser-profile-id"),
129
+ headers=headers,
130
+ )
131
+
132
+ def _handle_response(self, response: httpx.Response, format: ResponseFormat | None) -> Any:
133
+ self._set_last_response(response)
134
+ body = _read_body(response, format)
135
+ if response.is_error:
136
+ raise error_from_response(
137
+ response.status_code, body, response.headers.get("x-request-id")
138
+ )
139
+ return body
140
+
141
+
142
+ class OpenSERP(_BaseOpenSERP):
143
+ def __init__(
144
+ self,
145
+ *,
146
+ api_key: str | None = None,
147
+ base_url: str | None = None,
148
+ backend: Backend | None = None,
149
+ timeout: float = 30.0,
150
+ headers: Mapping[str, str] | None = None,
151
+ retry: RetryHook | None = None,
152
+ client: httpx.Client | None = None,
153
+ ) -> None:
154
+ super().__init__(
155
+ api_key=api_key,
156
+ base_url=base_url,
157
+ backend=backend,
158
+ timeout=timeout,
159
+ headers=headers,
160
+ retry=retry,
161
+ )
162
+ self._client = client or httpx.Client(timeout=timeout)
163
+ self._owns_client = client is None
164
+
165
+ def close(self) -> None:
166
+ if self._owns_client:
167
+ self._client.close()
168
+
169
+ def __enter__(self) -> OpenSERP:
170
+ return self
171
+
172
+ def __exit__(self, *_exc: object) -> None:
173
+ self.close()
174
+
175
+ def search(self, *, engine: Engine, **params: Any) -> SearchEnvelope | str:
176
+ query, headers, format = self._build_search_request(params)
177
+ return self._get_model(SearchEnvelope, f"/{engine}/search", query, headers, format)
178
+
179
+ def image(self, *, engine: Engine, **params: Any) -> ImageEnvelope | str:
180
+ query, headers, format = self._build_search_request(params)
181
+ return self._get_model(ImageEnvelope, f"/{engine}/image", query, headers, format)
182
+
183
+ def mega_search(self, **params: Any) -> MegaSearchEnvelope | str:
184
+ query, headers, format = self._build_search_request(params)
185
+ return self._get_model(MegaSearchEnvelope, "/mega/search", query, headers, format)
186
+
187
+ def fast_search(self, **params: Any) -> MegaSearchEnvelope | str:
188
+ return self.mega_search(**{**params, "mode": "fast"})
189
+
190
+ def any_search(self, **params: Any) -> MegaSearchEnvelope | str:
191
+ return self.mega_search(**{**params, "mode": "any"})
192
+
193
+ def mega_image(self, **params: Any) -> ImageEnvelope | str:
194
+ query, headers, format = self._build_search_request(params)
195
+ return self._get_model(ImageEnvelope, "/mega/image", query, headers, format)
196
+
197
+ def fast_image(self, **params: Any) -> ImageEnvelope | str:
198
+ return self.mega_image(**{**params, "mode": "fast"})
199
+
200
+ def any_image(self, **params: Any) -> ImageEnvelope | str:
201
+ return self.mega_image(**{**params, "mode": "any"})
202
+
203
+ def parse_google(
204
+ self, *, html: str, format: ResponseFormat | None = None
205
+ ) -> SearchEnvelope | str:
206
+ self._assert_oss("parse_google")
207
+ return self._parse("/google/parse", html, format)
208
+
209
+ def parse_bing(
210
+ self, *, html: str, format: ResponseFormat | None = None
211
+ ) -> SearchEnvelope | str:
212
+ self._assert_oss("parse_bing")
213
+ return self._parse("/bing/parse", html, format)
214
+
215
+ def health(self) -> HealthStatus:
216
+ self._assert_oss("health")
217
+ return self._get_json(HealthStatus, "/health")
218
+
219
+ def ready(self) -> ReadinessStatus:
220
+ self._assert_oss("ready")
221
+ return self._get_json(ReadinessStatus, "/ready")
222
+
223
+ def stats(self) -> StatsResponse:
224
+ self._assert_oss("stats")
225
+ return self._get_json(StatsResponse, "/stats")
226
+
227
+ def cache_stats(self) -> CacheStats:
228
+ self._assert_oss("cache_stats")
229
+ return self._get_json(CacheStats, "/stats/cache")
230
+
231
+ def proxy_stats(self) -> ProxyStats:
232
+ self._assert_oss("proxy_stats")
233
+ return self._get_json(ProxyStats, "/stats/proxy")
234
+
235
+ def circuit_breaker_stats(self) -> CircuitBreakerStatsResponse:
236
+ self._assert_oss("circuit_breaker_stats")
237
+ return self._get_json(CircuitBreakerStatsResponse, "/stats/cb")
238
+
239
+ def engines(self) -> MegaEnginesResponse:
240
+ self._assert_oss("engines")
241
+ return self._get_json(MegaEnginesResponse, "/mega/engines")
242
+
243
+ def me(self) -> CloudAccount:
244
+ self._assert_cloud("me")
245
+ return self._get_json(CloudAccount, "/me")
246
+
247
+ def pricing(self) -> Pricing:
248
+ self._assert_cloud("pricing")
249
+ return self._get_json(Pricing, "/pricing")
250
+
251
+ def engines_status(self) -> EnginesStatus:
252
+ self._assert_cloud("engines_status")
253
+ return self._get_json(EnginesStatus, "/engines/status")
254
+
255
+ def engines_capabilities(self) -> EnginesCapabilities:
256
+ self._assert_cloud("engines_capabilities")
257
+ return self._get_json(EnginesCapabilities, "/engines/capabilities")
258
+
259
+ def _get_json(self, model: type[ModelT], path: str) -> ModelT:
260
+ body = self._send("GET", path)
261
+ return model.model_validate(body)
262
+
263
+ def _parse(self, path: str, html: str, format: ResponseFormat | None) -> SearchEnvelope | str:
264
+ query, headers = self._build_parse_request(format)
265
+ body = self._send("POST", path, query=query, headers=headers, content=html, format=format)
266
+ return body if isinstance(body, str) else SearchEnvelope.model_validate(body)
267
+
268
+ def _get_model(
269
+ self,
270
+ model: type[ModelT],
271
+ path: str,
272
+ query: Mapping[str, QueryValue] | None = None,
273
+ headers: Mapping[str, str] | None = None,
274
+ format: ResponseFormat | None = None,
275
+ ) -> ModelT | str:
276
+ body = self._send("GET", path, query=query, headers=headers, format=format)
277
+ return body if isinstance(body, str) else model.model_validate(body)
278
+
279
+ def _send(
280
+ self,
281
+ method: str,
282
+ path: str,
283
+ *,
284
+ query: Mapping[str, QueryValue] | None = None,
285
+ headers: Mapping[str, str] | None = None,
286
+ format: ResponseFormat | None = None,
287
+ content: str | None = None,
288
+ ) -> Any:
289
+ attempt = 0
290
+ while True:
291
+ try:
292
+ try:
293
+ response = self._client.request(
294
+ method,
295
+ f"{self.base_url}{path}",
296
+ params=_encode_query(query),
297
+ headers=self._merged_headers(headers),
298
+ content=content,
299
+ )
300
+ except httpx.TimeoutException as exc:
301
+ raise TimeoutError(self.timeout) from exc
302
+ return self._handle_response(response, format)
303
+ except Exception as exc:
304
+ if not self.retry or not self.retry(exc, attempt):
305
+ raise
306
+ attempt += 1
307
+
308
+
309
+ class AsyncOpenSERP(_BaseOpenSERP):
310
+ def __init__(
311
+ self,
312
+ *,
313
+ api_key: str | None = None,
314
+ base_url: str | None = None,
315
+ backend: Backend | None = None,
316
+ timeout: float = 30.0,
317
+ headers: Mapping[str, str] | None = None,
318
+ retry: RetryHook | None = None,
319
+ client: httpx.AsyncClient | None = None,
320
+ ) -> None:
321
+ super().__init__(
322
+ api_key=api_key,
323
+ base_url=base_url,
324
+ backend=backend,
325
+ timeout=timeout,
326
+ headers=headers,
327
+ retry=retry,
328
+ )
329
+ self._client = client or httpx.AsyncClient(timeout=timeout)
330
+ self._owns_client = client is None
331
+
332
+ async def close(self) -> None:
333
+ if self._owns_client:
334
+ await self._client.aclose()
335
+
336
+ async def __aenter__(self) -> AsyncOpenSERP:
337
+ return self
338
+
339
+ async def __aexit__(self, *_exc: object) -> None:
340
+ await self.close()
341
+
342
+ async def search(self, *, engine: Engine, **params: Any) -> SearchEnvelope | str:
343
+ query, headers, format = self._build_search_request(params)
344
+ return await self._get_model(SearchEnvelope, f"/{engine}/search", query, headers, format)
345
+
346
+ async def image(self, *, engine: Engine, **params: Any) -> ImageEnvelope | str:
347
+ query, headers, format = self._build_search_request(params)
348
+ return await self._get_model(ImageEnvelope, f"/{engine}/image", query, headers, format)
349
+
350
+ async def mega_search(self, **params: Any) -> MegaSearchEnvelope | str:
351
+ query, headers, format = self._build_search_request(params)
352
+ return await self._get_model(MegaSearchEnvelope, "/mega/search", query, headers, format)
353
+
354
+ async def fast_search(self, **params: Any) -> MegaSearchEnvelope | str:
355
+ return await self.mega_search(**{**params, "mode": "fast"})
356
+
357
+ async def any_search(self, **params: Any) -> MegaSearchEnvelope | str:
358
+ return await self.mega_search(**{**params, "mode": "any"})
359
+
360
+ async def mega_image(self, **params: Any) -> ImageEnvelope | str:
361
+ query, headers, format = self._build_search_request(params)
362
+ return await self._get_model(ImageEnvelope, "/mega/image", query, headers, format)
363
+
364
+ async def fast_image(self, **params: Any) -> ImageEnvelope | str:
365
+ return await self.mega_image(**{**params, "mode": "fast"})
366
+
367
+ async def any_image(self, **params: Any) -> ImageEnvelope | str:
368
+ return await self.mega_image(**{**params, "mode": "any"})
369
+
370
+ async def parse_google(
371
+ self, *, html: str, format: ResponseFormat | None = None
372
+ ) -> SearchEnvelope | str:
373
+ self._assert_oss("parse_google")
374
+ return await self._parse("/google/parse", html, format)
375
+
376
+ async def parse_bing(
377
+ self, *, html: str, format: ResponseFormat | None = None
378
+ ) -> SearchEnvelope | str:
379
+ self._assert_oss("parse_bing")
380
+ return await self._parse("/bing/parse", html, format)
381
+
382
+ async def health(self) -> HealthStatus:
383
+ self._assert_oss("health")
384
+ return await self._get_json(HealthStatus, "/health")
385
+
386
+ async def ready(self) -> ReadinessStatus:
387
+ self._assert_oss("ready")
388
+ return await self._get_json(ReadinessStatus, "/ready")
389
+
390
+ async def stats(self) -> StatsResponse:
391
+ self._assert_oss("stats")
392
+ return await self._get_json(StatsResponse, "/stats")
393
+
394
+ async def cache_stats(self) -> CacheStats:
395
+ self._assert_oss("cache_stats")
396
+ return await self._get_json(CacheStats, "/stats/cache")
397
+
398
+ async def proxy_stats(self) -> ProxyStats:
399
+ self._assert_oss("proxy_stats")
400
+ return await self._get_json(ProxyStats, "/stats/proxy")
401
+
402
+ async def circuit_breaker_stats(self) -> CircuitBreakerStatsResponse:
403
+ self._assert_oss("circuit_breaker_stats")
404
+ return await self._get_json(CircuitBreakerStatsResponse, "/stats/cb")
405
+
406
+ async def engines(self) -> MegaEnginesResponse:
407
+ self._assert_oss("engines")
408
+ return await self._get_json(MegaEnginesResponse, "/mega/engines")
409
+
410
+ async def me(self) -> CloudAccount:
411
+ self._assert_cloud("me")
412
+ return await self._get_json(CloudAccount, "/me")
413
+
414
+ async def pricing(self) -> Pricing:
415
+ self._assert_cloud("pricing")
416
+ return await self._get_json(Pricing, "/pricing")
417
+
418
+ async def engines_status(self) -> EnginesStatus:
419
+ self._assert_cloud("engines_status")
420
+ return await self._get_json(EnginesStatus, "/engines/status")
421
+
422
+ async def engines_capabilities(self) -> EnginesCapabilities:
423
+ self._assert_cloud("engines_capabilities")
424
+ return await self._get_json(EnginesCapabilities, "/engines/capabilities")
425
+
426
+ async def _get_json(self, model: type[ModelT], path: str) -> ModelT:
427
+ body = await self._send("GET", path)
428
+ return model.model_validate(body)
429
+
430
+ async def _parse(
431
+ self, path: str, html: str, format: ResponseFormat | None
432
+ ) -> SearchEnvelope | str:
433
+ query, headers = self._build_parse_request(format)
434
+ body = await self._send(
435
+ "POST", path, query=query, headers=headers, content=html, format=format
436
+ )
437
+ return body if isinstance(body, str) else SearchEnvelope.model_validate(body)
438
+
439
+ async def _get_model(
440
+ self,
441
+ model: type[ModelT],
442
+ path: str,
443
+ query: Mapping[str, QueryValue] | None = None,
444
+ headers: Mapping[str, str] | None = None,
445
+ format: ResponseFormat | None = None,
446
+ ) -> ModelT | str:
447
+ body = await self._send("GET", path, query=query, headers=headers, format=format)
448
+ return body if isinstance(body, str) else model.model_validate(body)
449
+
450
+ async def _send(
451
+ self,
452
+ method: str,
453
+ path: str,
454
+ *,
455
+ query: Mapping[str, QueryValue] | None = None,
456
+ headers: Mapping[str, str] | None = None,
457
+ format: ResponseFormat | None = None,
458
+ content: str | None = None,
459
+ ) -> Any:
460
+ attempt = 0
461
+ while True:
462
+ try:
463
+ try:
464
+ response = await self._client.request(
465
+ method,
466
+ f"{self.base_url}{path}",
467
+ params=_encode_query(query),
468
+ headers=self._merged_headers(headers),
469
+ content=content,
470
+ )
471
+ except httpx.TimeoutException as exc:
472
+ raise TimeoutError(self.timeout) from exc
473
+ return self._handle_response(response, format)
474
+ except Exception as exc:
475
+ if not self.retry or not self.retry(exc, attempt):
476
+ raise
477
+ attempt += 1
478
+
479
+
480
+ def _split_query_and_headers(
481
+ query: Mapping[str, QueryValue],
482
+ ) -> tuple[dict[str, QueryValue], dict[str, str]]:
483
+ clean_query: dict[str, QueryValue] = {}
484
+ headers: dict[str, str] = {}
485
+ for key, value in query.items():
486
+ if value is None:
487
+ continue
488
+ header_name = _PROXY_HEADER_MAP.get(key)
489
+ if header_name is not None:
490
+ headers[header_name] = _encode_value(value)
491
+ else:
492
+ clean_query[key] = value
493
+ return clean_query, headers
494
+
495
+
496
+ def _encode_query(query: Mapping[str, QueryValue] | None) -> dict[str, str]:
497
+ if not query:
498
+ return {}
499
+ return {key: _encode_value(value) for key, value in query.items() if value is not None}
500
+
501
+
502
+ def _encode_value(value: QueryValue) -> str:
503
+ if isinstance(value, bool):
504
+ return "true" if value else "false"
505
+ if isinstance(value, list | tuple):
506
+ return ",".join(str(item) for item in value)
507
+ return str(value)
508
+
509
+
510
+ def _read_body(response: httpx.Response, format: ResponseFormat | None = None) -> Any:
511
+ if response.status_code == 204:
512
+ return None
513
+ if format and format != "json":
514
+ return response.text
515
+ content_type = response.headers.get("content-type", "")
516
+ if "application/json" in content_type:
517
+ return response.json()
518
+ text = response.text
519
+ if not text:
520
+ return None
521
+ try:
522
+ return response.json()
523
+ except ValueError:
524
+ return text
525
+
526
+
527
+ def _int_header(headers: httpx.Headers, name: str) -> int | None:
528
+ value = headers.get(name)
529
+ if value is None or value == "":
530
+ return None
531
+ try:
532
+ return int(value)
533
+ except ValueError:
534
+ return None
openserp/errors.py ADDED
@@ -0,0 +1,83 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+
6
+ class SERPError(Exception):
7
+ def __init__(
8
+ self,
9
+ message: str,
10
+ *,
11
+ status: int = 0,
12
+ code: str | None = None,
13
+ reason: str | None = None,
14
+ request_id: str | None = None,
15
+ meta: dict[str, Any] | None = None,
16
+ response: Any = None,
17
+ ) -> None:
18
+ super().__init__(message)
19
+ self.status = status
20
+ self.code = code
21
+ self.reason = reason
22
+ self.request_id = request_id
23
+ self.meta = meta
24
+ self.response = response
25
+
26
+
27
+ class RateLimitError(SERPError):
28
+ pass
29
+
30
+
31
+ class CaptchaError(SERPError):
32
+ pass
33
+
34
+
35
+ class CloudOnlyError(SERPError):
36
+ def __init__(self, method: str) -> None:
37
+ super().__init__(
38
+ f"{method} is only available against OpenSERP Cloud. "
39
+ "Configure api_key/base_url for https://api.openserp.org/v1 "
40
+ 'or set backend="cloud".'
41
+ )
42
+
43
+
44
+ class OssOnlyError(SERPError):
45
+ def __init__(self, method: str) -> None:
46
+ super().__init__(
47
+ f"{method} is only available against a self-hosted OpenSERP server. "
48
+ 'Configure base_url for your OSS server or set backend="oss".'
49
+ )
50
+
51
+
52
+ class TimeoutError(SERPError):
53
+ def __init__(self, timeout: float) -> None:
54
+ super().__init__(
55
+ f"OpenSERP request timed out after {timeout:g}s",
56
+ code="request_timeout",
57
+ )
58
+
59
+
60
+ def error_from_response(
61
+ status: int,
62
+ body: Any,
63
+ request_id: str | None = None,
64
+ ) -> SERPError:
65
+ data = body if isinstance(body, dict) else {}
66
+ code = data.get("error")
67
+ message = data.get("message") or f"OpenSERP request failed with status {status}"
68
+ options = {
69
+ "status": status,
70
+ "code": code,
71
+ "reason": data.get("reason"),
72
+ "request_id": data.get("request_id") or request_id,
73
+ "meta": data.get("meta"),
74
+ "response": body,
75
+ }
76
+
77
+ if status == 429 or code == "rate_limited":
78
+ return RateLimitError(message, **options)
79
+
80
+ if code == "captcha_detected":
81
+ return CaptchaError(message, **options)
82
+
83
+ return SERPError(message, **options)
openserp/models.py ADDED
@@ -0,0 +1,318 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Literal
4
+
5
+ from pydantic import BaseModel, ConfigDict, Field
6
+
7
+ Backend = Literal["oss", "cloud"]
8
+ Engine = Literal["google", "yandex", "baidu", "bing", "duck", "duckduckgo", "ecosia"]
9
+ MegaMode = Literal["balanced", "any", "fast"]
10
+ ResponseFormat = Literal["json", "markdown", "text", "ndjson"]
11
+
12
+
13
+ class OpenSERPModel(BaseModel):
14
+ model_config = ConfigDict(extra="allow")
15
+
16
+ def to_dict(self) -> dict[str, Any]:
17
+ return self.model_dump(mode="json")
18
+
19
+
20
+ class CreditInfo(OpenSERPModel):
21
+ used: int | None = None
22
+ remaining: int | None = None
23
+
24
+
25
+ class LastResponse(OpenSERPModel):
26
+ status: int
27
+ request_id: str | None = None
28
+ credits: CreditInfo | None = None
29
+ engine_used: str | None = None
30
+ fallback_engine: str | None = None
31
+ cache: str | None = None
32
+ proxy_mode: str | None = None
33
+ proxy_tag: str | None = None
34
+ proxy_used: str | None = None
35
+ network_bytes: int | None = None
36
+ browser_profile_id: str | None = None
37
+ headers: dict[str, str] = Field(default_factory=dict)
38
+
39
+
40
+ class QueryEcho(OpenSERPModel):
41
+ text: str | None = None
42
+ lang: str | None = None
43
+ region: str | None = None
44
+ engines_requested: list[str] = Field(default_factory=list)
45
+
46
+
47
+ class EngineErrorDetail(OpenSERPModel):
48
+ engine: str
49
+ error: str
50
+ message: str | None = None
51
+
52
+
53
+ class ResponseMeta(OpenSERPModel):
54
+ request_id: str | None = None
55
+ requested_at: str | None = None
56
+ took_ms: int | None = None
57
+ engines_failed: list[str] = Field(default_factory=list)
58
+ engine_errors: list[EngineErrorDetail] | None = None
59
+ version: str | None = None
60
+
61
+
62
+ class Pagination(OpenSERPModel):
63
+ page: int | None = None
64
+ has_more: bool | None = None
65
+ next_start: int | None = None
66
+
67
+
68
+ class Position(OpenSERPModel):
69
+ absolute: int | None = None
70
+
71
+
72
+ class DomainInfo(OpenSERPModel):
73
+ tld: str | None = None
74
+ sld: str | None = None
75
+ category: str | None = None
76
+
77
+
78
+ class Classification(OpenSERPModel):
79
+ content_type: str | None = None
80
+ source_hint: str | None = None
81
+
82
+
83
+ class SearchResult(OpenSERPModel):
84
+ id: str | None = None
85
+ rank: int | None = None
86
+ type: str | None = None
87
+ title: str | None = None
88
+ url: str | None = None
89
+ display_url: str | None = None
90
+ snippet: str | None = None
91
+ domain: str | None = None
92
+ favicon: str | None = None
93
+ position: Position | None = None
94
+ engine: str | None = None
95
+ domain_info: DomainInfo | None = None
96
+ classification: Classification | None = None
97
+
98
+
99
+ class ImageData(OpenSERPModel):
100
+ url: str | None = None
101
+ thumbnail: str | None = None
102
+ width: int | None = None
103
+ height: int | None = None
104
+
105
+
106
+ class ImageSource(OpenSERPModel):
107
+ page_url: str | None = None
108
+ domain: str | None = None
109
+
110
+
111
+ class ImageResult(OpenSERPModel):
112
+ id: str | None = None
113
+ rank: int | None = None
114
+ type: str | None = None
115
+ title: str | None = None
116
+ image: ImageData | None = None
117
+ source: ImageSource | None = None
118
+ engine: str | None = None
119
+
120
+
121
+ class ClusterOccurrence(OpenSERPModel):
122
+ engine: str | None = None
123
+ rank: int | None = None
124
+ result_id: str | None = None
125
+
126
+
127
+ class Cluster(OpenSERPModel):
128
+ id: str | None = None
129
+ canonical_url: str | None = None
130
+ domain: str | None = None
131
+ title: str | None = None
132
+ occurrences: list[ClusterOccurrence] = Field(default_factory=list)
133
+ engines_count: int | None = None
134
+ best_rank: int | None = None
135
+ score: float | None = None
136
+
137
+
138
+ class Envelope(OpenSERPModel):
139
+ query: QueryEcho | None = None
140
+ meta: ResponseMeta | None = None
141
+ pagination: Pagination | None = None
142
+ credits: CreditInfo | None = None
143
+
144
+ def to_pandas(self) -> Any:
145
+ try:
146
+ import pandas as pd # type: ignore[import-untyped]
147
+ except ImportError as exc: # pragma: no cover
148
+ raise ImportError("Install pandas support with: pip install openserp[pandas]") from exc
149
+
150
+ return pd.DataFrame(self._result_records())
151
+
152
+ def _result_records(self) -> list[dict[str, Any]]:
153
+ raise NotImplementedError
154
+
155
+
156
+ class SearchEnvelope(Envelope):
157
+ results: list[SearchResult] = Field(default_factory=list)
158
+
159
+ def _result_records(self) -> list[dict[str, Any]]:
160
+ records: list[dict[str, Any]] = []
161
+ for item in self.results:
162
+ record = item.model_dump(mode="json")
163
+ if item.position:
164
+ record["position_absolute"] = item.position.absolute
165
+ records.append(record)
166
+ return records
167
+
168
+
169
+ class MegaSearchEnvelope(SearchEnvelope):
170
+ clusters: list[Cluster] | None = None
171
+
172
+
173
+ class ImageEnvelope(Envelope):
174
+ results: list[ImageResult] = Field(default_factory=list)
175
+
176
+ def _result_records(self) -> list[dict[str, Any]]:
177
+ records: list[dict[str, Any]] = []
178
+ for item in self.results:
179
+ record = item.model_dump(mode="json")
180
+ if item.image:
181
+ record["image_url"] = item.image.url
182
+ record["thumbnail"] = item.image.thumbnail
183
+ record["width"] = item.image.width
184
+ record["height"] = item.image.height
185
+ if item.source:
186
+ record["page_url"] = item.source.page_url
187
+ record["domain"] = item.source.domain
188
+ records.append(record)
189
+ return records
190
+
191
+
192
+ class ErrorResponse(OpenSERPModel):
193
+ error: str
194
+ code: int
195
+ request_id: str | None = None
196
+ message: str | None = None
197
+ reason: str | None = None
198
+ meta: dict[str, Any] | None = None
199
+
200
+
201
+ class EngineHealth(OpenSERPModel):
202
+ name: str
203
+ initialized: bool
204
+ status: str
205
+
206
+
207
+ class HealthStatus(OpenSERPModel):
208
+ status: str
209
+ uptime: str | None = None
210
+ engines: list[EngineHealth] = Field(default_factory=list)
211
+ system: dict[str, Any] = Field(default_factory=dict)
212
+
213
+
214
+ class ReadinessStatus(OpenSERPModel):
215
+ status: str
216
+
217
+
218
+ class CacheStats(OpenSERPModel):
219
+ status: bool
220
+ entries: int | None = None
221
+ hits: int | None = None
222
+ misses: int | None = None
223
+ bypasses: int | None = None
224
+ evictions: int | None = None
225
+ ttl_seconds: int | None = None
226
+ max_size: int | None = None
227
+
228
+
229
+ class ProxyStats(OpenSERPModel):
230
+ configured_count: int | None = None
231
+ healthy_count: int | None = None
232
+ unhealthy_count: int | None = None
233
+ request_proxy_url_enabled: bool | None = None
234
+ lanes: dict[str, Any] | None = None
235
+ browser_processes: dict[str, Any] | None = None
236
+ tags: dict[str, Any] = Field(default_factory=dict)
237
+ entries: list[dict[str, Any]] = Field(default_factory=list)
238
+ engines: dict[str, Any] | None = None
239
+
240
+
241
+ class CircuitBreakerStat(OpenSERPModel):
242
+ engine: str
243
+ state: str
244
+ failure_count: int
245
+ last_changed: str
246
+ retry_in: int | None = None
247
+ avg_response_ms: int | None = None
248
+
249
+
250
+ class CircuitBreakerStatsResponse(OpenSERPModel):
251
+ circuit_breakers: list[CircuitBreakerStat] = Field(default_factory=list)
252
+
253
+
254
+ class StatsResponse(CircuitBreakerStatsResponse):
255
+ cache: CacheStats
256
+ proxy: ProxyStats
257
+
258
+
259
+ class MegaEngineInfo(OpenSERPModel):
260
+ name: str
261
+ initialized: bool
262
+ circuit_state: str | None = None
263
+
264
+
265
+ class MegaEnginesResponse(OpenSERPModel):
266
+ engines: list[MegaEngineInfo] = Field(default_factory=list)
267
+ total: int
268
+
269
+
270
+ class CloudAccount(OpenSERPModel):
271
+ id: str | None = None
272
+ email: str | None = None
273
+ created_at: str | None = None
274
+ credits_remaining: int | None = None
275
+ plan: str | None = None
276
+
277
+
278
+ class Price(OpenSERPModel):
279
+ credits: int | None = None
280
+ price_usd: float | None = None
281
+
282
+
283
+ class Pricing(OpenSERPModel):
284
+ credit_price_usd: float | None = None
285
+ search: Price | None = None
286
+ mega_search: Price | None = None
287
+ image_search: Price | None = None
288
+ any_search: Price | None = None
289
+ fast_search: Price | None = None
290
+ any_image: Price | None = None
291
+ fast_image: Price | None = None
292
+
293
+
294
+ class EngineStatus(OpenSERPModel):
295
+ status: str | None = None
296
+ latency_ms: int | None = None
297
+
298
+
299
+ class EnginesStatus(OpenSERPModel):
300
+ overall: str | None = None
301
+ engines: dict[str, EngineStatus] | None = None
302
+
303
+
304
+ class EngineCapability(OpenSERPModel):
305
+ web: bool | None = None
306
+ image: bool | None = None
307
+ fallback_web: bool | None = None
308
+ fallback_image: bool | None = None
309
+
310
+
311
+ class ModeCapability(OpenSERPModel):
312
+ web: bool | None = None
313
+ image: bool | None = None
314
+
315
+
316
+ class EnginesCapabilities(OpenSERPModel):
317
+ engines: dict[str, EngineCapability] | None = None
318
+ modes: dict[str, ModeCapability] | None = None
openserp/py.typed ADDED
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,213 @@
1
+ Metadata-Version: 2.4
2
+ Name: openserp
3
+ Version: 0.1.0
4
+ Summary: Python SDK for the OpenSERP self-hosted server and OpenSERP Cloud.
5
+ Project-URL: Homepage, https://openserp.org
6
+ Project-URL: Documentation, https://openserp.org/docs
7
+ Project-URL: Repository, https://github.com/karust/openserp
8
+ Project-URL: Issues, https://github.com/karust/openserp/issues
9
+ Author: OpenSERP
10
+ License-Expression: MIT
11
+ Keywords: ai-grounding,baidu,bing,duckduckgo,ecosia,google,openserp,search,seo,serp,yandex
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: httpx<1,>=0.27
24
+ Requires-Dist: pydantic<3,>=2.7
25
+ Provides-Extra: dev
26
+ Requires-Dist: build>=1.2; extra == 'dev'
27
+ Requires-Dist: mypy>=1.10; extra == 'dev'
28
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
29
+ Requires-Dist: pytest>=8.2; extra == 'dev'
30
+ Requires-Dist: respx>=0.21; extra == 'dev'
31
+ Requires-Dist: ruff>=0.5; extra == 'dev'
32
+ Provides-Extra: pandas
33
+ Requires-Dist: pandas>=2.0; extra == 'pandas'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # OpenSERP Python SDK
37
+
38
+ Alpha: API may change before 1.0.
39
+
40
+ Python SDK for the OpenSERP self-hosted server and OpenSERP Cloud. The same client works against both backends.
41
+
42
+ ## Install
43
+
44
+ ```bash
45
+ pip install openserp
46
+ ```
47
+
48
+ For DataFrame export:
49
+
50
+ ```bash
51
+ pip install "openserp[pandas]"
52
+ ```
53
+
54
+ ## OSS Mode
55
+
56
+ OSS mode is the default. Run the open source server locally, then point the SDK at it:
57
+
58
+ ```python
59
+ from openserp import OpenSERP
60
+
61
+ client = OpenSERP(base_url="http://localhost:7000")
62
+
63
+ resp = client.search(
64
+ engine="google",
65
+ text="openserp",
66
+ limit=10,
67
+ region="US",
68
+ )
69
+
70
+ print(resp.results[0].title, resp.results[0].url)
71
+ ```
72
+
73
+ If you omit every option, the client uses `http://localhost:7000`.
74
+
75
+ ## Cloud Mode
76
+
77
+ Pass an API key and the client defaults to `https://api.openserp.org/v1`.
78
+
79
+ ```python
80
+ import os
81
+
82
+ from openserp import OpenSERP
83
+
84
+ client = OpenSERP(api_key=os.environ["OPENSERP_API_KEY"])
85
+ resp = client.search(engine="google", text="openserp")
86
+
87
+ print(resp.results[0].title)
88
+ print(client.last_response.credits)
89
+ ```
90
+
91
+ ## Async
92
+
93
+ ```python
94
+ import asyncio
95
+ import os
96
+
97
+ from openserp import AsyncOpenSERP
98
+
99
+
100
+ async def main() -> None:
101
+ async with AsyncOpenSERP(api_key=os.environ["OPENSERP_API_KEY"]) as client:
102
+ resp = await client.search(engine="google", text="openserp")
103
+ print(resp.results[0].title)
104
+
105
+
106
+ asyncio.run(main())
107
+ ```
108
+
109
+ ## Mega Search
110
+
111
+ ```python
112
+ from openserp import OpenSERP
113
+
114
+ client = OpenSERP()
115
+
116
+ mega = client.mega_search(
117
+ text="openserp",
118
+ engines=["google", "bing", "yandex"],
119
+ mode="balanced",
120
+ )
121
+
122
+ df = mega.to_pandas()
123
+ print(df[["rank", "title", "url", "engine"]])
124
+ ```
125
+
126
+ Convenience helpers are also available:
127
+
128
+ ```python
129
+ client.fast_search(text="openserp", engines=["google", "bing"])
130
+ client.any_search(text="openserp", engines=["google", "bing"])
131
+ client.mega_image(text="golang logo", engines=["google", "bing"])
132
+ ```
133
+
134
+ ## AI / RAG
135
+
136
+ ```python
137
+ from openserp import OpenSERP
138
+
139
+ client = OpenSERP()
140
+ resp = client.search(engine="google", text="latest postgres indexing guide", limit=5)
141
+
142
+ context = "\n\n".join(
143
+ f"{item.title}\n{item.url}\n{item.snippet}"
144
+ for item in resp.results
145
+ )
146
+
147
+ prompt = f"Use these web results as grounding:\n\n{context}\n\nSummarize the key points."
148
+ ```
149
+
150
+ ## SEO Keyword Tracker
151
+
152
+ ```python
153
+ from openserp import OpenSERP
154
+
155
+ client = OpenSERP()
156
+ keywords = ["openserp", "serp api", "google search api"]
157
+ frames = []
158
+
159
+ for keyword in keywords:
160
+ resp = client.search(engine="google", text=keyword, region="US", limit=10)
161
+ frame = resp.to_pandas()
162
+ frame["keyword"] = keyword
163
+ frames.append(frame)
164
+
165
+ report = __import__("pandas").concat(frames, ignore_index=True)
166
+ report.to_csv("rank-report.csv", index=False)
167
+ ```
168
+
169
+ ## Async Batch
170
+
171
+ ```python
172
+ import asyncio
173
+
174
+ from openserp import AsyncOpenSERP
175
+
176
+
177
+ async def main() -> None:
178
+ sem = asyncio.Semaphore(20)
179
+ queries = [f"keyword {i}" for i in range(500)]
180
+
181
+ async with AsyncOpenSERP() as client:
182
+ async def run(query: str):
183
+ async with sem:
184
+ return await client.search(engine="google", text=query, limit=10)
185
+
186
+ responses = await asyncio.gather(*(run(query) for query in queries))
187
+ print(len(responses))
188
+
189
+
190
+ asyncio.run(main())
191
+ ```
192
+
193
+ ## Endpoint Availability
194
+
195
+ Search endpoints work in both modes. Operational OSS endpoints such as `health()`, `stats()`, `parse_google()`, and `parse_bing()` require a self-hosted server and raise `OssOnlyError` in Cloud mode.
196
+
197
+ Cloud account endpoints such as `me()`, `pricing()`, `engines_status()`, and `engines_capabilities()` require Cloud mode and raise `CloudOnlyError` in OSS mode.
198
+
199
+ ## Development
200
+
201
+ ```bash
202
+ python -m pip install -e ".[dev,pandas]"
203
+ pytest
204
+ ruff check .
205
+ mypy src
206
+ python -m build
207
+ ```
208
+
209
+ The project is scaffolded for `uv` too:
210
+
211
+ ```bash
212
+ uv build
213
+ ```
@@ -0,0 +1,9 @@
1
+ openserp/__init__.py,sha256=Le2lAfY38d2mLKIseiUILcQNBJbTkrtQEwaYHYtd5u8,1235
2
+ openserp/backend.py,sha256=I2sw_NzhamwJAyHBtMFA_h6v9Qg8WkLIdalojVyK2XU,951
3
+ openserp/client.py,sha256=gFocl1laNI9GuonoqknHLBA4Di7fDYg_revmKGlMXkg,19532
4
+ openserp/errors.py,sha256=38HxqEU1xjicr0oSOY0gWN7bS6C5C6IreNrnP3Dj4bg,2181
5
+ openserp/models.py,sha256=hY6vfNAov3kmJAWBjzi1GzuNrdbTnPZ5w9Rxui13npI,8511
6
+ openserp/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
7
+ openserp-0.1.0.dist-info/METADATA,sha256=MCWk57fLJijWuVpeQ_UhHMmgYipwm9D6LHaaroMVcPk,5187
8
+ openserp-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
9
+ openserp-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any