caesar-search 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ """caesar-search: official Python SDK for the Caesar search API."""
2
+
3
+ from . import models
4
+ from ._client import DEFAULT_BASE_URL, AsyncCaesar, Caesar
5
+ from ._exceptions import (
6
+ APIConnectionError,
7
+ APIStatusError,
8
+ APITimeoutError,
9
+ AuthenticationError,
10
+ CaesarError,
11
+ RateLimitError,
12
+ )
13
+ from ._version import __version__
14
+ from .models import DocumentResponse, FeedbackResponse, SearchResponse
15
+
16
+ __all__ = [
17
+ "DEFAULT_BASE_URL",
18
+ "APIConnectionError",
19
+ "APIStatusError",
20
+ "APITimeoutError",
21
+ "AsyncCaesar",
22
+ "AuthenticationError",
23
+ "Caesar",
24
+ "CaesarError",
25
+ "DocumentResponse",
26
+ "FeedbackResponse",
27
+ "RateLimitError",
28
+ "SearchResponse",
29
+ "__version__",
30
+ "models",
31
+ ]
@@ -0,0 +1,482 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import re
5
+ import time
6
+ from types import TracebackType
7
+ from typing import Any
8
+
9
+ import httpx
10
+
11
+ from ._exceptions import APIConnectionError, APITimeoutError, status_error_from_response
12
+ from ._version import __version__
13
+ from .models import DocumentResponse, FeedbackResponse, SearchResponse
14
+
15
+ DEFAULT_BASE_URL = "https://search-api-staging-779189860552.europe-west1.run.app"
16
+ DEFAULT_TIMEOUT = 30.0
17
+ DEFAULT_MAX_RETRIES = 3
18
+ _BASE_DELAY = 0.5
19
+ _MAX_DELAY = 8.0
20
+ _RETRYABLE_STATUSES = frozenset({429, 500, 502, 503, 504})
21
+ _UUID_PATTERN = re.compile(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", re.IGNORECASE)
22
+
23
+
24
+ def _resolve_key(api_key: str | None) -> str | None:
25
+ return api_key or os.environ.get("CAESAR_API_KEY") or None
26
+
27
+
28
+ def _resolve_base_url(base_url: str | None) -> str:
29
+ return (base_url or os.environ.get("CAESAR_BASE_URL") or DEFAULT_BASE_URL).rstrip("/")
30
+
31
+
32
+ def _headers(api_key: str | None) -> dict[str, str]:
33
+ headers = {
34
+ "Accept": "application/json",
35
+ "X-Caesar-Client": f"python-sdk/{__version__}",
36
+ }
37
+ if api_key:
38
+ headers["Authorization"] = f"Bearer {api_key}"
39
+ return headers
40
+
41
+
42
+ def _retry_delay(attempt: int, retry_after: str | None) -> float:
43
+ if retry_after:
44
+ try:
45
+ seconds = float(retry_after)
46
+ if seconds >= 0:
47
+ return float(min(seconds, _MAX_DELAY))
48
+ except ValueError:
49
+ pass
50
+ return float(min(_BASE_DELAY * (2**attempt), _MAX_DELAY))
51
+
52
+
53
+ def _search_body(
54
+ query: str,
55
+ *,
56
+ mode: str | None,
57
+ max_results: int | None,
58
+ objective: str | None,
59
+ session_id: str | None,
60
+ verbosity: str | None,
61
+ max_chars_total: int | None,
62
+ extra_body: dict[str, Any] | None,
63
+ ) -> dict[str, Any]:
64
+ body: dict[str, Any] = {"query": query, "client_model": "python-sdk"}
65
+ if mode is not None:
66
+ body["mode"] = mode
67
+ if max_results is not None:
68
+ body["max_results"] = max_results
69
+ if objective is not None:
70
+ body["objective"] = objective
71
+ if session_id is not None:
72
+ body["session_id"] = session_id
73
+ response_shape: dict[str, Any] = {}
74
+ if verbosity is not None:
75
+ response_shape["verbosity"] = verbosity
76
+ if max_chars_total is not None:
77
+ response_shape["budget"] = {"max_chars_total": max_chars_total}
78
+ if response_shape:
79
+ body["response"] = response_shape
80
+ if extra_body:
81
+ body.update(extra_body)
82
+ return body
83
+
84
+
85
+ def _read_body(
86
+ target: str | None,
87
+ *,
88
+ doc_id: str | None,
89
+ url: str | None,
90
+ query: str | None,
91
+ max_chars: int | None,
92
+ start_char: int | None,
93
+ include: list[str] | None,
94
+ extra_body: dict[str, Any] | None,
95
+ ) -> dict[str, Any]:
96
+ if target is not None:
97
+ if _UUID_PATTERN.match(target):
98
+ doc_id = doc_id or target
99
+ else:
100
+ url = url or target
101
+ if not doc_id and not url:
102
+ raise ValueError("provide a doc_id or a url")
103
+
104
+ content: dict[str, Any] = {
105
+ "selection": "query_relevant" if query else "full_document",
106
+ "format": "markdown",
107
+ }
108
+ if max_chars is not None:
109
+ content["max_chars"] = max_chars
110
+ if start_char:
111
+ # Continuation reads address the raw document text so offsets stay
112
+ # contiguous between calls.
113
+ content["selection"] = "full_document"
114
+ content["range"] = {"start_char": start_char}
115
+
116
+ body: dict[str, Any] = {
117
+ "include": include if include is not None else ["metadata", "content"],
118
+ "content": content,
119
+ }
120
+ if doc_id:
121
+ body["doc_id"] = doc_id
122
+ elif url:
123
+ body["canonical_url"] = url
124
+ if query:
125
+ body["query"] = query
126
+ if extra_body:
127
+ body.update(extra_body)
128
+ return body
129
+
130
+
131
+ def _feedback_body(
132
+ event_type: str,
133
+ *,
134
+ search_id: str | None,
135
+ doc_id: str | None,
136
+ passage_id: str | None,
137
+ query: str | None,
138
+ rank: int | None,
139
+ notes: str | None,
140
+ extra_body: dict[str, Any] | None,
141
+ ) -> dict[str, Any]:
142
+ body: dict[str, Any] = {
143
+ "event_type": event_type,
144
+ "agent_context": {"client_model": "python-sdk"},
145
+ }
146
+ if search_id is not None:
147
+ body["search_id"] = search_id
148
+ if doc_id is not None:
149
+ body["doc_id"] = doc_id
150
+ if passage_id is not None:
151
+ body["passage_id"] = passage_id
152
+ if query is not None:
153
+ body["query"] = query
154
+ if rank is not None:
155
+ body["rank"] = rank
156
+ if notes is not None:
157
+ body["notes"] = notes
158
+ if extra_body:
159
+ body.update(extra_body)
160
+ return body
161
+
162
+
163
+ class Caesar:
164
+ """Synchronous client for the Caesar search API.
165
+
166
+ Reads ``CAESAR_API_KEY`` and ``CAESAR_BASE_URL`` from the environment when
167
+ not passed explicitly. Anonymous access works at a lower rate limit when
168
+ the deployment allows it.
169
+ """
170
+
171
+ def __init__(
172
+ self,
173
+ *,
174
+ api_key: str | None = None,
175
+ base_url: str | None = None,
176
+ timeout: float = DEFAULT_TIMEOUT,
177
+ max_retries: int = DEFAULT_MAX_RETRIES,
178
+ http_client: httpx.Client | None = None,
179
+ ) -> None:
180
+ self._api_key = _resolve_key(api_key)
181
+ self._base_url = _resolve_base_url(base_url)
182
+ self._max_retries = max_retries
183
+ self._client = http_client or httpx.Client(timeout=timeout)
184
+ self.with_raw_response = _RawResponses(self)
185
+
186
+ # -- public surface -------------------------------------------------
187
+
188
+ def search(
189
+ self,
190
+ query: str,
191
+ *,
192
+ mode: str | None = None,
193
+ max_results: int | None = None,
194
+ objective: str | None = None,
195
+ session_id: str | None = None,
196
+ verbosity: str | None = None,
197
+ max_chars_total: int | None = None,
198
+ extra_body: dict[str, Any] | None = None,
199
+ ) -> SearchResponse:
200
+ """Search the web. Returns ranked results with provenance handles."""
201
+ body = _search_body(
202
+ query,
203
+ mode=mode,
204
+ max_results=max_results,
205
+ objective=objective,
206
+ session_id=session_id,
207
+ verbosity=verbosity,
208
+ max_chars_total=max_chars_total,
209
+ extra_body=extra_body,
210
+ )
211
+ return SearchResponse.model_validate(self._request("/v1/search", body).json())
212
+
213
+ def read(
214
+ self,
215
+ target: str | None = None,
216
+ *,
217
+ doc_id: str | None = None,
218
+ url: str | None = None,
219
+ query: str | None = None,
220
+ max_chars: int | None = None,
221
+ start_char: int | None = None,
222
+ include: list[str] | None = None,
223
+ extra_body: dict[str, Any] | None = None,
224
+ ) -> DocumentResponse:
225
+ """Read a document as clean markdown by doc_id or URL.
226
+
227
+ Truncated reads report ``content.start_char``/``char_count``; continue
228
+ with ``start_char=start + count`` instead of retrying bigger.
229
+ """
230
+ body = _read_body(
231
+ target,
232
+ doc_id=doc_id,
233
+ url=url,
234
+ query=query,
235
+ max_chars=max_chars,
236
+ start_char=start_char,
237
+ include=include,
238
+ extra_body=extra_body,
239
+ )
240
+ return DocumentResponse.model_validate(self._request("/v1/document", body).json())
241
+
242
+ def feedback(
243
+ self,
244
+ event_type: str,
245
+ *,
246
+ search_id: str | None = None,
247
+ doc_id: str | None = None,
248
+ passage_id: str | None = None,
249
+ query: str | None = None,
250
+ rank: int | None = None,
251
+ notes: str | None = None,
252
+ extra_body: dict[str, Any] | None = None,
253
+ ) -> FeedbackResponse:
254
+ """Send a feedback event about a search result or document."""
255
+ body = _feedback_body(
256
+ event_type,
257
+ search_id=search_id,
258
+ doc_id=doc_id,
259
+ passage_id=passage_id,
260
+ query=query,
261
+ rank=rank,
262
+ notes=notes,
263
+ extra_body=extra_body,
264
+ )
265
+ return FeedbackResponse.model_validate(self._request("/v1/feedback", body).json())
266
+
267
+ # -- plumbing ---------------------------------------------------------
268
+
269
+ def _request(self, path: str, body: dict[str, Any]) -> httpx.Response:
270
+ last_response: httpx.Response | None = None
271
+ for attempt in range(self._max_retries + 1):
272
+ try:
273
+ response = self._client.post(
274
+ f"{self._base_url}{path}",
275
+ json=body,
276
+ headers=_headers(self._api_key),
277
+ )
278
+ except httpx.TimeoutException as error:
279
+ raise APITimeoutError(f"request timed out: {error}") from error
280
+ except httpx.HTTPError as error:
281
+ raise APIConnectionError(f"request failed: {error}") from error
282
+
283
+ if response.status_code in _RETRYABLE_STATUSES and attempt < self._max_retries:
284
+ time.sleep(_retry_delay(attempt, response.headers.get("Retry-After")))
285
+ last_response = response
286
+ continue
287
+ if response.is_success:
288
+ return response
289
+ raise status_error_from_response(response)
290
+
291
+ raise status_error_from_response(last_response) # type: ignore[arg-type] # pragma: no cover
292
+
293
+ def close(self) -> None:
294
+ self._client.close()
295
+
296
+ def __enter__(self) -> Caesar:
297
+ return self
298
+
299
+ def __exit__(
300
+ self,
301
+ exc_type: type[BaseException] | None,
302
+ exc: BaseException | None,
303
+ tb: TracebackType | None,
304
+ ) -> None:
305
+ self.close()
306
+
307
+
308
+ class _RawResponses:
309
+ """Escape hatch: the same methods, returning the raw httpx.Response."""
310
+
311
+ def __init__(self, client: Caesar) -> None:
312
+ self._client = client
313
+
314
+ def search(self, query: str, **kwargs: Any) -> httpx.Response:
315
+ extra_body = kwargs.pop("extra_body", None)
316
+ body = _search_body(
317
+ query,
318
+ mode=kwargs.pop("mode", None),
319
+ max_results=kwargs.pop("max_results", None),
320
+ objective=kwargs.pop("objective", None),
321
+ session_id=kwargs.pop("session_id", None),
322
+ verbosity=kwargs.pop("verbosity", None),
323
+ max_chars_total=kwargs.pop("max_chars_total", None),
324
+ extra_body=extra_body,
325
+ )
326
+ return self._client._request("/v1/search", body)
327
+
328
+ def read(self, target: str | None = None, **kwargs: Any) -> httpx.Response:
329
+ body = _read_body(
330
+ target,
331
+ doc_id=kwargs.pop("doc_id", None),
332
+ url=kwargs.pop("url", None),
333
+ query=kwargs.pop("query", None),
334
+ max_chars=kwargs.pop("max_chars", None),
335
+ start_char=kwargs.pop("start_char", None),
336
+ include=kwargs.pop("include", None),
337
+ extra_body=kwargs.pop("extra_body", None),
338
+ )
339
+ return self._client._request("/v1/document", body)
340
+
341
+ def feedback(self, event_type: str, **kwargs: Any) -> httpx.Response:
342
+ body = _feedback_body(
343
+ event_type,
344
+ search_id=kwargs.pop("search_id", None),
345
+ doc_id=kwargs.pop("doc_id", None),
346
+ passage_id=kwargs.pop("passage_id", None),
347
+ query=kwargs.pop("query", None),
348
+ rank=kwargs.pop("rank", None),
349
+ notes=kwargs.pop("notes", None),
350
+ extra_body=kwargs.pop("extra_body", None),
351
+ )
352
+ return self._client._request("/v1/feedback", body)
353
+
354
+
355
+ class AsyncCaesar:
356
+ """Asynchronous client for the Caesar search API. Mirrors :class:`Caesar`."""
357
+
358
+ def __init__(
359
+ self,
360
+ *,
361
+ api_key: str | None = None,
362
+ base_url: str | None = None,
363
+ timeout: float = DEFAULT_TIMEOUT,
364
+ max_retries: int = DEFAULT_MAX_RETRIES,
365
+ http_client: httpx.AsyncClient | None = None,
366
+ ) -> None:
367
+ self._api_key = _resolve_key(api_key)
368
+ self._base_url = _resolve_base_url(base_url)
369
+ self._max_retries = max_retries
370
+ self._client = http_client or httpx.AsyncClient(timeout=timeout)
371
+
372
+ async def search(
373
+ self,
374
+ query: str,
375
+ *,
376
+ mode: str | None = None,
377
+ max_results: int | None = None,
378
+ objective: str | None = None,
379
+ session_id: str | None = None,
380
+ verbosity: str | None = None,
381
+ max_chars_total: int | None = None,
382
+ extra_body: dict[str, Any] | None = None,
383
+ ) -> SearchResponse:
384
+ body = _search_body(
385
+ query,
386
+ mode=mode,
387
+ max_results=max_results,
388
+ objective=objective,
389
+ session_id=session_id,
390
+ verbosity=verbosity,
391
+ max_chars_total=max_chars_total,
392
+ extra_body=extra_body,
393
+ )
394
+ return SearchResponse.model_validate((await self._request("/v1/search", body)).json())
395
+
396
+ async def read(
397
+ self,
398
+ target: str | None = None,
399
+ *,
400
+ doc_id: str | None = None,
401
+ url: str | None = None,
402
+ query: str | None = None,
403
+ max_chars: int | None = None,
404
+ start_char: int | None = None,
405
+ include: list[str] | None = None,
406
+ extra_body: dict[str, Any] | None = None,
407
+ ) -> DocumentResponse:
408
+ body = _read_body(
409
+ target,
410
+ doc_id=doc_id,
411
+ url=url,
412
+ query=query,
413
+ max_chars=max_chars,
414
+ start_char=start_char,
415
+ include=include,
416
+ extra_body=extra_body,
417
+ )
418
+ return DocumentResponse.model_validate((await self._request("/v1/document", body)).json())
419
+
420
+ async def feedback(
421
+ self,
422
+ event_type: str,
423
+ *,
424
+ search_id: str | None = None,
425
+ doc_id: str | None = None,
426
+ passage_id: str | None = None,
427
+ query: str | None = None,
428
+ rank: int | None = None,
429
+ notes: str | None = None,
430
+ extra_body: dict[str, Any] | None = None,
431
+ ) -> FeedbackResponse:
432
+ body = _feedback_body(
433
+ event_type,
434
+ search_id=search_id,
435
+ doc_id=doc_id,
436
+ passage_id=passage_id,
437
+ query=query,
438
+ rank=rank,
439
+ notes=notes,
440
+ extra_body=extra_body,
441
+ )
442
+ return FeedbackResponse.model_validate((await self._request("/v1/feedback", body)).json())
443
+
444
+ async def _request(self, path: str, body: dict[str, Any]) -> httpx.Response:
445
+ import asyncio
446
+
447
+ last_response: httpx.Response | None = None
448
+ for attempt in range(self._max_retries + 1):
449
+ try:
450
+ response = await self._client.post(
451
+ f"{self._base_url}{path}",
452
+ json=body,
453
+ headers=_headers(self._api_key),
454
+ )
455
+ except httpx.TimeoutException as error:
456
+ raise APITimeoutError(f"request timed out: {error}") from error
457
+ except httpx.HTTPError as error:
458
+ raise APIConnectionError(f"request failed: {error}") from error
459
+
460
+ if response.status_code in _RETRYABLE_STATUSES and attempt < self._max_retries:
461
+ await asyncio.sleep(_retry_delay(attempt, response.headers.get("Retry-After")))
462
+ last_response = response
463
+ continue
464
+ if response.is_success:
465
+ return response
466
+ raise status_error_from_response(response)
467
+
468
+ raise status_error_from_response(last_response) # type: ignore[arg-type] # pragma: no cover
469
+
470
+ async def aclose(self) -> None:
471
+ await self._client.aclose()
472
+
473
+ async def __aenter__(self) -> AsyncCaesar:
474
+ return self
475
+
476
+ async def __aexit__(
477
+ self,
478
+ exc_type: type[BaseException] | None,
479
+ exc: BaseException | None,
480
+ tb: TracebackType | None,
481
+ ) -> None:
482
+ await self.aclose()
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+
3
+ import httpx
4
+
5
+
6
+ class CaesarError(Exception):
7
+ """Base class for all caesar-search errors."""
8
+
9
+
10
+ class APIConnectionError(CaesarError):
11
+ """The API could not be reached."""
12
+
13
+
14
+ class APITimeoutError(APIConnectionError):
15
+ """The request timed out."""
16
+
17
+
18
+ class APIStatusError(CaesarError):
19
+ """The API returned a non-2xx response."""
20
+
21
+ def __init__(
22
+ self, *, status_code: int, code: str, message: str, request_id: str | None, response: httpx.Response
23
+ ):
24
+ super().__init__(f"{code}: {message}")
25
+ self.status_code = status_code
26
+ self.code = code
27
+ self.message = message
28
+ self.request_id = request_id
29
+ self.response = response
30
+
31
+
32
+ class AuthenticationError(APIStatusError):
33
+ """Missing or invalid API key (HTTP 401/403)."""
34
+
35
+
36
+ class RateLimitError(APIStatusError):
37
+ """Rate limit exceeded (HTTP 429)."""
38
+
39
+
40
+ def status_error_from_response(response: httpx.Response) -> APIStatusError:
41
+ code = f"http_{response.status_code}"
42
+ message = f"API request failed with status {response.status_code}"
43
+ request_id: str | None = None
44
+ try:
45
+ payload = response.json()
46
+ error = payload.get("error") or {}
47
+ code = error.get("code") or code
48
+ message = error.get("message") or message
49
+ request_id = payload.get("request_id")
50
+ except Exception: # noqa: BLE001 - non-JSON error bodies fall back to defaults
51
+ pass
52
+
53
+ error_class = APIStatusError
54
+ if response.status_code in (401, 403):
55
+ error_class = AuthenticationError
56
+ elif response.status_code == 429:
57
+ error_class = RateLimitError
58
+ return error_class(
59
+ status_code=response.status_code,
60
+ code=code,
61
+ message=message,
62
+ request_id=request_id,
63
+ response=response,
64
+ )
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,57 @@
1
+ """Generated pydantic models for the Caesar public API contract.
2
+
3
+ Everything here is generated from spec/openapi-public.json by
4
+ datamodel-code-generator — do not hand-edit `_models.py`.
5
+ """
6
+
7
+ from ._models import (
8
+ Access,
9
+ CaptureHistoryEntry,
10
+ ContentRange,
11
+ Document,
12
+ DocumentContent,
13
+ DocumentContentRequest,
14
+ DocumentProvenance,
15
+ DocumentResponse,
16
+ ErrorBody,
17
+ ErrorEnvelope,
18
+ FeedbackAgentContext,
19
+ FeedbackResponse,
20
+ Passage,
21
+ Ranking,
22
+ RateLimit,
23
+ ResponseBudget,
24
+ ResponseShape,
25
+ SearchResponse,
26
+ SearchResult,
27
+ SearchResultMetadata,
28
+ SearchScore,
29
+ Usage,
30
+ Warning,
31
+ )
32
+
33
+ __all__ = [
34
+ "Access",
35
+ "CaptureHistoryEntry",
36
+ "ContentRange",
37
+ "Document",
38
+ "DocumentContent",
39
+ "DocumentContentRequest",
40
+ "DocumentProvenance",
41
+ "DocumentResponse",
42
+ "ErrorBody",
43
+ "ErrorEnvelope",
44
+ "FeedbackAgentContext",
45
+ "FeedbackResponse",
46
+ "Passage",
47
+ "Ranking",
48
+ "RateLimit",
49
+ "ResponseBudget",
50
+ "ResponseShape",
51
+ "SearchResponse",
52
+ "SearchResult",
53
+ "SearchResultMetadata",
54
+ "SearchScore",
55
+ "Usage",
56
+ "Warning",
57
+ ]
@@ -0,0 +1,408 @@
1
+ # generated by datamodel-codegen:
2
+ # filename: openapi-public.json
3
+
4
+ from __future__ import annotations
5
+
6
+ from enum import Enum
7
+ from typing import Any
8
+
9
+ from pydantic import AnyUrl, BaseModel, ConfigDict, Field
10
+
11
+
12
+ class CaptureHistoryEntry(BaseModel):
13
+ model_config = ConfigDict(
14
+ extra="allow",
15
+ )
16
+ capture_id: str
17
+ capture_time: str
18
+ content_digest: str
19
+ content_format: str | None = None
20
+
21
+
22
+ class ContentRange(BaseModel):
23
+ model_config = ConfigDict(
24
+ extra="allow",
25
+ )
26
+ capture_id: str | None = None
27
+ """
28
+ Optional capture pin; a stale_range warning is returned when the latest capture differs.
29
+ """
30
+ max_chars: int | None = Field(None, ge=1)
31
+ """
32
+ Maximum characters for this range; overrides content.max_chars.
33
+ """
34
+ start_char: int | None = Field(None, ge=0)
35
+ """
36
+ Character offset to start content from.
37
+ """
38
+
39
+
40
+ class Document(BaseModel):
41
+ model_config = ConfigDict(
42
+ extra="allow",
43
+ )
44
+ canonical_url: str
45
+ content_digest: str | None = None
46
+ doc_id: str
47
+ first_seen_at: str
48
+ headings: list[str] | None = None
49
+ last_seen_at: str
50
+ latest_capture_id: str | None = None
51
+ meta_description: str | None = None
52
+ published_at: str | None = None
53
+ source_url: str
54
+ title: str | None = None
55
+
56
+
57
+ class DocumentContent(BaseModel):
58
+ model_config = ConfigDict(
59
+ extra="allow",
60
+ )
61
+ char_count: int
62
+ format: str
63
+ selection: str
64
+ start_char: int | None = None
65
+ text: str
66
+ truncated: bool
67
+
68
+
69
+ class Format(Enum):
70
+ """
71
+ Returned content format.
72
+ """
73
+
74
+ text = "text"
75
+ markdown = "markdown"
76
+
77
+
78
+ class Selection(Enum):
79
+ """
80
+ Content selection strategy.
81
+ """
82
+
83
+ none = "none"
84
+ query_relevant = "query_relevant"
85
+ top_passages = "top_passages"
86
+ passage_ids = "passage_ids"
87
+ full_document = "full_document"
88
+
89
+
90
+ class DocumentContentRequest(BaseModel):
91
+ model_config = ConfigDict(
92
+ extra="allow",
93
+ )
94
+ format: Format | None = "markdown"
95
+ """
96
+ Returned content format.
97
+ """
98
+ include_offsets: bool | None = None
99
+ """
100
+ Whether passage offsets should be included when available.
101
+ """
102
+ max_chars: int | None = Field(12000, ge=1)
103
+ """
104
+ Maximum content.text characters to return.
105
+ """
106
+ passage_ids: list[str] | None = None
107
+ """
108
+ Passage IDs to return when selection is passage_ids.
109
+ """
110
+ range: ContentRange | None = None
111
+ """
112
+ Continuation read: return content starting at a character offset of the same document.
113
+ """
114
+ selection: Selection | None = "query_relevant"
115
+ """
116
+ Content selection strategy.
117
+ """
118
+
119
+
120
+ class DocumentProvenance(BaseModel):
121
+ model_config = ConfigDict(
122
+ extra="allow",
123
+ )
124
+ capture_id: str
125
+ capture_time: str
126
+
127
+
128
+ class ErrorBody(BaseModel):
129
+ model_config = ConfigDict(
130
+ extra="allow",
131
+ )
132
+ code: str
133
+ """
134
+ Stable machine-readable error code.
135
+ """
136
+ details: dict[str, Any] | None = None
137
+ """
138
+ Optional structured error details.
139
+ """
140
+ message: str
141
+ """
142
+ Human-readable error message.
143
+ """
144
+
145
+
146
+ class Type(Enum):
147
+ """
148
+ Envelope discriminator.
149
+ """
150
+
151
+ error = "error"
152
+
153
+
154
+ class ErrorEnvelope(BaseModel):
155
+ model_config = ConfigDict(
156
+ extra="allow",
157
+ )
158
+ field_schema: AnyUrl | None = Field(
159
+ None,
160
+ alias="$schema",
161
+ examples=[
162
+ "https://search-api-staging-779189860552.europe-west1.run.app/ErrorEnvelope.json"
163
+ ],
164
+ )
165
+ """
166
+ A URL to the JSON Schema for this object.
167
+ """
168
+ error: ErrorBody
169
+ """
170
+ Error details.
171
+ """
172
+ request_id: str
173
+ """
174
+ Server request identifier.
175
+ """
176
+ type: Type
177
+ """
178
+ Envelope discriminator.
179
+ """
180
+
181
+
182
+ class FeedbackAgentContext(BaseModel):
183
+ model_config = ConfigDict(
184
+ extra="allow",
185
+ )
186
+ client_model: str | None = None
187
+ """
188
+ Calling model identifier.
189
+ """
190
+ task_type: str | None = None
191
+ """
192
+ Agent task type or evaluation bucket.
193
+ """
194
+
195
+
196
+ class Passage(BaseModel):
197
+ model_config = ConfigDict(
198
+ extra="allow",
199
+ )
200
+ char_end: int | None = None
201
+ char_start: int | None = None
202
+ doc_id: str
203
+ ordinal: int
204
+ passage_id: str
205
+ section_heading: str | None = None
206
+ section_path: list[str] | None = None
207
+ text: str
208
+
209
+
210
+ class Ranking(BaseModel):
211
+ model_config = ConfigDict(
212
+ extra="allow",
213
+ )
214
+ mode: str
215
+ ranker_version: str
216
+ score_scope: str
217
+
218
+
219
+ class RateLimit(BaseModel):
220
+ model_config = ConfigDict(
221
+ extra="allow",
222
+ )
223
+ limit_rps: int
224
+ remaining: int
225
+ reset_at: str
226
+
227
+
228
+ class OnExceed(Enum):
229
+ """
230
+ What to do when the budget binds: shed payload in the documented order, or fail with response_too_large.
231
+ """
232
+
233
+ shed = "shed"
234
+ error = "error"
235
+
236
+
237
+ class ResponseBudget(BaseModel):
238
+ model_config = ConfigDict(
239
+ extra="allow",
240
+ )
241
+ max_chars_total: int | None = Field(None, ge=1)
242
+ """
243
+ Maximum serialized response size in characters. Roughly 4 characters per token.
244
+ """
245
+ on_exceed: OnExceed | None = "shed"
246
+ """
247
+ What to do when the budget binds: shed payload in the documented order, or fail with response_too_large.
248
+ """
249
+
250
+
251
+ class Verbosity(Enum):
252
+ """
253
+ Field preset: ids_only (rank, doc_id, url, title), compact (adds snippet, score, key dates), standard (today's default), full (adds provenance).
254
+ """
255
+
256
+ ids_only = "ids_only"
257
+ compact = "compact"
258
+ standard = "standard"
259
+ full = "full"
260
+
261
+
262
+ class ResponseShape(BaseModel):
263
+ model_config = ConfigDict(
264
+ extra="allow",
265
+ )
266
+ budget: ResponseBudget | None = None
267
+ """
268
+ Total serialized response budget in characters with deterministic shedding.
269
+ """
270
+ verbosity: Verbosity | None = "standard"
271
+ """
272
+ Field preset: ids_only (rank, doc_id, url, title), compact (adds snippet, score, key dates), standard (today's default), full (adds provenance).
273
+ """
274
+
275
+
276
+ class SearchResultMetadata(BaseModel):
277
+ model_config = ConfigDict(
278
+ extra="allow",
279
+ )
280
+ content_digest: str | None = None
281
+ extracted_at: str | None = None
282
+ first_seen_at: str | None = None
283
+ last_crawled_at: str | None = None
284
+ last_seen_at: str | None = None
285
+ published_at: str | None = None
286
+
287
+
288
+ class SearchScore(BaseModel):
289
+ model_config = ConfigDict(
290
+ extra="allow",
291
+ )
292
+ value: float
293
+
294
+
295
+ class Usage(BaseModel):
296
+ model_config = ConfigDict(
297
+ extra="allow",
298
+ )
299
+ approx_tokens: int
300
+ bytes_returned: int
301
+ requests: int
302
+
303
+
304
+ class Warning(BaseModel):
305
+ model_config = ConfigDict(
306
+ extra="allow",
307
+ )
308
+ code: str
309
+ details: dict[str, Any] | None = None
310
+ message: str
311
+
312
+
313
+ class Access(BaseModel):
314
+ model_config = ConfigDict(
315
+ extra="allow",
316
+ )
317
+ rate_limit: RateLimit
318
+ tier: str
319
+
320
+
321
+ class DocumentResponse(BaseModel):
322
+ model_config = ConfigDict(
323
+ extra="allow",
324
+ )
325
+ field_schema: AnyUrl | None = Field(
326
+ None,
327
+ alias="$schema",
328
+ examples=[
329
+ "https://search-api-staging-779189860552.europe-west1.run.app/DocumentResponse.json"
330
+ ],
331
+ )
332
+ """
333
+ A URL to the JSON Schema for this object.
334
+ """
335
+ access: Access
336
+ capture_history: list[CaptureHistoryEntry] | None = None
337
+ content: DocumentContent | None = None
338
+ doc: Document
339
+ passages: list[Passage] | None = None
340
+ provenance: DocumentProvenance | None = None
341
+ request_id: str
342
+ session_id: str
343
+ usage: Usage | None = None
344
+ warnings: list[Warning] | None = None
345
+
346
+
347
+ class FeedbackResponse(BaseModel):
348
+ model_config = ConfigDict(
349
+ extra="allow",
350
+ )
351
+ field_schema: AnyUrl | None = Field(
352
+ None,
353
+ alias="$schema",
354
+ examples=[
355
+ "https://search-api-staging-779189860552.europe-west1.run.app/FeedbackResponse.json"
356
+ ],
357
+ )
358
+ """
359
+ A URL to the JSON Schema for this object.
360
+ """
361
+ accepted: bool
362
+ access: Access
363
+ feedback_id: str
364
+ request_id: str
365
+ session_id: str
366
+ usage: Usage | None = None
367
+
368
+
369
+ class SearchResult(BaseModel):
370
+ model_config = ConfigDict(
371
+ extra="allow",
372
+ )
373
+ canonical_url: str
374
+ description: str | None = None
375
+ doc_id: str
376
+ metadata: SearchResultMetadata | None = None
377
+ passages: list[Passage] | None = None
378
+ provenance: DocumentProvenance | None = None
379
+ rank: int
380
+ score: SearchScore | None = None
381
+ snippet: str | None = None
382
+ source_url: str | None = None
383
+ title: str | None = None
384
+
385
+
386
+ class SearchResponse(BaseModel):
387
+ model_config = ConfigDict(
388
+ extra="allow",
389
+ )
390
+ field_schema: AnyUrl | None = Field(
391
+ None,
392
+ alias="$schema",
393
+ examples=[
394
+ "https://search-api-staging-779189860552.europe-west1.run.app/SearchResponse.json"
395
+ ],
396
+ )
397
+ """
398
+ A URL to the JSON Schema for this object.
399
+ """
400
+ access: Access | None = None
401
+ ranking: Ranking | None = None
402
+ request_id: str
403
+ results: list[SearchResult] | None
404
+ search_id: str
405
+ session_id: str
406
+ truncated: bool | None = None
407
+ usage: Usage | None = None
408
+ warnings: list[Warning] | None = None
caesar_search/py.typed ADDED
File without changes
@@ -0,0 +1,57 @@
1
+ Metadata-Version: 2.4
2
+ Name: caesar-search
3
+ Version: 0.1.0
4
+ Summary: Official Python SDK for the Caesar search API — web search with provenance, built for agents.
5
+ Project-URL: Homepage, https://github.com/caesar-data/caesar-search-python
6
+ Project-URL: Repository, https://github.com/caesar-data/caesar-search-python
7
+ Project-URL: Issues, https://github.com/caesar-data/caesar-search-python/issues
8
+ Project-URL: Changelog, https://github.com/caesar-data/caesar-search-python/releases
9
+ Author: Caesar
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: agents,caesar,search,web-search
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: httpx>=0.27
23
+ Requires-Dist: pydantic>=2.7
24
+ Description-Content-Type: text/markdown
25
+
26
+ # caesar-search (Python)
27
+
28
+ Official Python SDK for the Caesar search API — web search with provenance, built for agents.
29
+
30
+ ## Quickstart
31
+
32
+ ```python
33
+ # pip install caesar-search (or: uv add caesar-search)
34
+ from caesar_search import Caesar
35
+
36
+ client = Caesar() # reads CAESAR_API_KEY; anonymous tier works without a key
37
+ results = client.search("rust async runtime comparison", max_results=5)
38
+ doc = client.read(results.results[0].doc_id, query="which runtime is fastest")
39
+ client.feedback("result_helpful", search_id=results.search_id, doc_id=doc.doc.doc_id)
40
+ ```
41
+
42
+ ## Clients
43
+
44
+ - `Caesar` — synchronous; `AsyncCaesar` — same surface with `async`/`await`. Both support context managers.
45
+ - Methods: `search()`, `read()` (doc_id **or** URL; `start_char=` continues truncated reads), `feedback()`.
46
+ - Responses are typed pydantic v2 models generated from the public OpenAPI spec; provenance fields (`doc_id`, `search_id`, `capture_id`, canonical/source URLs, crawl dates) are preserved verbatim.
47
+ - `client.with_raw_response.search(...)` returns the raw `httpx.Response`.
48
+ - Retries: 429/5xx with capped exponential backoff honoring `Retry-After` (`max_retries=` to tune, `0` to disable).
49
+ - Config: `api_key=` / `CAESAR_API_KEY`; `base_url=` / `CAESAR_BASE_URL`.
50
+
51
+ ## Errors
52
+
53
+ `CaesarError` → `APIConnectionError` / `APITimeoutError` and `APIStatusError` (with `.status_code`, `.code`, `.message`, `.request_id`) → `AuthenticationError` (401/403), `RateLimitError` (429).
54
+
55
+ ## License
56
+
57
+ [MIT](LICENSE)
@@ -0,0 +1,11 @@
1
+ caesar_search/__init__.py,sha256=0bClWbimpuV1ll8UqHXs4lOsk45Rx9TXNU1OWEs3nBU,724
2
+ caesar_search/_client.py,sha256=9Z4Nu8VsoKOQ-bRC-8l-dew_j8mcl2aP8kk9z89MgUU,15727
3
+ caesar_search/_exceptions.py,sha256=R6GcXNnvWd0WJ992sx9gxeGVMijsIqwYZtbr7Q1mXiU,1816
4
+ caesar_search/_version.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
5
+ caesar_search/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ caesar_search/models/__init__.py,sha256=Oo1kVKi4VAEq1ERk5zjK4M-uyWmmo0TE7og0F3iARjE,1141
7
+ caesar_search/models/_models.py,sha256=F-n3d0Q1MUqOcAlCRIQm0_4icgl49g5MRS17UG8N-Lg,9115
8
+ caesar_search-0.1.0.dist-info/METADATA,sha256=lN6TAXUOIaKz_UA-XRjf6DuAO_pq7MnW5g9u8eQdR4s,2563
9
+ caesar_search-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ caesar_search-0.1.0.dist-info/licenses/LICENSE,sha256=O9WfHubv0fg_DEnziZs8MtsruiasOY_Ym5VbN168nU0,1063
11
+ caesar_search-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Caesar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.