knowledge2 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. knowledge2-0.4.0.dist-info/METADATA +556 -0
  2. knowledge2-0.4.0.dist-info/RECORD +139 -0
  3. knowledge2-0.4.0.dist-info/WHEEL +5 -0
  4. knowledge2-0.4.0.dist-info/top_level.txt +1 -0
  5. sdk/__init__.py +70 -0
  6. sdk/_async_base.py +525 -0
  7. sdk/_async_paging.py +57 -0
  8. sdk/_base.py +541 -0
  9. sdk/_logging.py +41 -0
  10. sdk/_paging.py +73 -0
  11. sdk/_preview.py +70 -0
  12. sdk/_raw_response.py +25 -0
  13. sdk/_request_options.py +51 -0
  14. sdk/_transport.py +144 -0
  15. sdk/_validation.py +25 -0
  16. sdk/_validation_response.py +36 -0
  17. sdk/_version.py +3 -0
  18. sdk/async_client.py +320 -0
  19. sdk/async_resources/__init__.py +45 -0
  20. sdk/async_resources/_mixin_base.py +42 -0
  21. sdk/async_resources/a2a.py +230 -0
  22. sdk/async_resources/agents.py +489 -0
  23. sdk/async_resources/audit.py +145 -0
  24. sdk/async_resources/auth.py +133 -0
  25. sdk/async_resources/console.py +409 -0
  26. sdk/async_resources/corpora.py +276 -0
  27. sdk/async_resources/deployments.py +106 -0
  28. sdk/async_resources/documents.py +592 -0
  29. sdk/async_resources/feeds.py +248 -0
  30. sdk/async_resources/indexes.py +208 -0
  31. sdk/async_resources/jobs.py +165 -0
  32. sdk/async_resources/metadata.py +48 -0
  33. sdk/async_resources/models.py +102 -0
  34. sdk/async_resources/onboarding.py +538 -0
  35. sdk/async_resources/orgs.py +37 -0
  36. sdk/async_resources/pipelines.py +523 -0
  37. sdk/async_resources/projects.py +90 -0
  38. sdk/async_resources/search.py +262 -0
  39. sdk/async_resources/training.py +357 -0
  40. sdk/async_resources/usage.py +91 -0
  41. sdk/client.py +417 -0
  42. sdk/config.py +182 -0
  43. sdk/errors.py +178 -0
  44. sdk/examples/auth_factory.py +34 -0
  45. sdk/examples/batch_operations.py +57 -0
  46. sdk/examples/document_upload.py +56 -0
  47. sdk/examples/e2e_lifecycle.py +213 -0
  48. sdk/examples/error_handling.py +61 -0
  49. sdk/examples/pagination.py +64 -0
  50. sdk/examples/quickstart.py +36 -0
  51. sdk/examples/request_options.py +44 -0
  52. sdk/examples/search.py +64 -0
  53. sdk/integrations/__init__.py +57 -0
  54. sdk/integrations/_client.py +101 -0
  55. sdk/integrations/langchain/__init__.py +6 -0
  56. sdk/integrations/langchain/retriever.py +166 -0
  57. sdk/integrations/langchain/tools.py +108 -0
  58. sdk/integrations/llamaindex/__init__.py +11 -0
  59. sdk/integrations/llamaindex/filters.py +78 -0
  60. sdk/integrations/llamaindex/retriever.py +162 -0
  61. sdk/integrations/llamaindex/tools.py +109 -0
  62. sdk/integrations/llamaindex/vector_store.py +320 -0
  63. sdk/models/__init__.py +18 -0
  64. sdk/models/_base.py +24 -0
  65. sdk/models/_registry.py +457 -0
  66. sdk/models/a2a.py +92 -0
  67. sdk/models/agents.py +109 -0
  68. sdk/models/audit.py +28 -0
  69. sdk/models/auth.py +49 -0
  70. sdk/models/chunks.py +20 -0
  71. sdk/models/common.py +14 -0
  72. sdk/models/console.py +103 -0
  73. sdk/models/corpora.py +48 -0
  74. sdk/models/deployments.py +13 -0
  75. sdk/models/documents.py +126 -0
  76. sdk/models/embeddings.py +24 -0
  77. sdk/models/evaluation.py +17 -0
  78. sdk/models/feedback.py +9 -0
  79. sdk/models/feeds.py +57 -0
  80. sdk/models/indexes.py +36 -0
  81. sdk/models/jobs.py +52 -0
  82. sdk/models/models.py +26 -0
  83. sdk/models/onboarding.py +323 -0
  84. sdk/models/orgs.py +11 -0
  85. sdk/models/pipelines.py +147 -0
  86. sdk/models/projects.py +19 -0
  87. sdk/models/search.py +149 -0
  88. sdk/models/training.py +57 -0
  89. sdk/models/usage.py +39 -0
  90. sdk/namespaces.py +386 -0
  91. sdk/py.typed +0 -0
  92. sdk/resources/__init__.py +45 -0
  93. sdk/resources/_mixin_base.py +40 -0
  94. sdk/resources/a2a.py +230 -0
  95. sdk/resources/agents.py +487 -0
  96. sdk/resources/audit.py +144 -0
  97. sdk/resources/auth.py +138 -0
  98. sdk/resources/console.py +411 -0
  99. sdk/resources/corpora.py +269 -0
  100. sdk/resources/deployments.py +105 -0
  101. sdk/resources/documents.py +597 -0
  102. sdk/resources/feeds.py +246 -0
  103. sdk/resources/indexes.py +210 -0
  104. sdk/resources/jobs.py +164 -0
  105. sdk/resources/metadata.py +53 -0
  106. sdk/resources/models.py +99 -0
  107. sdk/resources/onboarding.py +542 -0
  108. sdk/resources/orgs.py +35 -0
  109. sdk/resources/pipeline_builder.py +257 -0
  110. sdk/resources/pipelines.py +520 -0
  111. sdk/resources/projects.py +87 -0
  112. sdk/resources/search.py +277 -0
  113. sdk/resources/training.py +358 -0
  114. sdk/resources/usage.py +92 -0
  115. sdk/types/__init__.py +366 -0
  116. sdk/types/a2a.py +88 -0
  117. sdk/types/agents.py +133 -0
  118. sdk/types/audit.py +26 -0
  119. sdk/types/auth.py +45 -0
  120. sdk/types/chunks.py +18 -0
  121. sdk/types/common.py +10 -0
  122. sdk/types/console.py +99 -0
  123. sdk/types/corpora.py +42 -0
  124. sdk/types/deployments.py +11 -0
  125. sdk/types/documents.py +104 -0
  126. sdk/types/embeddings.py +22 -0
  127. sdk/types/evaluation.py +15 -0
  128. sdk/types/feedback.py +7 -0
  129. sdk/types/feeds.py +61 -0
  130. sdk/types/indexes.py +30 -0
  131. sdk/types/jobs.py +50 -0
  132. sdk/types/models.py +22 -0
  133. sdk/types/onboarding.py +395 -0
  134. sdk/types/orgs.py +9 -0
  135. sdk/types/pipelines.py +177 -0
  136. sdk/types/projects.py +14 -0
  137. sdk/types/search.py +116 -0
  138. sdk/types/training.py +55 -0
  139. sdk/types/usage.py +37 -0
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (79.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ sdk
sdk/__init__.py ADDED
@@ -0,0 +1,70 @@
1
+ """Knowledge2 Python SDK."""
2
+
3
+ from sdk._version import __version__
4
+
5
+ from ._async_paging import AsyncPager
6
+ from ._base import ClientLimits, ClientTimeouts
7
+ from ._logging import set_debug
8
+ from ._paging import Page, SyncPager
9
+ from ._raw_response import RawResponse
10
+ from ._request_options import RequestOptions
11
+ from .async_client import AsyncKnowledge2
12
+ from .client import Knowledge2, Knowledge2Validated
13
+ from .errors import (
14
+ APIConnectionError,
15
+ APIError,
16
+ APITimeoutError,
17
+ AuthenticationError,
18
+ BadRequestError,
19
+ ConfirmationRequiredError,
20
+ ConflictError,
21
+ Knowledge2Error,
22
+ NotFoundError,
23
+ PermissionDeniedError,
24
+ RateLimitError,
25
+ ServerError,
26
+ ValidationError,
27
+ )
28
+ from .resources.pipeline_builder import PipelineBuilder
29
+
30
+ # K2Config is lazily imported to keep pydantic-settings optional.
31
+ # Users who don't need K2Config never import pydantic-settings.
32
+
33
+
34
+ def __getattr__(name: str):
35
+ if name == "K2Config":
36
+ from .config import K2Config
37
+
38
+ return K2Config
39
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
40
+
41
+
42
+ __all__ = [
43
+ "APIConnectionError",
44
+ "APIError",
45
+ "APITimeoutError",
46
+ "AsyncKnowledge2",
47
+ "AsyncPager",
48
+ "AuthenticationError",
49
+ "BadRequestError",
50
+ "ClientLimits",
51
+ "ClientTimeouts",
52
+ "ConfirmationRequiredError",
53
+ "ConflictError",
54
+ "K2Config",
55
+ "Knowledge2",
56
+ "Knowledge2Error",
57
+ "Knowledge2Validated",
58
+ "NotFoundError",
59
+ "Page",
60
+ "PermissionDeniedError",
61
+ "PipelineBuilder",
62
+ "RateLimitError",
63
+ "RawResponse",
64
+ "RequestOptions",
65
+ "ServerError",
66
+ "SyncPager",
67
+ "ValidationError",
68
+ "__version__",
69
+ "set_debug",
70
+ ]
sdk/_async_base.py ADDED
@@ -0,0 +1,525 @@
1
+ """Async base HTTP client for the Knowledge2 SDK.
2
+
3
+ Provides :class:`AsyncBaseClient` which handles HTTP transport, automatic
4
+ retries with exponential backoff, error classification, pagination,
5
+ and debug logging — mirroring :class:`BaseClient` for async contexts.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import contextvars
12
+ import time
13
+ from collections.abc import Callable
14
+ from typing import TYPE_CHECKING, Any, cast
15
+
16
+ import httpx
17
+
18
+ from sdk._async_paging import AsyncPager
19
+ from sdk._base import ClientLimits, ClientTimeouts
20
+ from sdk._logging import _redact_headers, logger
21
+ from sdk._paging import Page
22
+ from sdk._raw_response import RawResponse
23
+ from sdk._transport import build_auth_headers, calculate_backoff, error_from_response
24
+ from sdk._validation_response import maybe_validate
25
+ from sdk._version import __version__
26
+ from sdk.errors import (
27
+ APIConnectionError,
28
+ APIError,
29
+ APITimeoutError,
30
+ Knowledge2Error,
31
+ RateLimitError,
32
+ )
33
+
34
+ if TYPE_CHECKING:
35
+ from sdk._request_options import RequestOptions
36
+
37
+ try: # Python 3.11+
38
+ from typing import Self
39
+ except ImportError: # pragma: no cover - Python < 3.11
40
+ from typing_extensions import Self
41
+
42
+
43
+ class AsyncBaseClient:
44
+ @staticmethod
45
+ def _normalize_base_url(base_url: str) -> str:
46
+ """Normalize and validate base URL input before constructing httpx.AsyncClient."""
47
+ normalized = base_url.strip().rstrip("/")
48
+ if not normalized:
49
+ raise ValueError("api_host must not be empty")
50
+
51
+ for idx, char in enumerate(normalized):
52
+ if ord(char) < 32 or ord(char) == 127:
53
+ escaped = repr(char).strip("'")
54
+ raise ValueError(
55
+ f"api_host contains invalid control character {escaped} at position {idx}"
56
+ )
57
+ return normalized
58
+
59
+ def __init__(
60
+ self,
61
+ base_url: str,
62
+ api_key: str | None,
63
+ *,
64
+ bearer_token: str | None = None,
65
+ bearer_token_factory: Callable[[], str] | None = None,
66
+ token_cache_ttl: float = 300.0,
67
+ admin_token: str | None = None,
68
+ headers: dict[str, str] | None = None,
69
+ user_agent: str | None = None,
70
+ timeout: float | ClientTimeouts | httpx.Timeout | None = None,
71
+ limits: ClientLimits | None = None,
72
+ max_retries: int = 2,
73
+ validate_responses: bool = False,
74
+ http_client: httpx.AsyncClient | None = None,
75
+ ) -> None:
76
+ if bearer_token and bearer_token_factory:
77
+ raise ValueError("Cannot specify both 'bearer_token' and 'bearer_token_factory'")
78
+
79
+ self.base_url = self._normalize_base_url(base_url)
80
+ self.api_key = api_key
81
+ self.bearer_token = bearer_token
82
+ self.admin_token = admin_token
83
+ self._bearer_token_factory = bearer_token_factory
84
+ self._token_cache_ttl = token_cache_ttl
85
+ self._cached_token: str | None = None
86
+ self._token_expires_at: float = 0.0
87
+ self._token_lock = asyncio.Lock()
88
+ self._default_headers = dict(headers or {})
89
+ self._user_agent = user_agent or f"k2-python-sdk/{__version__}"
90
+ self._max_retries = max_retries
91
+ self._backoff_factor = 0.5
92
+ self._backoff_max = 8.0
93
+ self._validate_responses = validate_responses
94
+ self._raw_response_flag: contextvars.ContextVar[bool] = contextvars.ContextVar(
95
+ "raw_response_flag", default=False
96
+ )
97
+
98
+ if http_client is not None:
99
+ # Caller-supplied client — SDK does NOT own it.
100
+ self._client = http_client
101
+ self._owns_http_client = False
102
+ if timeout is not None or limits is not None:
103
+ logger.warning(
104
+ "When a caller-supplied http_client is provided, the SDK-level "
105
+ "'timeout' and 'limits' parameters are ignored. Configure these "
106
+ "settings on your httpx.Client instance directly."
107
+ )
108
+ else:
109
+ # SDK constructs and owns the client.
110
+ self._owns_http_client = True
111
+
112
+ # Resolve ClientTimeouts -> httpx.Timeout
113
+ if isinstance(timeout, ClientTimeouts):
114
+ resolved_timeout: float | httpx.Timeout | None = httpx.Timeout(
115
+ connect=timeout.connect,
116
+ read=timeout.read,
117
+ write=timeout.write,
118
+ pool=timeout.pool,
119
+ )
120
+ else:
121
+ resolved_timeout = timeout
122
+
123
+ # Build httpx.AsyncClient with optional limits
124
+ client_kwargs: dict[str, Any] = {
125
+ "base_url": self.base_url,
126
+ "timeout": resolved_timeout,
127
+ }
128
+ if limits is not None:
129
+ client_kwargs["limits"] = httpx.Limits(
130
+ max_connections=limits.max_connections,
131
+ max_keepalive_connections=limits.max_keepalive_connections,
132
+ keepalive_expiry=limits.keepalive_expiry,
133
+ )
134
+
135
+ self._client = httpx.AsyncClient(**client_kwargs)
136
+
137
+ async def __aenter__(self) -> Self:
138
+ return self
139
+
140
+ async def __aexit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
141
+ await self.close()
142
+
143
+ async def close(self) -> None:
144
+ """Close the underlying async HTTP client.
145
+
146
+ If the ``httpx.AsyncClient`` was supplied by the caller, this
147
+ method is a no-op — the caller retains ownership.
148
+ """
149
+ if self._owns_http_client:
150
+ await self._client.aclose()
151
+
152
+ # ------------------------------------------------------------------
153
+ # Response validation
154
+ # ------------------------------------------------------------------
155
+
156
+ def _maybe_validate(self, data: Any, model_name: str) -> Any:
157
+ """Validate response data through its Pydantic model if validation is enabled."""
158
+ return maybe_validate(
159
+ data, model_name, validate=self._validate_responses, raw_response_cls=RawResponse
160
+ )
161
+
162
+ # ------------------------------------------------------------------
163
+ # Token factory helpers
164
+ # ------------------------------------------------------------------
165
+
166
+ async def _resolve_bearer_token(self) -> str | None:
167
+ """Return the current bearer token, calling the factory if needed."""
168
+ if self.bearer_token:
169
+ return self.bearer_token
170
+ if self._bearer_token_factory is None:
171
+ return None
172
+
173
+ now = time.monotonic()
174
+ if self._cached_token is not None and now < self._token_expires_at:
175
+ return self._cached_token
176
+
177
+ async with self._token_lock:
178
+ # Double-check after acquiring lock
179
+ now = time.monotonic()
180
+ if self._cached_token is not None and now < self._token_expires_at:
181
+ return self._cached_token
182
+ # Factory might be sync — run in a thread to avoid blocking the loop
183
+ token = await asyncio.to_thread(self._bearer_token_factory)
184
+ self._cached_token = token
185
+ if self._token_cache_ttl > 0:
186
+ self._token_expires_at = now + self._token_cache_ttl
187
+ else:
188
+ # TTL=0 means no caching — expire immediately
189
+ self._token_expires_at = 0.0
190
+ return token
191
+
192
+ async def _clear_token_cache(self) -> None:
193
+ """Clear the cached bearer token (e.g. after a 401)."""
194
+ async with self._token_lock:
195
+ self._cached_token = None
196
+ self._token_expires_at = 0.0
197
+
198
+ # ------------------------------------------------------------------
199
+ # Header helpers
200
+ # ------------------------------------------------------------------
201
+
202
+ async def _headers(self, extra: dict[str, str] | None = None) -> dict[str, str]:
203
+ resolved_token = await self._resolve_bearer_token()
204
+ return build_auth_headers(
205
+ api_key=self.api_key,
206
+ bearer_token=resolved_token,
207
+ admin_token=self.admin_token,
208
+ user_agent=self._user_agent,
209
+ default_headers=self._default_headers,
210
+ extra=extra,
211
+ )
212
+
213
+ @staticmethod
214
+ def _idempotency_headers(idempotency_key: str | None) -> dict[str, str]:
215
+ if not idempotency_key:
216
+ return {}
217
+ return {"Idempotency-Key": idempotency_key}
218
+
219
+ # ------------------------------------------------------------------
220
+ # Retry helpers
221
+ # ------------------------------------------------------------------
222
+
223
+ def _backoff_delay(self, attempt: int, error: Knowledge2Error | None = None) -> float:
224
+ """Calculate backoff delay with jitter for retry attempt *attempt*."""
225
+ return calculate_backoff(
226
+ attempt,
227
+ error,
228
+ backoff_factor=self._backoff_factor,
229
+ backoff_max=self._backoff_max,
230
+ )
231
+
232
+ # ------------------------------------------------------------------
233
+ # Core request with retry
234
+ # ------------------------------------------------------------------
235
+
236
+ async def _request(
237
+ self,
238
+ method: str,
239
+ path: str,
240
+ *,
241
+ headers: dict[str, str] | None = None,
242
+ request_options: RequestOptions | None = None,
243
+ **kwargs: Any,
244
+ ) -> Any:
245
+ """Send an HTTP request with automatic retry on transient failures.
246
+
247
+ Args:
248
+ method: HTTP method.
249
+ path: API path.
250
+ headers: Extra headers for this request.
251
+ request_options: Per-call overrides for timeout, retries,
252
+ and passthrough headers.
253
+ **kwargs: Forwarded to ``httpx.AsyncClient.request()``.
254
+ """
255
+ # Resolve per-call overrides from RequestOptions
256
+ effective_retries = self._max_retries
257
+ if request_options is not None:
258
+ if request_options.max_retries is not None:
259
+ effective_retries = request_options.max_retries
260
+ if request_options.timeout is not None:
261
+ ct = request_options.timeout
262
+ kwargs["timeout"] = httpx.Timeout(
263
+ connect=ct.connect,
264
+ read=ct.read,
265
+ write=ct.write,
266
+ pool=ct.pool,
267
+ )
268
+ if request_options.passthrough_headers:
269
+ headers = {**(headers or {}), **request_options.passthrough_headers}
270
+
271
+ last_error: Knowledge2Error | None = None
272
+ return_raw = self._raw_response_flag.get(False)
273
+
274
+ for attempt in range(1 + effective_retries):
275
+ merged_headers = await self._headers(headers)
276
+ try:
277
+ logger.debug(
278
+ "%s %s (attempt %d/%d) headers=%s",
279
+ method,
280
+ path,
281
+ attempt + 1,
282
+ 1 + effective_retries,
283
+ _redact_headers(merged_headers),
284
+ )
285
+ response = await self._client.request(
286
+ method, path, headers=merged_headers, **kwargs
287
+ )
288
+ except asyncio.CancelledError:
289
+ raise
290
+ except httpx.ConnectError as exc:
291
+ last_error = APIConnectionError(f"Connection error: {exc}")
292
+ last_error.__cause__ = exc
293
+ if attempt < effective_retries:
294
+ delay = self._backoff_delay(attempt)
295
+ logger.debug(
296
+ "Retry %d/%d after %.2fs (connection error)",
297
+ attempt + 1,
298
+ effective_retries,
299
+ delay,
300
+ )
301
+ await asyncio.sleep(delay)
302
+ continue
303
+ raise last_error from exc
304
+ except httpx.TimeoutException as exc:
305
+ last_error = APITimeoutError(f"Request timed out: {exc}")
306
+ last_error.__cause__ = exc
307
+ if attempt < effective_retries:
308
+ delay = self._backoff_delay(attempt)
309
+ logger.debug(
310
+ "Retry %d/%d after %.2fs (timeout)",
311
+ attempt + 1,
312
+ effective_retries,
313
+ delay,
314
+ )
315
+ await asyncio.sleep(delay)
316
+ continue
317
+ raise last_error from exc
318
+ except httpx.HTTPError as exc:
319
+ last_error = APIConnectionError(f"Transport error: {exc}")
320
+ last_error.__cause__ = exc
321
+ if attempt < effective_retries:
322
+ delay = self._backoff_delay(attempt)
323
+ logger.debug(
324
+ "Retry %d/%d after %.2fs (transport error)",
325
+ attempt + 1,
326
+ effective_retries,
327
+ delay,
328
+ )
329
+ await asyncio.sleep(delay)
330
+ continue
331
+ raise last_error from exc
332
+
333
+ logger.debug(
334
+ "%s %s → %d",
335
+ method,
336
+ path,
337
+ response.status_code,
338
+ )
339
+
340
+ if response.is_error:
341
+ error = self._error_from_response(response)
342
+ # Clear cached factory token on 401 so the next attempt
343
+ # (or next call) fetches a fresh token.
344
+ if response.status_code == 401 and self._bearer_token_factory:
345
+ await self._clear_token_cache()
346
+ if error.retryable and attempt < effective_retries:
347
+ delay = self._backoff_delay(attempt, error)
348
+ logger.debug(
349
+ "Retry %d/%d after %.2fs (status %d)",
350
+ attempt + 1,
351
+ effective_retries,
352
+ delay,
353
+ response.status_code,
354
+ )
355
+ await asyncio.sleep(delay)
356
+ last_error = error
357
+ continue
358
+ if return_raw:
359
+ # Wrap HTTP errors into RawResponse instead of raising
360
+ # so callers can inspect status/headers/body.
361
+ if response.content:
362
+ try:
363
+ error_parsed = response.json()
364
+ except ValueError:
365
+ error_parsed = response.text if response.text else None
366
+ else:
367
+ error_parsed = None
368
+ return RawResponse(
369
+ status_code=response.status_code,
370
+ headers=dict(response.headers),
371
+ parsed=error_parsed,
372
+ )
373
+ raise error
374
+
375
+ # Success
376
+ if response.content:
377
+ try:
378
+ parsed = response.json()
379
+ except ValueError as exc:
380
+ raise APIConnectionError(
381
+ f"Expected JSON response but got {response.headers.get('content-type', 'unknown')}: {exc}"
382
+ ) from exc
383
+ else:
384
+ parsed = None
385
+ if return_raw:
386
+ return RawResponse(
387
+ status_code=response.status_code,
388
+ headers=dict(response.headers),
389
+ parsed=parsed,
390
+ )
391
+ return parsed
392
+
393
+ # All retries exhausted — should not normally reach here because
394
+ # the last iteration raises, but satisfies the type checker.
395
+ if last_error is not None: # pragma: no cover
396
+ raise last_error
397
+ return None # pragma: no cover
398
+
399
+ # ------------------------------------------------------------------
400
+ # Job polling
401
+ # ------------------------------------------------------------------
402
+
403
+ async def _wait_for_job(
404
+ self, job_id: str, *, poll_s: int = 5, timeout_s: float | None = None
405
+ ) -> dict[str, Any]:
406
+ # Temporarily disable raw response mode for internal polling calls
407
+ token = self._raw_response_flag.set(False)
408
+ try:
409
+ return await self._wait_for_job_inner(job_id, poll_s=poll_s, timeout_s=timeout_s)
410
+ finally:
411
+ self._raw_response_flag.reset(token)
412
+
413
+ async def _wait_for_job_inner(
414
+ self, job_id: str, *, poll_s: int = 5, timeout_s: float | None = None
415
+ ) -> dict[str, Any]:
416
+ start = time.monotonic()
417
+ while True:
418
+ job = await self._request("GET", f"/v1/jobs/{job_id}")
419
+ if not isinstance(job, dict):
420
+ raise RuntimeError(
421
+ f"Unexpected response polling job {job_id}: {type(job).__name__}"
422
+ )
423
+ status = job.get("status")
424
+ if status in {"succeeded", "failed", "canceled"}:
425
+ if status != "succeeded":
426
+ message = job.get("error_message") or f"Job {job_id} ended with status={status}"
427
+ raise RuntimeError(message)
428
+ return job
429
+ if timeout_s is not None and (time.monotonic() - start) > timeout_s:
430
+ raise TimeoutError(f"Timed out waiting for job {job_id}")
431
+ await asyncio.sleep(poll_s)
432
+
433
+ # ------------------------------------------------------------------
434
+ # Pagination
435
+ # ------------------------------------------------------------------
436
+
437
+ async def _list_page(
438
+ self,
439
+ method: str,
440
+ path: str,
441
+ *,
442
+ items_key: str,
443
+ params: dict[str, Any] | None = None,
444
+ limit: int = 100,
445
+ offset: int = 0,
446
+ ) -> Page[dict[str, Any]]:
447
+ """Fetch a single page and return a Page object with metadata."""
448
+ page_params = {**(params or {}), "limit": limit, "offset": offset}
449
+ data = await self._request(method, path, params=page_params)
450
+ response_meta: RawResponse[dict[str, Any] | list[Any] | None] | None = None
451
+ if isinstance(data, RawResponse):
452
+ response_meta = data
453
+ data = data.parsed
454
+
455
+ if isinstance(data, dict):
456
+ items = data.get(items_key, [])
457
+ total = data.get("total", len(items))
458
+ elif isinstance(data, list):
459
+ items = data
460
+ total = len(items)
461
+ else:
462
+ items = []
463
+ total = 0
464
+
465
+ page = Page(items=items, total=total, offset=offset, limit=limit)
466
+ if response_meta is not None:
467
+ return cast(
468
+ "Page[dict[str, Any]]",
469
+ RawResponse(
470
+ status_code=response_meta.status_code,
471
+ headers=response_meta.headers,
472
+ parsed=page,
473
+ ),
474
+ )
475
+ return page
476
+
477
+ def _paginate(
478
+ self,
479
+ method: str,
480
+ path: str,
481
+ *,
482
+ items_key: str,
483
+ params: dict[str, Any] | None = None,
484
+ limit: int = 100,
485
+ ) -> AsyncPager[dict[str, Any]]:
486
+ """Return an AsyncPager for lazy multi-page iteration.
487
+
488
+ Pages are fetched on demand — the next page is requested only
489
+ when the current page's items are exhausted.
490
+
491
+ Args:
492
+ method: HTTP method (usually ``"GET"``).
493
+ path: API path (e.g. ``"/v1/corpora"``).
494
+ items_key: JSON key that contains the list of items in the
495
+ response (e.g. ``"items"``).
496
+ params: Extra query parameters forwarded to each page
497
+ request.
498
+ limit: Page size (default 100).
499
+ """
500
+ base_params = dict(params or {})
501
+
502
+ async def fetch_page(offset: int, page_limit: int) -> tuple[list[dict[str, Any]], int]:
503
+ page_params = {**base_params, "limit": page_limit, "offset": offset}
504
+ data = await self._request(method, path, params=page_params)
505
+ if isinstance(data, dict):
506
+ items = data.get(items_key, [])
507
+ total = data.get("total", len(items))
508
+ elif isinstance(data, list):
509
+ items = data
510
+ total = len(items)
511
+ else:
512
+ items = []
513
+ total = 0
514
+ return items, total
515
+
516
+ return AsyncPager(fetch_page, limit=limit)
517
+
518
+ # ------------------------------------------------------------------
519
+ # Error classification
520
+ # ------------------------------------------------------------------
521
+
522
+ @staticmethod
523
+ def _error_from_response(response: httpx.Response) -> APIError:
524
+ """Parse an error response into the appropriate :class:`APIError` subclass."""
525
+ return error_from_response(response)
sdk/_async_paging.py ADDED
@@ -0,0 +1,57 @@
1
+ """Async pagination primitives for the Knowledge2 SDK."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import AsyncIterator, Awaitable, Callable
6
+ from typing import Generic, TypeVar
7
+
8
+ from sdk._paging import Page
9
+
10
+ T = TypeVar("T")
11
+
12
+
13
+ class AsyncPager(Generic[T]):
14
+ """Async stateful paginator that lazily fetches pages."""
15
+
16
+ def __init__(
17
+ self,
18
+ fetch_page: Callable[[int, int], Awaitable[tuple[list[T], int]]],
19
+ *,
20
+ limit: int = 100,
21
+ offset: int = 0,
22
+ ) -> None:
23
+ self._fetch_page = fetch_page
24
+ self._limit = limit
25
+ self._offset = offset
26
+ self._exhausted = False
27
+
28
+ async def next_page(self) -> Page[T] | None:
29
+ """Fetch the next page. Returns None when exhausted."""
30
+ if self._exhausted:
31
+ return None
32
+ items, total = await self._fetch_page(self._offset, self._limit)
33
+ page = Page(items=items, total=total, offset=self._offset, limit=self._limit)
34
+ if len(items) < self._limit or (total > len(items) and self._offset + self._limit >= total):
35
+ self._exhausted = True
36
+ else:
37
+ self._offset += self._limit
38
+ return page
39
+
40
+ async def iter_pages(self) -> AsyncIterator[Page[T]]:
41
+ """Iterate over all pages."""
42
+ while True:
43
+ page = await self.next_page()
44
+ if page is None:
45
+ break
46
+ yield page
47
+ if not page.items:
48
+ break
49
+
50
+ def __aiter__(self) -> AsyncIterator[T]:
51
+ """Item-level iteration across all pages."""
52
+ return self._item_iterator()
53
+
54
+ async def _item_iterator(self) -> AsyncIterator[T]:
55
+ async for page in self.iter_pages():
56
+ for item in page.items:
57
+ yield item