sie-sdk 0.3.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/PKG-INFO +2 -2
  2. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/pyproject.toml +2 -2
  3. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/__init__.py +2 -0
  4. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/__init__.py +2 -0
  5. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/_shared.py +38 -1
  6. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/async_.py +4 -0
  7. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/errors.py +27 -0
  8. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/sync.py +4 -0
  9. sie_sdk-0.3.4/tests/client/test_input_too_long.py +161 -0
  10. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/.gitignore +0 -0
  11. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/CONTRIBUTING.md +0 -0
  12. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/LICENSE +0 -0
  13. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/README.md +0 -0
  14. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/bundle_utils.py +0 -0
  15. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/cache.py +0 -0
  16. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/documents.py +0 -0
  17. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/encoding.py +0 -0
  18. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/exceptions.py +0 -0
  19. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/images.py +0 -0
  20. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/queue_types.py +0 -0
  21. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/scoring.py +0 -0
  22. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/storage.py +0 -0
  23. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/types.py +0 -0
  24. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/__init__.py +0 -0
  25. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/conftest.py +0 -0
  26. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/test_async.py +0 -0
  27. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/test_gateway_timeout_retry.py +0 -0
  28. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/test_model_load_failed.py +0 -0
  29. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/test_oom_retry.py +0 -0
  30. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/test_provisioning.py +0 -0
  31. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/test_sync.py +0 -0
  32. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/test_timeout.py +0 -0
  33. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/test_transport_error_retry.py +0 -0
  34. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/client/test_version_skew.py +0 -0
  35. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/test_cache.py +0 -0
  36. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/test_documents.py +0 -0
  37. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/test_exceptions.py +0 -0
  38. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/test_images.py +0 -0
  39. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/test_scoring.py +0 -0
  40. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/test_sdk_smoke.py +0 -0
  41. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/test_storage.py +0 -0
  42. {sie_sdk-0.3.2 → sie_sdk-0.3.4}/tests/test_storage_write.py +0 -0
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sie-sdk
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: Search Inference Engine - Python SDK
5
5
  License: Apache-2.0
6
6
  License-File: LICENSE
7
- Requires-Python: <3.13,>=3.12
7
+ Requires-Python: >=3.12
8
8
  Requires-Dist: aiohttp<4,>=3.9
9
9
  Requires-Dist: httpx<1,>=0.28
10
10
  Requires-Dist: msgpack-numpy<1,>=0.4
@@ -1,8 +1,8 @@
1
1
  [project]
2
2
  name = "sie-sdk"
3
- version = "0.3.2"
3
+ version = "0.3.4"
4
4
  description = "Search Inference Engine - Python SDK"
5
- requires-python = ">=3.12,<3.13"
5
+ requires-python = ">=3.12"
6
6
  license = { text = "Apache-2.0" }
7
7
  dependencies = [
8
8
  # HTTP client (async)
@@ -12,6 +12,7 @@ For ColBERT/late interaction models, use the scoring module:
12
12
  """
13
13
 
14
14
  from sie_sdk.client import (
15
+ InputTooLongError,
15
16
  LoraLoadingError,
16
17
  ModelLoadFailedError,
17
18
  ModelLoadingError,
@@ -70,6 +71,7 @@ __all__ = [
70
71
  "Entity",
71
72
  "ExtractResult",
72
73
  "HealthResponse",
74
+ "InputTooLongError",
73
75
  "Item",
74
76
  "LoraLoadingError",
75
77
  "ModelInfo",
@@ -5,6 +5,7 @@ Re-exports all client classes and errors for backwards compatibility.
5
5
 
6
6
  from sie_sdk.client.async_ import SIEAsyncClient
7
7
  from sie_sdk.client.errors import (
8
+ InputTooLongError,
8
9
  LoraLoadingError,
9
10
  ModelLoadFailedError,
10
11
  ModelLoadingError,
@@ -18,6 +19,7 @@ from sie_sdk.client.errors import (
18
19
  from sie_sdk.client.sync import SIEClient
19
20
 
20
21
  __all__ = [
22
+ "InputTooLongError",
21
23
  "LoraLoadingError",
22
24
  "ModelLoadFailedError",
23
25
  "ModelLoadingError",
@@ -39,7 +39,7 @@ from sie_sdk.types import (
39
39
  SparseResult,
40
40
  )
41
41
 
42
- from .errors import ModelLoadFailedError, RequestError, ServerError
42
+ from .errors import InputTooLongError, ModelLoadFailedError, RequestError, ServerError
43
43
 
44
44
  # Content types
45
45
  MSGPACK_CONTENT_TYPE = "application/msgpack"
@@ -73,6 +73,12 @@ MODEL_LOADING_ERROR_CODE = "MODEL_LOADING" # Error code from server
73
73
  # instead of burning the MODEL_LOADING retry budget.
74
74
  MODEL_LOAD_FAILED_ERROR_CODE = "MODEL_LOAD_FAILED"
75
75
 
76
+ # Terminal client-side error: request input exceeds the model's maximum
77
+ # token capacity. Server returns HTTP 400 + this code; the SDK surfaces
78
+ # a typed ``InputTooLongError`` so callers can react without parsing
79
+ # error codes by hand.
80
+ INPUT_TOO_LONG_ERROR_CODE = "INPUT_TOO_LONG"
81
+
76
82
  # Resource-exhausted retry settings (server-side OOM recovery exhausted).
77
83
  # Default backoff sequence: 5 -> 10 -> 20 s (capped at 30s). Three attempts
78
84
  # is enough to cover the typical eviction + retry window without making
@@ -373,6 +379,33 @@ def raise_if_model_load_failed(response: _HttpResponse, model: str | None = None
373
379
  )
374
380
 
375
381
 
382
+ def raise_if_input_too_long(response: _HttpResponse, model: str | None = None) -> None:
383
+ """Raise :class:`InputTooLongError` if the response is 400 ``INPUT_TOO_LONG``.
384
+
385
+ Used by the extract path to surface token-budget overruns as a
386
+ typed exception (so callers can catch :class:`InputTooLongError`
387
+ specifically) instead of relying on a generic
388
+ :class:`RequestError` + string-matching the ``code``.
389
+
390
+ Args:
391
+ response: HTTP response to inspect.
392
+ model: Model name for inclusion in the raised error.
393
+
394
+ Raises:
395
+ InputTooLongError: If the response is a 400 carrying the
396
+ ``INPUT_TOO_LONG`` error code.
397
+ """
398
+ if response.status_code != HTTP_CLIENT_ERROR:
399
+ return
400
+ detail = get_error_detail(response)
401
+ if detail is None:
402
+ return
403
+ if detail.get("code") != INPUT_TOO_LONG_ERROR_CODE:
404
+ return
405
+ message = str(detail.get("message") or "Input exceeds the model's maximum token capacity")
406
+ raise InputTooLongError(message, model=model)
407
+
408
+
376
409
  def handle_error(response: _HttpResponse) -> None:
377
410
  """Handle error response from server.
378
411
 
@@ -409,6 +442,10 @@ def handle_error(response: _HttpResponse) -> None:
409
442
  # Fall back to raw text
410
443
  message = response.text or message
411
444
 
445
+ # Fallback dispatch — ``model`` is only attached by the helper-style
446
+ # short-circuit (``raise_if_input_too_long``) on the extract path.
447
+ if response.status_code == HTTP_CLIENT_ERROR and code == INPUT_TOO_LONG_ERROR_CODE:
448
+ raise InputTooLongError(message)
412
449
  if response.status_code >= HTTP_SERVER_ERROR:
413
450
  raise ServerError(message, code=code, status_code=response.status_code)
414
451
  raise RequestError(message, code=code, status_code=response.status_code)
@@ -86,6 +86,7 @@ from ._shared import (
86
86
  parse_extract_results,
87
87
  parse_gpu_param,
88
88
  parse_score_result,
89
+ raise_if_input_too_long,
89
90
  raise_if_model_load_failed,
90
91
  )
91
92
  from .errors import (
@@ -1788,6 +1789,9 @@ class SIEAsyncClient:
1788
1789
  # Short-circuit terminal load failures (sie-test#85).
1789
1790
  raise_if_model_load_failed(response, model=model)
1790
1791
 
1792
+ # Short-circuit token-budget overruns (#849).
1793
+ raise_if_input_too_long(response, model=model)
1794
+
1791
1795
  # Handle 503 with MODEL_LOADING - auto-retry
1792
1796
  if response.status_code == 503:
1793
1797
  from ._shared import get_error_code
@@ -181,6 +181,33 @@ class ModelLoadFailedError(ServerError):
181
181
  self.attempts = attempts
182
182
 
183
183
 
184
+ class InputTooLongError(RequestError):
185
+ """Error when the request input exceeds the model's maximum token capacity.
186
+
187
+ Raised when the server returns HTTP ``400 INPUT_TOO_LONG`` for an
188
+ extraction request. Distinct from generic ``RequestError`` so callers
189
+ can branch on token-budget failures specifically (e.g. truncate the
190
+ input client-side, switch to a longer-context model, or surface a
191
+ targeted error to the end user) without parsing the error code.
192
+
193
+ Subclass of :class:`RequestError` so existing 4xx handlers continue
194
+ to work; new code can catch :class:`InputTooLongError` for tailored
195
+ handling.
196
+
197
+ Attributes:
198
+ model: The model that was requested.
199
+ """
200
+
201
+ def __init__(
202
+ self,
203
+ message: str,
204
+ *,
205
+ model: str | None = None,
206
+ ) -> None:
207
+ super().__init__(message, code="INPUT_TOO_LONG", status_code=400)
208
+ self.model = model
209
+
210
+
184
211
  class ResourceExhaustedError(ServerError):
185
212
  """Error when the server has exhausted its OOM-recovery strategies.
186
213
 
@@ -94,6 +94,7 @@ from ._shared import (
94
94
  parse_extract_results,
95
95
  parse_gpu_param,
96
96
  parse_score_result,
97
+ raise_if_input_too_long,
97
98
  raise_if_model_load_failed,
98
99
  )
99
100
  from .errors import (
@@ -2106,6 +2107,9 @@ class SIEClient:
2106
2107
  # Short-circuit terminal load failures (sie-test#85).
2107
2108
  raise_if_model_load_failed(response, model=model)
2108
2109
 
2110
+ # Short-circuit token-budget overruns (#849).
2111
+ raise_if_input_too_long(response, model=model)
2112
+
2109
2113
  # Handle 503 with MODEL_LOADING - auto-retry
2110
2114
  if response.status_code == 503:
2111
2115
  from ._shared import get_error_code
@@ -0,0 +1,161 @@
1
+ """Tests for the ``InputTooLongError`` short-circuit (#849).
2
+
3
+ A 400 ``INPUT_TOO_LONG`` response on the extract path must:
4
+ - raise :class:`InputTooLongError` immediately on the first response
5
+ - carry ``code == "INPUT_TOO_LONG"`` and ``status_code == 400``
6
+ - expose ``model`` from caller context
7
+ - not be confused with generic :class:`RequestError` (so callers can
8
+ branch on token-budget failures specifically)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from unittest.mock import AsyncMock, MagicMock, patch
15
+
16
+ import pytest
17
+ from sie_sdk import SIEAsyncClient, SIEClient
18
+ from sie_sdk.client._shared import handle_error
19
+ from sie_sdk.client.async_ import _AioResponse
20
+ from sie_sdk.client.errors import InputTooLongError, RequestError
21
+
22
+
23
+ def _resp_input_too_long(message: str = "Input exceeds capacity (4096 tokens)") -> MagicMock:
24
+ resp = MagicMock()
25
+ resp.status_code = 400
26
+ resp.headers = {"content-type": "application/json"}
27
+ resp.json.return_value = {"detail": {"code": "INPUT_TOO_LONG", "message": message}}
28
+ return resp
29
+
30
+
31
+ def _resp_validation_error() -> MagicMock:
32
+ """Negative case: a different 400 that must NOT be classified as INPUT_TOO_LONG."""
33
+ resp = MagicMock()
34
+ resp.status_code = 400
35
+ resp.headers = {"content-type": "application/json"}
36
+ resp.json.return_value = {"detail": {"code": "VALIDATION_ERROR", "message": "bad input"}}
37
+ return resp
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Sync client
42
+ # ---------------------------------------------------------------------------
43
+
44
+
45
+ class TestSyncInputTooLong:
46
+ def test_extract_raises_immediately_on_first_response(self) -> None:
47
+ """No retries are attempted; the typed error surfaces on the first call."""
48
+ with (
49
+ patch("sie_sdk.client.sync.httpx.Client") as mock_client,
50
+ patch("sie_sdk.client.sync.time.sleep") as mock_sleep,
51
+ ):
52
+ mock_client.return_value.post = MagicMock(side_effect=[_resp_input_too_long("Too many tokens")])
53
+ client = SIEClient("http://localhost:8080")
54
+
55
+ with pytest.raises(InputTooLongError) as excinfo:
56
+ client.extract("gliclass-large", {"text": "hi"}, labels=["a", "b"])
57
+
58
+ assert excinfo.value.model == "gliclass-large"
59
+ assert excinfo.value.code == "INPUT_TOO_LONG"
60
+ assert excinfo.value.status_code == 400
61
+ assert str(excinfo.value) == "Too many tokens"
62
+ # Critical: no retry happened.
63
+ assert mock_client.return_value.post.call_count == 1
64
+ mock_sleep.assert_not_called()
65
+ client.close()
66
+
67
+ def test_is_request_error_subclass(self) -> None:
68
+ """Existing 4xx handlers (`except RequestError`) must still catch it."""
69
+ assert issubclass(InputTooLongError, RequestError)
70
+ assert not issubclass(RequestError, InputTooLongError)
71
+
72
+ def test_other_400_falls_through_to_request_error(self) -> None:
73
+ """A 400 with a different code must NOT become InputTooLongError."""
74
+ with patch("sie_sdk.client.sync.httpx.Client") as mock_client:
75
+ mock_client.return_value.post = MagicMock(side_effect=[_resp_validation_error()])
76
+ client = SIEClient("http://localhost:8080")
77
+
78
+ with pytest.raises(RequestError) as excinfo:
79
+ client.extract("gliclass-large", {"text": "hi"}, labels=["a"])
80
+
81
+ assert not isinstance(excinfo.value, InputTooLongError)
82
+ assert excinfo.value.code == "VALIDATION_ERROR"
83
+ assert excinfo.value.status_code == 400
84
+ client.close()
85
+
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Async client
89
+ # ---------------------------------------------------------------------------
90
+
91
+
92
+ def _aio_input_too_long(message: str = "Input exceeds capacity (4096 tokens)") -> object:
93
+ return _AioResponse(
94
+ 400,
95
+ json.dumps({"detail": {"code": "INPUT_TOO_LONG", "message": message}}).encode(),
96
+ {"content-type": "application/json"},
97
+ )
98
+
99
+
100
+ def _aio_validation_error() -> object:
101
+ return _AioResponse(
102
+ 400,
103
+ json.dumps({"detail": {"code": "VALIDATION_ERROR", "message": "bad input"}}).encode(),
104
+ {"content-type": "application/json"},
105
+ )
106
+
107
+
108
+ class TestAsyncInputTooLong:
109
+ @pytest.mark.asyncio
110
+ async def test_extract_raises_immediately(self) -> None:
111
+ with (
112
+ patch("sie_sdk.client.async_.aiohttp.ClientSession"),
113
+ patch("sie_sdk.client.async_.asyncio.sleep") as mock_sleep,
114
+ ):
115
+ client = SIEAsyncClient("http://localhost:8080")
116
+ client._post = AsyncMock(side_effect=[_aio_input_too_long()])
117
+
118
+ with pytest.raises(InputTooLongError) as excinfo:
119
+ await client.extract("gliclass-large", {"text": "hi"}, labels=["a"])
120
+
121
+ assert excinfo.value.model == "gliclass-large"
122
+ assert excinfo.value.code == "INPUT_TOO_LONG"
123
+ assert excinfo.value.status_code == 400
124
+ assert client._post.await_count == 1
125
+ mock_sleep.assert_not_called()
126
+ await client.close()
127
+
128
+ @pytest.mark.asyncio
129
+ async def test_other_400_falls_through_to_request_error(self) -> None:
130
+ with patch("sie_sdk.client.async_.aiohttp.ClientSession"):
131
+ client = SIEAsyncClient("http://localhost:8080")
132
+ client._post = AsyncMock(side_effect=[_aio_validation_error()])
133
+
134
+ with pytest.raises(RequestError) as excinfo:
135
+ await client.extract("gliclass-large", {"text": "hi"}, labels=["a"])
136
+
137
+ assert not isinstance(excinfo.value, InputTooLongError)
138
+ assert excinfo.value.code == "VALIDATION_ERROR"
139
+ await client.close()
140
+
141
+
142
+ # ---------------------------------------------------------------------------
143
+ # Direct ``handle_error`` dispatch (locks in the secondary fallthrough so
144
+ # reordering the conditions in ``_shared.handle_error`` cannot silently
145
+ # regress the typed dispatch).
146
+ # ---------------------------------------------------------------------------
147
+
148
+
149
+ class TestHandleErrorDispatch:
150
+ def test_dispatch_raises_input_too_long(self) -> None:
151
+ with pytest.raises(InputTooLongError) as excinfo:
152
+ handle_error(_resp_input_too_long("Too many tokens"))
153
+ assert excinfo.value.code == "INPUT_TOO_LONG"
154
+ assert excinfo.value.status_code == 400
155
+ assert str(excinfo.value) == "Too many tokens"
156
+
157
+ def test_dispatch_does_not_classify_other_400(self) -> None:
158
+ with pytest.raises(RequestError) as excinfo:
159
+ handle_error(_resp_validation_error())
160
+ assert not isinstance(excinfo.value, InputTooLongError)
161
+ assert excinfo.value.code == "VALIDATION_ERROR"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes