PyPI - sie-sdk - Versions diffs - 0.3.2__tar.gz → 0.3.4__tar.gz - Mend

sie-sdk 0.3.2tar.gz → 0.3.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

{sie_sdk-0.3.2 → sie_sdk-0.3.4}/PKG-INFO RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.4
 Name: sie-sdk
-Version: 0.3.2
+Version: 0.3.4
 Summary: Search Inference Engine - Python SDK
 License: Apache-2.0
 License-File: LICENSE
-Requires-Python: <3.13,>=3.12
+Requires-Python: >=3.12
 Requires-Dist: aiohttp<4,>=3.9
 Requires-Dist: httpx<1,>=0.28
 Requires-Dist: msgpack-numpy<1,>=0.4

{sie_sdk-0.3.2 → sie_sdk-0.3.4}/pyproject.toml RENAMED Viewed

@@ -1,8 +1,8 @@
 [project]
 name = "sie-sdk"
-version = "0.3.2"
+version = "0.3.4"
 description = "Search Inference Engine - Python SDK"
-requires-python = ">=3.12,<3.13"
+requires-python = ">=3.12"
 license = { text = "Apache-2.0" }
 dependencies = [
     # HTTP client (async)

{sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/__init__.py RENAMED Viewed

@@ -12,6 +12,7 @@ For ColBERT/late interaction models, use the scoring module:
 """
 from sie_sdk.client import (
+    InputTooLongError,
     LoraLoadingError,
     ModelLoadFailedError,
     ModelLoadingError,
@@ -70,6 +71,7 @@ __all__ = [
     "Entity",
     "ExtractResult",
     "HealthResponse",
+    "InputTooLongError",
     "Item",
     "LoraLoadingError",
     "ModelInfo",

{sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/__init__.py RENAMED Viewed

@@ -5,6 +5,7 @@ Re-exports all client classes and errors for backwards compatibility.
 from sie_sdk.client.async_ import SIEAsyncClient
 from sie_sdk.client.errors import (
+    InputTooLongError,
     LoraLoadingError,
     ModelLoadFailedError,
     ModelLoadingError,
@@ -18,6 +19,7 @@ from sie_sdk.client.errors import (
 from sie_sdk.client.sync import SIEClient
 __all__ = [
+    "InputTooLongError",
     "LoraLoadingError",
     "ModelLoadFailedError",
     "ModelLoadingError",

{sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/_shared.py RENAMED Viewed

@@ -39,7 +39,7 @@ from sie_sdk.types import (
     SparseResult,
 )
-from .errors import ModelLoadFailedError, RequestError, ServerError
+from .errors import InputTooLongError, ModelLoadFailedError, RequestError, ServerError
 # Content types
 MSGPACK_CONTENT_TYPE = "application/msgpack"
@@ -73,6 +73,12 @@ MODEL_LOADING_ERROR_CODE = "MODEL_LOADING"  # Error code from server
 # instead of burning the MODEL_LOADING retry budget.
 MODEL_LOAD_FAILED_ERROR_CODE = "MODEL_LOAD_FAILED"
+# Terminal client-side error: request input exceeds the model's maximum
+# token capacity. Server returns HTTP 400 + this code; the SDK surfaces
+# a typed ``InputTooLongError`` so callers can react without parsing
+# error codes by hand.
+INPUT_TOO_LONG_ERROR_CODE = "INPUT_TOO_LONG"
 # Resource-exhausted retry settings (server-side OOM recovery exhausted).
 # Default backoff sequence: 5 -> 10 -> 20 s (capped at 30s). Three attempts
 # is enough to cover the typical eviction + retry window without making
@@ -373,6 +379,33 @@ def raise_if_model_load_failed(response: _HttpResponse, model: str | None = None
     )
+def raise_if_input_too_long(response: _HttpResponse, model: str | None = None) -> None:
+    """Raise :class:`InputTooLongError` if the response is 400 ``INPUT_TOO_LONG``.
+    Used by the extract path to surface token-budget overruns as a
+    typed exception (so callers can catch :class:`InputTooLongError`
+    specifically) instead of relying on a generic
+    :class:`RequestError` + string-matching the ``code``.
+    Args:
+        response: HTTP response to inspect.
+        model: Model name for inclusion in the raised error.
+    Raises:
+        InputTooLongError: If the response is a 400 carrying the
+            ``INPUT_TOO_LONG`` error code.
+    """
+    if response.status_code != HTTP_CLIENT_ERROR:
+        return
+    detail = get_error_detail(response)
+    if detail is None:
+        return
+    if detail.get("code") != INPUT_TOO_LONG_ERROR_CODE:
+        return
+    message = str(detail.get("message") or "Input exceeds the model's maximum token capacity")
+    raise InputTooLongError(message, model=model)
 def handle_error(response: _HttpResponse) -> None:
     """Handle error response from server.
@@ -409,6 +442,10 @@ def handle_error(response: _HttpResponse) -> None:
         # Fall back to raw text
         message = response.text or message
+    # Fallback dispatch — ``model`` is only attached by the helper-style
+    # short-circuit (``raise_if_input_too_long``) on the extract path.
+    if response.status_code == HTTP_CLIENT_ERROR and code == INPUT_TOO_LONG_ERROR_CODE:
+        raise InputTooLongError(message)
     if response.status_code >= HTTP_SERVER_ERROR:
         raise ServerError(message, code=code, status_code=response.status_code)
     raise RequestError(message, code=code, status_code=response.status_code)

{sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/async_.py RENAMED Viewed

@@ -86,6 +86,7 @@ from ._shared import (
     parse_extract_results,
     parse_gpu_param,
     parse_score_result,
+    raise_if_input_too_long,
     raise_if_model_load_failed,
 )
 from .errors import (
@@ -1788,6 +1789,9 @@ class SIEAsyncClient:
             # Short-circuit terminal load failures (sie-test#85).
             raise_if_model_load_failed(response, model=model)
+            # Short-circuit token-budget overruns (#849).
+            raise_if_input_too_long(response, model=model)
             # Handle 503 with MODEL_LOADING - auto-retry
             if response.status_code == 503:
                 from ._shared import get_error_code

{sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/errors.py RENAMED Viewed

@@ -181,6 +181,33 @@ class ModelLoadFailedError(ServerError):
         self.attempts = attempts
+class InputTooLongError(RequestError):
+    """Error when the request input exceeds the model's maximum token capacity.
+    Raised when the server returns HTTP ``400 INPUT_TOO_LONG`` for an
+    extraction request. Distinct from generic ``RequestError`` so callers
+    can branch on token-budget failures specifically (e.g. truncate the
+    input client-side, switch to a longer-context model, or surface a
+    targeted error to the end user) without parsing the error code.
+    Subclass of :class:`RequestError` so existing 4xx handlers continue
+    to work; new code can catch :class:`InputTooLongError` for tailored
+    handling.
+    Attributes:
+        model: The model that was requested.
+    """
+    def __init__(
+        self,
+        message: str,
+        *,
+        model: str | None = None,
+    ) -> None:
+        super().__init__(message, code="INPUT_TOO_LONG", status_code=400)
+        self.model = model
 class ResourceExhaustedError(ServerError):
     """Error when the server has exhausted its OOM-recovery strategies.

{sie_sdk-0.3.2 → sie_sdk-0.3.4}/src/sie_sdk/client/sync.py RENAMED Viewed

@@ -94,6 +94,7 @@ from ._shared import (
     parse_extract_results,
     parse_gpu_param,
     parse_score_result,
+    raise_if_input_too_long,
     raise_if_model_load_failed,
 )
 from .errors import (
@@ -2106,6 +2107,9 @@ class SIEClient:
             # Short-circuit terminal load failures (sie-test#85).
             raise_if_model_load_failed(response, model=model)
+            # Short-circuit token-budget overruns (#849).
+            raise_if_input_too_long(response, model=model)
             # Handle 503 with MODEL_LOADING - auto-retry
             if response.status_code == 503:
                 from ._shared import get_error_code

sie_sdk-0.3.4/tests/client/test_input_too_long.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""Tests for the ``InputTooLongError`` short-circuit (#849).
+A 400 ``INPUT_TOO_LONG`` response on the extract path must:
+- raise :class:`InputTooLongError` immediately on the first response
+- carry ``code == "INPUT_TOO_LONG"`` and ``status_code == 400``
+- expose ``model`` from caller context
+- not be confused with generic :class:`RequestError` (so callers can
+  branch on token-budget failures specifically)
+"""
+from __future__ import annotations
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+import pytest
+from sie_sdk import SIEAsyncClient, SIEClient
+from sie_sdk.client._shared import handle_error
+from sie_sdk.client.async_ import _AioResponse
+from sie_sdk.client.errors import InputTooLongError, RequestError
+def _resp_input_too_long(message: str = "Input exceeds capacity (4096 tokens)") -> MagicMock:
+    resp = MagicMock()
+    resp.status_code = 400
+    resp.headers = {"content-type": "application/json"}
+    resp.json.return_value = {"detail": {"code": "INPUT_TOO_LONG", "message": message}}
+    return resp
+def _resp_validation_error() -> MagicMock:
+    """Negative case: a different 400 that must NOT be classified as INPUT_TOO_LONG."""
+    resp = MagicMock()
+    resp.status_code = 400
+    resp.headers = {"content-type": "application/json"}
+    resp.json.return_value = {"detail": {"code": "VALIDATION_ERROR", "message": "bad input"}}
+    return resp
+# ---------------------------------------------------------------------------
+# Sync client
+# ---------------------------------------------------------------------------
+class TestSyncInputTooLong:
+    def test_extract_raises_immediately_on_first_response(self) -> None:
+        """No retries are attempted; the typed error surfaces on the first call."""
+        with (
+            patch("sie_sdk.client.sync.httpx.Client") as mock_client,
+            patch("sie_sdk.client.sync.time.sleep") as mock_sleep,
+        ):
+            mock_client.return_value.post = MagicMock(side_effect=[_resp_input_too_long("Too many tokens")])
+            client = SIEClient("http://localhost:8080")
+            with pytest.raises(InputTooLongError) as excinfo:
+                client.extract("gliclass-large", {"text": "hi"}, labels=["a", "b"])
+            assert excinfo.value.model == "gliclass-large"
+            assert excinfo.value.code == "INPUT_TOO_LONG"
+            assert excinfo.value.status_code == 400
+            assert str(excinfo.value) == "Too many tokens"
+            # Critical: no retry happened.
+            assert mock_client.return_value.post.call_count == 1
+            mock_sleep.assert_not_called()
+            client.close()
+    def test_is_request_error_subclass(self) -> None:
+        """Existing 4xx handlers (`except RequestError`) must still catch it."""
+        assert issubclass(InputTooLongError, RequestError)
+        assert not issubclass(RequestError, InputTooLongError)
+    def test_other_400_falls_through_to_request_error(self) -> None:
+        """A 400 with a different code must NOT become InputTooLongError."""
+        with patch("sie_sdk.client.sync.httpx.Client") as mock_client:
+            mock_client.return_value.post = MagicMock(side_effect=[_resp_validation_error()])
+            client = SIEClient("http://localhost:8080")
+            with pytest.raises(RequestError) as excinfo:
+                client.extract("gliclass-large", {"text": "hi"}, labels=["a"])
+            assert not isinstance(excinfo.value, InputTooLongError)
+            assert excinfo.value.code == "VALIDATION_ERROR"
+            assert excinfo.value.status_code == 400
+            client.close()
+# ---------------------------------------------------------------------------
+# Async client
+# ---------------------------------------------------------------------------
+def _aio_input_too_long(message: str = "Input exceeds capacity (4096 tokens)") -> object:
+    return _AioResponse(
+        400,
+        json.dumps({"detail": {"code": "INPUT_TOO_LONG", "message": message}}).encode(),
+        {"content-type": "application/json"},
+    )
+def _aio_validation_error() -> object:
+    return _AioResponse(
+        400,
+        json.dumps({"detail": {"code": "VALIDATION_ERROR", "message": "bad input"}}).encode(),
+        {"content-type": "application/json"},
+    )
+class TestAsyncInputTooLong:
+    @pytest.mark.asyncio
+    async def test_extract_raises_immediately(self) -> None:
+        with (
+            patch("sie_sdk.client.async_.aiohttp.ClientSession"),
+            patch("sie_sdk.client.async_.asyncio.sleep") as mock_sleep,
+        ):
+            client = SIEAsyncClient("http://localhost:8080")
+            client._post = AsyncMock(side_effect=[_aio_input_too_long()])
+            with pytest.raises(InputTooLongError) as excinfo:
+                await client.extract("gliclass-large", {"text": "hi"}, labels=["a"])
+            assert excinfo.value.model == "gliclass-large"
+            assert excinfo.value.code == "INPUT_TOO_LONG"
+            assert excinfo.value.status_code == 400
+            assert client._post.await_count == 1
+            mock_sleep.assert_not_called()
+            await client.close()
+    @pytest.mark.asyncio
+    async def test_other_400_falls_through_to_request_error(self) -> None:
+        with patch("sie_sdk.client.async_.aiohttp.ClientSession"):
+            client = SIEAsyncClient("http://localhost:8080")
+            client._post = AsyncMock(side_effect=[_aio_validation_error()])
+            with pytest.raises(RequestError) as excinfo:
+                await client.extract("gliclass-large", {"text": "hi"}, labels=["a"])
+            assert not isinstance(excinfo.value, InputTooLongError)
+            assert excinfo.value.code == "VALIDATION_ERROR"
+            await client.close()
+# ---------------------------------------------------------------------------
+# Direct ``handle_error`` dispatch (locks in the secondary fallthrough so
+# reordering the conditions in ``_shared.handle_error`` cannot silently
+# regress the typed dispatch).
+# ---------------------------------------------------------------------------
+class TestHandleErrorDispatch:
+    def test_dispatch_raises_input_too_long(self) -> None:
+        with pytest.raises(InputTooLongError) as excinfo:
+            handle_error(_resp_input_too_long("Too many tokens"))
+        assert excinfo.value.code == "INPUT_TOO_LONG"
+        assert excinfo.value.status_code == 400
+        assert str(excinfo.value) == "Too many tokens"
+    def test_dispatch_does_not_classify_other_400(self) -> None:
+        with pytest.raises(RequestError) as excinfo:
+            handle_error(_resp_validation_error())
+        assert not isinstance(excinfo.value, InputTooLongError)
+        assert excinfo.value.code == "VALIDATION_ERROR"