saia-python 0.4.1__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {saia_python-0.4.1/saia_python.egg-info → saia_python-0.6.0}/PKG-INFO +2 -4
  2. {saia_python-0.4.1 → saia_python-0.6.0}/README.md +1 -3
  3. {saia_python-0.4.1 → saia_python-0.6.0}/pyproject.toml +1 -1
  4. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/__init__.py +4 -1
  5. saia_python-0.6.0/saia_python/_http.py +243 -0
  6. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/_streaming.py +4 -1
  7. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/arcana.py +266 -69
  8. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/chat.py +11 -2
  9. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/client.py +35 -6
  10. saia_python-0.6.0/saia_python/documents.py +246 -0
  11. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/models.py +24 -9
  12. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/voice.py +27 -8
  13. {saia_python-0.4.1 → saia_python-0.6.0/saia_python.egg-info}/PKG-INFO +2 -4
  14. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python.egg-info/SOURCES.txt +2 -0
  15. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_arcana.py +188 -1
  16. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_auth.py +3 -0
  17. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_chat.py +2 -0
  18. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_client.py +9 -0
  19. saia_python-0.6.0/tests/test_documents.py +115 -0
  20. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_models.py +16 -1
  21. saia_python-0.6.0/tests/test_transport_policy.py +272 -0
  22. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_voice.py +2 -0
  23. saia_python-0.4.1/saia_python/_http.py +0 -71
  24. saia_python-0.4.1/saia_python/documents.py +0 -145
  25. {saia_python-0.4.1 → saia_python-0.6.0}/LICENSE +0 -0
  26. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/_util.py +0 -0
  27. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/arcana_references.py +0 -0
  28. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/auth.py +0 -0
  29. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/exceptions.py +0 -0
  30. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/openai_compat.py +0 -0
  31. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/py.typed +0 -0
  32. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/rate_limits.py +0 -0
  33. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python/responses.py +0 -0
  34. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python.egg-info/dependency_links.txt +0 -0
  35. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python.egg-info/requires.txt +0 -0
  36. {saia_python-0.4.1 → saia_python-0.6.0}/saia_python.egg-info/top_level.txt +0 -0
  37. {saia_python-0.4.1 → saia_python-0.6.0}/setup.cfg +0 -0
  38. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_arcana_references.py +0 -0
  39. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_exceptions.py +0 -0
  40. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_health_check.py +0 -0
  41. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_openai_compat.py +0 -0
  42. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_rate_limits.py +0 -0
  43. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_responses.py +0 -0
  44. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_setup_from_directory.py +0 -0
  45. {saia_python-0.4.1 → saia_python-0.6.0}/tests/test_streaming.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: saia-python
3
- Version: 0.4.1
3
+ Version: 0.6.0
4
4
  Summary: Python wrapper for the GWDG SAIA platform REST API
5
5
  Author: Friedrich Schwarz
6
6
  License-Expression: AGPL-3.0-only
@@ -57,9 +57,7 @@ Dynamic: license-file
57
57
  [![License: AGPL-3.0-only](https://img.shields.io/badge/license-AGPL--3.0--only-blue.svg)](https://github.com/fschwar4/saia_python/blob/main/LICENSE)
58
58
  [![Tests](https://github.com/fschwar4/saia_python/actions/workflows/tests.yml/badge.svg)](https://github.com/fschwar4/saia_python/actions/workflows/tests.yml)
59
59
  [![Docs](https://img.shields.io/badge/docs-online-blue.svg)](https://fschwar4.github.io/saia_python/)
60
- <!-- After enabling Zenodo (Settings → Integrations → GitHub) and cutting a release,
61
- paste the DOI badge Zenodo provides, e.g.:
62
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.XXXXXXX.svg)](https://doi.org/10.5281/zenodo.XXXXXXX) -->
60
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.20480724.svg)](https://doi.org/10.5281/zenodo.20480724)
63
61
 
64
62
  A Python wrapper for the [GWDG SAIA (Scalable AI Accelerator) platform](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) REST API.
65
63
 
@@ -5,9 +5,7 @@
5
5
  [![License: AGPL-3.0-only](https://img.shields.io/badge/license-AGPL--3.0--only-blue.svg)](https://github.com/fschwar4/saia_python/blob/main/LICENSE)
6
6
  [![Tests](https://github.com/fschwar4/saia_python/actions/workflows/tests.yml/badge.svg)](https://github.com/fschwar4/saia_python/actions/workflows/tests.yml)
7
7
  [![Docs](https://img.shields.io/badge/docs-online-blue.svg)](https://fschwar4.github.io/saia_python/)
8
- <!-- After enabling Zenodo (Settings → Integrations → GitHub) and cutting a release,
9
- paste the DOI badge Zenodo provides, e.g.:
10
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.XXXXXXX.svg)](https://doi.org/10.5281/zenodo.XXXXXXX) -->
8
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.20480724.svg)](https://doi.org/10.5281/zenodo.20480724)
11
9
 
12
10
  A Python wrapper for the [GWDG SAIA (Scalable AI Accelerator) platform](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) REST API.
13
11
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "saia-python"
7
- version = "0.4.1"
7
+ version = "0.6.0"
8
8
  description = "Python wrapper for the GWDG SAIA platform REST API"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -17,6 +17,7 @@ from __future__ import annotations
17
17
  import concurrent.futures
18
18
  from importlib.metadata import PackageNotFoundError, version
19
19
 
20
+ from ._http import RetryPolicy
20
21
  from ._streaming import SSEStream
21
22
  from .arcana_references import (
22
23
  ArcanaReference,
@@ -36,7 +37,7 @@ from .auth import (
36
37
  resolve_base_url,
37
38
  )
38
39
  from .client import SAIAClient
39
- from .documents import ConversionResult
40
+ from .documents import ConversionImage, ConversionResult
40
41
  from .exceptions import APIError, AuthenticationError, RateLimitError, SAIAError
41
42
  from .openai_compat import create_openai_client
42
43
  from .rate_limits import RateLimitInfo, parse_rate_limits
@@ -55,6 +56,7 @@ __all__ = [
55
56
  "resolve_base_url",
56
57
  "DEFAULT_BASE_URL",
57
58
  "create_openai_client",
59
+ "RetryPolicy",
58
60
  # Auth
59
61
  "load_api_key",
60
62
  "load_arcana_ids",
@@ -92,6 +94,7 @@ __all__ = [
92
94
  "get_rate_limits",
93
95
  "convert_document",
94
96
  "ConversionResult",
97
+ "ConversionImage",
95
98
  ]
96
99
 
97
100
 
@@ -0,0 +1,243 @@
1
+ """Shared HTTP plumbing used by more than one service.
2
+
3
+ Kept in one place so the chat-completion request shape and the
4
+ background-thread ``Session`` helper each have a single implementation,
5
+ rather than being copied across :mod:`saia_python.chat`,
6
+ :mod:`saia_python.arcana`, and :mod:`saia_python.voice`.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import random
13
+ import time
14
+ from collections.abc import Callable
15
+ from dataclasses import dataclass
16
+
17
+ import requests
18
+
19
+ from ._streaming import SSEStream
20
+ from .exceptions import raise_for_status
21
+ from .rate_limits import RateLimitInfo, parse_rate_limits
22
+
23
+ log = logging.getLogger(__name__)
24
+
25
+ # Default ``(connect, read)`` timeout in seconds for ARCANA management
26
+ # ("control-plane") requests that do not pass their own. A plain
27
+ # :class:`requests.Session` has NO default timeout, so a request the server
28
+ # accepts but never answers — common while an arcana is locked mid-(re)index —
29
+ # blocks forever on the socket read. Long-running "data-plane" calls (chat
30
+ # completions, voice transcription, document conversion) deliberately do not
31
+ # inherit this cap, since they can legitimately run for minutes.
32
+ DEFAULT_TIMEOUT: tuple[float, float] = (10.0, 60.0)
33
+
34
+
35
+ @dataclass
36
+ class RetryPolicy:
37
+ """Transport-layer policy for HTTP 429 (rate-limit) responses.
38
+
39
+ Applied at the session dispatch seam (:func:`execute`); ON by default and
40
+ scoped to idempotent calls. See ``docs/proposals/rate-limit-handling.md``.
41
+
42
+ Attributes:
43
+ on_rate_limit: Master switch. When ``False`` a 429 is never retried —
44
+ it propagates as :class:`~saia_python.RateLimitError`, i.e. today's
45
+ behavior.
46
+ max_retries: Maximum reset-driven retries (the minute window).
47
+ max_waiting_time: The longest a single wait may block, in seconds
48
+ (default 60). A reset further out than this fails fast (raises)
49
+ rather than blocking; settable per client.
50
+ fallback_wait: Seconds to wait when the server sends no reset hint.
51
+ fallback_max_retries: How many times the blind fallback is tried.
52
+ jitter: ``(low, high)`` seconds added to each wait to avoid a
53
+ thundering herd across concurrent workers; ``(0, 0)`` disables it.
54
+ retry_mutations: If ``True``, non-idempotent calls are retried too
55
+ (off by default — replaying a mutation is unsafe in general).
56
+ """
57
+
58
+ on_rate_limit: bool = True
59
+ max_retries: int = 5
60
+ max_waiting_time: float = 60.0
61
+ fallback_wait: float = 31.0
62
+ fallback_max_retries: int = 2
63
+ jitter: tuple[float, float] = (0.0, 2.0)
64
+ retry_mutations: bool = False
65
+
66
+ def applies(self, idempotent: bool) -> bool:
67
+ """Whether a 429 on a call with this idempotency is eligible for retry."""
68
+ return self.on_rate_limit and (idempotent or self.retry_mutations)
69
+
70
+
71
+ def coerce_retry(retry: RetryPolicy | bool | None) -> RetryPolicy:
72
+ """Normalise a ``retry`` argument into a :class:`RetryPolicy`.
73
+
74
+ A :class:`RetryPolicy` is returned unchanged; ``False`` disables retry;
75
+ ``None`` / ``True`` give the defaults (retry on).
76
+ """
77
+ if isinstance(retry, RetryPolicy):
78
+ return retry
79
+ if retry is False:
80
+ return RetryPolicy(on_rate_limit=False)
81
+ return RetryPolicy()
82
+
83
+
84
+ def resolve_retry(
85
+ default: RetryPolicy, override: RetryPolicy | bool | None
86
+ ) -> RetryPolicy:
87
+ """Pick the policy for one call: the per-call ``override`` when given, else
88
+ the service ``default``. ``None`` means "use the default"."""
89
+ return default if override is None else coerce_retry(override)
90
+
91
+
92
+ def _jitter(policy: RetryPolicy) -> float:
93
+ low, high = policy.jitter
94
+ return random.uniform(low, high) if high > low else low
95
+
96
+
97
+ def _plan(info: RateLimitInfo, policy: RetryPolicy, attempt: int) -> float | None:
98
+ """Seconds to wait before the next attempt, or ``None`` to give up (→ raise).
99
+
100
+ Honest about the single ``reset_seconds`` spanning four windows: we wait out
101
+ the window we can time (the minute), but fail fast on a longer window whose
102
+ reset is unknowable — we will not block for ~an hour inside a call.
103
+ """
104
+ # A longer window is exhausted → its reset is not the (minute) reset_seconds.
105
+ for window in ("hour", "day", "month"):
106
+ if getattr(info, f"remaining_{window}") == 0:
107
+ return None
108
+ reset = info.reset_seconds
109
+ if isinstance(reset, (int, float)) and reset > 0:
110
+ if reset > policy.max_waiting_time:
111
+ return None
112
+ return None if attempt >= policy.max_retries else float(reset) + 1.0
113
+ # No usable reset hint → conservative, bounded blind fallback.
114
+ return None if attempt >= policy.fallback_max_retries else policy.fallback_wait
115
+
116
+
117
+ def execute(
118
+ session: requests.Session,
119
+ method: str,
120
+ url: str,
121
+ *,
122
+ policy: RetryPolicy,
123
+ idempotent: bool,
124
+ sleep: Callable[[float], object] = time.sleep,
125
+ **kwargs,
126
+ ) -> requests.Response:
127
+ """Issue a request under a transport policy and return the response.
128
+
129
+ Dispatches ``getattr(session, method)(url, **kwargs)`` (``method`` is the
130
+ lowercase verb, matching the rest of the package). On HTTP 429 — when
131
+ ``policy`` permits (enabled, and the call is idempotent or
132
+ ``retry_mutations``) — it waits per :func:`_plan` and retries. It returns the
133
+ **raw response** unchanged on success *or* on give-up, so the caller's
134
+ :func:`~saia_python.exceptions.raise_for_status` still raises
135
+ :class:`~saia_python.RateLimitError` when retry is off, the budget is spent,
136
+ or the window must not be waited on.
137
+
138
+ Only the status code and headers are inspected — never the body — so
139
+ streaming and non-streaming requests behave identically and a streamed body
140
+ is never consumed. The (possibly streamed) connection is released with
141
+ ``close()`` before each wait.
142
+
143
+ Note:
144
+ A retry re-issues the request with the **same** ``kwargs``, so any file
145
+ payload must be retry-safe (``bytes``, not a one-shot file handle). The
146
+ file-upload callers (voice, documents) pass ``bytes``.
147
+ """
148
+ attempt = 0
149
+ while True:
150
+ resp = getattr(session, method)(url, **kwargs)
151
+ if resp.status_code != 429 or not policy.applies(idempotent):
152
+ return resp
153
+ wait = _plan(parse_rate_limits(resp.headers), policy, attempt)
154
+ if wait is None:
155
+ return resp
156
+ resp.close()
157
+ attempt += 1
158
+ wait += _jitter(policy)
159
+ log.info("SAIA rate limit (429) — waiting %.1fs before retry %d", wait, attempt)
160
+ sleep(wait)
161
+
162
+
163
+ def new_session_like(template: requests.Session) -> requests.Session:
164
+ """Return a fresh :class:`requests.Session` mirroring ``template``'s headers.
165
+
166
+ Background-thread work must not reuse the caller's ``Session`` —
167
+ ``requests.Session`` is not guaranteed thread-safe, and sharing its
168
+ connection pool across threads can corrupt in-flight requests. Both the
169
+ non-blocking Voice path and the fire-and-forget ARCANA index trigger spin
170
+ up their own ``Session`` through this helper so they never race the
171
+ client's.
172
+ """
173
+ session = requests.Session()
174
+ session.headers.update(template.headers)
175
+ return session
176
+
177
+
178
+ def post_chat_completion(
179
+ session: requests.Session,
180
+ url: str,
181
+ body: dict,
182
+ *,
183
+ headers: dict | None = None,
184
+ stream: bool = False,
185
+ policy: RetryPolicy | None = None,
186
+ sleep: Callable[[float], object] = time.sleep,
187
+ ) -> dict | SSEStream:
188
+ """POST a chat-completion request and normalise the response.
189
+
190
+ Shared by :meth:`ChatService.completions` and :meth:`ArcanaService.chat`:
191
+ both hit the same ``/chat/completions`` endpoint with identical
192
+ stream/non-stream handling and rate-limit surfacing — only the request
193
+ ``body`` fields and auth ``headers`` differ, so those stay with the caller.
194
+
195
+ Args:
196
+ session: The authenticated :class:`requests.Session`.
197
+ url: The fully-qualified ``/chat/completions`` URL.
198
+ body: The request JSON body (already assembled by the caller).
199
+ headers: Per-request headers. ``None`` uses the session defaults
200
+ (the Bearer auth + ``Accept: application/json``).
201
+ stream: When ``True``, request SSE and return an :class:`SSEStream`.
202
+ policy: Rate-limit :class:`RetryPolicy`; ``None`` uses the defaults
203
+ (retry on). Chat completions are idempotent, so an initial 429 is
204
+ retried per the policy — and for streaming the retry happens *before*
205
+ the stream is exposed (never mid-stream).
206
+ sleep: Injectable sleep hook (tests pass a recorder so they never block).
207
+
208
+ Returns:
209
+ When ``stream=False``: the response dict with an extra
210
+ ``"_rate_limits"`` key (a JSON-serializable dict). When ``stream=True``:
211
+ an :class:`SSEStream` whose ``rate_limits`` attribute holds the same dict.
212
+ """
213
+ policy = policy if policy is not None else RetryPolicy()
214
+ if stream:
215
+ stream_body = {**body, "stream": True}
216
+ stream_headers = {**(headers or {}), "Accept": "text/event-stream"}
217
+ resp = execute(
218
+ session,
219
+ "post",
220
+ url,
221
+ policy=policy,
222
+ idempotent=True,
223
+ sleep=sleep,
224
+ json=stream_body,
225
+ headers=stream_headers,
226
+ stream=True,
227
+ )
228
+ return SSEStream(resp)
229
+
230
+ resp = execute(
231
+ session,
232
+ "post",
233
+ url,
234
+ policy=policy,
235
+ idempotent=True,
236
+ sleep=sleep,
237
+ json=body,
238
+ headers=headers,
239
+ )
240
+ raise_for_status(resp)
241
+ result = resp.json()
242
+ result["_rate_limits"] = parse_rate_limits(resp.headers).to_dict()
243
+ return result
@@ -27,7 +27,10 @@ def iter_sse(response: requests.Response) -> Generator[dict, None, None]:
27
27
  """
28
28
  raise_for_status(response)
29
29
  try:
30
- for line in response.iter_lines(decode_unicode=True):
30
+ for raw in response.iter_lines(decode_unicode=True):
31
+ # decode_unicode=True yields str at runtime, but the requests type
32
+ # stub still types iter_lines as bytes — normalize for both.
33
+ line = raw.decode("utf-8") if isinstance(raw, bytes) else raw
31
34
  if not line or not line.startswith("data:"):
32
35
  continue
33
36
  payload = line[len("data:") :].strip()