saia-python 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {saia_python-0.5.0/saia_python.egg-info → saia_python-0.7.0}/PKG-INFO +8 -1
  2. {saia_python-0.5.0 → saia_python-0.7.0}/README.md +2 -0
  3. {saia_python-0.5.0 → saia_python-0.7.0}/pyproject.toml +11 -1
  4. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/__init__.py +49 -1
  5. saia_python-0.7.0/saia_python/_http.py +243 -0
  6. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/arcana.py +150 -80
  7. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/chat.py +11 -2
  8. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/client.py +29 -4
  9. saia_python-0.7.0/saia_python/documents.py +246 -0
  10. saia_python-0.7.0/saia_python/tokenizer.py +1546 -0
  11. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/voice.py +27 -8
  12. {saia_python-0.5.0 → saia_python-0.7.0/saia_python.egg-info}/PKG-INFO +8 -1
  13. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python.egg-info/SOURCES.txt +4 -0
  14. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python.egg-info/requires.txt +6 -0
  15. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_arcana.py +51 -1
  16. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_auth.py +2 -0
  17. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_chat.py +2 -0
  18. saia_python-0.7.0/tests/test_documents.py +115 -0
  19. saia_python-0.7.0/tests/test_tokenizer.py +656 -0
  20. saia_python-0.7.0/tests/test_transport_policy.py +272 -0
  21. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_voice.py +2 -0
  22. saia_python-0.5.0/saia_python/_http.py +0 -80
  23. saia_python-0.5.0/saia_python/documents.py +0 -145
  24. {saia_python-0.5.0 → saia_python-0.7.0}/LICENSE +0 -0
  25. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/_streaming.py +0 -0
  26. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/_util.py +0 -0
  27. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/arcana_references.py +0 -0
  28. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/auth.py +0 -0
  29. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/exceptions.py +0 -0
  30. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/models.py +0 -0
  31. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/openai_compat.py +0 -0
  32. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/py.typed +0 -0
  33. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/rate_limits.py +0 -0
  34. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python/responses.py +0 -0
  35. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python.egg-info/dependency_links.txt +0 -0
  36. {saia_python-0.5.0 → saia_python-0.7.0}/saia_python.egg-info/top_level.txt +0 -0
  37. {saia_python-0.5.0 → saia_python-0.7.0}/setup.cfg +0 -0
  38. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_arcana_references.py +0 -0
  39. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_client.py +0 -0
  40. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_exceptions.py +0 -0
  41. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_health_check.py +0 -0
  42. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_models.py +0 -0
  43. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_openai_compat.py +0 -0
  44. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_rate_limits.py +0 -0
  45. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_responses.py +0 -0
  46. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_setup_from_directory.py +0 -0
  47. {saia_python-0.5.0 → saia_python-0.7.0}/tests/test_streaming.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: saia-python
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: Python wrapper for the GWDG SAIA platform REST API
5
5
  Author: Friedrich Schwarz
6
6
  License-Expression: AGPL-3.0-only
@@ -31,6 +31,11 @@ Requires-Dist: tqdm>=4.60
31
31
  Requires-Dist: tomlkit>=0.12
32
32
  Provides-Extra: openai
33
33
  Requires-Dist: openai>=1.0; extra == "openai"
34
+ Provides-Extra: tokenizer
35
+ Requires-Dist: transformers>=4.40; extra == "tokenizer"
36
+ Requires-Dist: huggingface-hub>=0.20; extra == "tokenizer"
37
+ Requires-Dist: tiktoken>=0.5; extra == "tokenizer"
38
+ Requires-Dist: sentencepiece>=0.1.99; extra == "tokenizer"
34
39
  Provides-Extra: test
35
40
  Requires-Dist: pytest>=7.0; extra == "test"
36
41
  Requires-Dist: pytest-cov>=4.0; extra == "test"
@@ -117,6 +122,7 @@ chat_completion(model="meta-llama-3.1-8b-instruct", messages=[...])
117
122
  | **ARCANA** | RAG — knowledge base management and retrieval-augmented chat | [ARCANA](https://docs.hpc.gwdg.de/services/ai-services/arcana/index.html) |
118
123
  | **Documents** | PDF/document conversion via Docling | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
119
124
  | **Models** | List available models, probe tool-calling support | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
125
+ | **Tokenizers** | Download model tokenizers; count chat-template tokens, special-token overhead, and subword fertility (opt-in `[tokenizer]` extra) | [Chat AI Models](https://docs.hpc.gwdg.de/services/ai-services/chat-ai/models/index.html) |
120
126
  | **Rate Limits** | Inspect current quota and usage | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
121
127
 
122
128
  ## Repository Structure
@@ -130,6 +136,7 @@ saia-python/
130
136
  │ ├── voice.py # VoiceService — transcribe + translate
131
137
  │ ├── arcana.py # ArcanaService — RAG / knowledge bases
132
138
  │ ├── models.py # ModelsService — list available models
139
+ │ ├── tokenizer.py # Tokenizers — download, chat-template token counting
133
140
  │ ├── documents.py # DocumentService — Docling conversion
134
141
  │ ├── openai_compat.py # OpenAI SDK compatibility layer
135
142
  │ ├── auth.py # Credential and config discovery
@@ -65,6 +65,7 @@ chat_completion(model="meta-llama-3.1-8b-instruct", messages=[...])
65
65
  | **ARCANA** | RAG — knowledge base management and retrieval-augmented chat | [ARCANA](https://docs.hpc.gwdg.de/services/ai-services/arcana/index.html) |
66
66
  | **Documents** | PDF/document conversion via Docling | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
67
67
  | **Models** | List available models, probe tool-calling support | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
68
+ | **Tokenizers** | Download model tokenizers; count chat-template tokens, special-token overhead, and subword fertility (opt-in `[tokenizer]` extra) | [Chat AI Models](https://docs.hpc.gwdg.de/services/ai-services/chat-ai/models/index.html) |
68
69
  | **Rate Limits** | Inspect current quota and usage | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
69
70
 
70
71
  ## Repository Structure
@@ -78,6 +79,7 @@ saia-python/
78
79
  │ ├── voice.py # VoiceService — transcribe + translate
79
80
  │ ├── arcana.py # ArcanaService — RAG / knowledge bases
80
81
  │ ├── models.py # ModelsService — list available models
82
+ │ ├── tokenizer.py # Tokenizers — download, chat-template token counting
81
83
  │ ├── documents.py # DocumentService — Docling conversion
82
84
  │ ├── openai_compat.py # OpenAI SDK compatibility layer
83
85
  │ ├── auth.py # Credential and config discovery
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "saia-python"
7
- version = "0.5.0"
7
+ version = "0.7.0"
8
8
  description = "Python wrapper for the GWDG SAIA platform REST API"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -62,6 +62,16 @@ saia_python = ["py.typed"]
62
62
  openai = [
63
63
  "openai>=1.0",
64
64
  ]
65
+ tokenizer = [
66
+ # AutoTokenizer + chat-template (apply_chat_template) engine
67
+ "transformers>=4.40",
68
+ # downloads tokenizer files (snapshot_download) to the local cache
69
+ "huggingface-hub>=0.20",
70
+ # byte-pair encodings for the externally hosted OpenAI models
71
+ "tiktoken>=0.5",
72
+ # slow-tokenizer / SentencePiece backing for several of the models
73
+ "sentencepiece>=0.1.99",
74
+ ]
65
75
  test = [
66
76
  "pytest>=7.0",
67
77
  "pytest-cov>=4.0",
@@ -17,6 +17,7 @@ from __future__ import annotations
17
17
  import concurrent.futures
18
18
  from importlib.metadata import PackageNotFoundError, version
19
19
 
20
+ from ._http import RetryPolicy
20
21
  from ._streaming import SSEStream
21
22
  from .arcana_references import (
22
23
  ArcanaReference,
@@ -36,11 +37,34 @@ from .auth import (
36
37
  resolve_base_url,
37
38
  )
38
39
  from .client import SAIAClient
39
- from .documents import ConversionResult
40
+ from .documents import ConversionImage, ConversionResult
40
41
  from .exceptions import APIError, AuthenticationError, RateLimitError, SAIAError
41
42
  from .openai_compat import create_openai_client
42
43
  from .rate_limits import RateLimitInfo, parse_rate_limits
43
44
  from .responses import text_of
45
+ from .tokenizer import (
46
+ DEFAULT_TOKENIZER_DIR,
47
+ GWDG_MODEL_REPOS,
48
+ OPENAI_TIKTOKEN_ENCODINGS,
49
+ ChatTokenCount,
50
+ FileTokenCount,
51
+ GatedRepoAccessError,
52
+ TokenDistribution,
53
+ TokenizerService,
54
+ available_open_models,
55
+ chat_template_length,
56
+ chat_template_tokens,
57
+ count_tiktoken_tokens,
58
+ download_all_tokenizers,
59
+ download_tokenizer,
60
+ load_hf_token,
61
+ load_tokenizer,
62
+ repo_url,
63
+ resolve_repo,
64
+ special_token_overhead,
65
+ subword_fertility,
66
+ token_distribution,
67
+ )
44
68
 
45
69
  try:
46
70
  __version__ = version("saia-python")
@@ -55,6 +79,7 @@ __all__ = [
55
79
  "resolve_base_url",
56
80
  "DEFAULT_BASE_URL",
57
81
  "create_openai_client",
82
+ "RetryPolicy",
58
83
  # Auth
59
84
  "load_api_key",
60
85
  "load_arcana_ids",
@@ -79,6 +104,28 @@ __all__ = [
79
104
  "parse_arcana_references",
80
105
  "parse_reference_entries",
81
106
  "is_arcana_event",
107
+ # Tokenizers ([tokenizer] extra)
108
+ "GWDG_MODEL_REPOS",
109
+ "OPENAI_TIKTOKEN_ENCODINGS",
110
+ "DEFAULT_TOKENIZER_DIR",
111
+ "ChatTokenCount",
112
+ "FileTokenCount",
113
+ "TokenDistribution",
114
+ "TokenizerService",
115
+ "GatedRepoAccessError",
116
+ "available_open_models",
117
+ "resolve_repo",
118
+ "repo_url",
119
+ "load_hf_token",
120
+ "download_tokenizer",
121
+ "download_all_tokenizers",
122
+ "load_tokenizer",
123
+ "chat_template_tokens",
124
+ "chat_template_length",
125
+ "special_token_overhead",
126
+ "subword_fertility",
127
+ "count_tiktoken_tokens",
128
+ "token_distribution",
82
129
  # Functional API
83
130
  "list_models",
84
131
  "list_model_ids",
@@ -92,6 +139,7 @@ __all__ = [
92
139
  "get_rate_limits",
93
140
  "convert_document",
94
141
  "ConversionResult",
142
+ "ConversionImage",
95
143
  ]
96
144
 
97
145
 
@@ -0,0 +1,243 @@
1
+ """Shared HTTP plumbing used by more than one service.
2
+
3
+ Kept in one place so the chat-completion request shape and the
4
+ background-thread ``Session`` helper each have a single implementation,
5
+ rather than being copied across :mod:`saia_python.chat`,
6
+ :mod:`saia_python.arcana`, and :mod:`saia_python.voice`.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ import random
13
+ import time
14
+ from collections.abc import Callable
15
+ from dataclasses import dataclass
16
+
17
+ import requests
18
+
19
+ from ._streaming import SSEStream
20
+ from .exceptions import raise_for_status
21
+ from .rate_limits import RateLimitInfo, parse_rate_limits
22
+
23
+ log = logging.getLogger(__name__)
24
+
25
+ # Default ``(connect, read)`` timeout in seconds for ARCANA management
26
+ # ("control-plane") requests that do not pass their own. A plain
27
+ # :class:`requests.Session` has NO default timeout, so a request the server
28
+ # accepts but never answers — common while an arcana is locked mid-(re)index —
29
+ # blocks forever on the socket read. Long-running "data-plane" calls (chat
30
+ # completions, voice transcription, document conversion) deliberately do not
31
+ # inherit this cap, since they can legitimately run for minutes.
32
+ DEFAULT_TIMEOUT: tuple[float, float] = (10.0, 60.0)
33
+
34
+
35
+ @dataclass
36
+ class RetryPolicy:
37
+ """Transport-layer policy for HTTP 429 (rate-limit) responses.
38
+
39
+ Applied at the session dispatch seam (:func:`execute`); ON by default and
40
+ scoped to idempotent calls. See ``docs/proposals/rate-limit-handling.md``.
41
+
42
+ Attributes:
43
+ on_rate_limit: Master switch. When ``False`` a 429 is never retried —
44
+ it propagates as :class:`~saia_python.RateLimitError`, i.e. today's
45
+ behavior.
46
+ max_retries: Maximum reset-driven retries (the minute window).
47
+ max_waiting_time: The longest a single wait may block, in seconds
48
+ (default 60). A reset further out than this fails fast (raises)
49
+ rather than blocking; settable per client.
50
+ fallback_wait: Seconds to wait when the server sends no reset hint.
51
+ fallback_max_retries: How many times the blind fallback is tried.
52
+ jitter: ``(low, high)`` seconds added to each wait to avoid a
53
+ thundering herd across concurrent workers; ``(0, 0)`` disables it.
54
+ retry_mutations: If ``True``, non-idempotent calls are retried too
55
+ (off by default — replaying a mutation is unsafe in general).
56
+ """
57
+
58
+ on_rate_limit: bool = True
59
+ max_retries: int = 5
60
+ max_waiting_time: float = 60.0
61
+ fallback_wait: float = 31.0
62
+ fallback_max_retries: int = 2
63
+ jitter: tuple[float, float] = (0.0, 2.0)
64
+ retry_mutations: bool = False
65
+
66
+ def applies(self, idempotent: bool) -> bool:
67
+ """Whether a 429 on a call with this idempotency is eligible for retry."""
68
+ return self.on_rate_limit and (idempotent or self.retry_mutations)
69
+
70
+
71
+ def coerce_retry(retry: RetryPolicy | bool | None) -> RetryPolicy:
72
+ """Normalise a ``retry`` argument into a :class:`RetryPolicy`.
73
+
74
+ A :class:`RetryPolicy` is returned unchanged; ``False`` disables retry;
75
+ ``None`` / ``True`` give the defaults (retry on).
76
+ """
77
+ if isinstance(retry, RetryPolicy):
78
+ return retry
79
+ if retry is False:
80
+ return RetryPolicy(on_rate_limit=False)
81
+ return RetryPolicy()
82
+
83
+
84
+ def resolve_retry(
85
+ default: RetryPolicy, override: RetryPolicy | bool | None
86
+ ) -> RetryPolicy:
87
+ """Pick the policy for one call: the per-call ``override`` when given, else
88
+ the service ``default``. ``None`` means "use the default"."""
89
+ return default if override is None else coerce_retry(override)
90
+
91
+
92
+ def _jitter(policy: RetryPolicy) -> float:
93
+ low, high = policy.jitter
94
+ return random.uniform(low, high) if high > low else low
95
+
96
+
97
+ def _plan(info: RateLimitInfo, policy: RetryPolicy, attempt: int) -> float | None:
98
+ """Seconds to wait before the next attempt, or ``None`` to give up (→ raise).
99
+
100
+ Honest about the single ``reset_seconds`` spanning four windows: we wait out
101
+ the window we can time (the minute), but fail fast on a longer window whose
102
+ reset is unknowable — we will not block for ~an hour inside a call.
103
+ """
104
+ # A longer window is exhausted → its reset is not the (minute) reset_seconds.
105
+ for window in ("hour", "day", "month"):
106
+ if getattr(info, f"remaining_{window}") == 0:
107
+ return None
108
+ reset = info.reset_seconds
109
+ if isinstance(reset, (int, float)) and reset > 0:
110
+ if reset > policy.max_waiting_time:
111
+ return None
112
+ return None if attempt >= policy.max_retries else float(reset) + 1.0
113
+ # No usable reset hint → conservative, bounded blind fallback.
114
+ return None if attempt >= policy.fallback_max_retries else policy.fallback_wait
115
+
116
+
117
+ def execute(
118
+ session: requests.Session,
119
+ method: str,
120
+ url: str,
121
+ *,
122
+ policy: RetryPolicy,
123
+ idempotent: bool,
124
+ sleep: Callable[[float], object] = time.sleep,
125
+ **kwargs,
126
+ ) -> requests.Response:
127
+ """Issue a request under a transport policy and return the response.
128
+
129
+ Dispatches ``getattr(session, method)(url, **kwargs)`` (``method`` is the
130
+ lowercase verb, matching the rest of the package). On HTTP 429 — when
131
+ ``policy`` permits (enabled, and the call is idempotent or
132
+ ``retry_mutations``) — it waits per :func:`_plan` and retries. It returns the
133
+ **raw response** unchanged on success *or* on give-up, so the caller's
134
+ :func:`~saia_python.exceptions.raise_for_status` still raises
135
+ :class:`~saia_python.RateLimitError` when retry is off, the budget is spent,
136
+ or the window must not be waited on.
137
+
138
+ Only the status code and headers are inspected — never the body — so
139
+ streaming and non-streaming requests behave identically and a streamed body
140
+ is never consumed. The (possibly streamed) connection is released with
141
+ ``close()`` before each wait.
142
+
143
+ Note:
144
+ A retry re-issues the request with the **same** ``kwargs``, so any file
145
+ payload must be retry-safe (``bytes``, not a one-shot file handle). The
146
+ file-upload callers (voice, documents) pass ``bytes``.
147
+ """
148
+ attempt = 0
149
+ while True:
150
+ resp = getattr(session, method)(url, **kwargs)
151
+ if resp.status_code != 429 or not policy.applies(idempotent):
152
+ return resp
153
+ wait = _plan(parse_rate_limits(resp.headers), policy, attempt)
154
+ if wait is None:
155
+ return resp
156
+ resp.close()
157
+ attempt += 1
158
+ wait += _jitter(policy)
159
+ log.info("SAIA rate limit (429) — waiting %.1fs before retry %d", wait, attempt)
160
+ sleep(wait)
161
+
162
+
163
+ def new_session_like(template: requests.Session) -> requests.Session:
164
+ """Return a fresh :class:`requests.Session` mirroring ``template``'s headers.
165
+
166
+ Background-thread work must not reuse the caller's ``Session`` —
167
+ ``requests.Session`` is not guaranteed thread-safe, and sharing its
168
+ connection pool across threads can corrupt in-flight requests. Both the
169
+ non-blocking Voice path and the fire-and-forget ARCANA index trigger spin
170
+ up their own ``Session`` through this helper so they never race the
171
+ client's.
172
+ """
173
+ session = requests.Session()
174
+ session.headers.update(template.headers)
175
+ return session
176
+
177
+
178
+ def post_chat_completion(
179
+ session: requests.Session,
180
+ url: str,
181
+ body: dict,
182
+ *,
183
+ headers: dict | None = None,
184
+ stream: bool = False,
185
+ policy: RetryPolicy | None = None,
186
+ sleep: Callable[[float], object] = time.sleep,
187
+ ) -> dict | SSEStream:
188
+ """POST a chat-completion request and normalise the response.
189
+
190
+ Shared by :meth:`ChatService.completions` and :meth:`ArcanaService.chat`:
191
+ both hit the same ``/chat/completions`` endpoint with identical
192
+ stream/non-stream handling and rate-limit surfacing — only the request
193
+ ``body`` fields and auth ``headers`` differ, so those stay with the caller.
194
+
195
+ Args:
196
+ session: The authenticated :class:`requests.Session`.
197
+ url: The fully-qualified ``/chat/completions`` URL.
198
+ body: The request JSON body (already assembled by the caller).
199
+ headers: Per-request headers. ``None`` uses the session defaults
200
+ (the Bearer auth + ``Accept: application/json``).
201
+ stream: When ``True``, request SSE and return an :class:`SSEStream`.
202
+ policy: Rate-limit :class:`RetryPolicy`; ``None`` uses the defaults
203
+ (retry on). Chat completions are idempotent, so an initial 429 is
204
+ retried per the policy — and for streaming the retry happens *before*
205
+ the stream is exposed (never mid-stream).
206
+ sleep: Injectable sleep hook (tests pass a recorder so they never block).
207
+
208
+ Returns:
209
+ When ``stream=False``: the response dict with an extra
210
+ ``"_rate_limits"`` key (a JSON-serializable dict). When ``stream=True``:
211
+ an :class:`SSEStream` whose ``rate_limits`` attribute holds the same dict.
212
+ """
213
+ policy = policy if policy is not None else RetryPolicy()
214
+ if stream:
215
+ stream_body = {**body, "stream": True}
216
+ stream_headers = {**(headers or {}), "Accept": "text/event-stream"}
217
+ resp = execute(
218
+ session,
219
+ "post",
220
+ url,
221
+ policy=policy,
222
+ idempotent=True,
223
+ sleep=sleep,
224
+ json=stream_body,
225
+ headers=stream_headers,
226
+ stream=True,
227
+ )
228
+ return SSEStream(resp)
229
+
230
+ resp = execute(
231
+ session,
232
+ "post",
233
+ url,
234
+ policy=policy,
235
+ idempotent=True,
236
+ sleep=sleep,
237
+ json=body,
238
+ headers=headers,
239
+ )
240
+ raise_for_status(resp)
241
+ result = resp.json()
242
+ result["_rate_limits"] = parse_rate_limits(resp.headers).to_dict()
243
+ return result