saia-python 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {saia_python-0.6.0/saia_python.egg-info → saia_python-0.7.0}/PKG-INFO +8 -1
  2. {saia_python-0.6.0 → saia_python-0.7.0}/README.md +2 -0
  3. {saia_python-0.6.0 → saia_python-0.7.0}/pyproject.toml +11 -1
  4. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/__init__.py +45 -0
  5. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/client.py +15 -0
  6. saia_python-0.7.0/saia_python/tokenizer.py +1546 -0
  7. {saia_python-0.6.0 → saia_python-0.7.0/saia_python.egg-info}/PKG-INFO +8 -1
  8. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python.egg-info/SOURCES.txt +2 -0
  9. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python.egg-info/requires.txt +6 -0
  10. saia_python-0.7.0/tests/test_tokenizer.py +656 -0
  11. {saia_python-0.6.0 → saia_python-0.7.0}/LICENSE +0 -0
  12. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/_http.py +0 -0
  13. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/_streaming.py +0 -0
  14. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/_util.py +0 -0
  15. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/arcana.py +0 -0
  16. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/arcana_references.py +0 -0
  17. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/auth.py +0 -0
  18. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/chat.py +0 -0
  19. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/documents.py +0 -0
  20. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/exceptions.py +0 -0
  21. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/models.py +0 -0
  22. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/openai_compat.py +0 -0
  23. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/py.typed +0 -0
  24. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/rate_limits.py +0 -0
  25. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/responses.py +0 -0
  26. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python/voice.py +0 -0
  27. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python.egg-info/dependency_links.txt +0 -0
  28. {saia_python-0.6.0 → saia_python-0.7.0}/saia_python.egg-info/top_level.txt +0 -0
  29. {saia_python-0.6.0 → saia_python-0.7.0}/setup.cfg +0 -0
  30. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_arcana.py +0 -0
  31. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_arcana_references.py +0 -0
  32. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_auth.py +0 -0
  33. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_chat.py +0 -0
  34. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_client.py +0 -0
  35. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_documents.py +0 -0
  36. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_exceptions.py +0 -0
  37. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_health_check.py +0 -0
  38. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_models.py +0 -0
  39. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_openai_compat.py +0 -0
  40. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_rate_limits.py +0 -0
  41. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_responses.py +0 -0
  42. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_setup_from_directory.py +0 -0
  43. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_streaming.py +0 -0
  44. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_transport_policy.py +0 -0
  45. {saia_python-0.6.0 → saia_python-0.7.0}/tests/test_voice.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: saia-python
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Python wrapper for the GWDG SAIA platform REST API
5
5
  Author: Friedrich Schwarz
6
6
  License-Expression: AGPL-3.0-only
@@ -31,6 +31,11 @@ Requires-Dist: tqdm>=4.60
31
31
  Requires-Dist: tomlkit>=0.12
32
32
  Provides-Extra: openai
33
33
  Requires-Dist: openai>=1.0; extra == "openai"
34
+ Provides-Extra: tokenizer
35
+ Requires-Dist: transformers>=4.40; extra == "tokenizer"
36
+ Requires-Dist: huggingface-hub>=0.20; extra == "tokenizer"
37
+ Requires-Dist: tiktoken>=0.5; extra == "tokenizer"
38
+ Requires-Dist: sentencepiece>=0.1.99; extra == "tokenizer"
34
39
  Provides-Extra: test
35
40
  Requires-Dist: pytest>=7.0; extra == "test"
36
41
  Requires-Dist: pytest-cov>=4.0; extra == "test"
@@ -117,6 +122,7 @@ chat_completion(model="meta-llama-3.1-8b-instruct", messages=[...])
117
122
  | **ARCANA** | RAG — knowledge base management and retrieval-augmented chat | [ARCANA](https://docs.hpc.gwdg.de/services/ai-services/arcana/index.html) |
118
123
  | **Documents** | PDF/document conversion via Docling | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
119
124
  | **Models** | List available models, probe tool-calling support | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
125
+ | **Tokenizers** | Download model tokenizers; count chat-template tokens, special-token overhead, and subword fertility (opt-in `[tokenizer]` extra) | [Chat AI Models](https://docs.hpc.gwdg.de/services/ai-services/chat-ai/models/index.html) |
120
126
  | **Rate Limits** | Inspect current quota and usage | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
121
127
 
122
128
  ## Repository Structure
@@ -130,6 +136,7 @@ saia-python/
130
136
  │ ├── voice.py # VoiceService — transcribe + translate
131
137
  │ ├── arcana.py # ArcanaService — RAG / knowledge bases
132
138
  │ ├── models.py # ModelsService — list available models
139
+ │ ├── tokenizer.py # Tokenizers — download, chat-template token counting
133
140
  │ ├── documents.py # DocumentService — Docling conversion
134
141
  │ ├── openai_compat.py # OpenAI SDK compatibility layer
135
142
  │ ├── auth.py # Credential and config discovery
@@ -65,6 +65,7 @@ chat_completion(model="meta-llama-3.1-8b-instruct", messages=[...])
65
65
  | **ARCANA** | RAG — knowledge base management and retrieval-augmented chat | [ARCANA](https://docs.hpc.gwdg.de/services/ai-services/arcana/index.html) |
66
66
  | **Documents** | PDF/document conversion via Docling | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
67
67
  | **Models** | List available models, probe tool-calling support | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
68
+ | **Tokenizers** | Download model tokenizers; count chat-template tokens, special-token overhead, and subword fertility (opt-in `[tokenizer]` extra) | [Chat AI Models](https://docs.hpc.gwdg.de/services/ai-services/chat-ai/models/index.html) |
68
69
  | **Rate Limits** | Inspect current quota and usage | [SAIA API](https://docs.hpc.gwdg.de/services/ai-services/saia/index.html) |
69
70
 
70
71
  ## Repository Structure
@@ -78,6 +79,7 @@ saia-python/
78
79
  │ ├── voice.py # VoiceService — transcribe + translate
79
80
  │ ├── arcana.py # ArcanaService — RAG / knowledge bases
80
81
  │ ├── models.py # ModelsService — list available models
82
+ │ ├── tokenizer.py # Tokenizers — download, chat-template token counting
81
83
  │ ├── documents.py # DocumentService — Docling conversion
82
84
  │ ├── openai_compat.py # OpenAI SDK compatibility layer
83
85
  │ ├── auth.py # Credential and config discovery
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "saia-python"
7
- version = "0.6.0"
7
+ version = "0.7.0"
8
8
  description = "Python wrapper for the GWDG SAIA platform REST API"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -62,6 +62,16 @@ saia_python = ["py.typed"]
62
62
  openai = [
63
63
  "openai>=1.0",
64
64
  ]
65
+ tokenizer = [
66
+ # AutoTokenizer + chat-template (apply_chat_template) engine
67
+ "transformers>=4.40",
68
+ # downloads tokenizer files (snapshot_download) to the local cache
69
+ "huggingface-hub>=0.20",
70
+ # byte-pair encodings for the externally hosted OpenAI models
71
+ "tiktoken>=0.5",
72
+ # slow-tokenizer / SentencePiece backing for several of the models
73
+ "sentencepiece>=0.1.99",
74
+ ]
65
75
  test = [
66
76
  "pytest>=7.0",
67
77
  "pytest-cov>=4.0",
@@ -42,6 +42,29 @@ from .exceptions import APIError, AuthenticationError, RateLimitError, SAIAError
42
42
  from .openai_compat import create_openai_client
43
43
  from .rate_limits import RateLimitInfo, parse_rate_limits
44
44
  from .responses import text_of
45
+ from .tokenizer import (
46
+ DEFAULT_TOKENIZER_DIR,
47
+ GWDG_MODEL_REPOS,
48
+ OPENAI_TIKTOKEN_ENCODINGS,
49
+ ChatTokenCount,
50
+ FileTokenCount,
51
+ GatedRepoAccessError,
52
+ TokenDistribution,
53
+ TokenizerService,
54
+ available_open_models,
55
+ chat_template_length,
56
+ chat_template_tokens,
57
+ count_tiktoken_tokens,
58
+ download_all_tokenizers,
59
+ download_tokenizer,
60
+ load_hf_token,
61
+ load_tokenizer,
62
+ repo_url,
63
+ resolve_repo,
64
+ special_token_overhead,
65
+ subword_fertility,
66
+ token_distribution,
67
+ )
45
68
 
46
69
  try:
47
70
  __version__ = version("saia-python")
@@ -81,6 +104,28 @@ __all__ = [
81
104
  "parse_arcana_references",
82
105
  "parse_reference_entries",
83
106
  "is_arcana_event",
107
+ # Tokenizers ([tokenizer] extra)
108
+ "GWDG_MODEL_REPOS",
109
+ "OPENAI_TIKTOKEN_ENCODINGS",
110
+ "DEFAULT_TOKENIZER_DIR",
111
+ "ChatTokenCount",
112
+ "FileTokenCount",
113
+ "TokenDistribution",
114
+ "TokenizerService",
115
+ "GatedRepoAccessError",
116
+ "available_open_models",
117
+ "resolve_repo",
118
+ "repo_url",
119
+ "load_hf_token",
120
+ "download_tokenizer",
121
+ "download_all_tokenizers",
122
+ "load_tokenizer",
123
+ "chat_template_tokens",
124
+ "chat_template_length",
125
+ "special_token_overhead",
126
+ "subword_fertility",
127
+ "count_tiktoken_tokens",
128
+ "token_distribution",
84
129
  # Functional API
85
130
  "list_models",
86
131
  "list_model_ids",
@@ -12,6 +12,7 @@ from .documents import DocumentService
12
12
  from .exceptions import raise_for_status
13
13
  from .models import ModelsService
14
14
  from .rate_limits import RateLimitInfo, parse_rate_limits
15
+ from .tokenizer import TokenizerService
15
16
  from .voice import VoiceService
16
17
 
17
18
 
@@ -80,6 +81,7 @@ class SAIAClient:
80
81
  self._models: ModelsService | None = None
81
82
  self._arcana: ArcanaService | None = None
82
83
  self._documents: DocumentService | None = None
84
+ self._tokenizers: TokenizerService | None = None
83
85
  self._openai = None
84
86
  self._openai_async = None
85
87
 
@@ -106,6 +108,19 @@ class SAIAClient:
106
108
  )
107
109
  return self._models
108
110
 
111
+ @property
112
+ def tokenizers(self) -> TokenizerService:
113
+ """Tokenizer service for the open-weight models.
114
+
115
+ Loads model tokenizers, counts chat-template tokens, and annotates the
116
+ live model list with Hugging Face repositories. Requires the optional
117
+ ``[tokenizer]`` extra (``pip install saia-python[tokenizer]``) for the
118
+ download/load operations; the repository annotations work without it.
119
+ """
120
+ if self._tokenizers is None:
121
+ self._tokenizers = TokenizerService(self.models)
122
+ return self._tokenizers
123
+
109
124
  @property
110
125
  def arcana(self) -> ArcanaService:
111
126
  """ARCANA/RAG service."""