saia-python 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- saia_python/__init__.py +253 -0
- saia_python/_http.py +71 -0
- saia_python/_streaming.py +88 -0
- saia_python/_util.py +29 -0
- saia_python/arcana.py +1061 -0
- saia_python/arcana_references.py +182 -0
- saia_python/auth.py +515 -0
- saia_python/chat.py +72 -0
- saia_python/client.py +239 -0
- saia_python/documents.py +145 -0
- saia_python/exceptions.py +68 -0
- saia_python/models.py +146 -0
- saia_python/openai_compat.py +70 -0
- saia_python/py.typed +0 -0
- saia_python/rate_limits.py +84 -0
- saia_python/responses.py +70 -0
- saia_python/voice.py +175 -0
- saia_python-0.4.1.dist-info/METADATA +190 -0
- saia_python-0.4.1.dist-info/RECORD +22 -0
- saia_python-0.4.1.dist-info/WHEEL +5 -0
- saia_python-0.4.1.dist-info/licenses/LICENSE +661 -0
- saia_python-0.4.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Parse GWDG ARCANA reference blocks from chat-completion content.
|
|
2
|
+
|
|
3
|
+
When a request is routed through GWDG SAIA's ARCANA (RAG) gateway, the
|
|
4
|
+
gateway appends a verbose ``References:`` block to the assistant's reply —
|
|
5
|
+
one ``[RREFn] <filename>.md (<distance>)`` line per retrieved chunk,
|
|
6
|
+
followed by that chunk's body. This module turns that GWDG-specific wire
|
|
7
|
+
shape into structured data, leaving *rendering* and *filename
|
|
8
|
+
interpretation* to the caller: a filename's meaning depends on your own
|
|
9
|
+
corpus, and presentation depends on your own UI, so neither belongs here.
|
|
10
|
+
|
|
11
|
+
The module is **pure and dependency-free** (no HTTP, no I/O), so it is safe
|
|
12
|
+
to import into any environment — including an async server — without
|
|
13
|
+
pulling in a transport layer.
|
|
14
|
+
|
|
15
|
+
Example::
|
|
16
|
+
|
|
17
|
+
from saia_python import parse_arcana_references
|
|
18
|
+
|
|
19
|
+
parsed = parse_arcana_references(message_content)
|
|
20
|
+
if parsed.matched:
|
|
21
|
+
print(parsed.prose) # the answer, References block removed
|
|
22
|
+
for ref in parsed.references: # structured citations
|
|
23
|
+
print(ref.n, ref.filename, ref.distance)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import re
|
|
29
|
+
from dataclasses import dataclass, field
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"ArcanaReference",
|
|
33
|
+
"ParsedReferences",
|
|
34
|
+
"parse_arcana_references",
|
|
35
|
+
"parse_reference_entries",
|
|
36
|
+
"is_arcana_event",
|
|
37
|
+
"REFERENCES_MARKER_RE",
|
|
38
|
+
"REFERENCES_ENTRY_RE",
|
|
39
|
+
"REFERENCES_MARKER_MAX_LEN",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# The gateway appends the dump as a paragraph-isolated ``References:`` line
|
|
44
|
+
# immediately followed by the first ``[RREFn]`` entry. We anchor on the
|
|
45
|
+
# stable suffix ``\nReferences:\n+\s*[RREF<digit>]`` and stay permissive on
|
|
46
|
+
# whatever precedes it (a horizontal rule, blank lines, both, or neither).
|
|
47
|
+
REFERENCES_MARKER_RE = re.compile(r"\nReferences:\s*\n+\s*\[RREF\d+\]")
|
|
48
|
+
|
|
49
|
+
# Longest plausible wire form of the marker (e.g. ``\nReferences:\n\n\n[RREF999]``).
|
|
50
|
+
# Streaming consumers use this to size a lag buffer so the marker is still
|
|
51
|
+
# detected when it straddles SSE chunk boundaries.
|
|
52
|
+
REFERENCES_MARKER_MAX_LEN = 40
|
|
53
|
+
|
|
54
|
+
# One per-entry header line, e.g.::
|
|
55
|
+
# [RREF1] onkopedia_dlbcl_5-1-1_immunchemotherapie__ID0E.md (0.319)
|
|
56
|
+
REFERENCES_ENTRY_RE = re.compile(
|
|
57
|
+
r"^\[RREF(?P<n>\d+)\]\s+(?P<filename>\S+\.md)\s+\((?P<distance>[\d.]+)\)\s*$",
|
|
58
|
+
re.MULTILINE,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass(frozen=True)
|
|
63
|
+
class ArcanaReference:
|
|
64
|
+
"""A single parsed ARCANA citation.
|
|
65
|
+
|
|
66
|
+
Attributes:
|
|
67
|
+
n: The ``RREFn`` reference number emitted by the gateway.
|
|
68
|
+
filename: The retrieved chunk's source filename (e.g.
|
|
69
|
+
``"onkopedia_....md"``). Deriving a label or URL from it is the
|
|
70
|
+
caller's responsibility — it depends on your corpus.
|
|
71
|
+
distance: The retrieval distance score the gateway reports in
|
|
72
|
+
parentheses, or ``None`` if it could not be parsed as a float.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
n: int
|
|
76
|
+
filename: str
|
|
77
|
+
distance: float | None = None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass(frozen=True)
|
|
81
|
+
class ParsedReferences:
|
|
82
|
+
"""The result of splitting assistant content around the References block.
|
|
83
|
+
|
|
84
|
+
Attributes:
|
|
85
|
+
matched: Whether a ``References:`` block was found.
|
|
86
|
+
prose: The assistant content with the References block removed (the
|
|
87
|
+
substring before the marker). Equal to the full input when
|
|
88
|
+
``matched`` is ``False``. Note this still includes any trailing
|
|
89
|
+
horizontal-rule line the gateway inserts before ``References:`` —
|
|
90
|
+
stripping that is a rendering concern left to the caller.
|
|
91
|
+
references: Parsed, de-duplicated citations, ordered by ``n``.
|
|
92
|
+
references_block: The raw References block (marker to end of input),
|
|
93
|
+
or ``""`` when ``matched`` is ``False``.
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
matched: bool
|
|
97
|
+
prose: str
|
|
98
|
+
references: list[ArcanaReference] = field(default_factory=list)
|
|
99
|
+
references_block: str = ""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def parse_reference_entries(
|
|
103
|
+
block: str, *, dedupe: bool = True
|
|
104
|
+
) -> list[ArcanaReference]:
|
|
105
|
+
"""Parse ``[RREFn] <filename>.md (<distance>)`` entries from ``block``.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
block: Text containing one or more reference entry lines (typically
|
|
109
|
+
the References block, but any text works — non-matching lines
|
|
110
|
+
are ignored).
|
|
111
|
+
dedupe: When ``True`` (default), collapse repeated filenames to a
|
|
112
|
+
single entry keeping the lowest ``n`` (the gateway lists a
|
|
113
|
+
filename once per retrieved chunk, so one document can appear
|
|
114
|
+
several times); the result is ordered by ``n``. When ``False``,
|
|
115
|
+
every matching entry is returned in document order.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
A list of :class:`ArcanaReference`.
|
|
119
|
+
"""
|
|
120
|
+
entries: list[ArcanaReference] = []
|
|
121
|
+
for m in REFERENCES_ENTRY_RE.finditer(block):
|
|
122
|
+
try:
|
|
123
|
+
n = int(m.group("n"))
|
|
124
|
+
except (TypeError, ValueError):
|
|
125
|
+
continue
|
|
126
|
+
try:
|
|
127
|
+
distance: float | None = float(m.group("distance"))
|
|
128
|
+
except (TypeError, ValueError):
|
|
129
|
+
distance = None
|
|
130
|
+
entries.append(
|
|
131
|
+
ArcanaReference(n=n, filename=m.group("filename"), distance=distance)
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
if not dedupe:
|
|
135
|
+
return entries
|
|
136
|
+
|
|
137
|
+
best: dict[str, ArcanaReference] = {}
|
|
138
|
+
for ref in entries:
|
|
139
|
+
existing = best.get(ref.filename)
|
|
140
|
+
if existing is None or ref.n < existing.n:
|
|
141
|
+
best[ref.filename] = ref
|
|
142
|
+
return sorted(best.values(), key=lambda r: r.n)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def parse_arcana_references(content: str) -> ParsedReferences:
|
|
146
|
+
"""Split assistant ``content`` into prose and structured references.
|
|
147
|
+
|
|
148
|
+
Locates the GWDG ``References:`` marker; everything before it is prose,
|
|
149
|
+
everything from it onward is parsed into :class:`ArcanaReference` entries.
|
|
150
|
+
Conservative: if no marker is present, returns the content unchanged as
|
|
151
|
+
``prose`` with ``matched=False`` and no references.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
content: The assistant message content from an ARCANA-routed reply.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
A :class:`ParsedReferences`.
|
|
158
|
+
"""
|
|
159
|
+
m = REFERENCES_MARKER_RE.search(content)
|
|
160
|
+
if m is None:
|
|
161
|
+
return ParsedReferences(matched=False, prose=content)
|
|
162
|
+
idx = m.start()
|
|
163
|
+
block = content[idx:]
|
|
164
|
+
return ParsedReferences(
|
|
165
|
+
matched=True,
|
|
166
|
+
prose=content[:idx],
|
|
167
|
+
references=parse_reference_entries(block),
|
|
168
|
+
references_block=block,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def is_arcana_event(tool_call: dict) -> bool:
|
|
173
|
+
"""Return ``True`` if ``tool_call`` is a GWDG ``arcana.event`` beacon.
|
|
174
|
+
|
|
175
|
+
GWDG ARCANA streams retrieval-lifecycle markers (``accessing`` /
|
|
176
|
+
``done``) as ``tool_calls`` whose function name starts with
|
|
177
|
+
``arcana.event``. These are status signals, not real citations (the
|
|
178
|
+
actual references are baked into the message content as ``[RREFn]``
|
|
179
|
+
markers), so consumers typically filter them out before rendering.
|
|
180
|
+
"""
|
|
181
|
+
fn = tool_call.get("function") or {}
|
|
182
|
+
return (fn.get("name") or "").startswith("arcana.event")
|
saia_python/auth.py
ADDED
|
@@ -0,0 +1,515 @@
|
|
|
1
|
+
"""API key, ARCANA ID, and configuration discovery.
|
|
2
|
+
|
|
3
|
+
Configuration is split across two files:
|
|
4
|
+
|
|
5
|
+
- ``.env`` — secrets only (API key, optionally a single default ARCANA ID)
|
|
6
|
+
- ``config.toml`` — structured settings (username, base URL, ARCANA IDs, etc.)
|
|
7
|
+
|
|
8
|
+
Both files are searched in the current working directory, then the home directory.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
import tomlkit
|
|
18
|
+
|
|
19
|
+
_ENV_VAR = "SAIA_API_KEY"
|
|
20
|
+
_KEY_FILE = ".saia_api"
|
|
21
|
+
_ENV_FILE = ".env"
|
|
22
|
+
_CONFIG_FILE = "config.toml"
|
|
23
|
+
|
|
24
|
+
_USERNAME_VAR = "SAIA_USERNAME"
|
|
25
|
+
_ARCANA_ID_VAR = "SAIA_ARCANA_ID"
|
|
26
|
+
_ARCANA_ID_PATTERN = re.compile(r"^SAIA_ARCANA_ID_(\w+)$")
|
|
27
|
+
|
|
28
|
+
# The SAIA API base URL used when none is configured explicitly.
|
|
29
|
+
DEFAULT_BASE_URL = "https://chat-ai.academiccloud.de/v1"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _search_dirs() -> list[Path]:
|
|
33
|
+
"""Return directories to search, evaluated at call time."""
|
|
34
|
+
return [Path.cwd(), Path.home()]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# .env parsing
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _parse_dotenv(path: Path) -> dict[str, str]:
|
|
43
|
+
"""Parse a .env file into a dict of all KEY=value pairs."""
|
|
44
|
+
result: dict[str, str] = {}
|
|
45
|
+
try:
|
|
46
|
+
content = path.read_text(encoding="utf-8")
|
|
47
|
+
except (OSError, UnicodeDecodeError):
|
|
48
|
+
return result
|
|
49
|
+
for line in content.splitlines():
|
|
50
|
+
line = line.strip()
|
|
51
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
52
|
+
continue
|
|
53
|
+
key, _, value = line.partition("=")
|
|
54
|
+
result[key.strip()] = value.strip().strip("\"'")
|
|
55
|
+
return result
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _find_dotenv() -> dict[str, str]:
|
|
59
|
+
"""Find and parse the first .env file in search dirs."""
|
|
60
|
+
for directory in _search_dirs():
|
|
61
|
+
candidate = directory / _ENV_FILE
|
|
62
|
+
if candidate.exists():
|
|
63
|
+
return _parse_dotenv(candidate)
|
|
64
|
+
return {}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ---------------------------------------------------------------------------
|
|
68
|
+
# config.toml parsing
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _load_toml(path: Path) -> tomlkit.TOMLDocument:
|
|
73
|
+
"""Load a TOML file as a tomlkit document (preserves comments).
|
|
74
|
+
|
|
75
|
+
Returns an empty TOMLDocument if the file cannot be read.
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If the file exists but contains invalid TOML.
|
|
79
|
+
"""
|
|
80
|
+
try:
|
|
81
|
+
content = path.read_text(encoding="utf-8")
|
|
82
|
+
except (OSError, UnicodeDecodeError):
|
|
83
|
+
return tomlkit.document()
|
|
84
|
+
try:
|
|
85
|
+
return tomlkit.parse(content)
|
|
86
|
+
except tomlkit.exceptions.ParseError as e:
|
|
87
|
+
raise ValueError(
|
|
88
|
+
f"Invalid TOML in {path}: {e}\nFix the syntax error or remove the file."
|
|
89
|
+
) from e
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _find_config() -> dict:
|
|
93
|
+
"""Find and parse the first config.toml in search dirs."""
|
|
94
|
+
for directory in _search_dirs():
|
|
95
|
+
candidate = directory / _CONFIG_FILE
|
|
96
|
+
if candidate.exists():
|
|
97
|
+
return _load_toml(candidate)
|
|
98
|
+
return {}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def load_config() -> dict:
|
|
102
|
+
"""Load the full ``config.toml`` as a dict.
|
|
103
|
+
|
|
104
|
+
Searches the current working directory, then the home directory.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
The parsed TOML dict, or an empty dict if not found.
|
|
108
|
+
"""
|
|
109
|
+
return _find_config()
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _find_config_path() -> Path:
|
|
113
|
+
"""Return the path to config.toml (existing or default to cwd)."""
|
|
114
|
+
for directory in _search_dirs():
|
|
115
|
+
candidate = directory / _CONFIG_FILE
|
|
116
|
+
if candidate.exists():
|
|
117
|
+
return candidate
|
|
118
|
+
return Path.cwd() / _CONFIG_FILE
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _write_toml(path: Path, data) -> None:
|
|
122
|
+
"""Write a TOML document to a file, preserving comments and formatting."""
|
|
123
|
+
path.write_text(tomlkit.dumps(data), encoding="utf-8")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def add_arcana_to_config(arcana_id: str, *, label: str | None = None) -> Path:
|
|
127
|
+
"""Add an ARCANA ID to ``config.toml``.
|
|
128
|
+
|
|
129
|
+
Preserves existing comments and formatting. If ``label`` is provided,
|
|
130
|
+
adds to ``[saia.arcana.labels]``. Otherwise appends to
|
|
131
|
+
``[saia.arcana] ids`` array.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
arcana_id: The full ``owner/name`` ARCANA ID.
|
|
135
|
+
label: Optional label key (e.g. ``"project_a"``).
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
The path to the updated config.toml.
|
|
139
|
+
"""
|
|
140
|
+
path = _find_config_path()
|
|
141
|
+
doc = _load_toml(path) if path.exists() else tomlkit.document()
|
|
142
|
+
|
|
143
|
+
if "saia" not in doc:
|
|
144
|
+
doc.add("saia", tomlkit.table())
|
|
145
|
+
saia = doc["saia"]
|
|
146
|
+
if "arcana" not in saia:
|
|
147
|
+
saia.add("arcana", tomlkit.table())
|
|
148
|
+
arcana = saia["arcana"]
|
|
149
|
+
|
|
150
|
+
if label:
|
|
151
|
+
if "labels" not in arcana:
|
|
152
|
+
arcana.add("labels", tomlkit.table())
|
|
153
|
+
arcana["labels"][label] = arcana_id
|
|
154
|
+
else:
|
|
155
|
+
if "ids" not in arcana:
|
|
156
|
+
arcana.add("ids", tomlkit.array())
|
|
157
|
+
ids = arcana["ids"]
|
|
158
|
+
if arcana_id not in ids:
|
|
159
|
+
ids.append(arcana_id)
|
|
160
|
+
|
|
161
|
+
_write_toml(path, doc)
|
|
162
|
+
return path
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def remove_arcana_from_config(arcana_id: str) -> Path:
|
|
166
|
+
"""Remove an ARCANA ID from ``config.toml``.
|
|
167
|
+
|
|
168
|
+
Preserves existing comments and formatting. Removes the ID from
|
|
169
|
+
``[saia.arcana] ids``, ``[saia.arcana] default``, and any matching
|
|
170
|
+
entry in ``[saia.arcana.labels]``.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
arcana_id: The full ``owner/name`` ARCANA ID to remove.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
The path to the updated config.toml.
|
|
177
|
+
"""
|
|
178
|
+
path = _find_config_path()
|
|
179
|
+
if not path.exists():
|
|
180
|
+
return path
|
|
181
|
+
|
|
182
|
+
doc = _load_toml(path)
|
|
183
|
+
arcana = doc.get("saia", {}).get("arcana", {})
|
|
184
|
+
|
|
185
|
+
# Remove from ids array
|
|
186
|
+
ids = arcana.get("ids", [])
|
|
187
|
+
if arcana_id in ids:
|
|
188
|
+
ids.remove(arcana_id)
|
|
189
|
+
|
|
190
|
+
# Remove from default
|
|
191
|
+
if arcana.get("default") == arcana_id:
|
|
192
|
+
del arcana["default"]
|
|
193
|
+
|
|
194
|
+
# Remove from labels
|
|
195
|
+
labels = arcana.get("labels", {})
|
|
196
|
+
to_remove = [k for k, v in labels.items() if v == arcana_id]
|
|
197
|
+
for k in to_remove:
|
|
198
|
+
del labels[k]
|
|
199
|
+
|
|
200
|
+
_write_toml(path, doc)
|
|
201
|
+
return path
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
# ---------------------------------------------------------------------------
|
|
205
|
+
# API key loading
|
|
206
|
+
# ---------------------------------------------------------------------------
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def load_api_key(path: str | Path | None = None) -> str:
|
|
210
|
+
"""Discover and return the SAIA API key.
|
|
211
|
+
|
|
212
|
+
Resolution order:
|
|
213
|
+
|
|
214
|
+
1. ``path`` argument — explicit file path (``.saia_api`` or ``.env`` format)
|
|
215
|
+
2. ``SAIA_API_KEY`` environment variable
|
|
216
|
+
3. ``.saia_api`` in the current working directory
|
|
217
|
+
4. ``.saia_api`` in the home directory
|
|
218
|
+
5. ``.env`` in the current working directory (looks for ``SAIA_API_KEY=...``)
|
|
219
|
+
6. ``.env`` in the home directory
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
path: Optional explicit path to a ``.saia_api`` or ``.env`` file.
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
The API key string.
|
|
226
|
+
|
|
227
|
+
Raises:
|
|
228
|
+
FileNotFoundError: If ``path`` was given but does not exist.
|
|
229
|
+
ValueError: If no API key could be found anywhere.
|
|
230
|
+
"""
|
|
231
|
+
# 1. Explicit file path
|
|
232
|
+
if path is not None:
|
|
233
|
+
p = Path(path)
|
|
234
|
+
if not p.exists():
|
|
235
|
+
raise FileNotFoundError(f"API key file not found: {p}")
|
|
236
|
+
return _read_file(p)
|
|
237
|
+
|
|
238
|
+
# 2. Environment variable
|
|
239
|
+
key = os.environ.get(_ENV_VAR)
|
|
240
|
+
if key and key.strip():
|
|
241
|
+
return key.strip()
|
|
242
|
+
|
|
243
|
+
# 3 & 4. .saia_api in cwd / home
|
|
244
|
+
for directory in _search_dirs():
|
|
245
|
+
candidate = directory / _KEY_FILE
|
|
246
|
+
if candidate.exists():
|
|
247
|
+
return _read_raw(candidate)
|
|
248
|
+
|
|
249
|
+
# 5 & 6. .env in cwd / home
|
|
250
|
+
dotenv = _find_dotenv()
|
|
251
|
+
value = dotenv.get(_ENV_VAR)
|
|
252
|
+
if value:
|
|
253
|
+
return value
|
|
254
|
+
|
|
255
|
+
raise ValueError(
|
|
256
|
+
f"No SAIA API key found. Provide it via:\n"
|
|
257
|
+
f" - SAIAClient(api_key='...')\n"
|
|
258
|
+
f" - Environment variable {_ENV_VAR!r}\n"
|
|
259
|
+
f" - {_KEY_FILE!r} file containing the raw key\n"
|
|
260
|
+
f" - {_ENV_FILE!r} file with {_ENV_VAR}=<key>"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _read_file(path: Path) -> str:
|
|
265
|
+
"""Read an API key from an explicit file path.
|
|
266
|
+
|
|
267
|
+
Accepts both supported formats: a ``.env``-style file (a
|
|
268
|
+
``SAIA_API_KEY=...`` line) and a raw ``.saia_api`` file (the key on
|
|
269
|
+
its own line). The dotenv form wins when a ``SAIA_API_KEY`` entry is
|
|
270
|
+
present; otherwise the first non-empty, non-comment line is treated as
|
|
271
|
+
the raw key.
|
|
272
|
+
|
|
273
|
+
This replaces the previous "contains ``=``" heuristic, which mis-classified
|
|
274
|
+
raw keys that legitimately contain ``=`` (e.g. base64 padding) and raised
|
|
275
|
+
``IndexError`` on an empty file.
|
|
276
|
+
"""
|
|
277
|
+
value = _parse_dotenv(path).get(_ENV_VAR)
|
|
278
|
+
if value:
|
|
279
|
+
return value
|
|
280
|
+
return _read_raw(path)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _read_raw(path: Path) -> str:
|
|
284
|
+
"""Read a .saia_api file — first non-empty, non-comment line."""
|
|
285
|
+
for line in path.read_text(encoding="utf-8").splitlines():
|
|
286
|
+
line = line.strip()
|
|
287
|
+
if line and not line.startswith("#"):
|
|
288
|
+
return line
|
|
289
|
+
raise ValueError(f"No API key found in {path}")
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# ---------------------------------------------------------------------------
|
|
293
|
+
# ARCANA ID loading
|
|
294
|
+
# ---------------------------------------------------------------------------
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def load_arcana_ids() -> dict[str, str]:
|
|
298
|
+
"""Discover ARCANA IDs from ``.env``, ``config.toml``, and environment variables.
|
|
299
|
+
|
|
300
|
+
All sources are merged. The ``"default"`` key is set by the highest-priority
|
|
301
|
+
source that provides a single ARCANA ID:
|
|
302
|
+
|
|
303
|
+
1. ``SAIA_ARCANA_ID`` in ``.env`` or environment variable
|
|
304
|
+
2. ``[saia.arcana] default`` in ``config.toml``
|
|
305
|
+
3. First element of ``[saia.arcana] ids`` in ``config.toml``
|
|
306
|
+
4. First ``SAIA_ARCANA_ID_XX`` key (by file/env order, not sorted)
|
|
307
|
+
|
|
308
|
+
Additional IDs from ``config.toml`` arrays and numbered env vars are
|
|
309
|
+
included under their respective labels.
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
A dict mapping label to arcana ID string:
|
|
313
|
+
|
|
314
|
+
- ``"default"`` — the default arcana ID (if any source provides one)
|
|
315
|
+
- ``"0"``, ``"1"``, ... — from ``[saia.arcana] ids`` array
|
|
316
|
+
- Numbered suffixes (e.g. ``"01"``) — from ``SAIA_ARCANA_ID_01`` env vars
|
|
317
|
+
|
|
318
|
+
Returns an empty dict if no ARCANA IDs are found.
|
|
319
|
+
"""
|
|
320
|
+
dotenv = _find_dotenv()
|
|
321
|
+
config = _find_config()
|
|
322
|
+
arcana_cfg = config.get("saia", {}).get("arcana", {})
|
|
323
|
+
|
|
324
|
+
# Merge env vars + .env (env vars win)
|
|
325
|
+
env_merged = {**dotenv, **os.environ}
|
|
326
|
+
|
|
327
|
+
result: dict[str, str] = {}
|
|
328
|
+
default: str | None = None
|
|
329
|
+
|
|
330
|
+
# --- Collect from config.toml ---
|
|
331
|
+
|
|
332
|
+
# Named entries: [saia.arcana.labels]
|
|
333
|
+
labels = arcana_cfg.get("labels", {})
|
|
334
|
+
for label, value in labels.items():
|
|
335
|
+
if isinstance(value, str) and value.strip():
|
|
336
|
+
result[label] = value.strip()
|
|
337
|
+
|
|
338
|
+
# Array: [saia.arcana] ids = [...]
|
|
339
|
+
ids_list = arcana_cfg.get("ids", [])
|
|
340
|
+
if isinstance(ids_list, list):
|
|
341
|
+
for i, item in enumerate(ids_list):
|
|
342
|
+
if isinstance(item, str) and item.strip():
|
|
343
|
+
result[str(i)] = item.strip()
|
|
344
|
+
# Priority 3: first element of ids array
|
|
345
|
+
if ids_list and isinstance(ids_list[0], str) and ids_list[0].strip():
|
|
346
|
+
default = ids_list[0].strip()
|
|
347
|
+
|
|
348
|
+
# Single default: [saia.arcana] default = "..."
|
|
349
|
+
toml_default = arcana_cfg.get("default", "")
|
|
350
|
+
if isinstance(toml_default, str) and toml_default.strip():
|
|
351
|
+
# Priority 2: explicit default in config.toml
|
|
352
|
+
default = toml_default.strip()
|
|
353
|
+
|
|
354
|
+
# --- Collect from env vars / .env ---
|
|
355
|
+
|
|
356
|
+
# Numbered: SAIA_ARCANA_ID_XX (insertion order, not sorted)
|
|
357
|
+
first_numbered: str | None = None
|
|
358
|
+
for key, value in env_merged.items():
|
|
359
|
+
match = _ARCANA_ID_PATTERN.match(key)
|
|
360
|
+
if match and value.strip():
|
|
361
|
+
suffix = match.group(1)
|
|
362
|
+
result[suffix] = value.strip()
|
|
363
|
+
if first_numbered is None:
|
|
364
|
+
first_numbered = value.strip()
|
|
365
|
+
|
|
366
|
+
# Priority 4: first numbered key
|
|
367
|
+
if first_numbered and default is None:
|
|
368
|
+
default = first_numbered
|
|
369
|
+
|
|
370
|
+
# Priority 1: SAIA_ARCANA_ID in env/.env (highest priority)
|
|
371
|
+
env_single = env_merged.get(_ARCANA_ID_VAR, "").strip()
|
|
372
|
+
if env_single:
|
|
373
|
+
default = env_single
|
|
374
|
+
|
|
375
|
+
# Set the default
|
|
376
|
+
if default:
|
|
377
|
+
result["default"] = default
|
|
378
|
+
|
|
379
|
+
# --- Resolve owner prefix using username ---
|
|
380
|
+
result = _resolve_owner_prefix(result, config, env_merged)
|
|
381
|
+
|
|
382
|
+
return result
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _resolve_owner_prefix(
|
|
386
|
+
ids: dict[str, str], config: dict, env_merged: dict[str, str]
|
|
387
|
+
) -> dict[str, str]:
|
|
388
|
+
"""Prepend ``username/`` to ARCANA IDs that lack an owner prefix.
|
|
389
|
+
|
|
390
|
+
The chat endpoint requires the ``owner/name`` format. If an ID has no
|
|
391
|
+
``/``, the username is resolved from ``SAIA_USERNAME`` (env, .env) or
|
|
392
|
+
``[saia] username`` in config.toml.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
ids: The collected ARCANA IDs.
|
|
396
|
+
config: The parsed ``config.toml``.
|
|
397
|
+
env_merged: The already-merged ``{**dotenv, **os.environ}`` mapping
|
|
398
|
+
(env vars take precedence over ``.env``). Reused here to avoid
|
|
399
|
+
re-reading ``.env`` from disk.
|
|
400
|
+
|
|
401
|
+
Raises:
|
|
402
|
+
ValueError: If an ID has no ``/`` and no username is configured.
|
|
403
|
+
"""
|
|
404
|
+
if not ids:
|
|
405
|
+
return ids
|
|
406
|
+
|
|
407
|
+
# Check if any ID needs a prefix
|
|
408
|
+
needs_prefix = any("/" not in v for v in ids.values())
|
|
409
|
+
if not needs_prefix:
|
|
410
|
+
return ids
|
|
411
|
+
|
|
412
|
+
# Resolve username: env var / .env first (already merged), then config.toml
|
|
413
|
+
username = (env_merged.get(_USERNAME_VAR) or "").strip()
|
|
414
|
+
if not username:
|
|
415
|
+
cfg_username = config.get("saia", {}).get("username", "")
|
|
416
|
+
username = cfg_username.strip() if isinstance(cfg_username, str) else ""
|
|
417
|
+
|
|
418
|
+
if not username:
|
|
419
|
+
missing = [f"{k}={v}" for k, v in ids.items() if "/" not in v]
|
|
420
|
+
raise ValueError(
|
|
421
|
+
f"ARCANA ID(s) without owner prefix require a username, "
|
|
422
|
+
f"but SAIA_USERNAME is not configured.\n"
|
|
423
|
+
f" IDs missing owner prefix: {', '.join(missing)}\n"
|
|
424
|
+
f" Set SAIA_USERNAME in .env or [saia] username in config.toml."
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
resolved = {}
|
|
428
|
+
for label, value in ids.items():
|
|
429
|
+
if "/" not in value:
|
|
430
|
+
resolved[label] = f"{username}/{value}"
|
|
431
|
+
else:
|
|
432
|
+
resolved[label] = value
|
|
433
|
+
return resolved
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
# ---------------------------------------------------------------------------
|
|
437
|
+
# Username loading
|
|
438
|
+
# ---------------------------------------------------------------------------
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def load_username() -> str | None:
|
|
442
|
+
"""Discover the SAIA username from environment, ``.env``, or ``config.toml``.
|
|
443
|
+
|
|
444
|
+
Resolution order:
|
|
445
|
+
|
|
446
|
+
1. ``SAIA_USERNAME`` environment variable
|
|
447
|
+
2. ``SAIA_USERNAME`` in ``.env``
|
|
448
|
+
3. ``[saia] username`` in ``config.toml``
|
|
449
|
+
|
|
450
|
+
Returns:
|
|
451
|
+
The username string, or ``None`` if not configured.
|
|
452
|
+
"""
|
|
453
|
+
value = os.environ.get(_USERNAME_VAR, "").strip()
|
|
454
|
+
if value:
|
|
455
|
+
return value
|
|
456
|
+
|
|
457
|
+
dotenv = _find_dotenv()
|
|
458
|
+
value = dotenv.get(_USERNAME_VAR, "").strip()
|
|
459
|
+
if value:
|
|
460
|
+
return value
|
|
461
|
+
|
|
462
|
+
config = _find_config()
|
|
463
|
+
value = config.get("saia", {}).get("username", "")
|
|
464
|
+
if isinstance(value, str) and value.strip():
|
|
465
|
+
return value.strip()
|
|
466
|
+
|
|
467
|
+
return None
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# ---------------------------------------------------------------------------
|
|
471
|
+
# Base URL and credential resolution
|
|
472
|
+
# ---------------------------------------------------------------------------
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def resolve_base_url(explicit: str | None = None) -> str:
|
|
476
|
+
"""Resolve the SAIA API base URL.
|
|
477
|
+
|
|
478
|
+
Resolution order: explicit parameter > ``[saia] base_url`` in
|
|
479
|
+
``config.toml`` > the hardcoded ``DEFAULT_BASE_URL``.
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
explicit: An explicit URL. If provided, returned as-is (trailing
|
|
483
|
+
slash stripped).
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
The resolved base URL string.
|
|
487
|
+
"""
|
|
488
|
+
if explicit is not None:
|
|
489
|
+
return explicit.rstrip("/")
|
|
490
|
+
config = load_config()
|
|
491
|
+
toml_url = config.get("saia", {}).get("base_url", "")
|
|
492
|
+
if isinstance(toml_url, str) and toml_url.strip():
|
|
493
|
+
return toml_url.strip().rstrip("/")
|
|
494
|
+
return DEFAULT_BASE_URL
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def resolve_credentials(
|
|
498
|
+
api_key: str | None = None,
|
|
499
|
+
base_url: str | None = None,
|
|
500
|
+
key_file: str | None = None,
|
|
501
|
+
) -> tuple[str, str]:
|
|
502
|
+
"""Resolve the API key and base URL from explicit values or discovery.
|
|
503
|
+
|
|
504
|
+
Shared by :class:`~saia_python.SAIAClient` and
|
|
505
|
+
:func:`~saia_python.create_openai_client` so both apply identical
|
|
506
|
+
resolution: an explicit ``api_key`` wins, otherwise it is discovered via
|
|
507
|
+
:func:`load_api_key` (honouring ``key_file``); the base URL is resolved via
|
|
508
|
+
:func:`resolve_base_url`.
|
|
509
|
+
|
|
510
|
+
Returns:
|
|
511
|
+
An ``(api_key, base_url)`` tuple.
|
|
512
|
+
"""
|
|
513
|
+
resolved_key = api_key if api_key is not None else load_api_key(key_file)
|
|
514
|
+
resolved_url = resolve_base_url(base_url)
|
|
515
|
+
return resolved_key, resolved_url
|