saia-python 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- saia_python/__init__.py +253 -0
- saia_python/_http.py +71 -0
- saia_python/_streaming.py +88 -0
- saia_python/_util.py +29 -0
- saia_python/arcana.py +1061 -0
- saia_python/arcana_references.py +182 -0
- saia_python/auth.py +515 -0
- saia_python/chat.py +72 -0
- saia_python/client.py +239 -0
- saia_python/documents.py +145 -0
- saia_python/exceptions.py +68 -0
- saia_python/models.py +146 -0
- saia_python/openai_compat.py +70 -0
- saia_python/py.typed +0 -0
- saia_python/rate_limits.py +84 -0
- saia_python/responses.py +70 -0
- saia_python/voice.py +175 -0
- saia_python-0.4.1.dist-info/METADATA +190 -0
- saia_python-0.4.1.dist-info/RECORD +22 -0
- saia_python-0.4.1.dist-info/WHEEL +5 -0
- saia_python-0.4.1.dist-info/licenses/LICENSE +661 -0
- saia_python-0.4.1.dist-info/top_level.txt +1 -0
saia_python/arcana.py
ADDED
|
@@ -0,0 +1,1061 @@
|
|
|
1
|
+
"""ARCANA (RAG) service — manage knowledge bases and chat with context."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import uuid as _uuid
|
|
7
|
+
from collections.abc import Callable, Iterable
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import cast
|
|
10
|
+
from urllib.parse import quote
|
|
11
|
+
|
|
12
|
+
import requests
|
|
13
|
+
|
|
14
|
+
from ._http import new_session_like, post_chat_completion
|
|
15
|
+
from ._streaming import SSEStream
|
|
16
|
+
from ._util import progress_iter
|
|
17
|
+
from .exceptions import APIError, raise_for_status
|
|
18
|
+
|
|
19
|
+
_ARCANA_PATH = "/arcanas/api/v1"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def extract_arcana_name(id_or_name: str) -> str:
|
|
23
|
+
"""Extract the arcana name from a full ID or plain name.
|
|
24
|
+
|
|
25
|
+
The ARCANA chat endpoint uses the full ``owner/name`` format, while
|
|
26
|
+
management endpoints (``get``, ``upload``) use just the ``name``.
|
|
27
|
+
This function accepts either and returns the name part.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
id_or_name: Either ``"owner/name"`` or just ``"name"``.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
The name portion (everything after the first ``/``, or the
|
|
34
|
+
input unchanged if there is no ``/``).
|
|
35
|
+
"""
|
|
36
|
+
if "/" in id_or_name:
|
|
37
|
+
return id_or_name.split("/", 1)[1]
|
|
38
|
+
return id_or_name
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _json_or_none(resp) -> dict | None:
|
|
42
|
+
"""Return ``resp.json()``, or ``None`` when the body is empty / not JSON.
|
|
43
|
+
|
|
44
|
+
Several ARCANA management endpoints (delete, upload, delete-index, …) may
|
|
45
|
+
answer with an empty or non-JSON body on success; this collapses the
|
|
46
|
+
repeated decode-or-``None`` ``try/except`` they all used.
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
return resp.json()
|
|
50
|
+
except (json.JSONDecodeError, ValueError):
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ArcanaService:
|
|
55
|
+
"""Access the ARCANA/RAG endpoints.
|
|
56
|
+
|
|
57
|
+
The ARCANA API uses a different auth scheme (plain key, no ``Bearer`` prefix)
|
|
58
|
+
and a different URL path. This is handled automatically.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
session: A :class:`requests.Session` (auth header will be overridden per-request).
|
|
62
|
+
base_url: The SAIA API base URL (e.g. ``https://chat-ai.academiccloud.de/v1``).
|
|
63
|
+
api_key: The raw API key (needed because ARCANA omits the ``Bearer`` prefix).
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(self, session: requests.Session, base_url: str, api_key: str):
|
|
67
|
+
self._session = session
|
|
68
|
+
self._base_url = base_url
|
|
69
|
+
self._arcana_base = f"{self._base_url}{_ARCANA_PATH}"
|
|
70
|
+
self._api_key = api_key
|
|
71
|
+
|
|
72
|
+
def _headers(self, **extra) -> dict:
|
|
73
|
+
return {"Authorization": self._api_key, "Accept": "application/json", **extra}
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def _format_arcana_line(a: dict, *, with_owner: bool = False) -> str:
|
|
77
|
+
"""Format one arcana as a one-line summary (shared by the summary views)."""
|
|
78
|
+
idx = a.get("index_info") or {}
|
|
79
|
+
status = idx.get("index_status", "?")
|
|
80
|
+
if with_owner:
|
|
81
|
+
return (
|
|
82
|
+
f" {a['name']} "
|
|
83
|
+
f"(owner: {a.get('owner_user_name', '?')}, "
|
|
84
|
+
f"files: {a.get('file_count', '?')}, "
|
|
85
|
+
f"status: {status})"
|
|
86
|
+
)
|
|
87
|
+
return f" {a['name']} (files: {a.get('file_count', '?')}, status: {status})"
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def _glob_files(
|
|
91
|
+
directory: str | Path, pattern: str, *, recursive: bool
|
|
92
|
+
) -> list[Path]:
|
|
93
|
+
"""Return the sorted matching files in ``directory``.
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
FileNotFoundError: If ``directory`` is not a directory, or nothing
|
|
97
|
+
matches ``pattern``.
|
|
98
|
+
"""
|
|
99
|
+
directory = Path(directory)
|
|
100
|
+
if not directory.is_dir():
|
|
101
|
+
raise FileNotFoundError(f"Directory not found: {directory}")
|
|
102
|
+
glob_method = directory.rglob if recursive else directory.glob
|
|
103
|
+
files = sorted(p for p in glob_method(pattern) if p.is_file())
|
|
104
|
+
if not files:
|
|
105
|
+
raise FileNotFoundError(f"No files matching '{pattern}' in {directory}")
|
|
106
|
+
return files
|
|
107
|
+
|
|
108
|
+
def _run_file_batch(
|
|
109
|
+
self,
|
|
110
|
+
files: list[Path],
|
|
111
|
+
action,
|
|
112
|
+
*,
|
|
113
|
+
verb: str,
|
|
114
|
+
desc: str,
|
|
115
|
+
verbose: bool,
|
|
116
|
+
) -> list[dict]:
|
|
117
|
+
"""Run ``action(path)`` over each file, tallying per-file status.
|
|
118
|
+
|
|
119
|
+
The shared skeleton behind :meth:`upload_directory` and
|
|
120
|
+
:meth:`delete_directory`: an optionally tqdm-wrapped loop that records
|
|
121
|
+
a ``{"file", "status", ["error"]}`` dict per item and prints a tally
|
|
122
|
+
when ``verbose``. Only the per-file ``action`` and the past-tense
|
|
123
|
+
``verb`` (``"uploaded"`` / ``"deleted"``) differ between callers.
|
|
124
|
+
"""
|
|
125
|
+
results = []
|
|
126
|
+
for fp in progress_iter(files, desc=desc, unit="file"):
|
|
127
|
+
entry: dict = {"file": fp.name}
|
|
128
|
+
try:
|
|
129
|
+
action(fp)
|
|
130
|
+
entry["status"] = verb
|
|
131
|
+
except Exception as e:
|
|
132
|
+
entry["status"] = "failed"
|
|
133
|
+
entry["error"] = str(e)
|
|
134
|
+
if verbose:
|
|
135
|
+
label = verb.capitalize() if entry["status"] == verb else "FAILED"
|
|
136
|
+
print(f" {fp.name} {label}")
|
|
137
|
+
results.append(entry)
|
|
138
|
+
|
|
139
|
+
succeeded = sum(1 for r in results if r["status"] == verb)
|
|
140
|
+
failed = len(results) - succeeded
|
|
141
|
+
summary = f"{succeeded}/{len(results)} files {verb}"
|
|
142
|
+
if failed:
|
|
143
|
+
summary += f" ({failed} failed)"
|
|
144
|
+
if verbose:
|
|
145
|
+
print(summary)
|
|
146
|
+
return results
|
|
147
|
+
|
|
148
|
+
def version(self) -> str:
|
|
149
|
+
"""Return the ARCANA API version string.
|
|
150
|
+
|
|
151
|
+
Calls ``GET /arcanas/api/v1/version``.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
The version string (e.g. ``"0.4.16"``).
|
|
155
|
+
"""
|
|
156
|
+
resp = self._session.get(
|
|
157
|
+
f"{self._arcana_base}/version", headers=self._headers()
|
|
158
|
+
)
|
|
159
|
+
raise_for_status(resp)
|
|
160
|
+
return resp.json().get("version", "")
|
|
161
|
+
|
|
162
|
+
def heartbeat(self) -> bool:
|
|
163
|
+
"""Check whether the ARCANA service is alive.
|
|
164
|
+
|
|
165
|
+
Calls ``GET /arcanas/api/v1/heartbeat``. Returns ``True`` if the
|
|
166
|
+
service responds with 204, ``False`` otherwise (including transport
|
|
167
|
+
errors — it never raises).
|
|
168
|
+
"""
|
|
169
|
+
try:
|
|
170
|
+
resp = self._session.get(
|
|
171
|
+
f"{self._arcana_base}/heartbeat",
|
|
172
|
+
headers=self._headers(),
|
|
173
|
+
timeout=10,
|
|
174
|
+
)
|
|
175
|
+
return resp.status_code == 204
|
|
176
|
+
except Exception:
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
def user_info(self) -> dict:
|
|
180
|
+
"""Return the current user's profile and arcana statistics.
|
|
181
|
+
|
|
182
|
+
Calls ``GET /user/me``. Returns username, email, name, arcana
|
|
183
|
+
count, file count, and registration date.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
A dict with user profile fields.
|
|
187
|
+
"""
|
|
188
|
+
resp = self._session.get(
|
|
189
|
+
f"{self._arcana_base}/user/me",
|
|
190
|
+
headers=self._headers(),
|
|
191
|
+
)
|
|
192
|
+
raise_for_status(resp)
|
|
193
|
+
return resp.json()
|
|
194
|
+
|
|
195
|
+
def user_summary(self) -> str:
|
|
196
|
+
"""Return a formatted overview of the user's account and all arcanas.
|
|
197
|
+
|
|
198
|
+
Combines :meth:`user_info` and :meth:`list` into a single
|
|
199
|
+
human-readable string.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
A multi-line summary.
|
|
203
|
+
"""
|
|
204
|
+
user = self.user_info()
|
|
205
|
+
arcanas = self.list()
|
|
206
|
+
|
|
207
|
+
_W = 18
|
|
208
|
+
lines = [
|
|
209
|
+
f"{'Username':<{_W}}{user.get('username', '?')}",
|
|
210
|
+
f"{'Email':<{_W}}{user.get('email', '?')}",
|
|
211
|
+
f"{'Name':<{_W}}{user.get('first_name', '')} {user.get('surname', '')}".rstrip(),
|
|
212
|
+
f"{'Registered':<{_W}}{user.get('is_registered', '?')}",
|
|
213
|
+
f"{'Arcana count':<{_W}}{user.get('arcana_count', '?')}",
|
|
214
|
+
f"{'Total files':<{_W}}{user.get('file_count', '?')}",
|
|
215
|
+
]
|
|
216
|
+
if user.get("created_at"):
|
|
217
|
+
lines.append(f"{'Member since':<{_W}}{user['created_at']}")
|
|
218
|
+
|
|
219
|
+
lines.append(f"\nArcanas ({len(arcanas)}):")
|
|
220
|
+
for a in arcanas:
|
|
221
|
+
lines.append(self._format_arcana_line(a))
|
|
222
|
+
if not arcanas:
|
|
223
|
+
lines.append(" (none)")
|
|
224
|
+
|
|
225
|
+
return "\n".join(lines)
|
|
226
|
+
|
|
227
|
+
def create(
|
|
228
|
+
self,
|
|
229
|
+
name: str,
|
|
230
|
+
*,
|
|
231
|
+
append_uuid: bool = True,
|
|
232
|
+
update_toml: bool = False,
|
|
233
|
+
toml_label: str | None = None,
|
|
234
|
+
) -> dict:
|
|
235
|
+
"""Create a new arcana.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
name: Name for the new arcana (1–100 characters).
|
|
239
|
+
append_uuid: If ``True`` (default), append a UUID4 suffix to
|
|
240
|
+
the name (e.g. ``MyArcana-a1b2c3d4-...``). This mirrors
|
|
241
|
+
the behavior of the SAIA web UI and avoids name collisions.
|
|
242
|
+
update_toml: If ``True``, add the new arcana to ``config.toml``
|
|
243
|
+
after creation.
|
|
244
|
+
toml_label: Label under ``[saia.arcana.labels]`` in config.toml.
|
|
245
|
+
If omitted, the ID is appended to the ``ids`` array.
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
A dict with the created arcana name and the full ID
|
|
249
|
+
(``owner/name``).
|
|
250
|
+
"""
|
|
251
|
+
if append_uuid:
|
|
252
|
+
name = f"{name}-{_uuid.uuid4()}"
|
|
253
|
+
|
|
254
|
+
resp = self._session.post(
|
|
255
|
+
f"{self._arcana_base}/arcana/",
|
|
256
|
+
headers={**self._headers(), "Content-Type": "application/json"},
|
|
257
|
+
json={"name": name},
|
|
258
|
+
)
|
|
259
|
+
raise_for_status(resp)
|
|
260
|
+
|
|
261
|
+
# Build the full owner/name ID
|
|
262
|
+
# Fetch details to get the owner_user_name
|
|
263
|
+
details = self.get(name)
|
|
264
|
+
owner = details.get("owner_user_name", "")
|
|
265
|
+
full_id = f"{owner}/{name}" if owner else name
|
|
266
|
+
|
|
267
|
+
result = {"name": name, "id": full_id, "message": resp.json()}
|
|
268
|
+
|
|
269
|
+
if update_toml:
|
|
270
|
+
from .auth import add_arcana_to_config
|
|
271
|
+
|
|
272
|
+
add_arcana_to_config(full_id, label=toml_label)
|
|
273
|
+
|
|
274
|
+
return result
|
|
275
|
+
|
|
276
|
+
def delete(
|
|
277
|
+
self,
|
|
278
|
+
name: str,
|
|
279
|
+
*,
|
|
280
|
+
update_toml: bool = False,
|
|
281
|
+
) -> dict | None:
|
|
282
|
+
"""Delete an arcana entirely.
|
|
283
|
+
|
|
284
|
+
Accepts either the plain name or the full ``owner/name`` ID.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
288
|
+
update_toml: If ``True``, remove the arcana from ``config.toml``
|
|
289
|
+
after deletion.
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
The API response, or ``None`` if the response has no body.
|
|
293
|
+
"""
|
|
294
|
+
# Keep the full ID for config removal before extracting the name
|
|
295
|
+
full_id = name
|
|
296
|
+
name = extract_arcana_name(name)
|
|
297
|
+
|
|
298
|
+
resp = self._session.delete(
|
|
299
|
+
f"{self._arcana_base}/arcana/{quote(name, safe='')}",
|
|
300
|
+
headers=self._headers(),
|
|
301
|
+
)
|
|
302
|
+
raise_for_status(resp)
|
|
303
|
+
|
|
304
|
+
if update_toml:
|
|
305
|
+
from .auth import remove_arcana_from_config
|
|
306
|
+
|
|
307
|
+
# Try both formats (full ID and plain name)
|
|
308
|
+
remove_arcana_from_config(full_id)
|
|
309
|
+
if full_id != name:
|
|
310
|
+
remove_arcana_from_config(name)
|
|
311
|
+
|
|
312
|
+
return _json_or_none(resp)
|
|
313
|
+
|
|
314
|
+
def list(self) -> list[dict]:
|
|
315
|
+
"""List all available arcanas.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
A list of arcana dicts.
|
|
319
|
+
"""
|
|
320
|
+
resp = self._session.get(
|
|
321
|
+
f"{self._arcana_base}/arcana/",
|
|
322
|
+
headers=self._headers(),
|
|
323
|
+
)
|
|
324
|
+
raise_for_status(resp)
|
|
325
|
+
return resp.json()
|
|
326
|
+
|
|
327
|
+
def summary(self, *, arcana_ids: dict[str, str] | None = None) -> str:
|
|
328
|
+
"""Return a formatted summary of configured and available arcanas.
|
|
329
|
+
|
|
330
|
+
Combines information from :func:`~saia_python.load_arcana_ids`
|
|
331
|
+
(configured IDs) and :meth:`list` (server-side arcanas) into a
|
|
332
|
+
single human-readable string.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
arcana_ids: Optional pre-loaded dict from
|
|
336
|
+
:func:`~saia_python.load_arcana_ids`. If omitted, loaded
|
|
337
|
+
automatically.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
A multi-line summary string.
|
|
341
|
+
"""
|
|
342
|
+
if arcana_ids is None:
|
|
343
|
+
from .auth import load_arcana_ids
|
|
344
|
+
|
|
345
|
+
arcana_ids = load_arcana_ids()
|
|
346
|
+
|
|
347
|
+
lines = []
|
|
348
|
+
|
|
349
|
+
# Configured IDs
|
|
350
|
+
if arcana_ids:
|
|
351
|
+
lines.append(f"Configured ARCANA IDs ({len(arcana_ids)}):")
|
|
352
|
+
default_id = arcana_ids.get("default", "")
|
|
353
|
+
if default_id:
|
|
354
|
+
name_part = extract_arcana_name(default_id)
|
|
355
|
+
lines.append(f" Default ID {default_id}")
|
|
356
|
+
lines.append(f" Default name {name_part}")
|
|
357
|
+
for label, aid in arcana_ids.items():
|
|
358
|
+
if label != "default":
|
|
359
|
+
lines.append(f" [{label}] {aid}")
|
|
360
|
+
else:
|
|
361
|
+
lines.append("No ARCANA IDs configured.")
|
|
362
|
+
|
|
363
|
+
# Server-side arcanas
|
|
364
|
+
arcanas = self.list()
|
|
365
|
+
lines.append(f"\nAvailable on server ({len(arcanas)}):")
|
|
366
|
+
if arcanas:
|
|
367
|
+
for a in arcanas:
|
|
368
|
+
lines.append(self._format_arcana_line(a, with_owner=True))
|
|
369
|
+
else:
|
|
370
|
+
lines.append(" (none)")
|
|
371
|
+
|
|
372
|
+
return "\n".join(lines)
|
|
373
|
+
|
|
374
|
+
def get(self, name: str) -> dict:
|
|
375
|
+
"""Retrieve details of a specific arcana.
|
|
376
|
+
|
|
377
|
+
Accepts either the plain name or the full ``owner/name`` ID —
|
|
378
|
+
the owner prefix is stripped automatically.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
Arcana details dict.
|
|
385
|
+
"""
|
|
386
|
+
name = extract_arcana_name(name)
|
|
387
|
+
resp = self._session.get(
|
|
388
|
+
f"{self._arcana_base}/arcana/{quote(name, safe='')}",
|
|
389
|
+
headers=self._headers(),
|
|
390
|
+
)
|
|
391
|
+
raise_for_status(resp)
|
|
392
|
+
return resp.json()
|
|
393
|
+
|
|
394
|
+
def info(
|
|
395
|
+
self,
|
|
396
|
+
name: str | None = None,
|
|
397
|
+
*,
|
|
398
|
+
data: dict | None = None,
|
|
399
|
+
verbose: bool = False,
|
|
400
|
+
) -> str:
|
|
401
|
+
"""Return a formatted summary of an arcana's details.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
name: The arcana name or full ``owner/name`` ID. Can be omitted
|
|
405
|
+
if ``data`` is provided.
|
|
406
|
+
data: Optional pre-fetched arcana dict (from :meth:`get`).
|
|
407
|
+
Avoids a redundant API call when you already have the data.
|
|
408
|
+
verbose: If ``True``, include additional fields (CLI version,
|
|
409
|
+
vector DB version, error message if present).
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
A human-readable multi-line string.
|
|
413
|
+
"""
|
|
414
|
+
if data is None:
|
|
415
|
+
if name is None:
|
|
416
|
+
raise ValueError("Either name or data must be provided")
|
|
417
|
+
data = self.get(name)
|
|
418
|
+
idx = data.get("index_info") or {}
|
|
419
|
+
|
|
420
|
+
def _size_fmt(n: float) -> str:
|
|
421
|
+
for unit in ("B", "KB", "MB", "GB"):
|
|
422
|
+
if abs(n) < 1024:
|
|
423
|
+
return f"{n:.1f} {unit}"
|
|
424
|
+
n /= 1024
|
|
425
|
+
return f"{n:.1f} TB"
|
|
426
|
+
|
|
427
|
+
_W = 18 # label column width
|
|
428
|
+
lines = [
|
|
429
|
+
f"{'Name':<{_W}}{data.get('name', '?')}",
|
|
430
|
+
f"{'Owner':<{_W}}{data.get('owner_user_name', '?')}",
|
|
431
|
+
f"{'Files':<{_W}}{data.get('file_count', '?')} ({_size_fmt(data.get('size', 0))})",
|
|
432
|
+
f"{'Index status':<{_W}}{idx.get('index_status', '?')}",
|
|
433
|
+
f"{'Embeddings model':<{_W}}{idx.get('embeddings_model', '?')}",
|
|
434
|
+
f"{'Files indexed':<{_W}}{idx.get('total_files_indexed', '?')}",
|
|
435
|
+
f"{'Chunks indexed':<{_W}}{idx.get('total_chunks_indexed', '?')}",
|
|
436
|
+
f"{'Created':<{_W}}{data.get('created_at', '?')}",
|
|
437
|
+
f"{'Updated':<{_W}}{data.get('updated_at', '?')}",
|
|
438
|
+
]
|
|
439
|
+
|
|
440
|
+
if verbose:
|
|
441
|
+
if idx.get("error_msg") is not None:
|
|
442
|
+
lines.insert(4, f"{'Error':<{_W}}{idx['error_msg']}")
|
|
443
|
+
lines.append(f"{'CLI version':<{_W}}{idx.get('cli_version', '?')}")
|
|
444
|
+
lines.append(
|
|
445
|
+
f"{'Vector DB version':<{_W}}{idx.get('vector_db_version', '?')}"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
return "\n".join(lines)
|
|
449
|
+
|
|
450
|
+
def upload(
|
|
451
|
+
self, name: str, file_path: str | Path, *, overwrite: bool = False
|
|
452
|
+
) -> dict | None:
|
|
453
|
+
"""Upload a file to an arcana for indexing.
|
|
454
|
+
|
|
455
|
+
Supported formats: PDF, text, markdown. Accepts either the plain
|
|
456
|
+
name or the full ``owner/name`` ID.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
460
|
+
file_path: Path to the file to upload.
|
|
461
|
+
overwrite: If ``True``, replace an existing file with the same
|
|
462
|
+
name (uses PUT instead of POST).
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
The API response (may be ``None`` on success per the API spec).
|
|
466
|
+
"""
|
|
467
|
+
name = extract_arcana_name(name)
|
|
468
|
+
file_path = Path(file_path)
|
|
469
|
+
base = f"{self._arcana_base}/arcana/{quote(name, safe='')}/files"
|
|
470
|
+
with open(file_path, "rb") as f:
|
|
471
|
+
if overwrite:
|
|
472
|
+
resp = self._session.put(
|
|
473
|
+
f"{base}/{quote(file_path.name, safe='')}",
|
|
474
|
+
headers=self._headers(),
|
|
475
|
+
files={"file": (file_path.name, f)},
|
|
476
|
+
)
|
|
477
|
+
else:
|
|
478
|
+
resp = self._session.post(
|
|
479
|
+
f"{base}/",
|
|
480
|
+
headers=self._headers(),
|
|
481
|
+
files={"file": (file_path.name, f)},
|
|
482
|
+
)
|
|
483
|
+
raise_for_status(resp)
|
|
484
|
+
return _json_or_none(resp)
|
|
485
|
+
|
|
486
|
+
def upload_directory(
|
|
487
|
+
self,
|
|
488
|
+
name: str,
|
|
489
|
+
directory: str | Path,
|
|
490
|
+
*,
|
|
491
|
+
pattern: str = "*",
|
|
492
|
+
recursive: bool = False,
|
|
493
|
+
overwrite: bool = False,
|
|
494
|
+
verbose: bool = False,
|
|
495
|
+
) -> list[dict]:
|
|
496
|
+
"""Upload all files in a directory to an arcana.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
500
|
+
directory: Path to the directory containing files to upload.
|
|
501
|
+
pattern: Glob pattern to filter files (default ``"*"`` — all files).
|
|
502
|
+
For example, ``"*.pdf"`` to upload only PDFs.
|
|
503
|
+
recursive: If ``True``, search subdirectories recursively
|
|
504
|
+
(uses ``**/<pattern>``).
|
|
505
|
+
overwrite: If ``True``, replace existing files with the same name.
|
|
506
|
+
verbose: If ``True``, print per-file upload status.
|
|
507
|
+
|
|
508
|
+
Returns:
|
|
509
|
+
A list of dicts with keys ``"file"`` (filename only),
|
|
510
|
+
``"status"`` (``"uploaded"`` or ``"failed"``), and
|
|
511
|
+
``"error"`` (message string, only present on failure).
|
|
512
|
+
"""
|
|
513
|
+
files = self._glob_files(directory, pattern, recursive=recursive)
|
|
514
|
+
return self._run_file_batch(
|
|
515
|
+
files,
|
|
516
|
+
lambda fp: self.upload(name, fp, overwrite=overwrite),
|
|
517
|
+
verb="uploaded",
|
|
518
|
+
desc="Uploading",
|
|
519
|
+
verbose=verbose,
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
def upload_files(
|
|
523
|
+
self,
|
|
524
|
+
name: str,
|
|
525
|
+
paths: Iterable[str | Path],
|
|
526
|
+
*,
|
|
527
|
+
overwrite: bool = True,
|
|
528
|
+
verbose: bool = False,
|
|
529
|
+
) -> list[dict]:
|
|
530
|
+
"""Upload an explicit, caller-chosen list of files to an arcana.
|
|
531
|
+
|
|
532
|
+
Unlike :meth:`upload_directory` (which globs a whole directory), this
|
|
533
|
+
uploads exactly the ``paths`` you pass — so the *selection* of what to
|
|
534
|
+
(re)upload is entirely the caller's decision (e.g. the result of your
|
|
535
|
+
own changed-file / checksum comparison). Pair it with
|
|
536
|
+
:meth:`list_files` (whose dicts carry per-file ``index_info``) and a
|
|
537
|
+
single :meth:`generate_index` afterwards.
|
|
538
|
+
|
|
539
|
+
Args:
|
|
540
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
541
|
+
paths: An iterable of paths to upload.
|
|
542
|
+
overwrite: If ``True`` (default), replace existing files (PUT); if
|
|
543
|
+
``False``, create new files (POST). Defaults to ``True`` because
|
|
544
|
+
the typical caller has already decided these files are new or
|
|
545
|
+
changed.
|
|
546
|
+
verbose: If ``True``, print per-file upload status.
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
A list of ``{"file", "status", ["error"]}`` dicts — the same shape
|
|
550
|
+
as :meth:`upload_directory`.
|
|
551
|
+
"""
|
|
552
|
+
files = [Path(p) for p in paths]
|
|
553
|
+
return self._run_file_batch(
|
|
554
|
+
files,
|
|
555
|
+
lambda fp: self.upload(name, fp, overwrite=overwrite),
|
|
556
|
+
verb="uploaded",
|
|
557
|
+
desc="Uploading",
|
|
558
|
+
verbose=verbose,
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
def list_files(self, name: str) -> list[dict]:
|
|
562
|
+
"""List all files in an arcana.
|
|
563
|
+
|
|
564
|
+
Accepts either the plain name or the full ``owner/name`` ID.
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
568
|
+
|
|
569
|
+
Returns:
|
|
570
|
+
A list of file dicts (the API ``FileOutSchema``). Each entry has:
|
|
571
|
+
|
|
572
|
+
- ``name`` (str): file name — use with :meth:`download_file`,
|
|
573
|
+
:meth:`delete_file`, or an ``overwrite`` upload.
|
|
574
|
+
- ``size`` (int): size in bytes.
|
|
575
|
+
- ``owner_user_name`` (str): the owning user.
|
|
576
|
+
- ``created_at`` / ``updated_at`` (str): ISO-8601 timestamps.
|
|
577
|
+
- ``index_info`` (dict | None): per-file index state, or ``None``
|
|
578
|
+
if the file has never been indexed. When present it holds
|
|
579
|
+
``index_status`` (str — e.g. ``"INDEXED"``, ``"NOT_INDEXED"``,
|
|
580
|
+
``"ERROR"``) and ``chunks_indexed`` (int — number of embedding
|
|
581
|
+
chunks produced for the file).
|
|
582
|
+
- ``related_files`` (list | None): nested entries of the same
|
|
583
|
+
shape, when the server groups derived files together.
|
|
584
|
+
|
|
585
|
+
The per-file ``index_info`` lets callers see which files are
|
|
586
|
+
already indexed without doing any work. Indexing itself is
|
|
587
|
+
triggered per-arcana via :meth:`generate_index` — the ARCANA API
|
|
588
|
+
has no per-file index call.
|
|
589
|
+
"""
|
|
590
|
+
name = extract_arcana_name(name)
|
|
591
|
+
resp = self._session.get(
|
|
592
|
+
f"{self._arcana_base}/arcana/{quote(name, safe='')}/files/",
|
|
593
|
+
headers=self._headers(),
|
|
594
|
+
)
|
|
595
|
+
raise_for_status(resp)
|
|
596
|
+
return resp.json()
|
|
597
|
+
|
|
598
|
+
def delete_file(self, name: str, file_name: str) -> dict | None:
|
|
599
|
+
"""Delete a file from an arcana.
|
|
600
|
+
|
|
601
|
+
Accepts either the plain name or the full ``owner/name`` ID.
|
|
602
|
+
|
|
603
|
+
Args:
|
|
604
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
605
|
+
file_name: The name of the file to delete (as returned by
|
|
606
|
+
:meth:`list_files`).
|
|
607
|
+
|
|
608
|
+
Returns:
|
|
609
|
+
The API response, or ``None`` if the response has no body.
|
|
610
|
+
"""
|
|
611
|
+
name = extract_arcana_name(name)
|
|
612
|
+
resp = self._session.delete(
|
|
613
|
+
f"{self._arcana_base}/arcana/{quote(name, safe='')}/files/{quote(file_name, safe='')}",
|
|
614
|
+
headers=self._headers(),
|
|
615
|
+
)
|
|
616
|
+
raise_for_status(resp)
|
|
617
|
+
return _json_or_none(resp)
|
|
618
|
+
|
|
619
|
+
def download_file(self, name: str, file_name: str, output_path: str | Path) -> Path:
|
|
620
|
+
"""Download a file from an arcana to a local path.
|
|
621
|
+
|
|
622
|
+
Accepts either the plain name or the full ``owner/name`` ID.
|
|
623
|
+
|
|
624
|
+
Args:
|
|
625
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
626
|
+
file_name: The name of the file to download (as returned
|
|
627
|
+
by :meth:`list_files`).
|
|
628
|
+
output_path: Local path to save the file to.
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
The path the file was written to.
|
|
632
|
+
"""
|
|
633
|
+
name = extract_arcana_name(name)
|
|
634
|
+
resp = self._session.get(
|
|
635
|
+
f"{self._arcana_base}/arcana/{quote(name, safe='')}/files/{quote(file_name, safe='')}/download",
|
|
636
|
+
headers=self._headers(),
|
|
637
|
+
stream=True,
|
|
638
|
+
)
|
|
639
|
+
raise_for_status(resp)
|
|
640
|
+
output_path = Path(output_path)
|
|
641
|
+
with open(output_path, "wb") as f:
|
|
642
|
+
for chunk in resp.iter_content(chunk_size=8192):
|
|
643
|
+
f.write(chunk)
|
|
644
|
+
return output_path
|
|
645
|
+
|
|
646
|
+
def delete_directory(
|
|
647
|
+
self,
|
|
648
|
+
name: str,
|
|
649
|
+
directory: str | Path,
|
|
650
|
+
*,
|
|
651
|
+
pattern: str = "*",
|
|
652
|
+
recursive: bool = False,
|
|
653
|
+
verbose: bool = False,
|
|
654
|
+
) -> list[dict]:
|
|
655
|
+
"""Delete files from an arcana that match filenames in a local directory.
|
|
656
|
+
|
|
657
|
+
Finds all files in ``directory`` matching ``pattern``, then deletes
|
|
658
|
+
files with the same **name** from the arcana. Useful for removing
|
|
659
|
+
a batch of files that were previously uploaded with
|
|
660
|
+
:meth:`upload_directory`.
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
664
|
+
directory: Local directory whose filenames to match.
|
|
665
|
+
pattern: Glob pattern to filter files (default ``"*"``).
|
|
666
|
+
recursive: If ``True``, search subdirectories recursively.
|
|
667
|
+
verbose: If ``True``, print per-file deletion status.
|
|
668
|
+
|
|
669
|
+
Returns:
|
|
670
|
+
A list of dicts with keys ``"file"`` (filename only),
|
|
671
|
+
``"status"`` (``"deleted"`` or ``"failed"``), and
|
|
672
|
+
``"error"`` (only present on failure).
|
|
673
|
+
"""
|
|
674
|
+
files = self._glob_files(directory, pattern, recursive=recursive)
|
|
675
|
+
return self._run_file_batch(
|
|
676
|
+
files,
|
|
677
|
+
lambda fp: self.delete_file(name, fp.name),
|
|
678
|
+
verb="deleted",
|
|
679
|
+
desc="Deleting",
|
|
680
|
+
verbose=verbose,
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
def sync_directory(
|
|
684
|
+
self,
|
|
685
|
+
name: str,
|
|
686
|
+
directory: str | Path,
|
|
687
|
+
*,
|
|
688
|
+
select: Callable[[Path, dict | None], str],
|
|
689
|
+
pattern: str = "*",
|
|
690
|
+
recursive: bool = False,
|
|
691
|
+
prune: bool = False,
|
|
692
|
+
index: bool = True,
|
|
693
|
+
index_wait: bool = True,
|
|
694
|
+
verbose: bool = False,
|
|
695
|
+
) -> dict:
|
|
696
|
+
"""Sync a local directory into an arcana under caller-defined rules.
|
|
697
|
+
|
|
698
|
+
The *policy* — which files to upload, replace, or skip — stays entirely
|
|
699
|
+
outside the package: you supply a ``select`` callback that decides per
|
|
700
|
+
file. This method only does the plumbing: glob the directory, fetch the
|
|
701
|
+
remote listing, apply your decisions, and (optionally) trigger a single
|
|
702
|
+
index pass. ARCANA stores no content hash, so any content-change
|
|
703
|
+
detection (e.g. SHA-256 against your own manifest) belongs in
|
|
704
|
+
``select``.
|
|
705
|
+
|
|
706
|
+
Args:
|
|
707
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
708
|
+
directory: Local directory to sync from.
|
|
709
|
+
select: Called once per local file as ``select(local_path, remote)``
|
|
710
|
+
where ``local_path`` is a :class:`pathlib.Path` and ``remote``
|
|
711
|
+
is the matching file dict from :meth:`list_files` (matched by
|
|
712
|
+
name) or ``None`` if the file is not in the arcana yet. Must
|
|
713
|
+
return ``"upload"`` (POST new), ``"replace"`` (PUT over an
|
|
714
|
+
existing file), or ``"skip"``.
|
|
715
|
+
pattern: Glob pattern for local files (default ``"*"``).
|
|
716
|
+
recursive: If ``True``, recurse into subdirectories.
|
|
717
|
+
prune: If ``True``, delete remote files that have no local
|
|
718
|
+
counterpart by name. Defaults to ``False`` (never deletes
|
|
719
|
+
implicitly).
|
|
720
|
+
index: If ``True`` (default), call :meth:`generate_index` once after
|
|
721
|
+
the sync — but only when something actually changed.
|
|
722
|
+
index_wait: Forwarded to :meth:`generate_index` as ``wait``.
|
|
723
|
+
verbose: If ``True``, print per-file actions and a summary.
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
A report ``dict`` with keys ``"uploaded"``, ``"replaced"``,
|
|
727
|
+
``"skipped"``, ``"deleted"`` (lists of file names), ``"failed"``
|
|
728
|
+
(list of ``{"file", "error"}``) and ``"index"`` (the
|
|
729
|
+
:meth:`generate_index` result, or ``None`` if indexing was skipped).
|
|
730
|
+
|
|
731
|
+
Raises:
|
|
732
|
+
ValueError: If ``select`` returns anything other than ``"upload"``,
|
|
733
|
+
``"replace"``, or ``"skip"``.
|
|
734
|
+
"""
|
|
735
|
+
local_files = self._glob_files(directory, pattern, recursive=recursive)
|
|
736
|
+
# cast: `list_files` returns list[dict], but the `.list` method on this
|
|
737
|
+
# class shadows the builtin in annotations (see the mypy override), so
|
|
738
|
+
# mypy mis-types the return — cast restores `list` here.
|
|
739
|
+
remote_files = cast(list, self.list_files(name))
|
|
740
|
+
remote_by_name = {f["name"]: f for f in remote_files}
|
|
741
|
+
|
|
742
|
+
# Pass 1 — ask the caller's policy what to do with each local file.
|
|
743
|
+
valid = {"upload", "replace", "skip"}
|
|
744
|
+
plan: list[tuple[Path, str]] = []
|
|
745
|
+
for path in local_files:
|
|
746
|
+
action = select(path, remote_by_name.get(path.name))
|
|
747
|
+
if action not in valid:
|
|
748
|
+
raise ValueError(
|
|
749
|
+
f"select() must return one of {sorted(valid)}; "
|
|
750
|
+
f"got {action!r} for {path.name}"
|
|
751
|
+
)
|
|
752
|
+
plan.append((path, action))
|
|
753
|
+
|
|
754
|
+
report: dict = {
|
|
755
|
+
"uploaded": [],
|
|
756
|
+
"replaced": [],
|
|
757
|
+
"skipped": [],
|
|
758
|
+
"deleted": [],
|
|
759
|
+
"failed": [],
|
|
760
|
+
"index": None,
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
# Pass 2 — apply the plan.
|
|
764
|
+
for path, action in plan:
|
|
765
|
+
if action == "skip":
|
|
766
|
+
report["skipped"].append(path.name)
|
|
767
|
+
continue
|
|
768
|
+
overwrite = action == "replace"
|
|
769
|
+
bucket = "replaced" if overwrite else "uploaded"
|
|
770
|
+
try:
|
|
771
|
+
self.upload(name, path, overwrite=overwrite)
|
|
772
|
+
report[bucket].append(path.name)
|
|
773
|
+
if verbose:
|
|
774
|
+
print(f" {path.name} {bucket}")
|
|
775
|
+
except Exception as e:
|
|
776
|
+
report["failed"].append({"file": path.name, "error": str(e)})
|
|
777
|
+
if verbose:
|
|
778
|
+
print(f" {path.name} FAILED ({e})")
|
|
779
|
+
|
|
780
|
+
if prune:
|
|
781
|
+
local_names = {p.name for p in local_files}
|
|
782
|
+
for remote_name in remote_by_name:
|
|
783
|
+
if remote_name in local_names:
|
|
784
|
+
continue
|
|
785
|
+
try:
|
|
786
|
+
self.delete_file(name, remote_name)
|
|
787
|
+
report["deleted"].append(remote_name)
|
|
788
|
+
if verbose:
|
|
789
|
+
print(f" {remote_name} deleted")
|
|
790
|
+
except Exception as e:
|
|
791
|
+
report["failed"].append({"file": remote_name, "error": str(e)})
|
|
792
|
+
|
|
793
|
+
if index and (report["uploaded"] or report["replaced"] or report["deleted"]):
|
|
794
|
+
report["index"] = self.generate_index(name, wait=index_wait)
|
|
795
|
+
|
|
796
|
+
if verbose:
|
|
797
|
+
print(
|
|
798
|
+
f"sync: {len(report['uploaded'])} uploaded, "
|
|
799
|
+
f"{len(report['replaced'])} replaced, "
|
|
800
|
+
f"{len(report['skipped'])} skipped, "
|
|
801
|
+
f"{len(report['deleted'])} deleted, "
|
|
802
|
+
f"{len(report['failed'])} failed"
|
|
803
|
+
)
|
|
804
|
+
return report
|
|
805
|
+
|
|
806
|
+
def generate_index(
|
|
807
|
+
self,
|
|
808
|
+
name: str,
|
|
809
|
+
*,
|
|
810
|
+
wait: bool = True,
|
|
811
|
+
timeout: int = 600,
|
|
812
|
+
poll_interval: int = 5,
|
|
813
|
+
) -> dict | None:
|
|
814
|
+
"""Trigger index generation for an arcana.
|
|
815
|
+
|
|
816
|
+
By default this blocks until indexing completes (synchronous).
|
|
817
|
+
For large arcanas the server may time out (504). Use
|
|
818
|
+
``wait=False`` to fire the request and return immediately,
|
|
819
|
+
then poll with :meth:`info` to check the index status.
|
|
820
|
+
|
|
821
|
+
Args:
|
|
822
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
823
|
+
wait: If ``True`` (default), poll until indexing finishes.
|
|
824
|
+
If ``False``, fire the request and return immediately.
|
|
825
|
+
timeout: Maximum seconds to wait when ``wait=True``.
|
|
826
|
+
Defaults to 600 (10 minutes).
|
|
827
|
+
poll_interval: Seconds between status checks when ``wait=True``.
|
|
828
|
+
Defaults to 5.
|
|
829
|
+
|
|
830
|
+
Returns:
|
|
831
|
+
The arcana details dict (from :meth:`get`) when ``wait=True``
|
|
832
|
+
and indexing completed, or ``None`` when ``wait=False`` or
|
|
833
|
+
on timeout.
|
|
834
|
+
|
|
835
|
+
Raises:
|
|
836
|
+
TimeoutError: If ``wait=True`` and indexing does not complete
|
|
837
|
+
within ``timeout`` seconds.
|
|
838
|
+
"""
|
|
839
|
+
import threading
|
|
840
|
+
import time
|
|
841
|
+
|
|
842
|
+
resolved = extract_arcana_name(name)
|
|
843
|
+
url = f"{self._arcana_base}/arcana/{quote(resolved, safe='')}/generate-index"
|
|
844
|
+
|
|
845
|
+
if not wait:
|
|
846
|
+
# Fire-and-forget: send the trigger on a background thread so we
|
|
847
|
+
# return to the caller immediately. It uses its OWN Session — the
|
|
848
|
+
# caller polls via info()/get() on the shared client Session, and
|
|
849
|
+
# requests.Session is not safe to use from two threads at once.
|
|
850
|
+
def _fire():
|
|
851
|
+
session = new_session_like(self._session)
|
|
852
|
+
try:
|
|
853
|
+
session.post(url, headers=self._headers(), timeout=600)
|
|
854
|
+
except Exception:
|
|
855
|
+
pass # indexing status is checked via info()/get()
|
|
856
|
+
finally:
|
|
857
|
+
session.close()
|
|
858
|
+
|
|
859
|
+
threading.Thread(target=_fire, daemon=True).start()
|
|
860
|
+
return None
|
|
861
|
+
|
|
862
|
+
# Synchronous: fire the request, tolerate transport-level failures, then poll
|
|
863
|
+
try:
|
|
864
|
+
resp = self._session.post(url, headers=self._headers(), timeout=30)
|
|
865
|
+
raise_for_status(resp)
|
|
866
|
+
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
|
|
867
|
+
# The trigger almost certainly reached the server (the body
|
|
868
|
+
# was written before the response was dropped). ARCANA
|
|
869
|
+
# commonly holds the trigger connection while it builds the
|
|
870
|
+
# embedding queue, then closes it without a response. The
|
|
871
|
+
# arcana state machine is authoritative — fall through to
|
|
872
|
+
# the poll loop. Sanity-check with a GET first: if that
|
|
873
|
+
# also fails at the transport level, the server is
|
|
874
|
+
# genuinely down and the error propagates.
|
|
875
|
+
self.get(name)
|
|
876
|
+
except APIError as e:
|
|
877
|
+
# 504 Gateway Timeout from nginx is the same shape: trigger
|
|
878
|
+
# accepted, gateway gave up waiting for a response.
|
|
879
|
+
if e.status_code != 504:
|
|
880
|
+
raise
|
|
881
|
+
|
|
882
|
+
# Poll until indexing finishes
|
|
883
|
+
deadline = time.monotonic() + timeout
|
|
884
|
+
terminal = {"INDEXED", "ERROR", "NOT_INDEXED"}
|
|
885
|
+
status = ""
|
|
886
|
+
|
|
887
|
+
while time.monotonic() < deadline:
|
|
888
|
+
time.sleep(poll_interval)
|
|
889
|
+
data = self.get(name)
|
|
890
|
+
idx = data.get("index_info") or {}
|
|
891
|
+
status = idx.get("index_status", "")
|
|
892
|
+
if status in terminal:
|
|
893
|
+
return data
|
|
894
|
+
|
|
895
|
+
raise TimeoutError(
|
|
896
|
+
f"Indexing did not complete within {timeout}s. "
|
|
897
|
+
f"Last status: {status}. Check with client.arcana.info(...)."
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
def delete_index(self, name: str) -> dict | None:
|
|
901
|
+
"""Delete the index of an arcana.
|
|
902
|
+
|
|
903
|
+
Accepts either the plain name or the full ``owner/name`` ID.
|
|
904
|
+
|
|
905
|
+
Args:
|
|
906
|
+
name: The arcana name or full ``owner/name`` ID.
|
|
907
|
+
|
|
908
|
+
Returns:
|
|
909
|
+
The API response, or ``None`` if the response has no body.
|
|
910
|
+
"""
|
|
911
|
+
name = extract_arcana_name(name)
|
|
912
|
+
resp = self._session.delete(
|
|
913
|
+
f"{self._arcana_base}/arcana/{quote(name, safe='')}/delete-index",
|
|
914
|
+
headers=self._headers(),
|
|
915
|
+
)
|
|
916
|
+
raise_for_status(resp)
|
|
917
|
+
return _json_or_none(resp)
|
|
918
|
+
|
|
919
|
+
def setup_from_directory(
|
|
920
|
+
self,
|
|
921
|
+
name: str,
|
|
922
|
+
source_dir: str | Path,
|
|
923
|
+
*,
|
|
924
|
+
pattern: str = "*.md",
|
|
925
|
+
append_uuid: bool = True,
|
|
926
|
+
update_toml: bool = False,
|
|
927
|
+
toml_label: str | None = None,
|
|
928
|
+
wait_for_index: bool = True,
|
|
929
|
+
index_timeout: int = 600,
|
|
930
|
+
verbose: bool = True,
|
|
931
|
+
) -> dict:
|
|
932
|
+
"""End-to-end: create an arcana, upload a directory, build the index.
|
|
933
|
+
|
|
934
|
+
Composes :meth:`create`, :meth:`upload_directory`, and
|
|
935
|
+
:meth:`generate_index` into a single call. The arcana name
|
|
936
|
+
passed to upload + index is the one returned by ``create`` —
|
|
937
|
+
i.e. with the UUID suffix when ``append_uuid=True`` (default),
|
|
938
|
+
so the composition stays correct without the caller having to
|
|
939
|
+
remember the renaming.
|
|
940
|
+
|
|
941
|
+
Args:
|
|
942
|
+
name: Display name for the new arcana (UUID suffix appended
|
|
943
|
+
when ``append_uuid=True``).
|
|
944
|
+
source_dir: Directory whose matching files should be
|
|
945
|
+
uploaded to the new arcana.
|
|
946
|
+
pattern: Glob pattern passed to :meth:`upload_directory`.
|
|
947
|
+
Defaults to ``"*.md"``.
|
|
948
|
+
append_uuid: Forwarded to :meth:`create`. If ``True``
|
|
949
|
+
(default), the UUID suffix avoids name collisions and
|
|
950
|
+
mirrors the SAIA web UI behaviour.
|
|
951
|
+
update_toml: Forwarded to :meth:`create`. If ``True``, add
|
|
952
|
+
the new arcana to ``config.toml`` after creation.
|
|
953
|
+
toml_label: Label under ``[saia.arcana.labels]``.
|
|
954
|
+
Ignored when ``update_toml`` is ``False``.
|
|
955
|
+
wait_for_index: Forwarded to :meth:`generate_index` as
|
|
956
|
+
``wait``. If ``True`` (default), block until the index
|
|
957
|
+
reaches ``INDEXED`` (or fails / times out).
|
|
958
|
+
index_timeout: Forwarded to :meth:`generate_index` as
|
|
959
|
+
``timeout`` (seconds). Defaults to 600.
|
|
960
|
+
verbose: Forwarded to :meth:`upload_directory`. Controls
|
|
961
|
+
the per-file progress bar.
|
|
962
|
+
|
|
963
|
+
Returns:
|
|
964
|
+
A dict with three keys: ``"arcana"`` (the result from
|
|
965
|
+
:meth:`create`), ``"uploads"`` (the list from
|
|
966
|
+
:meth:`upload_directory`), and ``"index"`` (the result
|
|
967
|
+
from :meth:`generate_index`). Callers can inspect any
|
|
968
|
+
step.
|
|
969
|
+
|
|
970
|
+
Example::
|
|
971
|
+
|
|
972
|
+
result = client.arcana.setup_from_directory(
|
|
973
|
+
"MyKB", "./markdown/",
|
|
974
|
+
pattern="**/*.md",
|
|
975
|
+
update_toml=True, toml_label="my_kb",
|
|
976
|
+
)
|
|
977
|
+
print(result["arcana"]["id"]) # owner/MyKB-<uuid>
|
|
978
|
+
print(len(result["uploads"])) # files uploaded
|
|
979
|
+
print(result["index"]) # index_status
|
|
980
|
+
"""
|
|
981
|
+
create_result = self.create(
|
|
982
|
+
name,
|
|
983
|
+
append_uuid=append_uuid,
|
|
984
|
+
update_toml=update_toml,
|
|
985
|
+
toml_label=toml_label,
|
|
986
|
+
)
|
|
987
|
+
arcana_name = create_result["name"]
|
|
988
|
+
uploads = self.upload_directory(
|
|
989
|
+
arcana_name,
|
|
990
|
+
source_dir,
|
|
991
|
+
pattern=pattern,
|
|
992
|
+
verbose=verbose,
|
|
993
|
+
)
|
|
994
|
+
index = self.generate_index(
|
|
995
|
+
arcana_name,
|
|
996
|
+
wait=wait_for_index,
|
|
997
|
+
timeout=index_timeout,
|
|
998
|
+
)
|
|
999
|
+
return {
|
|
1000
|
+
"arcana": create_result,
|
|
1001
|
+
"uploads": uploads,
|
|
1002
|
+
"index": index,
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
def chat(
|
|
1006
|
+
self,
|
|
1007
|
+
model: str,
|
|
1008
|
+
messages: list[dict],
|
|
1009
|
+
arcana_id: str,
|
|
1010
|
+
*,
|
|
1011
|
+
temperature: float | None = None,
|
|
1012
|
+
max_tokens: int | None = None,
|
|
1013
|
+
stream: bool = False,
|
|
1014
|
+
**kwargs,
|
|
1015
|
+
) -> dict | SSEStream:
|
|
1016
|
+
"""Chat with RAG context from an arcana.
|
|
1017
|
+
|
|
1018
|
+
This uses the standard ``/chat/completions`` endpoint with arcana
|
|
1019
|
+
parameters injected.
|
|
1020
|
+
|
|
1021
|
+
Args:
|
|
1022
|
+
model: Model identifier.
|
|
1023
|
+
messages: Chat messages.
|
|
1024
|
+
arcana_id: The arcana ID to use for retrieval.
|
|
1025
|
+
stream: If ``True``, return a generator yielding chunks.
|
|
1026
|
+
**kwargs: Additional parameters forwarded to the API.
|
|
1027
|
+
|
|
1028
|
+
Returns:
|
|
1029
|
+
When ``stream=False``: the API response dict, with an extra
|
|
1030
|
+
``"_rate_limits"`` key (a JSON-serializable dict; see
|
|
1031
|
+
:class:`~saia_python.RateLimitInfo`). When ``stream=True``: an
|
|
1032
|
+
``SSEStream`` whose ``rate_limits`` attribute exposes the same dict.
|
|
1033
|
+
"""
|
|
1034
|
+
body = {
|
|
1035
|
+
"model": model,
|
|
1036
|
+
"messages": messages,
|
|
1037
|
+
"enable-tools": True,
|
|
1038
|
+
"arcana": {"id": arcana_id},
|
|
1039
|
+
**kwargs,
|
|
1040
|
+
}
|
|
1041
|
+
if temperature is not None:
|
|
1042
|
+
body["temperature"] = temperature
|
|
1043
|
+
if max_tokens is not None:
|
|
1044
|
+
body["max_tokens"] = max_tokens
|
|
1045
|
+
|
|
1046
|
+
headers = {
|
|
1047
|
+
"Authorization": f"Bearer {self._api_key}",
|
|
1048
|
+
"Accept": "application/json",
|
|
1049
|
+
"inference-service": "saia-openai-gateway",
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
return post_chat_completion(
|
|
1053
|
+
self._session,
|
|
1054
|
+
f"{self._base_url}/chat/completions",
|
|
1055
|
+
body,
|
|
1056
|
+
headers=headers,
|
|
1057
|
+
stream=stream,
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
def __repr__(self):
|
|
1061
|
+
return f"ArcanaService(base_url={self._base_url!r})"
|