saia-python 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
saia_python/arcana.py ADDED
@@ -0,0 +1,1061 @@
1
+ """ARCANA (RAG) service — manage knowledge bases and chat with context."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import uuid as _uuid
7
+ from collections.abc import Callable, Iterable
8
+ from pathlib import Path
9
+ from typing import cast
10
+ from urllib.parse import quote
11
+
12
+ import requests
13
+
14
+ from ._http import new_session_like, post_chat_completion
15
+ from ._streaming import SSEStream
16
+ from ._util import progress_iter
17
+ from .exceptions import APIError, raise_for_status
18
+
19
+ _ARCANA_PATH = "/arcanas/api/v1"
20
+
21
+
22
+ def extract_arcana_name(id_or_name: str) -> str:
23
+ """Extract the arcana name from a full ID or plain name.
24
+
25
+ The ARCANA chat endpoint uses the full ``owner/name`` format, while
26
+ management endpoints (``get``, ``upload``) use just the ``name``.
27
+ This function accepts either and returns the name part.
28
+
29
+ Args:
30
+ id_or_name: Either ``"owner/name"`` or just ``"name"``.
31
+
32
+ Returns:
33
+ The name portion (everything after the first ``/``, or the
34
+ input unchanged if there is no ``/``).
35
+ """
36
+ if "/" in id_or_name:
37
+ return id_or_name.split("/", 1)[1]
38
+ return id_or_name
39
+
40
+
41
+ def _json_or_none(resp) -> dict | None:
42
+ """Return ``resp.json()``, or ``None`` when the body is empty / not JSON.
43
+
44
+ Several ARCANA management endpoints (delete, upload, delete-index, …) may
45
+ answer with an empty or non-JSON body on success; this collapses the
46
+ repeated decode-or-``None`` ``try/except`` they all used.
47
+ """
48
+ try:
49
+ return resp.json()
50
+ except (json.JSONDecodeError, ValueError):
51
+ return None
52
+
53
+
54
+ class ArcanaService:
55
+ """Access the ARCANA/RAG endpoints.
56
+
57
+ The ARCANA API uses a different auth scheme (plain key, no ``Bearer`` prefix)
58
+ and a different URL path. This is handled automatically.
59
+
60
+ Args:
61
+ session: A :class:`requests.Session` (auth header will be overridden per-request).
62
+ base_url: The SAIA API base URL (e.g. ``https://chat-ai.academiccloud.de/v1``).
63
+ api_key: The raw API key (needed because ARCANA omits the ``Bearer`` prefix).
64
+ """
65
+
66
+ def __init__(self, session: requests.Session, base_url: str, api_key: str):
67
+ self._session = session
68
+ self._base_url = base_url
69
+ self._arcana_base = f"{self._base_url}{_ARCANA_PATH}"
70
+ self._api_key = api_key
71
+
72
+ def _headers(self, **extra) -> dict:
73
+ return {"Authorization": self._api_key, "Accept": "application/json", **extra}
74
+
75
+ @staticmethod
76
+ def _format_arcana_line(a: dict, *, with_owner: bool = False) -> str:
77
+ """Format one arcana as a one-line summary (shared by the summary views)."""
78
+ idx = a.get("index_info") or {}
79
+ status = idx.get("index_status", "?")
80
+ if with_owner:
81
+ return (
82
+ f" {a['name']} "
83
+ f"(owner: {a.get('owner_user_name', '?')}, "
84
+ f"files: {a.get('file_count', '?')}, "
85
+ f"status: {status})"
86
+ )
87
+ return f" {a['name']} (files: {a.get('file_count', '?')}, status: {status})"
88
+
89
+ @staticmethod
90
+ def _glob_files(
91
+ directory: str | Path, pattern: str, *, recursive: bool
92
+ ) -> list[Path]:
93
+ """Return the sorted matching files in ``directory``.
94
+
95
+ Raises:
96
+ FileNotFoundError: If ``directory`` is not a directory, or nothing
97
+ matches ``pattern``.
98
+ """
99
+ directory = Path(directory)
100
+ if not directory.is_dir():
101
+ raise FileNotFoundError(f"Directory not found: {directory}")
102
+ glob_method = directory.rglob if recursive else directory.glob
103
+ files = sorted(p for p in glob_method(pattern) if p.is_file())
104
+ if not files:
105
+ raise FileNotFoundError(f"No files matching '{pattern}' in {directory}")
106
+ return files
107
+
108
+ def _run_file_batch(
109
+ self,
110
+ files: list[Path],
111
+ action,
112
+ *,
113
+ verb: str,
114
+ desc: str,
115
+ verbose: bool,
116
+ ) -> list[dict]:
117
+ """Run ``action(path)`` over each file, tallying per-file status.
118
+
119
+ The shared skeleton behind :meth:`upload_directory` and
120
+ :meth:`delete_directory`: an optionally tqdm-wrapped loop that records
121
+ a ``{"file", "status", ["error"]}`` dict per item and prints a tally
122
+ when ``verbose``. Only the per-file ``action`` and the past-tense
123
+ ``verb`` (``"uploaded"`` / ``"deleted"``) differ between callers.
124
+ """
125
+ results = []
126
+ for fp in progress_iter(files, desc=desc, unit="file"):
127
+ entry: dict = {"file": fp.name}
128
+ try:
129
+ action(fp)
130
+ entry["status"] = verb
131
+ except Exception as e:
132
+ entry["status"] = "failed"
133
+ entry["error"] = str(e)
134
+ if verbose:
135
+ label = verb.capitalize() if entry["status"] == verb else "FAILED"
136
+ print(f" {fp.name} {label}")
137
+ results.append(entry)
138
+
139
+ succeeded = sum(1 for r in results if r["status"] == verb)
140
+ failed = len(results) - succeeded
141
+ summary = f"{succeeded}/{len(results)} files {verb}"
142
+ if failed:
143
+ summary += f" ({failed} failed)"
144
+ if verbose:
145
+ print(summary)
146
+ return results
147
+
148
+ def version(self) -> str:
149
+ """Return the ARCANA API version string.
150
+
151
+ Calls ``GET /arcanas/api/v1/version``.
152
+
153
+ Returns:
154
+ The version string (e.g. ``"0.4.16"``).
155
+ """
156
+ resp = self._session.get(
157
+ f"{self._arcana_base}/version", headers=self._headers()
158
+ )
159
+ raise_for_status(resp)
160
+ return resp.json().get("version", "")
161
+
162
+ def heartbeat(self) -> bool:
163
+ """Check whether the ARCANA service is alive.
164
+
165
+ Calls ``GET /arcanas/api/v1/heartbeat``. Returns ``True`` if the
166
+ service responds with 204, ``False`` otherwise (including transport
167
+ errors — it never raises).
168
+ """
169
+ try:
170
+ resp = self._session.get(
171
+ f"{self._arcana_base}/heartbeat",
172
+ headers=self._headers(),
173
+ timeout=10,
174
+ )
175
+ return resp.status_code == 204
176
+ except Exception:
177
+ return False
178
+
179
+ def user_info(self) -> dict:
180
+ """Return the current user's profile and arcana statistics.
181
+
182
+ Calls ``GET /user/me``. Returns username, email, name, arcana
183
+ count, file count, and registration date.
184
+
185
+ Returns:
186
+ A dict with user profile fields.
187
+ """
188
+ resp = self._session.get(
189
+ f"{self._arcana_base}/user/me",
190
+ headers=self._headers(),
191
+ )
192
+ raise_for_status(resp)
193
+ return resp.json()
194
+
195
+ def user_summary(self) -> str:
196
+ """Return a formatted overview of the user's account and all arcanas.
197
+
198
+ Combines :meth:`user_info` and :meth:`list` into a single
199
+ human-readable string.
200
+
201
+ Returns:
202
+ A multi-line summary.
203
+ """
204
+ user = self.user_info()
205
+ arcanas = self.list()
206
+
207
+ _W = 18
208
+ lines = [
209
+ f"{'Username':<{_W}}{user.get('username', '?')}",
210
+ f"{'Email':<{_W}}{user.get('email', '?')}",
211
+ f"{'Name':<{_W}}{user.get('first_name', '')} {user.get('surname', '')}".rstrip(),
212
+ f"{'Registered':<{_W}}{user.get('is_registered', '?')}",
213
+ f"{'Arcana count':<{_W}}{user.get('arcana_count', '?')}",
214
+ f"{'Total files':<{_W}}{user.get('file_count', '?')}",
215
+ ]
216
+ if user.get("created_at"):
217
+ lines.append(f"{'Member since':<{_W}}{user['created_at']}")
218
+
219
+ lines.append(f"\nArcanas ({len(arcanas)}):")
220
+ for a in arcanas:
221
+ lines.append(self._format_arcana_line(a))
222
+ if not arcanas:
223
+ lines.append(" (none)")
224
+
225
+ return "\n".join(lines)
226
+
227
+ def create(
228
+ self,
229
+ name: str,
230
+ *,
231
+ append_uuid: bool = True,
232
+ update_toml: bool = False,
233
+ toml_label: str | None = None,
234
+ ) -> dict:
235
+ """Create a new arcana.
236
+
237
+ Args:
238
+ name: Name for the new arcana (1–100 characters).
239
+ append_uuid: If ``True`` (default), append a UUID4 suffix to
240
+ the name (e.g. ``MyArcana-a1b2c3d4-...``). This mirrors
241
+ the behavior of the SAIA web UI and avoids name collisions.
242
+ update_toml: If ``True``, add the new arcana to ``config.toml``
243
+ after creation.
244
+ toml_label: Label under ``[saia.arcana.labels]`` in config.toml.
245
+ If omitted, the ID is appended to the ``ids`` array.
246
+
247
+ Returns:
248
+ A dict with the created arcana name and the full ID
249
+ (``owner/name``).
250
+ """
251
+ if append_uuid:
252
+ name = f"{name}-{_uuid.uuid4()}"
253
+
254
+ resp = self._session.post(
255
+ f"{self._arcana_base}/arcana/",
256
+ headers={**self._headers(), "Content-Type": "application/json"},
257
+ json={"name": name},
258
+ )
259
+ raise_for_status(resp)
260
+
261
+ # Build the full owner/name ID
262
+ # Fetch details to get the owner_user_name
263
+ details = self.get(name)
264
+ owner = details.get("owner_user_name", "")
265
+ full_id = f"{owner}/{name}" if owner else name
266
+
267
+ result = {"name": name, "id": full_id, "message": resp.json()}
268
+
269
+ if update_toml:
270
+ from .auth import add_arcana_to_config
271
+
272
+ add_arcana_to_config(full_id, label=toml_label)
273
+
274
+ return result
275
+
276
+ def delete(
277
+ self,
278
+ name: str,
279
+ *,
280
+ update_toml: bool = False,
281
+ ) -> dict | None:
282
+ """Delete an arcana entirely.
283
+
284
+ Accepts either the plain name or the full ``owner/name`` ID.
285
+
286
+ Args:
287
+ name: The arcana name or full ``owner/name`` ID.
288
+ update_toml: If ``True``, remove the arcana from ``config.toml``
289
+ after deletion.
290
+
291
+ Returns:
292
+ The API response, or ``None`` if the response has no body.
293
+ """
294
+ # Keep the full ID for config removal before extracting the name
295
+ full_id = name
296
+ name = extract_arcana_name(name)
297
+
298
+ resp = self._session.delete(
299
+ f"{self._arcana_base}/arcana/{quote(name, safe='')}",
300
+ headers=self._headers(),
301
+ )
302
+ raise_for_status(resp)
303
+
304
+ if update_toml:
305
+ from .auth import remove_arcana_from_config
306
+
307
+ # Try both formats (full ID and plain name)
308
+ remove_arcana_from_config(full_id)
309
+ if full_id != name:
310
+ remove_arcana_from_config(name)
311
+
312
+ return _json_or_none(resp)
313
+
314
+ def list(self) -> list[dict]:
315
+ """List all available arcanas.
316
+
317
+ Returns:
318
+ A list of arcana dicts.
319
+ """
320
+ resp = self._session.get(
321
+ f"{self._arcana_base}/arcana/",
322
+ headers=self._headers(),
323
+ )
324
+ raise_for_status(resp)
325
+ return resp.json()
326
+
327
+ def summary(self, *, arcana_ids: dict[str, str] | None = None) -> str:
328
+ """Return a formatted summary of configured and available arcanas.
329
+
330
+ Combines information from :func:`~saia_python.load_arcana_ids`
331
+ (configured IDs) and :meth:`list` (server-side arcanas) into a
332
+ single human-readable string.
333
+
334
+ Args:
335
+ arcana_ids: Optional pre-loaded dict from
336
+ :func:`~saia_python.load_arcana_ids`. If omitted, loaded
337
+ automatically.
338
+
339
+ Returns:
340
+ A multi-line summary string.
341
+ """
342
+ if arcana_ids is None:
343
+ from .auth import load_arcana_ids
344
+
345
+ arcana_ids = load_arcana_ids()
346
+
347
+ lines = []
348
+
349
+ # Configured IDs
350
+ if arcana_ids:
351
+ lines.append(f"Configured ARCANA IDs ({len(arcana_ids)}):")
352
+ default_id = arcana_ids.get("default", "")
353
+ if default_id:
354
+ name_part = extract_arcana_name(default_id)
355
+ lines.append(f" Default ID {default_id}")
356
+ lines.append(f" Default name {name_part}")
357
+ for label, aid in arcana_ids.items():
358
+ if label != "default":
359
+ lines.append(f" [{label}] {aid}")
360
+ else:
361
+ lines.append("No ARCANA IDs configured.")
362
+
363
+ # Server-side arcanas
364
+ arcanas = self.list()
365
+ lines.append(f"\nAvailable on server ({len(arcanas)}):")
366
+ if arcanas:
367
+ for a in arcanas:
368
+ lines.append(self._format_arcana_line(a, with_owner=True))
369
+ else:
370
+ lines.append(" (none)")
371
+
372
+ return "\n".join(lines)
373
+
374
+ def get(self, name: str) -> dict:
375
+ """Retrieve details of a specific arcana.
376
+
377
+ Accepts either the plain name or the full ``owner/name`` ID —
378
+ the owner prefix is stripped automatically.
379
+
380
+ Args:
381
+ name: The arcana name or full ``owner/name`` ID.
382
+
383
+ Returns:
384
+ Arcana details dict.
385
+ """
386
+ name = extract_arcana_name(name)
387
+ resp = self._session.get(
388
+ f"{self._arcana_base}/arcana/{quote(name, safe='')}",
389
+ headers=self._headers(),
390
+ )
391
+ raise_for_status(resp)
392
+ return resp.json()
393
+
394
+ def info(
395
+ self,
396
+ name: str | None = None,
397
+ *,
398
+ data: dict | None = None,
399
+ verbose: bool = False,
400
+ ) -> str:
401
+ """Return a formatted summary of an arcana's details.
402
+
403
+ Args:
404
+ name: The arcana name or full ``owner/name`` ID. Can be omitted
405
+ if ``data`` is provided.
406
+ data: Optional pre-fetched arcana dict (from :meth:`get`).
407
+ Avoids a redundant API call when you already have the data.
408
+ verbose: If ``True``, include additional fields (CLI version,
409
+ vector DB version, error message if present).
410
+
411
+ Returns:
412
+ A human-readable multi-line string.
413
+ """
414
+ if data is None:
415
+ if name is None:
416
+ raise ValueError("Either name or data must be provided")
417
+ data = self.get(name)
418
+ idx = data.get("index_info") or {}
419
+
420
+ def _size_fmt(n: float) -> str:
421
+ for unit in ("B", "KB", "MB", "GB"):
422
+ if abs(n) < 1024:
423
+ return f"{n:.1f} {unit}"
424
+ n /= 1024
425
+ return f"{n:.1f} TB"
426
+
427
+ _W = 18 # label column width
428
+ lines = [
429
+ f"{'Name':<{_W}}{data.get('name', '?')}",
430
+ f"{'Owner':<{_W}}{data.get('owner_user_name', '?')}",
431
+ f"{'Files':<{_W}}{data.get('file_count', '?')} ({_size_fmt(data.get('size', 0))})",
432
+ f"{'Index status':<{_W}}{idx.get('index_status', '?')}",
433
+ f"{'Embeddings model':<{_W}}{idx.get('embeddings_model', '?')}",
434
+ f"{'Files indexed':<{_W}}{idx.get('total_files_indexed', '?')}",
435
+ f"{'Chunks indexed':<{_W}}{idx.get('total_chunks_indexed', '?')}",
436
+ f"{'Created':<{_W}}{data.get('created_at', '?')}",
437
+ f"{'Updated':<{_W}}{data.get('updated_at', '?')}",
438
+ ]
439
+
440
+ if verbose:
441
+ if idx.get("error_msg") is not None:
442
+ lines.insert(4, f"{'Error':<{_W}}{idx['error_msg']}")
443
+ lines.append(f"{'CLI version':<{_W}}{idx.get('cli_version', '?')}")
444
+ lines.append(
445
+ f"{'Vector DB version':<{_W}}{idx.get('vector_db_version', '?')}"
446
+ )
447
+
448
+ return "\n".join(lines)
449
+
450
+ def upload(
451
+ self, name: str, file_path: str | Path, *, overwrite: bool = False
452
+ ) -> dict | None:
453
+ """Upload a file to an arcana for indexing.
454
+
455
+ Supported formats: PDF, text, markdown. Accepts either the plain
456
+ name or the full ``owner/name`` ID.
457
+
458
+ Args:
459
+ name: The arcana name or full ``owner/name`` ID.
460
+ file_path: Path to the file to upload.
461
+ overwrite: If ``True``, replace an existing file with the same
462
+ name (uses PUT instead of POST).
463
+
464
+ Returns:
465
+ The API response (may be ``None`` on success per the API spec).
466
+ """
467
+ name = extract_arcana_name(name)
468
+ file_path = Path(file_path)
469
+ base = f"{self._arcana_base}/arcana/{quote(name, safe='')}/files"
470
+ with open(file_path, "rb") as f:
471
+ if overwrite:
472
+ resp = self._session.put(
473
+ f"{base}/{quote(file_path.name, safe='')}",
474
+ headers=self._headers(),
475
+ files={"file": (file_path.name, f)},
476
+ )
477
+ else:
478
+ resp = self._session.post(
479
+ f"{base}/",
480
+ headers=self._headers(),
481
+ files={"file": (file_path.name, f)},
482
+ )
483
+ raise_for_status(resp)
484
+ return _json_or_none(resp)
485
+
486
+ def upload_directory(
487
+ self,
488
+ name: str,
489
+ directory: str | Path,
490
+ *,
491
+ pattern: str = "*",
492
+ recursive: bool = False,
493
+ overwrite: bool = False,
494
+ verbose: bool = False,
495
+ ) -> list[dict]:
496
+ """Upload all files in a directory to an arcana.
497
+
498
+ Args:
499
+ name: The arcana name or full ``owner/name`` ID.
500
+ directory: Path to the directory containing files to upload.
501
+ pattern: Glob pattern to filter files (default ``"*"`` — all files).
502
+ For example, ``"*.pdf"`` to upload only PDFs.
503
+ recursive: If ``True``, search subdirectories recursively
504
+ (uses ``**/<pattern>``).
505
+ overwrite: If ``True``, replace existing files with the same name.
506
+ verbose: If ``True``, print per-file upload status.
507
+
508
+ Returns:
509
+ A list of dicts with keys ``"file"`` (filename only),
510
+ ``"status"`` (``"uploaded"`` or ``"failed"``), and
511
+ ``"error"`` (message string, only present on failure).
512
+ """
513
+ files = self._glob_files(directory, pattern, recursive=recursive)
514
+ return self._run_file_batch(
515
+ files,
516
+ lambda fp: self.upload(name, fp, overwrite=overwrite),
517
+ verb="uploaded",
518
+ desc="Uploading",
519
+ verbose=verbose,
520
+ )
521
+
522
+ def upload_files(
523
+ self,
524
+ name: str,
525
+ paths: Iterable[str | Path],
526
+ *,
527
+ overwrite: bool = True,
528
+ verbose: bool = False,
529
+ ) -> list[dict]:
530
+ """Upload an explicit, caller-chosen list of files to an arcana.
531
+
532
+ Unlike :meth:`upload_directory` (which globs a whole directory), this
533
+ uploads exactly the ``paths`` you pass — so the *selection* of what to
534
+ (re)upload is entirely the caller's decision (e.g. the result of your
535
+ own changed-file / checksum comparison). Pair it with
536
+ :meth:`list_files` (whose dicts carry per-file ``index_info``) and a
537
+ single :meth:`generate_index` afterwards.
538
+
539
+ Args:
540
+ name: The arcana name or full ``owner/name`` ID.
541
+ paths: An iterable of paths to upload.
542
+ overwrite: If ``True`` (default), replace existing files (PUT); if
543
+ ``False``, create new files (POST). Defaults to ``True`` because
544
+ the typical caller has already decided these files are new or
545
+ changed.
546
+ verbose: If ``True``, print per-file upload status.
547
+
548
+ Returns:
549
+ A list of ``{"file", "status", ["error"]}`` dicts — the same shape
550
+ as :meth:`upload_directory`.
551
+ """
552
+ files = [Path(p) for p in paths]
553
+ return self._run_file_batch(
554
+ files,
555
+ lambda fp: self.upload(name, fp, overwrite=overwrite),
556
+ verb="uploaded",
557
+ desc="Uploading",
558
+ verbose=verbose,
559
+ )
560
+
561
+ def list_files(self, name: str) -> list[dict]:
562
+ """List all files in an arcana.
563
+
564
+ Accepts either the plain name or the full ``owner/name`` ID.
565
+
566
+ Args:
567
+ name: The arcana name or full ``owner/name`` ID.
568
+
569
+ Returns:
570
+ A list of file dicts (the API ``FileOutSchema``). Each entry has:
571
+
572
+ - ``name`` (str): file name — use with :meth:`download_file`,
573
+ :meth:`delete_file`, or an ``overwrite`` upload.
574
+ - ``size`` (int): size in bytes.
575
+ - ``owner_user_name`` (str): the owning user.
576
+ - ``created_at`` / ``updated_at`` (str): ISO-8601 timestamps.
577
+ - ``index_info`` (dict | None): per-file index state, or ``None``
578
+ if the file has never been indexed. When present it holds
579
+ ``index_status`` (str — e.g. ``"INDEXED"``, ``"NOT_INDEXED"``,
580
+ ``"ERROR"``) and ``chunks_indexed`` (int — number of embedding
581
+ chunks produced for the file).
582
+ - ``related_files`` (list | None): nested entries of the same
583
+ shape, when the server groups derived files together.
584
+
585
+ The per-file ``index_info`` lets callers see which files are
586
+ already indexed without doing any work. Indexing itself is
587
+ triggered per-arcana via :meth:`generate_index` — the ARCANA API
588
+ has no per-file index call.
589
+ """
590
+ name = extract_arcana_name(name)
591
+ resp = self._session.get(
592
+ f"{self._arcana_base}/arcana/{quote(name, safe='')}/files/",
593
+ headers=self._headers(),
594
+ )
595
+ raise_for_status(resp)
596
+ return resp.json()
597
+
598
+ def delete_file(self, name: str, file_name: str) -> dict | None:
599
+ """Delete a file from an arcana.
600
+
601
+ Accepts either the plain name or the full ``owner/name`` ID.
602
+
603
+ Args:
604
+ name: The arcana name or full ``owner/name`` ID.
605
+ file_name: The name of the file to delete (as returned by
606
+ :meth:`list_files`).
607
+
608
+ Returns:
609
+ The API response, or ``None`` if the response has no body.
610
+ """
611
+ name = extract_arcana_name(name)
612
+ resp = self._session.delete(
613
+ f"{self._arcana_base}/arcana/{quote(name, safe='')}/files/{quote(file_name, safe='')}",
614
+ headers=self._headers(),
615
+ )
616
+ raise_for_status(resp)
617
+ return _json_or_none(resp)
618
+
619
+ def download_file(self, name: str, file_name: str, output_path: str | Path) -> Path:
620
+ """Download a file from an arcana to a local path.
621
+
622
+ Accepts either the plain name or the full ``owner/name`` ID.
623
+
624
+ Args:
625
+ name: The arcana name or full ``owner/name`` ID.
626
+ file_name: The name of the file to download (as returned
627
+ by :meth:`list_files`).
628
+ output_path: Local path to save the file to.
629
+
630
+ Returns:
631
+ The path the file was written to.
632
+ """
633
+ name = extract_arcana_name(name)
634
+ resp = self._session.get(
635
+ f"{self._arcana_base}/arcana/{quote(name, safe='')}/files/{quote(file_name, safe='')}/download",
636
+ headers=self._headers(),
637
+ stream=True,
638
+ )
639
+ raise_for_status(resp)
640
+ output_path = Path(output_path)
641
+ with open(output_path, "wb") as f:
642
+ for chunk in resp.iter_content(chunk_size=8192):
643
+ f.write(chunk)
644
+ return output_path
645
+
646
+ def delete_directory(
647
+ self,
648
+ name: str,
649
+ directory: str | Path,
650
+ *,
651
+ pattern: str = "*",
652
+ recursive: bool = False,
653
+ verbose: bool = False,
654
+ ) -> list[dict]:
655
+ """Delete files from an arcana that match filenames in a local directory.
656
+
657
+ Finds all files in ``directory`` matching ``pattern``, then deletes
658
+ files with the same **name** from the arcana. Useful for removing
659
+ a batch of files that were previously uploaded with
660
+ :meth:`upload_directory`.
661
+
662
+ Args:
663
+ name: The arcana name or full ``owner/name`` ID.
664
+ directory: Local directory whose filenames to match.
665
+ pattern: Glob pattern to filter files (default ``"*"``).
666
+ recursive: If ``True``, search subdirectories recursively.
667
+ verbose: If ``True``, print per-file deletion status.
668
+
669
+ Returns:
670
+ A list of dicts with keys ``"file"`` (filename only),
671
+ ``"status"`` (``"deleted"`` or ``"failed"``), and
672
+ ``"error"`` (only present on failure).
673
+ """
674
+ files = self._glob_files(directory, pattern, recursive=recursive)
675
+ return self._run_file_batch(
676
+ files,
677
+ lambda fp: self.delete_file(name, fp.name),
678
+ verb="deleted",
679
+ desc="Deleting",
680
+ verbose=verbose,
681
+ )
682
+
683
+ def sync_directory(
684
+ self,
685
+ name: str,
686
+ directory: str | Path,
687
+ *,
688
+ select: Callable[[Path, dict | None], str],
689
+ pattern: str = "*",
690
+ recursive: bool = False,
691
+ prune: bool = False,
692
+ index: bool = True,
693
+ index_wait: bool = True,
694
+ verbose: bool = False,
695
+ ) -> dict:
696
+ """Sync a local directory into an arcana under caller-defined rules.
697
+
698
+ The *policy* — which files to upload, replace, or skip — stays entirely
699
+ outside the package: you supply a ``select`` callback that decides per
700
+ file. This method only does the plumbing: glob the directory, fetch the
701
+ remote listing, apply your decisions, and (optionally) trigger a single
702
+ index pass. ARCANA stores no content hash, so any content-change
703
+ detection (e.g. SHA-256 against your own manifest) belongs in
704
+ ``select``.
705
+
706
+ Args:
707
+ name: The arcana name or full ``owner/name`` ID.
708
+ directory: Local directory to sync from.
709
+ select: Called once per local file as ``select(local_path, remote)``
710
+ where ``local_path`` is a :class:`pathlib.Path` and ``remote``
711
+ is the matching file dict from :meth:`list_files` (matched by
712
+ name) or ``None`` if the file is not in the arcana yet. Must
713
+ return ``"upload"`` (POST new), ``"replace"`` (PUT over an
714
+ existing file), or ``"skip"``.
715
+ pattern: Glob pattern for local files (default ``"*"``).
716
+ recursive: If ``True``, recurse into subdirectories.
717
+ prune: If ``True``, delete remote files that have no local
718
+ counterpart by name. Defaults to ``False`` (never deletes
719
+ implicitly).
720
+ index: If ``True`` (default), call :meth:`generate_index` once after
721
+ the sync — but only when something actually changed.
722
+ index_wait: Forwarded to :meth:`generate_index` as ``wait``.
723
+ verbose: If ``True``, print per-file actions and a summary.
724
+
725
+ Returns:
726
+ A report ``dict`` with keys ``"uploaded"``, ``"replaced"``,
727
+ ``"skipped"``, ``"deleted"`` (lists of file names), ``"failed"``
728
+ (list of ``{"file", "error"}``) and ``"index"`` (the
729
+ :meth:`generate_index` result, or ``None`` if indexing was skipped).
730
+
731
+ Raises:
732
+ ValueError: If ``select`` returns anything other than ``"upload"``,
733
+ ``"replace"``, or ``"skip"``.
734
+ """
735
+ local_files = self._glob_files(directory, pattern, recursive=recursive)
736
+ # cast: `list_files` returns list[dict], but the `.list` method on this
737
+ # class shadows the builtin in annotations (see the mypy override), so
738
+ # mypy mis-types the return — cast restores `list` here.
739
+ remote_files = cast(list, self.list_files(name))
740
+ remote_by_name = {f["name"]: f for f in remote_files}
741
+
742
+ # Pass 1 — ask the caller's policy what to do with each local file.
743
+ valid = {"upload", "replace", "skip"}
744
+ plan: list[tuple[Path, str]] = []
745
+ for path in local_files:
746
+ action = select(path, remote_by_name.get(path.name))
747
+ if action not in valid:
748
+ raise ValueError(
749
+ f"select() must return one of {sorted(valid)}; "
750
+ f"got {action!r} for {path.name}"
751
+ )
752
+ plan.append((path, action))
753
+
754
+ report: dict = {
755
+ "uploaded": [],
756
+ "replaced": [],
757
+ "skipped": [],
758
+ "deleted": [],
759
+ "failed": [],
760
+ "index": None,
761
+ }
762
+
763
+ # Pass 2 — apply the plan.
764
+ for path, action in plan:
765
+ if action == "skip":
766
+ report["skipped"].append(path.name)
767
+ continue
768
+ overwrite = action == "replace"
769
+ bucket = "replaced" if overwrite else "uploaded"
770
+ try:
771
+ self.upload(name, path, overwrite=overwrite)
772
+ report[bucket].append(path.name)
773
+ if verbose:
774
+ print(f" {path.name} {bucket}")
775
+ except Exception as e:
776
+ report["failed"].append({"file": path.name, "error": str(e)})
777
+ if verbose:
778
+ print(f" {path.name} FAILED ({e})")
779
+
780
+ if prune:
781
+ local_names = {p.name for p in local_files}
782
+ for remote_name in remote_by_name:
783
+ if remote_name in local_names:
784
+ continue
785
+ try:
786
+ self.delete_file(name, remote_name)
787
+ report["deleted"].append(remote_name)
788
+ if verbose:
789
+ print(f" {remote_name} deleted")
790
+ except Exception as e:
791
+ report["failed"].append({"file": remote_name, "error": str(e)})
792
+
793
+ if index and (report["uploaded"] or report["replaced"] or report["deleted"]):
794
+ report["index"] = self.generate_index(name, wait=index_wait)
795
+
796
+ if verbose:
797
+ print(
798
+ f"sync: {len(report['uploaded'])} uploaded, "
799
+ f"{len(report['replaced'])} replaced, "
800
+ f"{len(report['skipped'])} skipped, "
801
+ f"{len(report['deleted'])} deleted, "
802
+ f"{len(report['failed'])} failed"
803
+ )
804
+ return report
805
+
806
+ def generate_index(
807
+ self,
808
+ name: str,
809
+ *,
810
+ wait: bool = True,
811
+ timeout: int = 600,
812
+ poll_interval: int = 5,
813
+ ) -> dict | None:
814
+ """Trigger index generation for an arcana.
815
+
816
+ By default this blocks until indexing completes (synchronous).
817
+ For large arcanas the server may time out (504). Use
818
+ ``wait=False`` to fire the request and return immediately,
819
+ then poll with :meth:`info` to check the index status.
820
+
821
+ Args:
822
+ name: The arcana name or full ``owner/name`` ID.
823
+ wait: If ``True`` (default), poll until indexing finishes.
824
+ If ``False``, fire the request and return immediately.
825
+ timeout: Maximum seconds to wait when ``wait=True``.
826
+ Defaults to 600 (10 minutes).
827
+ poll_interval: Seconds between status checks when ``wait=True``.
828
+ Defaults to 5.
829
+
830
+ Returns:
831
+ The arcana details dict (from :meth:`get`) when ``wait=True``
832
+ and indexing completed, or ``None`` when ``wait=False`` or
833
+ on timeout.
834
+
835
+ Raises:
836
+ TimeoutError: If ``wait=True`` and indexing does not complete
837
+ within ``timeout`` seconds.
838
+ """
839
+ import threading
840
+ import time
841
+
842
+ resolved = extract_arcana_name(name)
843
+ url = f"{self._arcana_base}/arcana/{quote(resolved, safe='')}/generate-index"
844
+
845
+ if not wait:
846
+ # Fire-and-forget: send the trigger on a background thread so we
847
+ # return to the caller immediately. It uses its OWN Session — the
848
+ # caller polls via info()/get() on the shared client Session, and
849
+ # requests.Session is not safe to use from two threads at once.
850
+ def _fire():
851
+ session = new_session_like(self._session)
852
+ try:
853
+ session.post(url, headers=self._headers(), timeout=600)
854
+ except Exception:
855
+ pass # indexing status is checked via info()/get()
856
+ finally:
857
+ session.close()
858
+
859
+ threading.Thread(target=_fire, daemon=True).start()
860
+ return None
861
+
862
+ # Synchronous: fire the request, tolerate transport-level failures, then poll
863
+ try:
864
+ resp = self._session.post(url, headers=self._headers(), timeout=30)
865
+ raise_for_status(resp)
866
+ except (requests.exceptions.Timeout, requests.exceptions.ConnectionError):
867
+ # The trigger almost certainly reached the server (the body
868
+ # was written before the response was dropped). ARCANA
869
+ # commonly holds the trigger connection while it builds the
870
+ # embedding queue, then closes it without a response. The
871
+ # arcana state machine is authoritative — fall through to
872
+ # the poll loop. Sanity-check with a GET first: if that
873
+ # also fails at the transport level, the server is
874
+ # genuinely down and the error propagates.
875
+ self.get(name)
876
+ except APIError as e:
877
+ # 504 Gateway Timeout from nginx is the same shape: trigger
878
+ # accepted, gateway gave up waiting for a response.
879
+ if e.status_code != 504:
880
+ raise
881
+
882
+ # Poll until indexing finishes
883
+ deadline = time.monotonic() + timeout
884
+ terminal = {"INDEXED", "ERROR", "NOT_INDEXED"}
885
+ status = ""
886
+
887
+ while time.monotonic() < deadline:
888
+ time.sleep(poll_interval)
889
+ data = self.get(name)
890
+ idx = data.get("index_info") or {}
891
+ status = idx.get("index_status", "")
892
+ if status in terminal:
893
+ return data
894
+
895
+ raise TimeoutError(
896
+ f"Indexing did not complete within {timeout}s. "
897
+ f"Last status: {status}. Check with client.arcana.info(...)."
898
+ )
899
+
900
+ def delete_index(self, name: str) -> dict | None:
901
+ """Delete the index of an arcana.
902
+
903
+ Accepts either the plain name or the full ``owner/name`` ID.
904
+
905
+ Args:
906
+ name: The arcana name or full ``owner/name`` ID.
907
+
908
+ Returns:
909
+ The API response, or ``None`` if the response has no body.
910
+ """
911
+ name = extract_arcana_name(name)
912
+ resp = self._session.delete(
913
+ f"{self._arcana_base}/arcana/{quote(name, safe='')}/delete-index",
914
+ headers=self._headers(),
915
+ )
916
+ raise_for_status(resp)
917
+ return _json_or_none(resp)
918
+
919
+ def setup_from_directory(
920
+ self,
921
+ name: str,
922
+ source_dir: str | Path,
923
+ *,
924
+ pattern: str = "*.md",
925
+ append_uuid: bool = True,
926
+ update_toml: bool = False,
927
+ toml_label: str | None = None,
928
+ wait_for_index: bool = True,
929
+ index_timeout: int = 600,
930
+ verbose: bool = True,
931
+ ) -> dict:
932
+ """End-to-end: create an arcana, upload a directory, build the index.
933
+
934
+ Composes :meth:`create`, :meth:`upload_directory`, and
935
+ :meth:`generate_index` into a single call. The arcana name
936
+ passed to upload + index is the one returned by ``create`` —
937
+ i.e. with the UUID suffix when ``append_uuid=True`` (default),
938
+ so the composition stays correct without the caller having to
939
+ remember the renaming.
940
+
941
+ Args:
942
+ name: Display name for the new arcana (UUID suffix appended
943
+ when ``append_uuid=True``).
944
+ source_dir: Directory whose matching files should be
945
+ uploaded to the new arcana.
946
+ pattern: Glob pattern passed to :meth:`upload_directory`.
947
+ Defaults to ``"*.md"``.
948
+ append_uuid: Forwarded to :meth:`create`. If ``True``
949
+ (default), the UUID suffix avoids name collisions and
950
+ mirrors the SAIA web UI behaviour.
951
+ update_toml: Forwarded to :meth:`create`. If ``True``, add
952
+ the new arcana to ``config.toml`` after creation.
953
+ toml_label: Label under ``[saia.arcana.labels]``.
954
+ Ignored when ``update_toml`` is ``False``.
955
+ wait_for_index: Forwarded to :meth:`generate_index` as
956
+ ``wait``. If ``True`` (default), block until the index
957
+ reaches ``INDEXED`` (or fails / times out).
958
+ index_timeout: Forwarded to :meth:`generate_index` as
959
+ ``timeout`` (seconds). Defaults to 600.
960
+ verbose: Forwarded to :meth:`upload_directory`. Controls
961
+ the per-file progress bar.
962
+
963
+ Returns:
964
+ A dict with three keys: ``"arcana"`` (the result from
965
+ :meth:`create`), ``"uploads"`` (the list from
966
+ :meth:`upload_directory`), and ``"index"`` (the result
967
+ from :meth:`generate_index`). Callers can inspect any
968
+ step.
969
+
970
+ Example::
971
+
972
+ result = client.arcana.setup_from_directory(
973
+ "MyKB", "./markdown/",
974
+ pattern="**/*.md",
975
+ update_toml=True, toml_label="my_kb",
976
+ )
977
+ print(result["arcana"]["id"]) # owner/MyKB-<uuid>
978
+ print(len(result["uploads"])) # files uploaded
979
+ print(result["index"]) # index_status
980
+ """
981
+ create_result = self.create(
982
+ name,
983
+ append_uuid=append_uuid,
984
+ update_toml=update_toml,
985
+ toml_label=toml_label,
986
+ )
987
+ arcana_name = create_result["name"]
988
+ uploads = self.upload_directory(
989
+ arcana_name,
990
+ source_dir,
991
+ pattern=pattern,
992
+ verbose=verbose,
993
+ )
994
+ index = self.generate_index(
995
+ arcana_name,
996
+ wait=wait_for_index,
997
+ timeout=index_timeout,
998
+ )
999
+ return {
1000
+ "arcana": create_result,
1001
+ "uploads": uploads,
1002
+ "index": index,
1003
+ }
1004
+
1005
+ def chat(
1006
+ self,
1007
+ model: str,
1008
+ messages: list[dict],
1009
+ arcana_id: str,
1010
+ *,
1011
+ temperature: float | None = None,
1012
+ max_tokens: int | None = None,
1013
+ stream: bool = False,
1014
+ **kwargs,
1015
+ ) -> dict | SSEStream:
1016
+ """Chat with RAG context from an arcana.
1017
+
1018
+ This uses the standard ``/chat/completions`` endpoint with arcana
1019
+ parameters injected.
1020
+
1021
+ Args:
1022
+ model: Model identifier.
1023
+ messages: Chat messages.
1024
+ arcana_id: The arcana ID to use for retrieval.
1025
+ stream: If ``True``, return a generator yielding chunks.
1026
+ **kwargs: Additional parameters forwarded to the API.
1027
+
1028
+ Returns:
1029
+ When ``stream=False``: the API response dict, with an extra
1030
+ ``"_rate_limits"`` key (a JSON-serializable dict; see
1031
+ :class:`~saia_python.RateLimitInfo`). When ``stream=True``: an
1032
+ ``SSEStream`` whose ``rate_limits`` attribute exposes the same dict.
1033
+ """
1034
+ body = {
1035
+ "model": model,
1036
+ "messages": messages,
1037
+ "enable-tools": True,
1038
+ "arcana": {"id": arcana_id},
1039
+ **kwargs,
1040
+ }
1041
+ if temperature is not None:
1042
+ body["temperature"] = temperature
1043
+ if max_tokens is not None:
1044
+ body["max_tokens"] = max_tokens
1045
+
1046
+ headers = {
1047
+ "Authorization": f"Bearer {self._api_key}",
1048
+ "Accept": "application/json",
1049
+ "inference-service": "saia-openai-gateway",
1050
+ }
1051
+
1052
+ return post_chat_completion(
1053
+ self._session,
1054
+ f"{self._base_url}/chat/completions",
1055
+ body,
1056
+ headers=headers,
1057
+ stream=stream,
1058
+ )
1059
+
1060
+ def __repr__(self):
1061
+ return f"ArcanaService(base_url={self._base_url!r})"