projectdavid 1.33.14__tar.gz → 1.33.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of projectdavid might be problematic. Click here for more details.

Files changed (71) hide show
  1. {projectdavid-1.33.14 → projectdavid-1.33.16}/CHANGELOG.md +14 -0
  2. {projectdavid-1.33.14/src/projectdavid.egg-info → projectdavid-1.33.16}/PKG-INFO +1 -1
  3. {projectdavid-1.33.14 → projectdavid-1.33.16}/pyproject.toml +1 -1
  4. projectdavid-1.33.16/src/projectdavid/clients/vectors.py +813 -0
  5. {projectdavid-1.33.14 → projectdavid-1.33.16/src/projectdavid.egg-info}/PKG-INFO +1 -1
  6. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid.egg-info/SOURCES.txt +1 -0
  7. {projectdavid-1.33.14 → projectdavid-1.33.16}/LICENSE +0 -0
  8. {projectdavid-1.33.14 → projectdavid-1.33.16}/MANIFEST.in +0 -0
  9. {projectdavid-1.33.14 → projectdavid-1.33.16}/README.md +0 -0
  10. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/assistants.md +0 -0
  11. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/code_interpretation.md +0 -0
  12. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/database.md +0 -0
  13. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/database_assistant_example.md +0 -0
  14. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/docker_comtainers.md +0 -0
  15. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/file_search.md +0 -0
  16. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/files.md +0 -0
  17. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/function_call_definition.md +0 -0
  18. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/function_calls.md +0 -0
  19. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/handling_function_calls.md +0 -0
  20. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/inference.md +0 -0
  21. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/messages.md +0 -0
  22. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/runs.md +0 -0
  23. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/streams.md +0 -0
  24. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/threads.md +0 -0
  25. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/tools.md +0 -0
  26. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/users.md +0 -0
  27. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/vector_store.md +0 -0
  28. {projectdavid-1.33.14 → projectdavid-1.33.16}/docs/versioning.md +0 -0
  29. {projectdavid-1.33.14 → projectdavid-1.33.16}/setup.cfg +0 -0
  30. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/__init__.py +0 -0
  31. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/_version.py +0 -0
  32. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/actions_client.py +0 -0
  33. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/api_key_client.py +0 -0
  34. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/assistants_client.py +0 -0
  35. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/base_client.py +0 -0
  36. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/base_vector_store.py +0 -0
  37. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/event_handler.py +0 -0
  38. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/file_processor.py +0 -0
  39. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/file_search.py +0 -0
  40. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/files_client.py +0 -0
  41. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/inference_client.py +0 -0
  42. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/messages_client.py +0 -0
  43. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/runs.py +0 -0
  44. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/synchronous_inference_wrapper.py +0 -0
  45. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/threads_client.py +0 -0
  46. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/tools_client.py +0 -0
  47. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/users_client.py +0 -0
  48. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/vector_store_manager.py +0 -0
  49. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/clients/vision-file_processor.py +0 -0
  50. /projectdavid-1.33.14/src/projectdavid/clients/vectors.py → /projectdavid-1.33.16/src/projectdavid/clients/vision_vectors.py +0 -0
  51. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/constants/platform.py +0 -0
  52. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/decorators.py +0 -0
  53. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/entity.py +0 -0
  54. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/events.py +0 -0
  55. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/serializers.py +0 -0
  56. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/services/logging_service.py +0 -0
  57. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/synthesis/__init__.py +0 -0
  58. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/synthesis/llm_synthesizer.py +0 -0
  59. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/synthesis/prompt.py +0 -0
  60. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/synthesis/reranker.py +0 -0
  61. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/synthesis/retriever.py +0 -0
  62. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/utils/__init__.py +0 -0
  63. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/utils/function_call_suppressor.py +0 -0
  64. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/utils/monitor_launcher.py +0 -0
  65. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/utils/peek_gate.py +0 -0
  66. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/utils/run_monitor.py +0 -0
  67. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid/utils/vector_search_formatter.py +0 -0
  68. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid.egg-info/dependency_links.txt +0 -0
  69. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid.egg-info/requires.txt +0 -0
  70. {projectdavid-1.33.14 → projectdavid-1.33.16}/src/projectdavid.egg-info/top_level.txt +0 -0
  71. {projectdavid-1.33.14 → projectdavid-1.33.16}/tests/test_clients.py +0 -0
@@ -1,3 +1,17 @@
1
+ ## [1.33.16](https://github.com/frankie336/projectdavid/compare/v1.33.15...v1.33.16) (2025-06-16)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * Back out from vision support - resource issue. Revisit in grand plan-3 ([14568e9](https://github.com/frankie336/projectdavid/commit/14568e97edef6e82fd93e3ee034fbf160d4a302b))
7
+
8
+ ## [1.33.15](https://github.com/frankie336/projectdavid/compare/v1.33.14...v1.33.15) (2025-06-16)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * Back out from vision support - resource issue. Revisit in grand plan-2 ([a735034](https://github.com/frankie336/projectdavid/commit/a735034879ce50ce1dc2a508ce304796105f5830))
14
+
1
15
  ## [1.33.14](https://github.com/frankie336/projectdavid/compare/v1.33.13...v1.33.14) (2025-06-16)
2
16
 
3
17
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: projectdavid
3
- Version: 1.33.14
3
+ Version: 1.33.16
4
4
  Summary: Python SDK for interacting with the Entities Assistant API.
5
5
  Author-email: Francis Neequaye Armah <francis.neequaye@projectdavid.co.uk>
6
6
  License: PolyForm Noncommercial License 1.0.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "projectdavid"
7
- version = "1.33.14"
7
+ version = "1.33.16"
8
8
  description = "Python SDK for interacting with the Entities Assistant API."
9
9
  readme = "README.md"
10
10
  authors = [
@@ -0,0 +1,813 @@
1
+ """projectdavid.clients.vector_store_client
2
+ ---------------------------------------
3
+
4
+ Token-scoped HTTP client + local Qdrant helper for vector-store operations.
5
+ """
6
+
7
+ import asyncio
8
+ import os
9
+ import uuid
10
+ import warnings
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional, Union
13
+
14
+ import httpx
15
+ from dotenv import load_dotenv
16
+ from projectdavid_common import UtilsInterface, ValidationInterface
17
+ from pydantic import BaseModel, Field
18
+
19
+ from projectdavid.clients.file_processor import FileProcessor
20
+ from projectdavid.clients.vector_store_manager import VectorStoreManager
21
+ from projectdavid.synthesis import reranker, retriever
22
+ from projectdavid.synthesis.llm_synthesizer import synthesize_envelope
23
+ from projectdavid.utils.vector_search_formatter import make_envelope
24
+
25
+ load_dotenv()
26
+ log = UtilsInterface.LoggingUtility()
27
+
28
+
29
+ def summarize_hits(query: str, hits: List[Dict[str, Any]]) -> str:
30
+ lines = [f"• {h['meta_data']['file_name']} (score {h['score']:.2f})" for h in hits]
31
+ return f"Top files for **{query}**:\n" + "\n".join(lines)
32
+
33
+
34
+ # --------------------------------------------------------------------------- #
35
+ # Exceptions
36
+ # --------------------------------------------------------------------------- #
37
+ class VectorStoreClientError(Exception):
38
+ """Raised on any client-side or API error."""
39
+
40
+
41
+ # --------------------------------------------------------------------------- #
42
+ # Helper schema
43
+ # --------------------------------------------------------------------------- #
44
+ class VectorStoreFileUpdateStatusInput(BaseModel):
45
+ status: ValidationInterface.StatusEnum = Field(
46
+ ..., description="The new status for the file record."
47
+ )
48
+ error_message: Optional[str] = Field(
49
+ None, description="Error message if status is 'failed'."
50
+ )
51
+
52
+
53
+ # --------------------------------------------------------------------------- #
54
+ # Main client
55
+ # --------------------------------------------------------------------------- #
56
+ class VectorStoreClient:
57
+ """
58
+ Thin HTTP+Qdrant wrapper.
59
+
60
+ • All API requests scoped by X-API-Key.
61
+ • create_vector_store() no longer takes user_id; ownership from token.
62
+ """
63
+
64
+ # ------------------------------------------------------------------ #
65
+ # Construction / cleanup
66
+ # ------------------------------------------------------------------ #
67
+ def __init__(
68
+ self,
69
+ base_url: Optional[str] = None,
70
+ api_key: Optional[str] = None,
71
+ *,
72
+ vector_store_host: str = "localhost",
73
+ file_processor_kwargs: Optional[dict] = None, # 🔶 add arg
74
+ ):
75
+ self.base_url = (base_url or os.getenv("BASE_URL", "")).rstrip("/")
76
+ self.api_key = api_key or os.getenv("API_KEY")
77
+ if not self.base_url:
78
+ raise VectorStoreClientError("BASE_URL is required.")
79
+
80
+ self._base_headers: Dict[str, str] = {"Content-Type": "application/json"}
81
+ if self.api_key:
82
+ self._base_headers["X-API-Key"] = self.api_key
83
+ else:
84
+ log.warning("No API key — protected routes will fail.")
85
+
86
+ self._sync_api_client = httpx.Client(
87
+ base_url=self.base_url, headers=self._base_headers, timeout=30.0
88
+ )
89
+
90
+ # Local helpers ---------------------------------------------------
91
+ self.vector_manager = VectorStoreManager(vector_store_host=vector_store_host)
92
+ self.identifier_service = UtilsInterface.IdentifierService()
93
+
94
+ # 🔶 forward kwargs into the upgraded FileProcessor
95
+ self.file_processor = FileProcessor(**(file_processor_kwargs or {}))
96
+
97
+ log.info("VectorStoreClient → %s", self.base_url)
98
+
99
+ # Context support ------------------------------------------------------ #
100
+ def __enter__(self):
101
+ return self
102
+
103
+ def __exit__(self, *_exc):
104
+ self.close()
105
+
106
+ async def __aenter__(self):
107
+ return self
108
+
109
+ async def __aexit__(self, *_exc):
110
+ await self.aclose()
111
+
112
+ # Cleanup -------------------------------------------------------------- #
113
+ async def aclose(self):
114
+ await asyncio.to_thread(self._sync_api_client.close)
115
+
116
+ def close(self):
117
+ try:
118
+ loop = asyncio.get_running_loop()
119
+ if loop.is_running():
120
+ warnings.warn(
121
+ "close() inside running loop — use `await aclose()`",
122
+ RuntimeWarning,
123
+ )
124
+ self._sync_api_client.close()
125
+ return
126
+ except RuntimeError:
127
+ pass
128
+ asyncio.run(self.aclose())
129
+
130
+ # Low-level HTTP helpers ---------------------------------------------- #
131
+ async def _parse_response(self, resp: httpx.Response) -> Any:
132
+ try:
133
+ resp.raise_for_status()
134
+ return None if resp.status_code == 204 else resp.json()
135
+ except httpx.HTTPStatusError as exc:
136
+ log.error("API %d – %s", exc.response.status_code, exc.response.text)
137
+ raise VectorStoreClientError(
138
+ f"API {exc.response.status_code}: {exc.response.text}"
139
+ ) from exc
140
+ except Exception as exc:
141
+ raise VectorStoreClientError(f"Invalid response: {resp.text}") from exc
142
+
143
+ async def _request(self, method: str, url: str, **kwargs) -> Any:
144
+ retries = 3
145
+ for attempt in range(1, retries + 1):
146
+ try:
147
+ async with httpx.AsyncClient(
148
+ base_url=self.base_url,
149
+ headers=self._base_headers,
150
+ timeout=30.0,
151
+ ) as client:
152
+ resp = await client.request(method, url, **kwargs)
153
+ return await self._parse_response(resp)
154
+ except (
155
+ httpx.TimeoutException,
156
+ httpx.NetworkError,
157
+ httpx.HTTPStatusError,
158
+ ) as exc:
159
+ retryable = isinstance(
160
+ exc, (httpx.TimeoutException, httpx.NetworkError)
161
+ ) or (
162
+ isinstance(exc, httpx.HTTPStatusError)
163
+ and exc.response.status_code >= 500
164
+ )
165
+ if retryable and attempt < retries:
166
+ backoff = 2 ** (attempt - 1)
167
+ log.warning(
168
+ "Retry %d/%d %s %s in %ds – %s",
169
+ attempt,
170
+ retries,
171
+ method,
172
+ url,
173
+ backoff,
174
+ exc,
175
+ )
176
+ await asyncio.sleep(backoff)
177
+ continue
178
+ raise VectorStoreClientError(str(exc)) from exc
179
+ raise VectorStoreClientError("Request failed after retries")
180
+
181
+ # Internal async ops -------------------------------------------------- #
182
+ async def _create_vs_async(
183
+ self,
184
+ name: str,
185
+ vector_size: int,
186
+ distance_metric: str,
187
+ config: Optional[Dict[str, Any]],
188
+ ) -> ValidationInterface.VectorStoreRead:
189
+ shared_id = self.identifier_service.generate_vector_id()
190
+ self.vector_manager.create_store(
191
+ collection_name=shared_id,
192
+ vector_size=vector_size,
193
+ distance=distance_metric.upper(),
194
+ )
195
+
196
+ payload = {
197
+ "shared_id": shared_id,
198
+ "name": name,
199
+ "vector_size": vector_size,
200
+ "distance_metric": distance_metric.upper(),
201
+ "config": config or {},
202
+ }
203
+ resp = await self._request("POST", "/v1/vector-stores", json=payload)
204
+ return ValidationInterface.VectorStoreRead.model_validate(resp)
205
+
206
+ async def _list_my_vs_async(self) -> List[ValidationInterface.VectorStoreRead]:
207
+ resp = await self._request("GET", "/v1/vector-stores")
208
+ return [ValidationInterface.VectorStoreRead.model_validate(r) for r in resp]
209
+
210
+ # ------------------------------------------------------------------ #
211
+ # NEW admin‑aware creation helper
212
+ # ------------------------------------------------------------------ #
213
+ async def _create_vs_for_user_async(
214
+ self,
215
+ owner_id: str,
216
+ name: str,
217
+ vector_size: int,
218
+ distance_metric: str,
219
+ config: Optional[Dict[str, Any]],
220
+ ) -> ValidationInterface.VectorStoreRead:
221
+ shared_id = self.identifier_service.generate_vector_id()
222
+ self.vector_manager.create_store(
223
+ collection_name=shared_id,
224
+ vector_size=vector_size,
225
+ distance=distance_metric.upper(),
226
+ )
227
+ payload = {
228
+ "shared_id": shared_id,
229
+ "name": name,
230
+ "vector_size": vector_size,
231
+ "distance_metric": distance_metric.upper(),
232
+ "config": config or {},
233
+ }
234
+ # pass owner_id as query‑param (backend enforces admin‑only)
235
+ resp = await self._request(
236
+ "POST",
237
+ "/v1/vector-stores",
238
+ json=payload,
239
+ params={"owner_id": owner_id},
240
+ )
241
+ return ValidationInterface.VectorStoreRead.model_validate(resp)
242
+
243
+ async def _add_file_async(
244
+ self, vector_store_id: str, p: Path, meta: Optional[Dict[str, Any]]
245
+ ) -> ValidationInterface.VectorStoreFileRead:
246
+ processed = await self.file_processor.process_file(p)
247
+ texts, vectors = processed["chunks"], processed["vectors"]
248
+ line_data = processed.get("line_data") or [] # ← NEW
249
+
250
+ base_md = meta or {}
251
+ base_md.update({"source": str(p), "file_name": p.name})
252
+
253
+ file_record_id = f"vsf_{uuid.uuid4()}"
254
+
255
+ # Build per‑chunk payload, now including page/lines if present
256
+ chunk_md = []
257
+ for i in range(len(texts)):
258
+ payload = {
259
+ **base_md,
260
+ "chunk_index": i,
261
+ "file_id": file_record_id,
262
+ }
263
+ if i < len(line_data): # ← NEW
264
+ payload.update(line_data[i]) # {'page': …, 'lines': …}
265
+ chunk_md.append(payload)
266
+
267
+ self.vector_manager.add_to_store(
268
+ store_name=vector_store_id,
269
+ texts=texts,
270
+ vectors=vectors,
271
+ metadata=chunk_md,
272
+ )
273
+
274
+ resp = await self._request(
275
+ "POST",
276
+ f"/v1/vector-stores/{vector_store_id}/files",
277
+ json={
278
+ "file_id": file_record_id,
279
+ "file_name": p.name,
280
+ "file_path": str(p),
281
+ "status": "completed",
282
+ "meta_data": meta or {},
283
+ },
284
+ )
285
+ return ValidationInterface.VectorStoreFileRead.model_validate(resp)
286
+
287
+ async def _search_vs_async(
288
+ self,
289
+ vector_store_id: str,
290
+ query_text: str,
291
+ top_k: int,
292
+ filters: Optional[Dict] = None,
293
+ vector_store_host: Optional[str] = None,
294
+ ) -> List[Dict[str, Any]]:
295
+
296
+ # pick local vs. override host
297
+ vector_manager = (
298
+ VectorStoreManager(vector_store_host=vector_store_host)
299
+ if vector_store_host
300
+ else self.vector_manager
301
+ )
302
+
303
+ store = self.retrieve_vector_store_sync(vector_store_id)
304
+
305
+ # 🔶 choose encoder by vector_size
306
+ if store.vector_size == 1024: # images collection
307
+ vec = self.file_processor.encode_clip_text(query_text).tolist()
308
+ vector_field = "caption_vector" # field name in Qdrant
309
+ else: # 384-D text collection
310
+ vec = self.file_processor.encode_text(query_text).tolist()
311
+ vector_field = None # default field
312
+
313
+ return vector_manager.query_store(
314
+ store_name=store.collection_name,
315
+ query_vector=vec,
316
+ top_k=top_k,
317
+ filters=filters,
318
+ vector_field=vector_field,
319
+ )
320
+
321
+ async def _delete_vs_async(
322
+ self, vector_store_id: str, permanent: bool
323
+ ) -> Dict[str, Any]:
324
+ qres = self.vector_manager.delete_store(vector_store_id)
325
+ await self._request(
326
+ "DELETE",
327
+ f"/v1/vector-stores/{vector_store_id}",
328
+ params={"permanent": permanent},
329
+ )
330
+ return {
331
+ "vector_store_id": vector_store_id,
332
+ "status": "deleted",
333
+ "permanent": permanent,
334
+ "qdrant_result": qres,
335
+ }
336
+
337
+ async def _delete_file_async(
338
+ self, vector_store_id: str, file_path: str
339
+ ) -> Dict[str, Any]:
340
+ fres = self.vector_manager.delete_file_from_store(vector_store_id, file_path)
341
+ await self._request(
342
+ "DELETE",
343
+ f"/v1/vector-stores/{vector_store_id}/files",
344
+ params={"file_path": file_path},
345
+ )
346
+ return {
347
+ "vector_store_id": vector_store_id,
348
+ "file_path": file_path,
349
+ "status": "deleted",
350
+ "qdrant_result": fres,
351
+ }
352
+
353
+ async def _list_store_files_async(
354
+ self, vector_store_id: str
355
+ ) -> List[ValidationInterface.VectorStoreFileRead]:
356
+ resp = await self._request("GET", f"/v1/vector-stores/{vector_store_id}/files")
357
+ return [
358
+ ValidationInterface.VectorStoreFileRead.model_validate(item)
359
+ for item in resp
360
+ ]
361
+
362
+ async def _update_file_status_async(
363
+ self,
364
+ vector_store_id: str,
365
+ file_id: str,
366
+ status: ValidationInterface.StatusEnum,
367
+ error_message: Optional[str] = None,
368
+ ) -> ValidationInterface.VectorStoreFileRead:
369
+ payload = VectorStoreFileUpdateStatusInput(
370
+ status=status, error_message=error_message
371
+ ).model_dump(exclude_none=True)
372
+ resp = await self._request(
373
+ "PATCH",
374
+ f"/v1/vector-stores/{vector_store_id}/files/{file_id}",
375
+ json=payload,
376
+ )
377
+ return ValidationInterface.VectorStoreFileRead.model_validate(resp)
378
+
379
+ async def _get_assistant_vs_async(
380
+ self, assistant_id: str
381
+ ) -> List[ValidationInterface.VectorStoreRead]:
382
+ resp = await self._request(
383
+ "GET", f"/v1/assistants/{assistant_id}/vector-stores"
384
+ )
385
+ return [
386
+ ValidationInterface.VectorStoreRead.model_validate(item) for item in resp
387
+ ]
388
+
389
+ async def _attach_vs_async(self, vector_store_id: str, assistant_id: str) -> bool:
390
+ await self._request(
391
+ "POST",
392
+ f"/v1/assistants/{assistant_id}/vector-stores/{vector_store_id}/attach",
393
+ )
394
+ return True
395
+
396
+ async def _detach_vs_async(self, vector_store_id: str, assistant_id: str) -> bool:
397
+ await self._request(
398
+ "DELETE",
399
+ f"/v1/assistants/{assistant_id}/vector-stores/{vector_store_id}/detach",
400
+ )
401
+ return True
402
+
403
+ # Sync facade helpers ------------------------------------------------ #
404
+ def _run_sync(self, coro):
405
+ try:
406
+ loop = asyncio.get_running_loop()
407
+ if loop.is_running():
408
+ raise VectorStoreClientError("Sync call inside running loop")
409
+ except RuntimeError:
410
+ pass
411
+ return asyncio.run(coro)
412
+
413
+ # ──────────────────────────────────────────────────────────────────
414
+ # Helpers (private)
415
+ # ──────────────────────────────────────────────────────────────────
416
+ @staticmethod
417
+ def _normalise_hits(raw_hits: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
418
+ """
419
+ Ensure each hit dict contains a top‑level 'meta_data' key so that all
420
+ downstream components (reranker, synthesizer, envelope builder) can
421
+ rely on a stable schema.
422
+ """
423
+ normalised: List[Dict[str, Any]] = []
424
+ for h in raw_hits:
425
+ md = h.get("meta_data") or h.get("metadata") or {}
426
+ normalised.append(
427
+ {
428
+ "text": h["text"],
429
+ "score": h["score"],
430
+ "meta_data": md,
431
+ "vector_id": h.get("vector_id"),
432
+ "store_id": h.get("store_id"),
433
+ }
434
+ )
435
+ return normalised
436
+
437
+ # Public API ---------------------------------------------------------- #
438
+ def create_vector_store(
439
+ self,
440
+ name: str,
441
+ *,
442
+ vector_size: int = 384,
443
+ distance_metric: str = "Cosine",
444
+ config: Optional[Dict[str, Any]] = None,
445
+ ) -> ValidationInterface.VectorStoreRead:
446
+ """Create a new store owned by *this* API key."""
447
+ return self._run_sync(
448
+ self._create_vs_async(name, vector_size, distance_metric, config)
449
+ )
450
+
451
+ def create_vector_store_for_user(
452
+ self,
453
+ owner_id: str,
454
+ name: str,
455
+ *,
456
+ vector_size: int = 384,
457
+ distance_metric: str = "Cosine",
458
+ config: Optional[Dict[str, Any]] = None,
459
+ ) -> ValidationInterface.VectorStoreRead:
460
+ """
461
+ **Admin‑only** helper → create a store on behalf of *owner_id*.
462
+
463
+ The caller’s API‑key must belong to an admin; otherwise the
464
+ request will be rejected by the server with HTTP 403.
465
+ """
466
+ return self._run_sync(
467
+ self._create_vs_for_user_async(
468
+ owner_id, name, vector_size, distance_metric, config
469
+ )
470
+ )
471
+
472
+ # ───────────────────────────────────────────────────────────────
473
+ # Convenience: ensure a per-user “file_search” store exists
474
+ # ───────────────────────────────────────────────────────────────
475
+ # unchanged … (get_or_create_file_search_store)
476
+
477
+ def list_my_vector_stores(self) -> List[ValidationInterface.VectorStoreRead]:
478
+ """List all non-deleted stores owned by *this* API-key’s user."""
479
+ return self._run_sync(self._list_my_vs_async())
480
+
481
+ # ───────────────────────────────────────────────────────────────
482
+ # NEW: real per-user listing (admin-only)
483
+ # ───────────────────────────────────────────────────────────────
484
+ async def _list_vs_by_user_async(self, user_id: str):
485
+ resp = await self._request(
486
+ "GET",
487
+ "/v1/vector-stores/admin/by-user",
488
+ params={"owner_id": user_id},
489
+ )
490
+ return [ValidationInterface.VectorStoreRead.model_validate(r) for r in resp]
491
+
492
+ def get_stores_by_user(
493
+ self,
494
+ _user_id: str,
495
+ ) -> List[ValidationInterface.VectorStoreRead]: # noqa: ARG002
496
+ """
497
+ ⚠️ **Deprecated** – prefer impersonating the user’s API-key or using
498
+ the newer RBAC endpoints, but keep working for legacy code.
499
+ """
500
+ warnings.warn(
501
+ "`get_stores_by_user()` is deprecated; use `list_my_vector_stores()` or "
502
+ "`VectorStoreClient(list_my_vector_stores)` with an impersonated key.",
503
+ DeprecationWarning,
504
+ stacklevel=2,
505
+ )
506
+ return self._run_sync(self._list_vs_by_user_async(_user_id))
507
+
508
+ # ───────────────────────────────────────────────────────────────
509
+ # Convenience: ensure a per-user “file_search” store exists
510
+ # ───────────────────────────────────────────────────────────────
511
+ def get_or_create_file_search_store(self, user_id: Optional[str] = None) -> str:
512
+ """
513
+ Return the *oldest* vector-store named **file_search** for ``user_id``;
514
+ create one if none exist.
515
+
516
+ Parameters
517
+ ----------
518
+ user_id : Optional[str]
519
+ • If **None** → operate on *this* API-key’s stores
520
+ • If not None → *admin-only* – look up / create on behalf of ``user_id``
521
+
522
+ Returns
523
+ -------
524
+ str
525
+ The vector-store **id**.
526
+ """
527
+
528
+ # 1️⃣ Fetch candidate stores
529
+ if user_id is None:
530
+ # Normal user context – only see caller-owned stores
531
+ stores = self.list_my_vector_stores()
532
+ else:
533
+ # Admin context – may inspect another user’s stores
534
+ stores = self.get_stores_by_user(_user_id=user_id)
535
+
536
+ file_search_stores = [s for s in stores if s.name == "file_search"]
537
+
538
+ if file_search_stores:
539
+ # 2️⃣ Pick the *earliest* (oldest created_at) to keep things stable
540
+ chosen = min(
541
+ file_search_stores,
542
+ key=lambda s: (s.created_at or 0),
543
+ )
544
+ log.info(
545
+ "Re-using existing 'file_search' store %s for user %s",
546
+ chosen.id,
547
+ user_id or "<self>",
548
+ )
549
+ return chosen.id
550
+
551
+ # 3️⃣ Nothing found → create a fresh store
552
+ if user_id is None:
553
+ new_store = self.create_vector_store(name="file_search")
554
+ else:
555
+ # Requires admin API-key
556
+ new_store = self.create_vector_store_for_user(
557
+ owner_id=user_id,
558
+ name="file_search",
559
+ )
560
+
561
+ log.info(
562
+ "Created new 'file_search' store %s for user %s",
563
+ new_store.id,
564
+ user_id or "<self>",
565
+ )
566
+ return new_store.id
567
+
568
+ def add_file_to_vector_store(
569
+ self,
570
+ vector_store_id: str,
571
+ file_path: Union[str, Path],
572
+ user_metadata: Optional[Dict[str, Any]] = None,
573
+ ) -> ValidationInterface.VectorStoreFileRead:
574
+ p = Path(file_path)
575
+ if not p.is_file():
576
+ raise FileNotFoundError(f"File not found: {p}")
577
+ return self._run_sync(self._add_file_async(vector_store_id, p, user_metadata))
578
+
579
+ def delete_vector_store(
580
+ self,
581
+ vector_store_id: str,
582
+ permanent: bool = False,
583
+ ) -> Dict[str, Any]:
584
+ return self._run_sync(self._delete_vs_async(vector_store_id, permanent))
585
+
586
+ def delete_file_from_vector_store(
587
+ self,
588
+ vector_store_id: str,
589
+ file_path: str,
590
+ ) -> Dict[str, Any]:
591
+ return self._run_sync(self._delete_file_async(vector_store_id, file_path))
592
+
593
+ def list_store_files(
594
+ self,
595
+ vector_store_id: str,
596
+ ) -> List[ValidationInterface.VectorStoreFileRead]:
597
+ return self._run_sync(self._list_store_files_async(vector_store_id))
598
+
599
+ def update_vector_store_file_status(
600
+ self,
601
+ vector_store_id: str,
602
+ file_id: str,
603
+ status: ValidationInterface.StatusEnum,
604
+ error_message: Optional[str] = None,
605
+ ) -> ValidationInterface.VectorStoreFileRead:
606
+ return self._run_sync(
607
+ self._update_file_status_async(
608
+ vector_store_id, file_id, status, error_message
609
+ )
610
+ )
611
+
612
+ def get_vector_stores_for_assistant(
613
+ self,
614
+ assistant_id: str,
615
+ ) -> List[ValidationInterface.VectorStoreRead]:
616
+ return self._run_sync(self._get_assistant_vs_async(assistant_id))
617
+
618
+ def attach_vector_store_to_assistant(
619
+ self,
620
+ vector_store_id: str,
621
+ assistant_id: str,
622
+ ) -> bool:
623
+ return self._run_sync(self._attach_vs_async(vector_store_id, assistant_id))
624
+
625
+ def detach_vector_store_from_assistant(
626
+ self,
627
+ vector_store_id: str,
628
+ assistant_id: str,
629
+ ) -> bool:
630
+ return self._run_sync(self._detach_vs_async(vector_store_id, assistant_id))
631
+
632
+ def retrieve_vector_store_sync(
633
+ self,
634
+ vector_store_id: str,
635
+ ) -> ValidationInterface.VectorStoreRead:
636
+ resp = self._sync_api_client.get(f"/v1/vector-stores/{vector_store_id}")
637
+ resp.raise_for_status()
638
+ return ValidationInterface.VectorStoreRead.model_validate(resp.json())
639
+
640
+ def vector_file_search_raw(
641
+ self,
642
+ vector_store_id: str,
643
+ query_text: str,
644
+ top_k: int = 5,
645
+ filters: Optional[Dict] = None,
646
+ vector_store_host: Optional[str] = None,
647
+ ) -> List[Dict[str, Any]]:
648
+ return self._run_sync(
649
+ self._search_vs_async(
650
+ vector_store_id, query_text, top_k, filters, vector_store_host
651
+ )
652
+ )
653
+
654
+ # ─────────────────────────────────────────────────────────────────────────────
655
+ # MID‑LEVEL: envelope but **no** rerank / synthesis
656
+ # ─────────────────────────────────────────────────────────────────────────────
657
+ def simple_vector_file_search(
658
+ self,
659
+ vector_store_id: str,
660
+ query_text: str,
661
+ top_k: int = 5,
662
+ filters: Optional[Dict] = None,
663
+ ) -> Dict[str, Any]:
664
+ """
665
+ Run a semantic search against *vector_store_id* and return the results
666
+ wrapped in an OpenAI‑compatible envelope (file_search_call + assistant
667
+ message with file_citation annotations).
668
+
669
+ Args:
670
+ vector_store_id: The store ID to query.
671
+ query_text: Natural‑language search text.
672
+ top_k: Maximum hits to retrieve.
673
+ filters: Optional Qdrant payload filter dict.
674
+
675
+ Returns:
676
+ dict: JSON‑serialisable envelope identical to the OpenAI format.
677
+ """
678
+ # 1️⃣ Raw hits (list[dict] from VectorStoreManager.query_store)
679
+ raw_hits = self.vector_file_search_raw(
680
+ vector_store_id=vector_store_id,
681
+ query_text=query_text,
682
+ top_k=top_k,
683
+ filters=filters,
684
+ )
685
+
686
+ # 2️⃣ Normalise / enrich each hit so downstream code never crashes
687
+ hits: List[Dict[str, Any]] = []
688
+ for h in raw_hits:
689
+ md = h.get("meta_data") or h.get("metadata") or {}
690
+ hits.append(
691
+ {
692
+ "text": h["text"],
693
+ "score": h["score"],
694
+ "meta_data": md,
695
+ "vector_id": h.get("vector_id"),
696
+ "store_id": h.get("store_id"),
697
+ }
698
+ )
699
+
700
+ # 3️⃣ Generate human‑friendly answer text (LLM call or simple template)
701
+ answer_text = summarize_hits(query_text, hits)
702
+
703
+ # 4️⃣ Wrap everything into an OpenAI envelope
704
+ return make_envelope(query_text, hits, answer_text)
705
+
706
+ # ────────────────────────────────────────────────────────────────
707
+ # End‑to‑end: retrieve → (rerank) → synthesize → envelope
708
+ # ────────────────────────────────────────────────────────────────
709
+ def attended_file_search(
710
+ self,
711
+ vector_store_id: str,
712
+ query_text: str,
713
+ k: int = 20,
714
+ vector_store_host: Optional[str] = None,
715
+ ) -> Dict[str, Any]:
716
+ """
717
+ Run a full file search with optional cross-encoder rerank and envelope synthesis.
718
+
719
+ Parameters
720
+ ----------
721
+ vector_store_id : str
722
+ The ID of the target vector store to query.
723
+ query_text : str
724
+ The natural-language search text.
725
+ k : int, optional
726
+ The maximum number of hits to retrieve (default is 20).
727
+ vector_store_host : Optional[str], optional
728
+ An optional override for the default vector store host.
729
+
730
+ Returns
731
+ -------
732
+ Dict[str, Any]
733
+ An OpenAI-style envelope containing the synthesized response.
734
+ """
735
+
736
+ # 1️⃣ Retrieve initial candidates (now with optional vector_store_host passthrough)
737
+ hits = retriever.retrieve(
738
+ self,
739
+ vector_store_id=vector_store_id,
740
+ query=query_text,
741
+ k=k,
742
+ vector_store_host=vector_store_host,
743
+ )
744
+
745
+ # 2️⃣ Optional cross-encoder / LLM rerank
746
+ hits = reranker.rerank(query_text, hits, top_k=min(len(hits), 10))
747
+
748
+ # 3️⃣ Normalize schema (guarantee 'meta_data')
749
+ hits = self._normalise_hits(hits)
750
+
751
+ # 4️⃣ Abstractive synthesis → OpenAI-style envelope
752
+ return synthesize_envelope(
753
+ query_text,
754
+ hits,
755
+ api_key=self.api_key, # Project-David key
756
+ base_url=self.base_url, # Same backend
757
+ provider_api_key=os.getenv("HYPERBOLIC_API_KEY"), # Hyperbolic key
758
+ )
759
+
760
+ # ────────────────────────────────────────────────────────────────
761
+ # End‑to‑end: retrieve → (rerank) → synthesize → envelope
762
+ # ────────────────────────────────────────────────────────────────
763
+ def unattended_file_search(
764
+ self,
765
+ vector_store_id: str,
766
+ query_text: str,
767
+ k: int = 20,
768
+ vector_store_host: Optional[str] = None,
769
+ ) -> Dict[str, Any]:
770
+ """
771
+ Perform a search over the file vector store and return normalized retrieval hits.
772
+
773
+ This method executes a bare search pipeline: it retrieves vector-based candidates
774
+ using semantic similarity, optionally applies reranking (e.g., cross-encoder or LLM-based),
775
+ and normalizes the result schema. It does not perform synthesis or construct an OpenAI-style envelope.
776
+
777
+ Use this when you want direct access to retrieved content for custom downstream handling,
778
+ logging, inspection, or separate orchestration logic.
779
+
780
+ Parameters
781
+ ----------
782
+ vector_store_id : str
783
+ The ID of the vector store to search within.
784
+ query_text : str
785
+ The user query in natural language.
786
+ k : int, optional
787
+ The number of top hits to retrieve (default is 20).
788
+ vector_store_host : Optional[str], optional
789
+ Optional override for the vector store host (e.g., when calling remote Qdrant).
790
+
791
+ Returns
792
+ -------
793
+ Dict[str, Any]
794
+ A normalized list of retrieval results (each with metadata and score),
795
+ without abstraction, synthesis, or formatting.
796
+ """
797
+
798
+ # 1️⃣ Retrieve initial candidates (now with optional vector_store_host passthrough)
799
+ hits = retriever.retrieve(
800
+ self,
801
+ vector_store_id=vector_store_id,
802
+ query=query_text,
803
+ k=k,
804
+ vector_store_host=vector_store_host,
805
+ )
806
+
807
+ # 2️⃣ Optional cross-encoder / LLM rerank
808
+ hits = reranker.rerank(query_text, hits, top_k=min(len(hits), 10))
809
+
810
+ # 3️⃣ Normalize schema (guarantee 'meta_data')
811
+ hits = self._normalise_hits(hits)
812
+
813
+ return hits
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: projectdavid
3
- Version: 1.33.14
3
+ Version: 1.33.16
4
4
  Summary: Python SDK for interacting with the Entities Assistant API.
5
5
  Author-email: Francis Neequaye Armah <francis.neequaye@projectdavid.co.uk>
6
6
  License: PolyForm Noncommercial License 1.0.0
@@ -52,6 +52,7 @@ src/projectdavid/clients/users_client.py
52
52
  src/projectdavid/clients/vector_store_manager.py
53
53
  src/projectdavid/clients/vectors.py
54
54
  src/projectdavid/clients/vision-file_processor.py
55
+ src/projectdavid/clients/vision_vectors.py
55
56
  src/projectdavid/constants/platform.py
56
57
  src/projectdavid/services/logging_service.py
57
58
  src/projectdavid/synthesis/__init__.py
File without changes
File without changes
File without changes