projectdavid 1.33.14__py3-none-any.whl → 1.33.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of projectdavid might be problematic. Click here for more details.

@@ -13,14 +13,11 @@ from typing import Any, Dict, List, Optional, Union
13
13
 
14
14
  import httpx
15
15
  from dotenv import load_dotenv
16
- from PIL import Image
17
16
  from projectdavid_common import UtilsInterface, ValidationInterface
18
17
  from pydantic import BaseModel, Field
19
- from qdrant_client.http import models as qdrant
20
18
 
21
19
  from projectdavid.clients.file_processor import FileProcessor
22
20
  from projectdavid.clients.vector_store_manager import VectorStoreManager
23
- from projectdavid.decorators import experimental
24
21
  from projectdavid.synthesis import reranker, retriever
25
22
  from projectdavid.synthesis.llm_synthesizer import synthesize_envelope
26
23
  from projectdavid.utils.vector_search_formatter import make_envelope
@@ -73,9 +70,8 @@ class VectorStoreClient:
73
70
  api_key: Optional[str] = None,
74
71
  *,
75
72
  vector_store_host: str = "localhost",
76
- file_processor_kwargs: Optional[dict] = None,
73
+ file_processor_kwargs: Optional[dict] = None, # 🔶 add arg
77
74
  ):
78
-
79
75
  self.base_url = (base_url or os.getenv("BASE_URL", "")).rstrip("/")
80
76
  self.api_key = api_key or os.getenv("API_KEY")
81
77
  if not self.base_url:
@@ -96,18 +92,7 @@ class VectorStoreClient:
96
92
  self.identifier_service = UtilsInterface.IdentifierService()
97
93
 
98
94
  # 🔶 forward kwargs into the upgraded FileProcessor
99
-
100
- self.file_processor = FileProcessor(
101
- **(
102
- file_processor_kwargs
103
- or {
104
- "use_gpu": False,
105
- "use_detection": True,
106
- "use_geo": True,
107
- "use_ocr": True,
108
- }
109
- )
110
- )
95
+ self.file_processor = FileProcessor(**(file_processor_kwargs or {}))
111
96
 
112
97
  log.info("VectorStoreClient → %s", self.base_url)
113
98
 
@@ -200,15 +185,12 @@ class VectorStoreClient:
200
185
  vector_size: int,
201
186
  distance_metric: str,
202
187
  config: Optional[Dict[str, Any]],
203
- vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
204
188
  ) -> ValidationInterface.VectorStoreRead:
205
189
  shared_id = self.identifier_service.generate_vector_id()
206
- # forward multi-vector schema if given
207
190
  self.vector_manager.create_store(
208
191
  collection_name=shared_id,
209
192
  vector_size=vector_size,
210
193
  distance=distance_metric.upper(),
211
- vectors_config=vectors_config,
212
194
  )
213
195
 
214
196
  payload = {
@@ -221,6 +203,10 @@ class VectorStoreClient:
221
203
  resp = await self._request("POST", "/v1/vector-stores", json=payload)
222
204
  return ValidationInterface.VectorStoreRead.model_validate(resp)
223
205
 
206
+ async def _list_my_vs_async(self) -> List[ValidationInterface.VectorStoreRead]:
207
+ resp = await self._request("GET", "/v1/vector-stores")
208
+ return [ValidationInterface.VectorStoreRead.model_validate(r) for r in resp]
209
+
224
210
  # ------------------------------------------------------------------ #
225
211
  # NEW admin‑aware creation helper
226
212
  # ------------------------------------------------------------------ #
@@ -231,17 +217,13 @@ class VectorStoreClient:
231
217
  vector_size: int,
232
218
  distance_metric: str,
233
219
  config: Optional[Dict[str, Any]],
234
- vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
235
220
  ) -> ValidationInterface.VectorStoreRead:
236
221
  shared_id = self.identifier_service.generate_vector_id()
237
- # forward multi-vector schema if given
238
222
  self.vector_manager.create_store(
239
223
  collection_name=shared_id,
240
224
  vector_size=vector_size,
241
225
  distance=distance_metric.upper(),
242
- vectors_config=vectors_config,
243
226
  )
244
-
245
227
  payload = {
246
228
  "shared_id": shared_id,
247
229
  "name": name,
@@ -249,6 +231,7 @@ class VectorStoreClient:
249
231
  "distance_metric": distance_metric.upper(),
250
232
  "config": config or {},
251
233
  }
234
+ # pass owner_id as query‑param (backend enforces admin‑only)
252
235
  resp = await self._request(
253
236
  "POST",
254
237
  "/v1/vector-stores",
@@ -304,20 +287,12 @@ class VectorStoreClient:
304
287
  async def _search_vs_async(
305
288
  self,
306
289
  vector_store_id: str,
307
- query_text: Union[str, List[float]],
290
+ query_text: str,
308
291
  top_k: int,
309
292
  filters: Optional[Dict] = None,
310
293
  vector_store_host: Optional[str] = None,
311
- vector_field: Optional[str] = None, # allow caller override
312
294
  ) -> List[Dict[str, Any]]:
313
- """
314
- Internal: run ANN search against the specified vector field or auto-detect by store size.
315
295
 
316
- If `vector_field` is provided, it will be used directly. Otherwise:
317
- • 1024-D → caption_vector
318
- • 3-D → geo_vector
319
- • others → default vector (text)
320
- """
321
296
  # pick local vs. override host
322
297
  vector_manager = (
323
298
  VectorStoreManager(vector_store_host=vector_store_host)
@@ -325,36 +300,16 @@ class VectorStoreClient:
325
300
  else self.vector_manager
326
301
  )
327
302
 
328
- # fetch store info to inspect schema
329
303
  store = self.retrieve_vector_store_sync(vector_store_id)
330
304
 
331
- # determine the query vector and target field
332
- if vector_field is not None:
333
- # if caller passed a raw vector list, use it; otherwise treat as caption search
334
- if isinstance(query_text, list):
335
- vec = query_text
336
- else:
337
- vec = self.file_processor.encode_clip_text(query_text).tolist()
338
- else:
339
- # auto-detect based on stored vector dimensionality
340
- if store.vector_size == 1024:
341
- # image/caption space
342
- vec = self.file_processor.encode_clip_text(query_text).tolist()
343
- vector_field = "caption_vector"
344
- elif store.vector_size == 3:
345
- # geo space; query_text must be a raw 3-D list
346
- if not isinstance(query_text, list):
347
- raise VectorStoreClientError(
348
- "Geo search requires a 3-element vector; pass raw unit-sphere list"
349
- )
350
- vec = query_text
351
- vector_field = "geo_vector"
352
- else:
353
- # fallback to text embedding
354
- vec = self.file_processor.encode_text(query_text).tolist()
355
- vector_field = None # use default
356
-
357
- # perform the search on the selected vector column
305
+ # 🔶 choose encoder by vector_size
306
+ if store.vector_size == 1024: # images collection
307
+ vec = self.file_processor.encode_clip_text(query_text).tolist()
308
+ vector_field = "caption_vector" # field name in Qdrant
309
+ else: # 384-D text collection
310
+ vec = self.file_processor.encode_text(query_text).tolist()
311
+ vector_field = None # default field
312
+
358
313
  return vector_manager.query_store(
359
314
  store_name=store.collection_name,
360
315
  query_vector=vec,
@@ -487,110 +442,10 @@ class VectorStoreClient:
487
442
  vector_size: int = 384,
488
443
  distance_metric: str = "Cosine",
489
444
  config: Optional[Dict[str, Any]] = None,
490
- vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
491
- ) -> ValidationInterface.VectorStoreRead:
492
- """
493
- Create a new store owned by this API key.
494
-
495
- If `vectors_config` is provided, it should map each vector
496
- field name to its Qdrant VectorParams (size + distance).
497
- """
498
- return self._run_sync(
499
- self._create_vs_async(
500
- name,
501
- vector_size,
502
- distance_metric,
503
- config,
504
- vectors_config,
505
- )
506
- )
507
-
508
- @experimental
509
- def create_vector_vision_store(
510
- self,
511
- name: str,
512
- *,
513
- vector_size: int = 384,
514
- distance_metric: str = "Cosine",
515
- config: Optional[Dict[str, Any]] = None,
516
- vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
517
- ) -> ValidationInterface.VectorStoreRead:
518
-
519
- if not vectors_config:
520
- vectors_config = {
521
- # Raw visual embeddings (OpenCLIP ViT-H/14 → 1024-D)
522
- "image_vector": qdrant.VectorParams(
523
- size=1024, distance=qdrant.Distance.COSINE
524
- ),
525
- # Language embeddings of your BLIP-2 captions → 1024-D
526
- "caption_vector": qdrant.VectorParams(
527
- size=1024, distance=qdrant.Distance.COSINE
528
- ),
529
- # Object-region embeddings (YOLO crop + Sentence-BERT) → 1024-D
530
- "region_vector": qdrant.VectorParams(
531
- size=1024, distance=qdrant.Distance.COSINE
532
- ),
533
- # Geo-location unit vectors (RegioNet) → 3-D
534
- "geo_vector": qdrant.VectorParams(
535
- size=3, distance=qdrant.Distance.COSINE
536
- ),
537
- }
538
-
539
- return self._run_sync(
540
- self._create_vs_async(
541
- name,
542
- vector_size,
543
- distance_metric,
544
- config,
545
- vectors_config,
546
- )
547
- )
548
-
549
- @experimental
550
- def create_vector_vision_store_for_user(
551
- self,
552
- owner_id: str,
553
- name: str,
554
- *,
555
- vector_size: int = 384,
556
- distance_metric: str = "Cosine",
557
- config: Optional[Dict[str, Any]] = None,
558
- vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
559
445
  ) -> ValidationInterface.VectorStoreRead:
560
- """
561
- Admin-only: create a store on behalf of another user.
562
- Pass `vectors_config` to define a multi-vector schema.
563
- """
564
- if not vectors_config:
565
-
566
- vectors_config = {
567
- # Raw visual embeddings (OpenCLIP ViT-H/14 → 1024-D)
568
- "image_vector": qdrant.VectorParams(
569
- size=1024, distance=qdrant.Distance.COSINE
570
- ),
571
- # Language embeddings of your BLIP-2 captions → 1024-D
572
- "caption_vector": qdrant.VectorParams(
573
- size=1024, distance=qdrant.Distance.COSINE
574
- ),
575
- # Object-region embeddings (YOLO crop + Sentence-BERT) → 1024-D
576
- "region_vector": qdrant.VectorParams(
577
- size=1024, distance=qdrant.Distance.COSINE
578
- ),
579
- # Geo-location unit vectors (RegioNet) → 3-D
580
- "geo_vector": qdrant.VectorParams(
581
- size=3, distance=qdrant.Distance.COSINE
582
- ),
583
- }
584
-
446
+ """Create a new store owned by *this* API key."""
585
447
  return self._run_sync(
586
- self._create_vs_for_user_async(
587
- owner_id,
588
- name,
589
- vector_size,
590
- distance_metric,
591
- config,
592
- vectors_config,
593
- )
448
+ self._create_vs_async(name, vector_size, distance_metric, config)
594
449
  )
595
450
 
596
451
  def create_vector_store_for_user(
@@ -601,20 +456,16 @@ class VectorStoreClient:
601
456
  vector_size: int = 384,
602
457
  distance_metric: str = "Cosine",
603
458
  config: Optional[Dict[str, Any]] = None,
604
- vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
605
459
  ) -> ValidationInterface.VectorStoreRead:
606
460
  """
607
- Admin-only: create a store on behalf of another user.
608
- Pass `vectors_config` to define a multi-vector schema.
461
+ **Adminonly** helper → create a store on behalf of *owner_id*.
462
+
463
+ The caller’s API‑key must belong to an admin; otherwise the
464
+ request will be rejected by the server with HTTP 403.
609
465
  """
610
466
  return self._run_sync(
611
467
  self._create_vs_for_user_async(
612
- owner_id,
613
- name,
614
- vector_size,
615
- distance_metric,
616
- config,
617
- vectors_config,
468
+ owner_id, name, vector_size, distance_metric, config
618
469
  )
619
470
  )
620
471
 
@@ -793,16 +644,10 @@ class VectorStoreClient:
793
644
  top_k: int = 5,
794
645
  filters: Optional[Dict] = None,
795
646
  vector_store_host: Optional[str] = None,
796
- vector_field: Optional[str] = None, # ← NEW
797
647
  ) -> List[Dict[str, Any]]:
798
648
  return self._run_sync(
799
649
  self._search_vs_async(
800
- vector_store_id,
801
- query_text,
802
- top_k,
803
- filters,
804
- vector_store_host,
805
- vector_field,
650
+ vector_store_id, query_text, top_k, filters, vector_store_host
806
651
  )
807
652
  )
808
653
 
@@ -966,93 +811,3 @@ class VectorStoreClient:
966
811
  hits = self._normalise_hits(hits)
967
812
 
968
813
  return hits
969
-
970
- @experimental
971
- def image_similarity_search(
972
- self,
973
- vector_store_id: str,
974
- img: Image.Image,
975
- k: int = 10,
976
- vector_store_host: Optional[str] = None,
977
- ) -> List[Dict[str, Any]]:
978
- vec = self.file_processor.encode_image(img).tolist()
979
- return self.vector_file_search_raw(
980
- vector_store_id=vector_store_id,
981
- query_text=vec,
982
- top_k=k,
983
- filters=None,
984
- vector_store_host=vector_store_host,
985
- vector_field="image_vector",
986
- )
987
-
988
- @experimental
989
- def search_images(
990
- self,
991
- vector_store_id: str,
992
- query: Union[str, Image.Image, List[float]],
993
- *,
994
- modality: Optional[str] = None,
995
- k: int = 10,
996
- vector_store_host: Optional[str] = None,
997
- ) -> List[Dict[str, Any]]:
998
- """
999
- Unified image search across multiple modalities, with appropriate reranking:
1000
-
1001
- - If `query` is a str → caption search (reranked)
1002
- - If `query` is a PIL.Image.Image → visual search (no rerank)
1003
- - If `query` is a list[float] → raw vector search
1004
- - `modality` override: one of 'caption', 'image', 'region', 'geo'
1005
- """
1006
- # Map modality to (vector_field, encoder)
1007
- field_map = {
1008
- "caption": (
1009
- "caption_vector",
1010
- lambda q: self.file_processor.encode_clip_text(q).tolist(),
1011
- ),
1012
- "image": (
1013
- "image_vector",
1014
- lambda q: self.file_processor.encode_image(q).tolist(),
1015
- ),
1016
- "region": (
1017
- "region_vector",
1018
- lambda q: self.file_processor.encode_text(q).tolist(),
1019
- ),
1020
- "geo": ("geo_vector", lambda q: q), # assume q is raw 3-D vector
1021
- }
1022
-
1023
- # Auto-detect if not provided
1024
- if modality is None:
1025
- if isinstance(query, str):
1026
- modality = "caption"
1027
- elif isinstance(query, Image.Image):
1028
- modality = "image"
1029
- elif isinstance(query, list):
1030
- modality = "image"
1031
- else:
1032
- raise VectorStoreClientError(f"Unsupported query type: {type(query)}")
1033
-
1034
- modality = modality.lower()
1035
- if modality not in field_map:
1036
- raise VectorStoreClientError(f"Unknown modality '{modality}'")
1037
-
1038
- vector_field, encoder = field_map[modality]
1039
- vec = encoder(query)
1040
-
1041
- # 1️⃣ ANN search
1042
- hits = self.vector_file_search_raw(
1043
- vector_store_id=vector_store_id,
1044
- query_text=vec,
1045
- top_k=k,
1046
- filters=None,
1047
- vector_store_host=vector_store_host,
1048
- vector_field=vector_field,
1049
- )
1050
-
1051
- # 2️⃣ Rerank for text-based modalities
1052
- if modality in ("caption", "region"):
1053
- hits = reranker.rerank(
1054
- query if isinstance(query, str) else "", hits, top_k=min(len(hits), k)
1055
- )
1056
-
1057
- # 3️⃣ Normalize and return
1058
- return self._normalise_hits(hits)