projectdavid 1.33.14__py3-none-any.whl → 1.33.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of projectdavid might be problematic. Click here for more details.
- projectdavid/clients/vectors.py +24 -269
- projectdavid/clients/vision_vectors.py +1058 -0
- {projectdavid-1.33.14.dist-info → projectdavid-1.33.16.dist-info}/METADATA +1 -1
- {projectdavid-1.33.14.dist-info → projectdavid-1.33.16.dist-info}/RECORD +7 -6
- {projectdavid-1.33.14.dist-info → projectdavid-1.33.16.dist-info}/WHEEL +0 -0
- {projectdavid-1.33.14.dist-info → projectdavid-1.33.16.dist-info}/licenses/LICENSE +0 -0
- {projectdavid-1.33.14.dist-info → projectdavid-1.33.16.dist-info}/top_level.txt +0 -0
projectdavid/clients/vectors.py
CHANGED
|
@@ -13,14 +13,11 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
13
13
|
|
|
14
14
|
import httpx
|
|
15
15
|
from dotenv import load_dotenv
|
|
16
|
-
from PIL import Image
|
|
17
16
|
from projectdavid_common import UtilsInterface, ValidationInterface
|
|
18
17
|
from pydantic import BaseModel, Field
|
|
19
|
-
from qdrant_client.http import models as qdrant
|
|
20
18
|
|
|
21
19
|
from projectdavid.clients.file_processor import FileProcessor
|
|
22
20
|
from projectdavid.clients.vector_store_manager import VectorStoreManager
|
|
23
|
-
from projectdavid.decorators import experimental
|
|
24
21
|
from projectdavid.synthesis import reranker, retriever
|
|
25
22
|
from projectdavid.synthesis.llm_synthesizer import synthesize_envelope
|
|
26
23
|
from projectdavid.utils.vector_search_formatter import make_envelope
|
|
@@ -73,9 +70,8 @@ class VectorStoreClient:
|
|
|
73
70
|
api_key: Optional[str] = None,
|
|
74
71
|
*,
|
|
75
72
|
vector_store_host: str = "localhost",
|
|
76
|
-
file_processor_kwargs: Optional[dict] = None,
|
|
73
|
+
file_processor_kwargs: Optional[dict] = None, # 🔶 add arg
|
|
77
74
|
):
|
|
78
|
-
|
|
79
75
|
self.base_url = (base_url or os.getenv("BASE_URL", "")).rstrip("/")
|
|
80
76
|
self.api_key = api_key or os.getenv("API_KEY")
|
|
81
77
|
if not self.base_url:
|
|
@@ -96,18 +92,7 @@ class VectorStoreClient:
|
|
|
96
92
|
self.identifier_service = UtilsInterface.IdentifierService()
|
|
97
93
|
|
|
98
94
|
# 🔶 forward kwargs into the upgraded FileProcessor
|
|
99
|
-
|
|
100
|
-
self.file_processor = FileProcessor(
|
|
101
|
-
**(
|
|
102
|
-
file_processor_kwargs
|
|
103
|
-
or {
|
|
104
|
-
"use_gpu": False,
|
|
105
|
-
"use_detection": True,
|
|
106
|
-
"use_geo": True,
|
|
107
|
-
"use_ocr": True,
|
|
108
|
-
}
|
|
109
|
-
)
|
|
110
|
-
)
|
|
95
|
+
self.file_processor = FileProcessor(**(file_processor_kwargs or {}))
|
|
111
96
|
|
|
112
97
|
log.info("VectorStoreClient → %s", self.base_url)
|
|
113
98
|
|
|
@@ -200,15 +185,12 @@ class VectorStoreClient:
|
|
|
200
185
|
vector_size: int,
|
|
201
186
|
distance_metric: str,
|
|
202
187
|
config: Optional[Dict[str, Any]],
|
|
203
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
204
188
|
) -> ValidationInterface.VectorStoreRead:
|
|
205
189
|
shared_id = self.identifier_service.generate_vector_id()
|
|
206
|
-
# forward multi-vector schema if given
|
|
207
190
|
self.vector_manager.create_store(
|
|
208
191
|
collection_name=shared_id,
|
|
209
192
|
vector_size=vector_size,
|
|
210
193
|
distance=distance_metric.upper(),
|
|
211
|
-
vectors_config=vectors_config,
|
|
212
194
|
)
|
|
213
195
|
|
|
214
196
|
payload = {
|
|
@@ -221,6 +203,10 @@ class VectorStoreClient:
|
|
|
221
203
|
resp = await self._request("POST", "/v1/vector-stores", json=payload)
|
|
222
204
|
return ValidationInterface.VectorStoreRead.model_validate(resp)
|
|
223
205
|
|
|
206
|
+
async def _list_my_vs_async(self) -> List[ValidationInterface.VectorStoreRead]:
|
|
207
|
+
resp = await self._request("GET", "/v1/vector-stores")
|
|
208
|
+
return [ValidationInterface.VectorStoreRead.model_validate(r) for r in resp]
|
|
209
|
+
|
|
224
210
|
# ------------------------------------------------------------------ #
|
|
225
211
|
# NEW admin‑aware creation helper
|
|
226
212
|
# ------------------------------------------------------------------ #
|
|
@@ -231,17 +217,13 @@ class VectorStoreClient:
|
|
|
231
217
|
vector_size: int,
|
|
232
218
|
distance_metric: str,
|
|
233
219
|
config: Optional[Dict[str, Any]],
|
|
234
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
235
220
|
) -> ValidationInterface.VectorStoreRead:
|
|
236
221
|
shared_id = self.identifier_service.generate_vector_id()
|
|
237
|
-
# forward multi-vector schema if given
|
|
238
222
|
self.vector_manager.create_store(
|
|
239
223
|
collection_name=shared_id,
|
|
240
224
|
vector_size=vector_size,
|
|
241
225
|
distance=distance_metric.upper(),
|
|
242
|
-
vectors_config=vectors_config,
|
|
243
226
|
)
|
|
244
|
-
|
|
245
227
|
payload = {
|
|
246
228
|
"shared_id": shared_id,
|
|
247
229
|
"name": name,
|
|
@@ -249,6 +231,7 @@ class VectorStoreClient:
|
|
|
249
231
|
"distance_metric": distance_metric.upper(),
|
|
250
232
|
"config": config or {},
|
|
251
233
|
}
|
|
234
|
+
# pass owner_id as query‑param (backend enforces admin‑only)
|
|
252
235
|
resp = await self._request(
|
|
253
236
|
"POST",
|
|
254
237
|
"/v1/vector-stores",
|
|
@@ -304,20 +287,12 @@ class VectorStoreClient:
|
|
|
304
287
|
async def _search_vs_async(
|
|
305
288
|
self,
|
|
306
289
|
vector_store_id: str,
|
|
307
|
-
query_text:
|
|
290
|
+
query_text: str,
|
|
308
291
|
top_k: int,
|
|
309
292
|
filters: Optional[Dict] = None,
|
|
310
293
|
vector_store_host: Optional[str] = None,
|
|
311
|
-
vector_field: Optional[str] = None, # allow caller override
|
|
312
294
|
) -> List[Dict[str, Any]]:
|
|
313
|
-
"""
|
|
314
|
-
Internal: run ANN search against the specified vector field or auto-detect by store size.
|
|
315
295
|
|
|
316
|
-
If `vector_field` is provided, it will be used directly. Otherwise:
|
|
317
|
-
• 1024-D → caption_vector
|
|
318
|
-
• 3-D → geo_vector
|
|
319
|
-
• others → default vector (text)
|
|
320
|
-
"""
|
|
321
296
|
# pick local vs. override host
|
|
322
297
|
vector_manager = (
|
|
323
298
|
VectorStoreManager(vector_store_host=vector_store_host)
|
|
@@ -325,36 +300,16 @@ class VectorStoreClient:
|
|
|
325
300
|
else self.vector_manager
|
|
326
301
|
)
|
|
327
302
|
|
|
328
|
-
# fetch store info to inspect schema
|
|
329
303
|
store = self.retrieve_vector_store_sync(vector_store_id)
|
|
330
304
|
|
|
331
|
-
#
|
|
332
|
-
if
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
# auto-detect based on stored vector dimensionality
|
|
340
|
-
if store.vector_size == 1024:
|
|
341
|
-
# image/caption space
|
|
342
|
-
vec = self.file_processor.encode_clip_text(query_text).tolist()
|
|
343
|
-
vector_field = "caption_vector"
|
|
344
|
-
elif store.vector_size == 3:
|
|
345
|
-
# geo space; query_text must be a raw 3-D list
|
|
346
|
-
if not isinstance(query_text, list):
|
|
347
|
-
raise VectorStoreClientError(
|
|
348
|
-
"Geo search requires a 3-element vector; pass raw unit-sphere list"
|
|
349
|
-
)
|
|
350
|
-
vec = query_text
|
|
351
|
-
vector_field = "geo_vector"
|
|
352
|
-
else:
|
|
353
|
-
# fallback to text embedding
|
|
354
|
-
vec = self.file_processor.encode_text(query_text).tolist()
|
|
355
|
-
vector_field = None # use default
|
|
356
|
-
|
|
357
|
-
# perform the search on the selected vector column
|
|
305
|
+
# 🔶 choose encoder by vector_size
|
|
306
|
+
if store.vector_size == 1024: # images collection
|
|
307
|
+
vec = self.file_processor.encode_clip_text(query_text).tolist()
|
|
308
|
+
vector_field = "caption_vector" # field name in Qdrant
|
|
309
|
+
else: # 384-D text collection
|
|
310
|
+
vec = self.file_processor.encode_text(query_text).tolist()
|
|
311
|
+
vector_field = None # default field
|
|
312
|
+
|
|
358
313
|
return vector_manager.query_store(
|
|
359
314
|
store_name=store.collection_name,
|
|
360
315
|
query_vector=vec,
|
|
@@ -487,110 +442,10 @@ class VectorStoreClient:
|
|
|
487
442
|
vector_size: int = 384,
|
|
488
443
|
distance_metric: str = "Cosine",
|
|
489
444
|
config: Optional[Dict[str, Any]] = None,
|
|
490
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
491
|
-
) -> ValidationInterface.VectorStoreRead:
|
|
492
|
-
"""
|
|
493
|
-
Create a new store owned by this API key.
|
|
494
|
-
|
|
495
|
-
If `vectors_config` is provided, it should map each vector
|
|
496
|
-
field name to its Qdrant VectorParams (size + distance).
|
|
497
|
-
"""
|
|
498
|
-
return self._run_sync(
|
|
499
|
-
self._create_vs_async(
|
|
500
|
-
name,
|
|
501
|
-
vector_size,
|
|
502
|
-
distance_metric,
|
|
503
|
-
config,
|
|
504
|
-
vectors_config,
|
|
505
|
-
)
|
|
506
|
-
)
|
|
507
|
-
|
|
508
|
-
@experimental
|
|
509
|
-
def create_vector_vision_store(
|
|
510
|
-
self,
|
|
511
|
-
name: str,
|
|
512
|
-
*,
|
|
513
|
-
vector_size: int = 384,
|
|
514
|
-
distance_metric: str = "Cosine",
|
|
515
|
-
config: Optional[Dict[str, Any]] = None,
|
|
516
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
517
|
-
) -> ValidationInterface.VectorStoreRead:
|
|
518
|
-
|
|
519
|
-
if not vectors_config:
|
|
520
|
-
vectors_config = {
|
|
521
|
-
# Raw visual embeddings (OpenCLIP ViT-H/14 → 1024-D)
|
|
522
|
-
"image_vector": qdrant.VectorParams(
|
|
523
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
524
|
-
),
|
|
525
|
-
# Language embeddings of your BLIP-2 captions → 1024-D
|
|
526
|
-
"caption_vector": qdrant.VectorParams(
|
|
527
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
528
|
-
),
|
|
529
|
-
# Object-region embeddings (YOLO crop + Sentence-BERT) → 1024-D
|
|
530
|
-
"region_vector": qdrant.VectorParams(
|
|
531
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
532
|
-
),
|
|
533
|
-
# Geo-location unit vectors (RegioNet) → 3-D
|
|
534
|
-
"geo_vector": qdrant.VectorParams(
|
|
535
|
-
size=3, distance=qdrant.Distance.COSINE
|
|
536
|
-
),
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
return self._run_sync(
|
|
540
|
-
self._create_vs_async(
|
|
541
|
-
name,
|
|
542
|
-
vector_size,
|
|
543
|
-
distance_metric,
|
|
544
|
-
config,
|
|
545
|
-
vectors_config,
|
|
546
|
-
)
|
|
547
|
-
)
|
|
548
|
-
|
|
549
|
-
@experimental
|
|
550
|
-
def create_vector_vision_store_for_user(
|
|
551
|
-
self,
|
|
552
|
-
owner_id: str,
|
|
553
|
-
name: str,
|
|
554
|
-
*,
|
|
555
|
-
vector_size: int = 384,
|
|
556
|
-
distance_metric: str = "Cosine",
|
|
557
|
-
config: Optional[Dict[str, Any]] = None,
|
|
558
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
559
445
|
) -> ValidationInterface.VectorStoreRead:
|
|
560
|
-
"""
|
|
561
|
-
Admin-only: create a store on behalf of another user.
|
|
562
|
-
Pass `vectors_config` to define a multi-vector schema.
|
|
563
|
-
"""
|
|
564
|
-
if not vectors_config:
|
|
565
|
-
|
|
566
|
-
vectors_config = {
|
|
567
|
-
# Raw visual embeddings (OpenCLIP ViT-H/14 → 1024-D)
|
|
568
|
-
"image_vector": qdrant.VectorParams(
|
|
569
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
570
|
-
),
|
|
571
|
-
# Language embeddings of your BLIP-2 captions → 1024-D
|
|
572
|
-
"caption_vector": qdrant.VectorParams(
|
|
573
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
574
|
-
),
|
|
575
|
-
# Object-region embeddings (YOLO crop + Sentence-BERT) → 1024-D
|
|
576
|
-
"region_vector": qdrant.VectorParams(
|
|
577
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
578
|
-
),
|
|
579
|
-
# Geo-location unit vectors (RegioNet) → 3-D
|
|
580
|
-
"geo_vector": qdrant.VectorParams(
|
|
581
|
-
size=3, distance=qdrant.Distance.COSINE
|
|
582
|
-
),
|
|
583
|
-
}
|
|
584
|
-
|
|
446
|
+
"""Create a new store owned by *this* API key."""
|
|
585
447
|
return self._run_sync(
|
|
586
|
-
self.
|
|
587
|
-
owner_id,
|
|
588
|
-
name,
|
|
589
|
-
vector_size,
|
|
590
|
-
distance_metric,
|
|
591
|
-
config,
|
|
592
|
-
vectors_config,
|
|
593
|
-
)
|
|
448
|
+
self._create_vs_async(name, vector_size, distance_metric, config)
|
|
594
449
|
)
|
|
595
450
|
|
|
596
451
|
def create_vector_store_for_user(
|
|
@@ -601,20 +456,16 @@ class VectorStoreClient:
|
|
|
601
456
|
vector_size: int = 384,
|
|
602
457
|
distance_metric: str = "Cosine",
|
|
603
458
|
config: Optional[Dict[str, Any]] = None,
|
|
604
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
605
459
|
) -> ValidationInterface.VectorStoreRead:
|
|
606
460
|
"""
|
|
607
|
-
Admin
|
|
608
|
-
|
|
461
|
+
**Admin‑only** helper → create a store on behalf of *owner_id*.
|
|
462
|
+
|
|
463
|
+
The caller’s API‑key must belong to an admin; otherwise the
|
|
464
|
+
request will be rejected by the server with HTTP 403.
|
|
609
465
|
"""
|
|
610
466
|
return self._run_sync(
|
|
611
467
|
self._create_vs_for_user_async(
|
|
612
|
-
owner_id,
|
|
613
|
-
name,
|
|
614
|
-
vector_size,
|
|
615
|
-
distance_metric,
|
|
616
|
-
config,
|
|
617
|
-
vectors_config,
|
|
468
|
+
owner_id, name, vector_size, distance_metric, config
|
|
618
469
|
)
|
|
619
470
|
)
|
|
620
471
|
|
|
@@ -793,16 +644,10 @@ class VectorStoreClient:
|
|
|
793
644
|
top_k: int = 5,
|
|
794
645
|
filters: Optional[Dict] = None,
|
|
795
646
|
vector_store_host: Optional[str] = None,
|
|
796
|
-
vector_field: Optional[str] = None, # ← NEW
|
|
797
647
|
) -> List[Dict[str, Any]]:
|
|
798
648
|
return self._run_sync(
|
|
799
649
|
self._search_vs_async(
|
|
800
|
-
vector_store_id,
|
|
801
|
-
query_text,
|
|
802
|
-
top_k,
|
|
803
|
-
filters,
|
|
804
|
-
vector_store_host,
|
|
805
|
-
vector_field,
|
|
650
|
+
vector_store_id, query_text, top_k, filters, vector_store_host
|
|
806
651
|
)
|
|
807
652
|
)
|
|
808
653
|
|
|
@@ -966,93 +811,3 @@ class VectorStoreClient:
|
|
|
966
811
|
hits = self._normalise_hits(hits)
|
|
967
812
|
|
|
968
813
|
return hits
|
|
969
|
-
|
|
970
|
-
@experimental
|
|
971
|
-
def image_similarity_search(
|
|
972
|
-
self,
|
|
973
|
-
vector_store_id: str,
|
|
974
|
-
img: Image.Image,
|
|
975
|
-
k: int = 10,
|
|
976
|
-
vector_store_host: Optional[str] = None,
|
|
977
|
-
) -> List[Dict[str, Any]]:
|
|
978
|
-
vec = self.file_processor.encode_image(img).tolist()
|
|
979
|
-
return self.vector_file_search_raw(
|
|
980
|
-
vector_store_id=vector_store_id,
|
|
981
|
-
query_text=vec,
|
|
982
|
-
top_k=k,
|
|
983
|
-
filters=None,
|
|
984
|
-
vector_store_host=vector_store_host,
|
|
985
|
-
vector_field="image_vector",
|
|
986
|
-
)
|
|
987
|
-
|
|
988
|
-
@experimental
|
|
989
|
-
def search_images(
|
|
990
|
-
self,
|
|
991
|
-
vector_store_id: str,
|
|
992
|
-
query: Union[str, Image.Image, List[float]],
|
|
993
|
-
*,
|
|
994
|
-
modality: Optional[str] = None,
|
|
995
|
-
k: int = 10,
|
|
996
|
-
vector_store_host: Optional[str] = None,
|
|
997
|
-
) -> List[Dict[str, Any]]:
|
|
998
|
-
"""
|
|
999
|
-
Unified image search across multiple modalities, with appropriate reranking:
|
|
1000
|
-
|
|
1001
|
-
- If `query` is a str → caption search (reranked)
|
|
1002
|
-
- If `query` is a PIL.Image.Image → visual search (no rerank)
|
|
1003
|
-
- If `query` is a list[float] → raw vector search
|
|
1004
|
-
- `modality` override: one of 'caption', 'image', 'region', 'geo'
|
|
1005
|
-
"""
|
|
1006
|
-
# Map modality to (vector_field, encoder)
|
|
1007
|
-
field_map = {
|
|
1008
|
-
"caption": (
|
|
1009
|
-
"caption_vector",
|
|
1010
|
-
lambda q: self.file_processor.encode_clip_text(q).tolist(),
|
|
1011
|
-
),
|
|
1012
|
-
"image": (
|
|
1013
|
-
"image_vector",
|
|
1014
|
-
lambda q: self.file_processor.encode_image(q).tolist(),
|
|
1015
|
-
),
|
|
1016
|
-
"region": (
|
|
1017
|
-
"region_vector",
|
|
1018
|
-
lambda q: self.file_processor.encode_text(q).tolist(),
|
|
1019
|
-
),
|
|
1020
|
-
"geo": ("geo_vector", lambda q: q), # assume q is raw 3-D vector
|
|
1021
|
-
}
|
|
1022
|
-
|
|
1023
|
-
# Auto-detect if not provided
|
|
1024
|
-
if modality is None:
|
|
1025
|
-
if isinstance(query, str):
|
|
1026
|
-
modality = "caption"
|
|
1027
|
-
elif isinstance(query, Image.Image):
|
|
1028
|
-
modality = "image"
|
|
1029
|
-
elif isinstance(query, list):
|
|
1030
|
-
modality = "image"
|
|
1031
|
-
else:
|
|
1032
|
-
raise VectorStoreClientError(f"Unsupported query type: {type(query)}")
|
|
1033
|
-
|
|
1034
|
-
modality = modality.lower()
|
|
1035
|
-
if modality not in field_map:
|
|
1036
|
-
raise VectorStoreClientError(f"Unknown modality '{modality}'")
|
|
1037
|
-
|
|
1038
|
-
vector_field, encoder = field_map[modality]
|
|
1039
|
-
vec = encoder(query)
|
|
1040
|
-
|
|
1041
|
-
# 1️⃣ ANN search
|
|
1042
|
-
hits = self.vector_file_search_raw(
|
|
1043
|
-
vector_store_id=vector_store_id,
|
|
1044
|
-
query_text=vec,
|
|
1045
|
-
top_k=k,
|
|
1046
|
-
filters=None,
|
|
1047
|
-
vector_store_host=vector_store_host,
|
|
1048
|
-
vector_field=vector_field,
|
|
1049
|
-
)
|
|
1050
|
-
|
|
1051
|
-
# 2️⃣ Rerank for text-based modalities
|
|
1052
|
-
if modality in ("caption", "region"):
|
|
1053
|
-
hits = reranker.rerank(
|
|
1054
|
-
query if isinstance(query, str) else "", hits, top_k=min(len(hits), k)
|
|
1055
|
-
)
|
|
1056
|
-
|
|
1057
|
-
# 3️⃣ Normalize and return
|
|
1058
|
-
return self._normalise_hits(hits)
|