projectdavid 1.33.15__py3-none-any.whl → 1.33.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of projectdavid might be problematic. Click here for more details.
- projectdavid/clients/vectors.py +22 -234
- {projectdavid-1.33.15.dist-info → projectdavid-1.33.16.dist-info}/METADATA +1 -1
- {projectdavid-1.33.15.dist-info → projectdavid-1.33.16.dist-info}/RECORD +6 -6
- {projectdavid-1.33.15.dist-info → projectdavid-1.33.16.dist-info}/WHEEL +0 -0
- {projectdavid-1.33.15.dist-info → projectdavid-1.33.16.dist-info}/licenses/LICENSE +0 -0
- {projectdavid-1.33.15.dist-info → projectdavid-1.33.16.dist-info}/top_level.txt +0 -0
projectdavid/clients/vectors.py
CHANGED
|
@@ -13,10 +13,8 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
13
13
|
|
|
14
14
|
import httpx
|
|
15
15
|
from dotenv import load_dotenv
|
|
16
|
-
from PIL import Image
|
|
17
16
|
from projectdavid_common import UtilsInterface, ValidationInterface
|
|
18
17
|
from pydantic import BaseModel, Field
|
|
19
|
-
from qdrant_client.http import models as qdrant
|
|
20
18
|
|
|
21
19
|
from projectdavid.clients.file_processor import FileProcessor
|
|
22
20
|
from projectdavid.clients.vector_store_manager import VectorStoreManager
|
|
@@ -187,15 +185,12 @@ class VectorStoreClient:
|
|
|
187
185
|
vector_size: int,
|
|
188
186
|
distance_metric: str,
|
|
189
187
|
config: Optional[Dict[str, Any]],
|
|
190
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
191
188
|
) -> ValidationInterface.VectorStoreRead:
|
|
192
189
|
shared_id = self.identifier_service.generate_vector_id()
|
|
193
|
-
# forward multi-vector schema if given
|
|
194
190
|
self.vector_manager.create_store(
|
|
195
191
|
collection_name=shared_id,
|
|
196
192
|
vector_size=vector_size,
|
|
197
193
|
distance=distance_metric.upper(),
|
|
198
|
-
vectors_config=vectors_config,
|
|
199
194
|
)
|
|
200
195
|
|
|
201
196
|
payload = {
|
|
@@ -208,6 +203,10 @@ class VectorStoreClient:
|
|
|
208
203
|
resp = await self._request("POST", "/v1/vector-stores", json=payload)
|
|
209
204
|
return ValidationInterface.VectorStoreRead.model_validate(resp)
|
|
210
205
|
|
|
206
|
+
async def _list_my_vs_async(self) -> List[ValidationInterface.VectorStoreRead]:
|
|
207
|
+
resp = await self._request("GET", "/v1/vector-stores")
|
|
208
|
+
return [ValidationInterface.VectorStoreRead.model_validate(r) for r in resp]
|
|
209
|
+
|
|
211
210
|
# ------------------------------------------------------------------ #
|
|
212
211
|
# NEW admin‑aware creation helper
|
|
213
212
|
# ------------------------------------------------------------------ #
|
|
@@ -218,17 +217,13 @@ class VectorStoreClient:
|
|
|
218
217
|
vector_size: int,
|
|
219
218
|
distance_metric: str,
|
|
220
219
|
config: Optional[Dict[str, Any]],
|
|
221
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
222
220
|
) -> ValidationInterface.VectorStoreRead:
|
|
223
221
|
shared_id = self.identifier_service.generate_vector_id()
|
|
224
|
-
# forward multi-vector schema if given
|
|
225
222
|
self.vector_manager.create_store(
|
|
226
223
|
collection_name=shared_id,
|
|
227
224
|
vector_size=vector_size,
|
|
228
225
|
distance=distance_metric.upper(),
|
|
229
|
-
vectors_config=vectors_config,
|
|
230
226
|
)
|
|
231
|
-
|
|
232
227
|
payload = {
|
|
233
228
|
"shared_id": shared_id,
|
|
234
229
|
"name": name,
|
|
@@ -236,6 +231,7 @@ class VectorStoreClient:
|
|
|
236
231
|
"distance_metric": distance_metric.upper(),
|
|
237
232
|
"config": config or {},
|
|
238
233
|
}
|
|
234
|
+
# pass owner_id as query‑param (backend enforces admin‑only)
|
|
239
235
|
resp = await self._request(
|
|
240
236
|
"POST",
|
|
241
237
|
"/v1/vector-stores",
|
|
@@ -291,20 +287,12 @@ class VectorStoreClient:
|
|
|
291
287
|
async def _search_vs_async(
|
|
292
288
|
self,
|
|
293
289
|
vector_store_id: str,
|
|
294
|
-
query_text:
|
|
290
|
+
query_text: str,
|
|
295
291
|
top_k: int,
|
|
296
292
|
filters: Optional[Dict] = None,
|
|
297
293
|
vector_store_host: Optional[str] = None,
|
|
298
|
-
vector_field: Optional[str] = None, # allow caller override
|
|
299
294
|
) -> List[Dict[str, Any]]:
|
|
300
|
-
"""
|
|
301
|
-
Internal: run ANN search against the specified vector field or auto-detect by store size.
|
|
302
295
|
|
|
303
|
-
If `vector_field` is provided, it will be used directly. Otherwise:
|
|
304
|
-
• 1024-D → caption_vector
|
|
305
|
-
• 3-D → geo_vector
|
|
306
|
-
• others → default vector (text)
|
|
307
|
-
"""
|
|
308
296
|
# pick local vs. override host
|
|
309
297
|
vector_manager = (
|
|
310
298
|
VectorStoreManager(vector_store_host=vector_store_host)
|
|
@@ -312,36 +300,16 @@ class VectorStoreClient:
|
|
|
312
300
|
else self.vector_manager
|
|
313
301
|
)
|
|
314
302
|
|
|
315
|
-
# fetch store info to inspect schema
|
|
316
303
|
store = self.retrieve_vector_store_sync(vector_store_id)
|
|
317
304
|
|
|
318
|
-
#
|
|
319
|
-
if
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
# auto-detect based on stored vector dimensionality
|
|
327
|
-
if store.vector_size == 1024:
|
|
328
|
-
# image/caption space
|
|
329
|
-
vec = self.file_processor.encode_clip_text(query_text).tolist()
|
|
330
|
-
vector_field = "caption_vector"
|
|
331
|
-
elif store.vector_size == 3:
|
|
332
|
-
# geo space; query_text must be a raw 3-D list
|
|
333
|
-
if not isinstance(query_text, list):
|
|
334
|
-
raise VectorStoreClientError(
|
|
335
|
-
"Geo search requires a 3-element vector; pass raw unit-sphere list"
|
|
336
|
-
)
|
|
337
|
-
vec = query_text
|
|
338
|
-
vector_field = "geo_vector"
|
|
339
|
-
else:
|
|
340
|
-
# fallback to text embedding
|
|
341
|
-
vec = self.file_processor.encode_text(query_text).tolist()
|
|
342
|
-
vector_field = None # use default
|
|
343
|
-
|
|
344
|
-
# perform the search on the selected vector column
|
|
305
|
+
# 🔶 choose encoder by vector_size
|
|
306
|
+
if store.vector_size == 1024: # images collection
|
|
307
|
+
vec = self.file_processor.encode_clip_text(query_text).tolist()
|
|
308
|
+
vector_field = "caption_vector" # field name in Qdrant
|
|
309
|
+
else: # 384-D text collection
|
|
310
|
+
vec = self.file_processor.encode_text(query_text).tolist()
|
|
311
|
+
vector_field = None # default field
|
|
312
|
+
|
|
345
313
|
return vector_manager.query_store(
|
|
346
314
|
store_name=store.collection_name,
|
|
347
315
|
query_vector=vec,
|
|
@@ -474,92 +442,10 @@ class VectorStoreClient:
|
|
|
474
442
|
vector_size: int = 384,
|
|
475
443
|
distance_metric: str = "Cosine",
|
|
476
444
|
config: Optional[Dict[str, Any]] = None,
|
|
477
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
478
445
|
) -> ValidationInterface.VectorStoreRead:
|
|
479
|
-
"""
|
|
480
|
-
Create a new store owned by this API key.
|
|
481
|
-
|
|
482
|
-
If `vectors_config` is provided, it should map each vector
|
|
483
|
-
field name to its Qdrant VectorParams (size + distance).
|
|
484
|
-
"""
|
|
446
|
+
"""Create a new store owned by *this* API key."""
|
|
485
447
|
return self._run_sync(
|
|
486
|
-
self._create_vs_async(
|
|
487
|
-
name,
|
|
488
|
-
vector_size,
|
|
489
|
-
distance_metric,
|
|
490
|
-
config,
|
|
491
|
-
vectors_config,
|
|
492
|
-
)
|
|
493
|
-
)
|
|
494
|
-
|
|
495
|
-
def create_vector_vision_store(
|
|
496
|
-
self,
|
|
497
|
-
name: str = "vision",
|
|
498
|
-
):
|
|
499
|
-
|
|
500
|
-
vectors_config = {
|
|
501
|
-
# Raw visual embeddings (OpenCLIP ViT-H/14 → 1024-D)
|
|
502
|
-
"image_vector": qdrant.VectorParams(
|
|
503
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
504
|
-
),
|
|
505
|
-
# Language embeddings of your BLIP-2 captions → 1024-D
|
|
506
|
-
"caption_vector": qdrant.VectorParams(
|
|
507
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
508
|
-
),
|
|
509
|
-
# Object-region embeddings (YOLO crop + Sentence-BERT) → 1024-D
|
|
510
|
-
"region_vector": qdrant.VectorParams(
|
|
511
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
512
|
-
),
|
|
513
|
-
# Geo-location unit vectors (RegioNet) → 3-D
|
|
514
|
-
"geo_vector": qdrant.VectorParams(size=3, distance=qdrant.Distance.COSINE),
|
|
515
|
-
}
|
|
516
|
-
|
|
517
|
-
return self.create_vector_store(name=name, vectors_config=vectors_config)
|
|
518
|
-
|
|
519
|
-
def create_vector_vision_store_for_user(
|
|
520
|
-
self,
|
|
521
|
-
owner_id: str,
|
|
522
|
-
name: str,
|
|
523
|
-
*,
|
|
524
|
-
vector_size: int = 384,
|
|
525
|
-
distance_metric: str = "Cosine",
|
|
526
|
-
config: Optional[Dict[str, Any]] = None,
|
|
527
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
528
|
-
) -> ValidationInterface.VectorStoreRead:
|
|
529
|
-
"""
|
|
530
|
-
Admin-only: create a store on behalf of another user.
|
|
531
|
-
Pass `vectors_config` to define a multi-vector schema.
|
|
532
|
-
"""
|
|
533
|
-
if not vectors_config:
|
|
534
|
-
|
|
535
|
-
vectors_config = {
|
|
536
|
-
# Raw visual embeddings (OpenCLIP ViT-H/14 → 1024-D)
|
|
537
|
-
"image_vector": qdrant.VectorParams(
|
|
538
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
539
|
-
),
|
|
540
|
-
# Language embeddings of your BLIP-2 captions → 1024-D
|
|
541
|
-
"caption_vector": qdrant.VectorParams(
|
|
542
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
543
|
-
),
|
|
544
|
-
# Object-region embeddings (YOLO crop + Sentence-BERT) → 1024-D
|
|
545
|
-
"region_vector": qdrant.VectorParams(
|
|
546
|
-
size=1024, distance=qdrant.Distance.COSINE
|
|
547
|
-
),
|
|
548
|
-
# Geo-location unit vectors (RegioNet) → 3-D
|
|
549
|
-
"geo_vector": qdrant.VectorParams(
|
|
550
|
-
size=3, distance=qdrant.Distance.COSINE
|
|
551
|
-
),
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
return self._run_sync(
|
|
555
|
-
self._create_vs_for_user_async(
|
|
556
|
-
owner_id,
|
|
557
|
-
name,
|
|
558
|
-
vector_size,
|
|
559
|
-
distance_metric,
|
|
560
|
-
config,
|
|
561
|
-
vectors_config,
|
|
562
|
-
)
|
|
448
|
+
self._create_vs_async(name, vector_size, distance_metric, config)
|
|
563
449
|
)
|
|
564
450
|
|
|
565
451
|
def create_vector_store_for_user(
|
|
@@ -570,20 +456,16 @@ class VectorStoreClient:
|
|
|
570
456
|
vector_size: int = 384,
|
|
571
457
|
distance_metric: str = "Cosine",
|
|
572
458
|
config: Optional[Dict[str, Any]] = None,
|
|
573
|
-
vectors_config: Optional[Dict[str, qdrant.VectorParams]] = None, # ← NEW
|
|
574
459
|
) -> ValidationInterface.VectorStoreRead:
|
|
575
460
|
"""
|
|
576
|
-
Admin
|
|
577
|
-
|
|
461
|
+
**Admin‑only** helper → create a store on behalf of *owner_id*.
|
|
462
|
+
|
|
463
|
+
The caller’s API‑key must belong to an admin; otherwise the
|
|
464
|
+
request will be rejected by the server with HTTP 403.
|
|
578
465
|
"""
|
|
579
466
|
return self._run_sync(
|
|
580
467
|
self._create_vs_for_user_async(
|
|
581
|
-
owner_id,
|
|
582
|
-
name,
|
|
583
|
-
vector_size,
|
|
584
|
-
distance_metric,
|
|
585
|
-
config,
|
|
586
|
-
vectors_config,
|
|
468
|
+
owner_id, name, vector_size, distance_metric, config
|
|
587
469
|
)
|
|
588
470
|
)
|
|
589
471
|
|
|
@@ -762,16 +644,10 @@ class VectorStoreClient:
|
|
|
762
644
|
top_k: int = 5,
|
|
763
645
|
filters: Optional[Dict] = None,
|
|
764
646
|
vector_store_host: Optional[str] = None,
|
|
765
|
-
vector_field: Optional[str] = None, # ← NEW
|
|
766
647
|
) -> List[Dict[str, Any]]:
|
|
767
648
|
return self._run_sync(
|
|
768
649
|
self._search_vs_async(
|
|
769
|
-
vector_store_id,
|
|
770
|
-
query_text,
|
|
771
|
-
top_k,
|
|
772
|
-
filters,
|
|
773
|
-
vector_store_host,
|
|
774
|
-
vector_field,
|
|
650
|
+
vector_store_id, query_text, top_k, filters, vector_store_host
|
|
775
651
|
)
|
|
776
652
|
)
|
|
777
653
|
|
|
@@ -935,91 +811,3 @@ class VectorStoreClient:
|
|
|
935
811
|
hits = self._normalise_hits(hits)
|
|
936
812
|
|
|
937
813
|
return hits
|
|
938
|
-
|
|
939
|
-
def image_similarity_search(
|
|
940
|
-
self,
|
|
941
|
-
vector_store_id: str,
|
|
942
|
-
img: Image.Image,
|
|
943
|
-
k: int = 10,
|
|
944
|
-
vector_store_host: Optional[str] = None,
|
|
945
|
-
) -> List[Dict[str, Any]]:
|
|
946
|
-
vec = self.file_processor.encode_image(img).tolist()
|
|
947
|
-
return self.vector_file_search_raw(
|
|
948
|
-
vector_store_id=vector_store_id,
|
|
949
|
-
query_text=vec,
|
|
950
|
-
top_k=k,
|
|
951
|
-
filters=None,
|
|
952
|
-
vector_store_host=vector_store_host,
|
|
953
|
-
vector_field="image_vector",
|
|
954
|
-
)
|
|
955
|
-
|
|
956
|
-
def search_images(
|
|
957
|
-
self,
|
|
958
|
-
vector_store_id: str,
|
|
959
|
-
query: Union[str, Image.Image, List[float]],
|
|
960
|
-
*,
|
|
961
|
-
modality: Optional[str] = None,
|
|
962
|
-
k: int = 10,
|
|
963
|
-
vector_store_host: Optional[str] = None,
|
|
964
|
-
) -> List[Dict[str, Any]]:
|
|
965
|
-
"""
|
|
966
|
-
Unified image search across multiple modalities, with appropriate reranking:
|
|
967
|
-
|
|
968
|
-
- If `query` is a str → caption search (reranked)
|
|
969
|
-
- If `query` is a PIL.Image.Image → visual search (no rerank)
|
|
970
|
-
- If `query` is a list[float] → raw vector search
|
|
971
|
-
- `modality` override: one of 'caption', 'image', 'region', 'geo'
|
|
972
|
-
"""
|
|
973
|
-
# Map modality to (vector_field, encoder)
|
|
974
|
-
field_map = {
|
|
975
|
-
"caption": (
|
|
976
|
-
"caption_vector",
|
|
977
|
-
lambda q: self.file_processor.encode_clip_text(q).tolist(),
|
|
978
|
-
),
|
|
979
|
-
"image": (
|
|
980
|
-
"image_vector",
|
|
981
|
-
lambda q: self.file_processor.encode_image(q).tolist(),
|
|
982
|
-
),
|
|
983
|
-
"region": (
|
|
984
|
-
"region_vector",
|
|
985
|
-
lambda q: self.file_processor.encode_text(q).tolist(),
|
|
986
|
-
),
|
|
987
|
-
"geo": ("geo_vector", lambda q: q), # assume q is raw 3-D vector
|
|
988
|
-
}
|
|
989
|
-
|
|
990
|
-
# Auto-detect if not provided
|
|
991
|
-
if modality is None:
|
|
992
|
-
if isinstance(query, str):
|
|
993
|
-
modality = "caption"
|
|
994
|
-
elif isinstance(query, Image.Image):
|
|
995
|
-
modality = "image"
|
|
996
|
-
elif isinstance(query, list):
|
|
997
|
-
modality = "image"
|
|
998
|
-
else:
|
|
999
|
-
raise VectorStoreClientError(f"Unsupported query type: {type(query)}")
|
|
1000
|
-
|
|
1001
|
-
modality = modality.lower()
|
|
1002
|
-
if modality not in field_map:
|
|
1003
|
-
raise VectorStoreClientError(f"Unknown modality '{modality}'")
|
|
1004
|
-
|
|
1005
|
-
vector_field, encoder = field_map[modality]
|
|
1006
|
-
vec = encoder(query)
|
|
1007
|
-
|
|
1008
|
-
# 1️⃣ ANN search
|
|
1009
|
-
hits = self.vector_file_search_raw(
|
|
1010
|
-
vector_store_id=vector_store_id,
|
|
1011
|
-
query_text=vec,
|
|
1012
|
-
top_k=k,
|
|
1013
|
-
filters=None,
|
|
1014
|
-
vector_store_host=vector_store_host,
|
|
1015
|
-
vector_field=vector_field,
|
|
1016
|
-
)
|
|
1017
|
-
|
|
1018
|
-
# 2️⃣ Rerank for text-based modalities
|
|
1019
|
-
if modality in ("caption", "region"):
|
|
1020
|
-
hits = reranker.rerank(
|
|
1021
|
-
query if isinstance(query, str) else "", hits, top_k=min(len(hits), k)
|
|
1022
|
-
)
|
|
1023
|
-
|
|
1024
|
-
# 3️⃣ Normalize and return
|
|
1025
|
-
return self._normalise_hits(hits)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: projectdavid
|
|
3
|
-
Version: 1.33.
|
|
3
|
+
Version: 1.33.16
|
|
4
4
|
Summary: Python SDK for interacting with the Entities Assistant API.
|
|
5
5
|
Author-email: Francis Neequaye Armah <francis.neequaye@projectdavid.co.uk>
|
|
6
6
|
License: PolyForm Noncommercial License 1.0.0
|
|
@@ -21,7 +21,7 @@ projectdavid/clients/threads_client.py,sha256=ekzU5w14zftmtmFkiec3NC90Of-_KVSUY1
|
|
|
21
21
|
projectdavid/clients/tools_client.py,sha256=GkCVOmwpAoPqVt6aYmH0G1HIFha3iEwR9IIf9teR0j8,11487
|
|
22
22
|
projectdavid/clients/users_client.py,sha256=eCuUb9qvyH1GUFhZu6TRL9zdoK-qzHSs8-Vmrk_0mmg,13729
|
|
23
23
|
projectdavid/clients/vector_store_manager.py,sha256=q-ZgRQVX_S3nMrKYhmvkVrDjDRzM3ZFzUF55HBGRTe8,12861
|
|
24
|
-
projectdavid/clients/vectors.py,sha256=
|
|
24
|
+
projectdavid/clients/vectors.py,sha256=ubkB4rzcfDSYHFwOpa99Y4nHSJan_0jKnzyCYFJ15ck,31665
|
|
25
25
|
projectdavid/clients/vision-file_processor.py,sha256=19ft9IUeY5x9_22vC4JqndiFlpDYyUn6z1ygv-EV2NE,16852
|
|
26
26
|
projectdavid/clients/vision_vectors.py,sha256=cysPVbUzW3byB82MTqG2X1Iz5ZAe82WTS1JfQcoqVhE,40229
|
|
27
27
|
projectdavid/constants/platform.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -37,8 +37,8 @@ projectdavid/utils/monitor_launcher.py,sha256=3YAgJdeuaUvq3JGvpA4ymqFsAnk29nH5q9
|
|
|
37
37
|
projectdavid/utils/peek_gate.py,sha256=5whMRnDOQjATRpThWDJkvY9ScXuJ7Sd_-9rvGgXeTAQ,2532
|
|
38
38
|
projectdavid/utils/run_monitor.py,sha256=F_WkqIP-qnWH-4llIbileWWLfRj2Q1Cg-ni23SR1rec,3786
|
|
39
39
|
projectdavid/utils/vector_search_formatter.py,sha256=YTe3HPGec26qGY7uxY8_GS8lc4QaN6aNXMzkl29nZpI,1735
|
|
40
|
-
projectdavid-1.33.
|
|
41
|
-
projectdavid-1.33.
|
|
42
|
-
projectdavid-1.33.
|
|
43
|
-
projectdavid-1.33.
|
|
44
|
-
projectdavid-1.33.
|
|
40
|
+
projectdavid-1.33.16.dist-info/licenses/LICENSE,sha256=_8yjiEGttpS284BkfhXxfERqTRZW_tUaHiBB0GTJTMg,4563
|
|
41
|
+
projectdavid-1.33.16.dist-info/METADATA,sha256=Yw29rZgN6eU8PjU0QxKmeluLMzSk5ZCoJL4jo88J0IM,11555
|
|
42
|
+
projectdavid-1.33.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
43
|
+
projectdavid-1.33.16.dist-info/top_level.txt,sha256=kil8GU4s7qYRfNnzGnFHhZnSNRSxgNG-J4HLgQMmMtw,13
|
|
44
|
+
projectdavid-1.33.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|