hammad-python 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ham/__init__.py +10 -0
- {hammad_python-0.0.30.dist-info → hammad_python-0.0.31.dist-info}/METADATA +6 -32
- hammad_python-0.0.31.dist-info/RECORD +6 -0
- hammad/__init__.py +0 -84
- hammad/_internal.py +0 -256
- hammad/_main.py +0 -226
- hammad/cache/__init__.py +0 -40
- hammad/cache/base_cache.py +0 -181
- hammad/cache/cache.py +0 -169
- hammad/cache/decorators.py +0 -261
- hammad/cache/file_cache.py +0 -80
- hammad/cache/ttl_cache.py +0 -74
- hammad/cli/__init__.py +0 -33
- hammad/cli/animations.py +0 -573
- hammad/cli/plugins.py +0 -867
- hammad/cli/styles/__init__.py +0 -55
- hammad/cli/styles/settings.py +0 -139
- hammad/cli/styles/types.py +0 -358
- hammad/cli/styles/utils.py +0 -634
- hammad/data/__init__.py +0 -90
- hammad/data/collections/__init__.py +0 -49
- hammad/data/collections/collection.py +0 -326
- hammad/data/collections/indexes/__init__.py +0 -37
- hammad/data/collections/indexes/qdrant/__init__.py +0 -1
- hammad/data/collections/indexes/qdrant/index.py +0 -723
- hammad/data/collections/indexes/qdrant/settings.py +0 -94
- hammad/data/collections/indexes/qdrant/utils.py +0 -210
- hammad/data/collections/indexes/tantivy/__init__.py +0 -1
- hammad/data/collections/indexes/tantivy/index.py +0 -426
- hammad/data/collections/indexes/tantivy/settings.py +0 -40
- hammad/data/collections/indexes/tantivy/utils.py +0 -176
- hammad/data/configurations/__init__.py +0 -35
- hammad/data/configurations/configuration.py +0 -564
- hammad/data/models/__init__.py +0 -50
- hammad/data/models/extensions/__init__.py +0 -4
- hammad/data/models/extensions/pydantic/__init__.py +0 -42
- hammad/data/models/extensions/pydantic/converters.py +0 -759
- hammad/data/models/fields.py +0 -546
- hammad/data/models/model.py +0 -1078
- hammad/data/models/utils.py +0 -280
- hammad/data/sql/__init__.py +0 -24
- hammad/data/sql/database.py +0 -576
- hammad/data/sql/types.py +0 -127
- hammad/data/types/__init__.py +0 -75
- hammad/data/types/file.py +0 -431
- hammad/data/types/multimodal/__init__.py +0 -36
- hammad/data/types/multimodal/audio.py +0 -200
- hammad/data/types/multimodal/image.py +0 -182
- hammad/data/types/text.py +0 -1308
- hammad/formatting/__init__.py +0 -33
- hammad/formatting/json/__init__.py +0 -27
- hammad/formatting/json/converters.py +0 -158
- hammad/formatting/text/__init__.py +0 -63
- hammad/formatting/text/converters.py +0 -723
- hammad/formatting/text/markdown.py +0 -131
- hammad/formatting/yaml/__init__.py +0 -26
- hammad/formatting/yaml/converters.py +0 -5
- hammad/genai/__init__.py +0 -217
- hammad/genai/a2a/__init__.py +0 -32
- hammad/genai/a2a/workers.py +0 -552
- hammad/genai/agents/__init__.py +0 -59
- hammad/genai/agents/agent.py +0 -1973
- hammad/genai/agents/run.py +0 -1024
- hammad/genai/agents/types/__init__.py +0 -42
- hammad/genai/agents/types/agent_context.py +0 -13
- hammad/genai/agents/types/agent_event.py +0 -128
- hammad/genai/agents/types/agent_hooks.py +0 -220
- hammad/genai/agents/types/agent_messages.py +0 -31
- hammad/genai/agents/types/agent_response.py +0 -125
- hammad/genai/agents/types/agent_stream.py +0 -327
- hammad/genai/graphs/__init__.py +0 -125
- hammad/genai/graphs/_utils.py +0 -190
- hammad/genai/graphs/base.py +0 -1828
- hammad/genai/graphs/plugins.py +0 -316
- hammad/genai/graphs/types.py +0 -638
- hammad/genai/models/__init__.py +0 -1
- hammad/genai/models/embeddings/__init__.py +0 -43
- hammad/genai/models/embeddings/model.py +0 -226
- hammad/genai/models/embeddings/run.py +0 -163
- hammad/genai/models/embeddings/types/__init__.py +0 -37
- hammad/genai/models/embeddings/types/embedding_model_name.py +0 -75
- hammad/genai/models/embeddings/types/embedding_model_response.py +0 -76
- hammad/genai/models/embeddings/types/embedding_model_run_params.py +0 -66
- hammad/genai/models/embeddings/types/embedding_model_settings.py +0 -47
- hammad/genai/models/language/__init__.py +0 -57
- hammad/genai/models/language/model.py +0 -1098
- hammad/genai/models/language/run.py +0 -878
- hammad/genai/models/language/types/__init__.py +0 -40
- hammad/genai/models/language/types/language_model_instructor_mode.py +0 -47
- hammad/genai/models/language/types/language_model_messages.py +0 -28
- hammad/genai/models/language/types/language_model_name.py +0 -239
- hammad/genai/models/language/types/language_model_request.py +0 -127
- hammad/genai/models/language/types/language_model_response.py +0 -217
- hammad/genai/models/language/types/language_model_response_chunk.py +0 -56
- hammad/genai/models/language/types/language_model_settings.py +0 -89
- hammad/genai/models/language/types/language_model_stream.py +0 -600
- hammad/genai/models/language/utils/__init__.py +0 -28
- hammad/genai/models/language/utils/requests.py +0 -421
- hammad/genai/models/language/utils/structured_outputs.py +0 -135
- hammad/genai/models/model_provider.py +0 -4
- hammad/genai/models/multimodal.py +0 -47
- hammad/genai/models/reranking.py +0 -26
- hammad/genai/types/__init__.py +0 -1
- hammad/genai/types/base.py +0 -215
- hammad/genai/types/history.py +0 -290
- hammad/genai/types/tools.py +0 -507
- hammad/logging/__init__.py +0 -35
- hammad/logging/decorators.py +0 -834
- hammad/logging/logger.py +0 -1018
- hammad/mcp/__init__.py +0 -53
- hammad/mcp/client/__init__.py +0 -35
- hammad/mcp/client/client.py +0 -624
- hammad/mcp/client/client_service.py +0 -400
- hammad/mcp/client/settings.py +0 -178
- hammad/mcp/servers/__init__.py +0 -26
- hammad/mcp/servers/launcher.py +0 -1161
- hammad/runtime/__init__.py +0 -32
- hammad/runtime/decorators.py +0 -142
- hammad/runtime/run.py +0 -299
- hammad/service/__init__.py +0 -49
- hammad/service/create.py +0 -527
- hammad/service/decorators.py +0 -283
- hammad/types.py +0 -288
- hammad/typing/__init__.py +0 -435
- hammad/web/__init__.py +0 -43
- hammad/web/http/__init__.py +0 -1
- hammad/web/http/client.py +0 -944
- hammad/web/models.py +0 -275
- hammad/web/openapi/__init__.py +0 -1
- hammad/web/openapi/client.py +0 -740
- hammad/web/search/__init__.py +0 -1
- hammad/web/search/client.py +0 -1023
- hammad/web/utils.py +0 -472
- hammad_python-0.0.30.dist-info/RECORD +0 -135
- {hammad → ham}/py.typed +0 -0
- {hammad_python-0.0.30.dist-info → hammad_python-0.0.31.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.30.dist-info → hammad_python-0.0.31.dist-info}/licenses/LICENSE +0 -0
@@ -1,723 +0,0 @@
|
|
1
|
-
"""hammad.data.collections.indexes.qdrant.index"""
|
2
|
-
|
3
|
-
from datetime import datetime, timezone, timedelta
|
4
|
-
from typing import (
|
5
|
-
Any,
|
6
|
-
Callable,
|
7
|
-
Dict,
|
8
|
-
List,
|
9
|
-
Optional,
|
10
|
-
Type,
|
11
|
-
Union,
|
12
|
-
final,
|
13
|
-
TYPE_CHECKING,
|
14
|
-
Tuple,
|
15
|
-
NamedTuple,
|
16
|
-
)
|
17
|
-
|
18
|
-
if TYPE_CHECKING:
|
19
|
-
from .....genai.models.embeddings.types import EmbeddingModelName
|
20
|
-
# import uuid # Unused import
|
21
|
-
from pathlib import Path
|
22
|
-
import json
|
23
|
-
|
24
|
-
from ....sql.types import (
|
25
|
-
DatabaseItemType,
|
26
|
-
DatabaseItemFilters,
|
27
|
-
DatabaseItem,
|
28
|
-
)
|
29
|
-
from ....sql.database import Database
|
30
|
-
from . import utils
|
31
|
-
from .settings import (
|
32
|
-
QdrantCollectionIndexSettings,
|
33
|
-
QdrantCollectionIndexQuerySettings,
|
34
|
-
DistanceMetric,
|
35
|
-
)
|
36
|
-
|
37
|
-
|
38
|
-
class VectorSearchResult(NamedTuple):
|
39
|
-
"""Result from vector search containing item and similarity score."""
|
40
|
-
|
41
|
-
item: "DatabaseItem[DatabaseItemType]"
|
42
|
-
score: float
|
43
|
-
|
44
|
-
|
45
|
-
__all__ = (
|
46
|
-
"QdrantCollectionIndex",
|
47
|
-
"VectorSearchResult",
|
48
|
-
)
|
49
|
-
|
50
|
-
|
51
|
-
@final
|
52
|
-
class QdrantCollectionIndex:
|
53
|
-
"""A vector collection index that uses Qdrant for vector storage
|
54
|
-
and similarity search, with SQL Database as the primary storage backend.
|
55
|
-
|
56
|
-
This collection index provides vector-based functionality for storing
|
57
|
-
embeddings and performing semantic similarity searches while using
|
58
|
-
the Database class for reliable data persistence.
|
59
|
-
"""
|
60
|
-
|
61
|
-
def __init__(
|
62
|
-
self,
|
63
|
-
*,
|
64
|
-
name: str = "default",
|
65
|
-
vector_size: Optional[int] = None,
|
66
|
-
schema: Optional[Type[DatabaseItemType]] = None,
|
67
|
-
ttl: Optional[int] = None,
|
68
|
-
path: Optional[Path | str] = None,
|
69
|
-
distance_metric: DistanceMetric = "dot",
|
70
|
-
settings: Optional[QdrantCollectionIndexSettings] = None,
|
71
|
-
query_settings: Optional[QdrantCollectionIndexQuerySettings] = None,
|
72
|
-
embedding_model: Optional["EmbeddingModelName"] = None,
|
73
|
-
embedding_dimensions: Optional[int] = None,
|
74
|
-
embedding_api_key: Optional[str] = None,
|
75
|
-
embedding_base_url: Optional[str] = None,
|
76
|
-
rerank_model: Optional[str] = None,
|
77
|
-
rerank_api_key: Optional[str] = None,
|
78
|
-
rerank_base_url: Optional[str] = None,
|
79
|
-
) -> None:
|
80
|
-
"""
|
81
|
-
Initialize a new QdrantCollectionIndex.
|
82
|
-
|
83
|
-
Args:
|
84
|
-
name: The name of the index.
|
85
|
-
vector_size: Size/dimension of the vectors to store.
|
86
|
-
schema: Optional schema type for validation.
|
87
|
-
ttl: The time to live for items in this index.
|
88
|
-
path: The path where the index will be stored.
|
89
|
-
distance_metric: Distance metric for similarity search.
|
90
|
-
settings: Settings for Qdrant configuration.
|
91
|
-
query_settings: Settings for query behavior.
|
92
|
-
embedding_model: The embedding model to use (e.g., 'openai/text-embedding-3-small').
|
93
|
-
embedding_dimensions: Number of dimensions for embeddings.
|
94
|
-
embedding_api_key: API key for the embedding service.
|
95
|
-
embedding_base_url: Base URL for the embedding service.
|
96
|
-
rerank_model: The rerank model to use (e.g., 'cohere/rerank-english-v3.0').
|
97
|
-
rerank_api_key: API key for the rerank service.
|
98
|
-
rerank_base_url: Base URL for the rerank service.
|
99
|
-
"""
|
100
|
-
self.name = name
|
101
|
-
self.vector_size = vector_size
|
102
|
-
self._vector_size_determined = vector_size is not None
|
103
|
-
self.schema = schema
|
104
|
-
self.ttl = ttl
|
105
|
-
self.embedding_model = embedding_model
|
106
|
-
self.embedding_dimensions = embedding_dimensions
|
107
|
-
self.embedding_api_key = embedding_api_key
|
108
|
-
self.embedding_base_url = embedding_base_url
|
109
|
-
self._embedding_function = None
|
110
|
-
|
111
|
-
# Rerank model configuration
|
112
|
-
self.rerank_model = rerank_model
|
113
|
-
self.rerank_api_key = rerank_api_key
|
114
|
-
self.rerank_base_url = rerank_base_url
|
115
|
-
|
116
|
-
if path is not None and not isinstance(path, Path):
|
117
|
-
path = Path(path)
|
118
|
-
|
119
|
-
self.path = path
|
120
|
-
|
121
|
-
# Create settings with vector_size and distance_metric
|
122
|
-
if not settings:
|
123
|
-
qdrant_path = None
|
124
|
-
if self.path is not None:
|
125
|
-
qdrant_path = str(self.path / f"{name}_qdrant")
|
126
|
-
|
127
|
-
settings = QdrantCollectionIndexSettings(
|
128
|
-
vector_size=vector_size or 768, # Default fallback
|
129
|
-
distance_metric=distance_metric,
|
130
|
-
path=qdrant_path,
|
131
|
-
)
|
132
|
-
|
133
|
-
if not query_settings:
|
134
|
-
query_settings = QdrantCollectionIndexQuerySettings()
|
135
|
-
|
136
|
-
self.settings = settings
|
137
|
-
self.query_settings = query_settings
|
138
|
-
|
139
|
-
# Initialize SQL Database as primary storage backend
|
140
|
-
database_path = None
|
141
|
-
if self.path is not None:
|
142
|
-
database_path = self.path / f"{name}.db"
|
143
|
-
|
144
|
-
self._database = Database[DatabaseItemType](
|
145
|
-
name=name,
|
146
|
-
schema=schema,
|
147
|
-
ttl=ttl,
|
148
|
-
path=database_path,
|
149
|
-
table_name=f"qdrant_{name}",
|
150
|
-
)
|
151
|
-
|
152
|
-
# Initialize Qdrant client (lazily to handle import errors gracefully)
|
153
|
-
self._client = None
|
154
|
-
self._client_wrapper = None
|
155
|
-
# Only initialize if vector_size is determined
|
156
|
-
if self._vector_size_determined:
|
157
|
-
self._init_qdrant_client()
|
158
|
-
|
159
|
-
def _init_qdrant_client(self) -> None:
|
160
|
-
"""Initialize Qdrant client and collection."""
|
161
|
-
try:
|
162
|
-
self._client = utils.create_qdrant_client(self.settings)
|
163
|
-
self._client_wrapper = utils.QdrantClientWrapper(
|
164
|
-
client=self._client, collection_name=self.name
|
165
|
-
)
|
166
|
-
|
167
|
-
# Create collection if it doesn't exist
|
168
|
-
utils.create_collection_if_not_exists(
|
169
|
-
self._client, self.name, self.settings
|
170
|
-
)
|
171
|
-
|
172
|
-
except utils.QdrantCollectionIndexError:
|
173
|
-
# Qdrant not available - only SQL storage will work
|
174
|
-
self._client = None
|
175
|
-
self._client_wrapper = None
|
176
|
-
|
177
|
-
def _get_embedding_function(self) -> Optional[Callable[[Any], List[float]]]:
|
178
|
-
"""Get or create embedding function from model configuration."""
|
179
|
-
if self._embedding_function is None and self.embedding_model:
|
180
|
-
from .....genai.models.embeddings.model import EmbeddingModel
|
181
|
-
|
182
|
-
model = EmbeddingModel(model=self.embedding_model)
|
183
|
-
|
184
|
-
def embedding_function(item: Any) -> List[float]:
|
185
|
-
response = model.run(
|
186
|
-
input=item,
|
187
|
-
dimensions=self.embedding_dimensions,
|
188
|
-
api_key=self.embedding_api_key,
|
189
|
-
api_base=self.embedding_base_url,
|
190
|
-
format=True,
|
191
|
-
)
|
192
|
-
if response.data and len(response.data) > 0:
|
193
|
-
return response.data[0].embedding
|
194
|
-
else:
|
195
|
-
raise utils.QdrantCollectionIndexError(
|
196
|
-
"Failed to generate embedding: empty response"
|
197
|
-
)
|
198
|
-
|
199
|
-
self._embedding_function = embedding_function
|
200
|
-
|
201
|
-
return self._embedding_function
|
202
|
-
|
203
|
-
def _rerank_results(
|
204
|
-
self,
|
205
|
-
query: str,
|
206
|
-
results: List[Tuple[DatabaseItem[DatabaseItemType], float]],
|
207
|
-
top_n: Optional[int] = None,
|
208
|
-
) -> List[Tuple[DatabaseItem[DatabaseItemType], float]]:
|
209
|
-
"""
|
210
|
-
Rerank search results using the configured rerank model.
|
211
|
-
|
212
|
-
Args:
|
213
|
-
query: The original search query
|
214
|
-
results: List of (DatabaseItem, similarity_score) tuples
|
215
|
-
top_n: Number of top results to return after reranking
|
216
|
-
|
217
|
-
Returns:
|
218
|
-
Reranked list of (DatabaseItem, rerank_score) tuples
|
219
|
-
"""
|
220
|
-
if not self.rerank_model or not results:
|
221
|
-
return results
|
222
|
-
|
223
|
-
try:
|
224
|
-
from .....genai.models.reranking import run_reranking_model
|
225
|
-
|
226
|
-
# Extract documents for reranking
|
227
|
-
documents = []
|
228
|
-
for db_item, _ in results:
|
229
|
-
# Convert item to string for reranking
|
230
|
-
if isinstance(db_item.item, dict):
|
231
|
-
doc_text = json.dumps(db_item.item)
|
232
|
-
else:
|
233
|
-
doc_text = str(db_item.item)
|
234
|
-
documents.append(doc_text)
|
235
|
-
|
236
|
-
# Perform reranking
|
237
|
-
rerank_response = run_reranking_model(
|
238
|
-
model=self.rerank_model,
|
239
|
-
query=query,
|
240
|
-
documents=documents,
|
241
|
-
top_n=top_n or len(results),
|
242
|
-
api_key=self.rerank_api_key,
|
243
|
-
api_base=self.rerank_base_url,
|
244
|
-
)
|
245
|
-
|
246
|
-
# Reorder results based on rerank scores
|
247
|
-
reranked_results = []
|
248
|
-
for rerank_result in rerank_response.results:
|
249
|
-
original_index = rerank_result.index
|
250
|
-
rerank_score = rerank_result.relevance_score
|
251
|
-
db_item = results[original_index][0]
|
252
|
-
# Update the score on the DatabaseItem itself
|
253
|
-
db_item.score = rerank_score
|
254
|
-
reranked_results.append((db_item, rerank_score))
|
255
|
-
|
256
|
-
return reranked_results
|
257
|
-
|
258
|
-
except Exception:
|
259
|
-
# If reranking fails, return original results
|
260
|
-
return results
|
261
|
-
|
262
|
-
def _prepare_vector(self, item: Any) -> List[float]:
|
263
|
-
"""Prepare vector from item using embedding function or direct vector."""
|
264
|
-
embedding_function = self._get_embedding_function()
|
265
|
-
if embedding_function:
|
266
|
-
vector = embedding_function(item)
|
267
|
-
# Determine vector size from first embedding if not set
|
268
|
-
if not self._vector_size_determined:
|
269
|
-
self._determine_vector_size(len(vector))
|
270
|
-
return vector
|
271
|
-
elif isinstance(item, dict) and "vector" in item:
|
272
|
-
vector = item["vector"]
|
273
|
-
# Determine vector size from first vector if not set
|
274
|
-
if not self._vector_size_determined:
|
275
|
-
self._determine_vector_size(len(vector))
|
276
|
-
return utils.prepare_vector(vector, self.vector_size)
|
277
|
-
elif isinstance(item, (list, tuple)):
|
278
|
-
# Determine vector size from first vector if not set
|
279
|
-
if not self._vector_size_determined:
|
280
|
-
self._determine_vector_size(len(item))
|
281
|
-
return utils.prepare_vector(item, self.vector_size)
|
282
|
-
else:
|
283
|
-
raise utils.QdrantCollectionIndexError(
|
284
|
-
"Item must contain 'vector' key, be a vector itself, "
|
285
|
-
"or embedding_model must be provided"
|
286
|
-
)
|
287
|
-
|
288
|
-
def _determine_vector_size(self, size: int) -> None:
|
289
|
-
"""Determine and set vector size based on first embedding/vector."""
|
290
|
-
if not self._vector_size_determined:
|
291
|
-
self.vector_size = size
|
292
|
-
self._vector_size_determined = True
|
293
|
-
|
294
|
-
# Update settings with determined vector size
|
295
|
-
if self.settings:
|
296
|
-
self.settings.vector_size = size
|
297
|
-
|
298
|
-
# Initialize Qdrant client now that we have vector size
|
299
|
-
self._init_qdrant_client()
|
300
|
-
|
301
|
-
def _add_to_qdrant(
|
302
|
-
self,
|
303
|
-
item_id: str,
|
304
|
-
vector: List[float],
|
305
|
-
item: DatabaseItemType,
|
306
|
-
filters: Optional[DatabaseItemFilters] = None,
|
307
|
-
) -> None:
|
308
|
-
"""Add item to Qdrant vector store."""
|
309
|
-
if not self._client:
|
310
|
-
# Qdrant not available, skip vector indexing
|
311
|
-
return
|
312
|
-
|
313
|
-
try:
|
314
|
-
try:
|
315
|
-
from qdrant_client.models import PointStruct
|
316
|
-
except ImportError:
|
317
|
-
raise ImportError(
|
318
|
-
"Using Qdrant requires the `qdrant-client` package. Please install with: pip install 'hammad-python[genai]'"
|
319
|
-
)
|
320
|
-
|
321
|
-
# Prepare payload with metadata
|
322
|
-
payload = {
|
323
|
-
"item_data": json.dumps(utils.serialize(item)),
|
324
|
-
"created_at": datetime.now(timezone.utc).isoformat(),
|
325
|
-
}
|
326
|
-
|
327
|
-
# Add filters as top-level payload fields
|
328
|
-
if filters:
|
329
|
-
for key, value in filters.items():
|
330
|
-
payload[key] = value
|
331
|
-
|
332
|
-
# Create point and upsert to Qdrant
|
333
|
-
point = PointStruct(id=item_id, vector=vector, payload=payload)
|
334
|
-
|
335
|
-
self._client.upsert(collection_name=self.name, points=[point])
|
336
|
-
|
337
|
-
except Exception:
|
338
|
-
# Vector indexing failed, but data is still in SQL database
|
339
|
-
pass
|
340
|
-
|
341
|
-
def add(
|
342
|
-
self,
|
343
|
-
item: DatabaseItemType,
|
344
|
-
*,
|
345
|
-
id: Optional[str] = None,
|
346
|
-
filters: Optional[DatabaseItemFilters] = None,
|
347
|
-
ttl: Optional[int] = None,
|
348
|
-
vector: Optional[List[float]] = None,
|
349
|
-
) -> str:
|
350
|
-
"""
|
351
|
-
Add an item to the index.
|
352
|
-
|
353
|
-
Args:
|
354
|
-
item: The item to store.
|
355
|
-
id: Optional ID (will generate UUID if not provided).
|
356
|
-
filters: Optional filters/metadata.
|
357
|
-
ttl: Optional TTL in seconds.
|
358
|
-
vector: Optional pre-computed vector (if not provided, will use embedding_function).
|
359
|
-
|
360
|
-
Returns:
|
361
|
-
The ID of the stored item.
|
362
|
-
"""
|
363
|
-
# Add to SQL database first
|
364
|
-
item_id = self._database.add(
|
365
|
-
item=item,
|
366
|
-
id=id,
|
367
|
-
filters=filters,
|
368
|
-
ttl=ttl,
|
369
|
-
)
|
370
|
-
|
371
|
-
# Prepare vector for Qdrant storage
|
372
|
-
if vector is None:
|
373
|
-
try:
|
374
|
-
prepared_vector = self._prepare_vector(item)
|
375
|
-
except utils.QdrantCollectionIndexError:
|
376
|
-
# Vector preparation failed, but item is still in database
|
377
|
-
return item_id
|
378
|
-
else:
|
379
|
-
prepared_vector = utils.prepare_vector(vector, self.vector_size)
|
380
|
-
|
381
|
-
# Add to Qdrant vector store
|
382
|
-
self._add_to_qdrant(item_id, prepared_vector, item, filters)
|
383
|
-
|
384
|
-
return item_id
|
385
|
-
|
386
|
-
def get(
|
387
|
-
self,
|
388
|
-
id: str,
|
389
|
-
*,
|
390
|
-
filters: Optional[DatabaseItemFilters] = None,
|
391
|
-
) -> Optional[DatabaseItem[DatabaseItemType]]:
|
392
|
-
"""
|
393
|
-
Get an item by ID.
|
394
|
-
|
395
|
-
Args:
|
396
|
-
id: The item ID.
|
397
|
-
filters: Optional filters to match.
|
398
|
-
|
399
|
-
Returns:
|
400
|
-
The database item or None if not found.
|
401
|
-
"""
|
402
|
-
return self._database.get(id, filters=filters)
|
403
|
-
|
404
|
-
def _vector_search(
|
405
|
-
self,
|
406
|
-
query_vector: Union[List[float], Any],
|
407
|
-
*,
|
408
|
-
filters: Optional[DatabaseItemFilters] = None,
|
409
|
-
limit: int = 10,
|
410
|
-
score_threshold: Optional[float] = None,
|
411
|
-
query_text: Optional[str] = None,
|
412
|
-
enable_rerank: bool = True,
|
413
|
-
return_scores: bool = False,
|
414
|
-
) -> Union[List[DatabaseItem[DatabaseItemType]], List[VectorSearchResult]]:
|
415
|
-
"""
|
416
|
-
Internal method to perform vector similarity search.
|
417
|
-
|
418
|
-
Args:
|
419
|
-
query_vector: Query vector for similarity search.
|
420
|
-
filters: Optional filters to apply.
|
421
|
-
limit: Maximum number of results.
|
422
|
-
score_threshold: Minimum similarity score threshold.
|
423
|
-
query_text: Optional original query text for reranking.
|
424
|
-
enable_rerank: Whether to enable reranking if rerank model is configured.
|
425
|
-
return_scores: Whether to return scores with results.
|
426
|
-
|
427
|
-
Returns:
|
428
|
-
List of matching database items sorted by similarity score (and reranked if enabled),
|
429
|
-
or list of VectorSearchResult objects if return_scores is True.
|
430
|
-
"""
|
431
|
-
if not self._client:
|
432
|
-
# Qdrant not available, return empty results
|
433
|
-
return []
|
434
|
-
|
435
|
-
# Prepare query vector
|
436
|
-
prepared_vector = utils.prepare_vector(query_vector, self.vector_size)
|
437
|
-
|
438
|
-
try:
|
439
|
-
# Build Qdrant filter
|
440
|
-
qdrant_filter = utils.build_qdrant_filter(filters)
|
441
|
-
|
442
|
-
# Perform search
|
443
|
-
results = self._client.query_points(
|
444
|
-
collection_name=self.name,
|
445
|
-
query=prepared_vector,
|
446
|
-
query_filter=qdrant_filter,
|
447
|
-
limit=limit,
|
448
|
-
score_threshold=score_threshold,
|
449
|
-
with_payload=True,
|
450
|
-
with_vectors=False,
|
451
|
-
)
|
452
|
-
|
453
|
-
# Get item IDs from results and fetch from database with scores
|
454
|
-
db_items_with_scores = []
|
455
|
-
for result in results.points:
|
456
|
-
item_id = str(result.id)
|
457
|
-
db_item = self._database.get(item_id, filters=filters)
|
458
|
-
if db_item:
|
459
|
-
# Set the score on the DatabaseItem itself
|
460
|
-
db_item.score = result.score
|
461
|
-
db_items_with_scores.append((db_item, result.score))
|
462
|
-
|
463
|
-
# Apply reranking if enabled and configured
|
464
|
-
if enable_rerank and self.rerank_model and query_text:
|
465
|
-
db_items_with_scores = self._rerank_results(
|
466
|
-
query=query_text, results=db_items_with_scores, top_n=limit
|
467
|
-
)
|
468
|
-
|
469
|
-
# Return results with or without scores based on return_scores parameter
|
470
|
-
if return_scores:
|
471
|
-
return [
|
472
|
-
VectorSearchResult(item=item, score=score)
|
473
|
-
for item, score in db_items_with_scores
|
474
|
-
]
|
475
|
-
else:
|
476
|
-
# Extract just the database items (without scores) for backward compatibility
|
477
|
-
db_items = [item for item, score in db_items_with_scores]
|
478
|
-
return db_items
|
479
|
-
|
480
|
-
except Exception:
|
481
|
-
# Vector search failed, return empty results
|
482
|
-
return []
|
483
|
-
|
484
|
-
def query(
|
485
|
-
self,
|
486
|
-
query: Optional[str] = None,
|
487
|
-
*,
|
488
|
-
filters: Optional[DatabaseItemFilters] = None,
|
489
|
-
limit: Optional[int] = None,
|
490
|
-
vector: bool = False,
|
491
|
-
rerank: bool = False,
|
492
|
-
query_vector: Optional[List[float]] = None,
|
493
|
-
return_scores: bool = False,
|
494
|
-
) -> Union[List[DatabaseItem[DatabaseItemType]], List[VectorSearchResult]]:
|
495
|
-
"""
|
496
|
-
Query items from the collection.
|
497
|
-
|
498
|
-
Args:
|
499
|
-
query: Search query string.
|
500
|
-
filters: Optional filters to apply.
|
501
|
-
limit: Maximum number of results.
|
502
|
-
vector: Whether to use vector search (requires embedding_model to be configured).
|
503
|
-
rerank: Whether to use reranking (requires rerank_model to be configured).
|
504
|
-
query_vector: Optional pre-computed query vector for similarity search.
|
505
|
-
return_scores: Whether to return similarity scores with results (only applies to vector search).
|
506
|
-
|
507
|
-
Returns:
|
508
|
-
List of matching database items, or list of VectorSearchResult objects if return_scores is True.
|
509
|
-
"""
|
510
|
-
effective_limit = limit or self.query_settings.limit
|
511
|
-
|
512
|
-
# If explicit vector is provided, use it directly
|
513
|
-
if query_vector is not None:
|
514
|
-
return self._vector_search(
|
515
|
-
query_vector=query_vector,
|
516
|
-
filters=filters,
|
517
|
-
limit=effective_limit,
|
518
|
-
score_threshold=self.query_settings.score_threshold,
|
519
|
-
query_text=query,
|
520
|
-
enable_rerank=rerank,
|
521
|
-
return_scores=return_scores,
|
522
|
-
)
|
523
|
-
|
524
|
-
# If vector=True, use vector search with embedding model
|
525
|
-
if vector:
|
526
|
-
if not query:
|
527
|
-
raise ValueError("Query string is required when vector=True")
|
528
|
-
|
529
|
-
embedding_function = self._get_embedding_function()
|
530
|
-
if not embedding_function:
|
531
|
-
raise ValueError("Embedding model not configured for vector search")
|
532
|
-
|
533
|
-
try:
|
534
|
-
query_vector = embedding_function(query)
|
535
|
-
return self._vector_search(
|
536
|
-
query_vector=query_vector,
|
537
|
-
filters=filters,
|
538
|
-
limit=effective_limit,
|
539
|
-
score_threshold=self.query_settings.score_threshold,
|
540
|
-
query_text=query,
|
541
|
-
enable_rerank=rerank,
|
542
|
-
return_scores=return_scores,
|
543
|
-
)
|
544
|
-
except Exception as e:
|
545
|
-
raise ValueError(f"Failed to generate embedding for query: {e}")
|
546
|
-
|
547
|
-
# If rerank=True but vector=False, perform both standard and vector search, then rerank
|
548
|
-
if rerank and query:
|
549
|
-
if not self.rerank_model:
|
550
|
-
raise ValueError("Rerank model not configured")
|
551
|
-
|
552
|
-
# Get results from both database and vector search (if possible)
|
553
|
-
db_results = self._database.query(
|
554
|
-
limit=effective_limit,
|
555
|
-
order_by="created_at",
|
556
|
-
ascending=False,
|
557
|
-
)
|
558
|
-
|
559
|
-
vector_results = []
|
560
|
-
embedding_function = self._get_embedding_function()
|
561
|
-
if embedding_function:
|
562
|
-
try:
|
563
|
-
query_vector = embedding_function(query)
|
564
|
-
vector_results = self._vector_search(
|
565
|
-
query_vector=query_vector,
|
566
|
-
filters=filters,
|
567
|
-
limit=effective_limit,
|
568
|
-
score_threshold=self.query_settings.score_threshold,
|
569
|
-
query_text=query,
|
570
|
-
enable_rerank=False, # We'll rerank combined results
|
571
|
-
return_scores=False, # We handle scores separately in rerank mode
|
572
|
-
)
|
573
|
-
except Exception:
|
574
|
-
pass
|
575
|
-
|
576
|
-
# Combine and deduplicate results
|
577
|
-
combined_results = []
|
578
|
-
seen_ids = set()
|
579
|
-
|
580
|
-
for result in db_results + vector_results:
|
581
|
-
if result.id not in seen_ids:
|
582
|
-
combined_results.append((result, 0.0)) # Score placeholder
|
583
|
-
seen_ids.add(result.id)
|
584
|
-
|
585
|
-
# Apply reranking to combined results
|
586
|
-
if combined_results:
|
587
|
-
reranked_results = self._rerank_results(
|
588
|
-
query=query, results=combined_results, top_n=effective_limit
|
589
|
-
)
|
590
|
-
# Scores are already set on the DatabaseItem objects by _rerank_results
|
591
|
-
return [item for item, _ in reranked_results]
|
592
|
-
|
593
|
-
return [item for item, _ in combined_results]
|
594
|
-
|
595
|
-
# Default: fall back to database query
|
596
|
-
return self._database.query(
|
597
|
-
limit=effective_limit,
|
598
|
-
order_by="created_at",
|
599
|
-
ascending=False,
|
600
|
-
)
|
601
|
-
|
602
|
-
def delete(self, id: str) -> bool:
|
603
|
-
"""
|
604
|
-
Delete an item by ID.
|
605
|
-
|
606
|
-
Args:
|
607
|
-
id: The item ID.
|
608
|
-
|
609
|
-
Returns:
|
610
|
-
True if item was deleted, False if not found.
|
611
|
-
"""
|
612
|
-
# Delete from database
|
613
|
-
deleted = self._database.delete(id)
|
614
|
-
|
615
|
-
if deleted and self._client:
|
616
|
-
# Delete from Qdrant
|
617
|
-
try:
|
618
|
-
self._client.delete(collection_name=self.name, points_selector=[id])
|
619
|
-
except Exception:
|
620
|
-
# Vector deletion failed, but item was removed from database
|
621
|
-
pass
|
622
|
-
|
623
|
-
return deleted
|
624
|
-
|
625
|
-
def count(
|
626
|
-
self,
|
627
|
-
filters: Optional[DatabaseItemFilters] = None,
|
628
|
-
) -> int:
|
629
|
-
"""
|
630
|
-
Count items matching the filters.
|
631
|
-
|
632
|
-
Args:
|
633
|
-
filters: Optional filters to apply.
|
634
|
-
|
635
|
-
Returns:
|
636
|
-
Number of matching items.
|
637
|
-
"""
|
638
|
-
if not self._client:
|
639
|
-
# Use database count
|
640
|
-
from ....sql.types import QueryFilter, QueryCondition
|
641
|
-
|
642
|
-
query_filter = None
|
643
|
-
if filters:
|
644
|
-
conditions = [
|
645
|
-
QueryCondition(
|
646
|
-
field="filters", operator="contains", value=json.dumps(filters)
|
647
|
-
)
|
648
|
-
]
|
649
|
-
query_filter = QueryFilter(conditions=conditions)
|
650
|
-
|
651
|
-
return self._database.count(query_filter)
|
652
|
-
|
653
|
-
try:
|
654
|
-
# Use Qdrant count
|
655
|
-
qdrant_filter = utils.build_qdrant_filter(filters)
|
656
|
-
info = self._client.count(
|
657
|
-
collection_name=self.name,
|
658
|
-
count_filter=qdrant_filter,
|
659
|
-
exact=self.query_settings.exact,
|
660
|
-
)
|
661
|
-
return info.count
|
662
|
-
except Exception:
|
663
|
-
# Fall back to database count
|
664
|
-
return self._database.count()
|
665
|
-
|
666
|
-
def clear(self) -> int:
|
667
|
-
"""
|
668
|
-
Clear all items from the index.
|
669
|
-
|
670
|
-
Returns:
|
671
|
-
Number of items deleted.
|
672
|
-
"""
|
673
|
-
count = self._database.clear()
|
674
|
-
|
675
|
-
if self._client:
|
676
|
-
# Clear Qdrant collection by recreating it
|
677
|
-
try:
|
678
|
-
utils.create_collection_if_not_exists(
|
679
|
-
self._client, self.name, self.settings
|
680
|
-
)
|
681
|
-
except Exception:
|
682
|
-
pass
|
683
|
-
|
684
|
-
return count
|
685
|
-
|
686
|
-
def get_vector(self, id: str) -> Optional[List[float]]:
|
687
|
-
"""
|
688
|
-
Get the vector for a specific item by ID.
|
689
|
-
|
690
|
-
Args:
|
691
|
-
id: The item ID.
|
692
|
-
|
693
|
-
Returns:
|
694
|
-
The vector or None if not found.
|
695
|
-
"""
|
696
|
-
if not self._client:
|
697
|
-
return None
|
698
|
-
|
699
|
-
try:
|
700
|
-
points = self._client.retrieve(
|
701
|
-
collection_name=self.name,
|
702
|
-
ids=[id],
|
703
|
-
with_payload=False,
|
704
|
-
with_vectors=True,
|
705
|
-
)
|
706
|
-
|
707
|
-
if not points:
|
708
|
-
return None
|
709
|
-
|
710
|
-
vector = points[0].vector
|
711
|
-
if isinstance(vector, dict):
|
712
|
-
# Handle named vectors if used
|
713
|
-
return list(vector.values())[0] if vector else None
|
714
|
-
return vector
|
715
|
-
|
716
|
-
except Exception:
|
717
|
-
return None
|
718
|
-
|
719
|
-
def __repr__(self) -> str:
|
720
|
-
"""String representation of the index."""
|
721
|
-
location = str(self.path) if self.path else "memory"
|
722
|
-
vector_available = "yes" if self._client else "no"
|
723
|
-
return f"<QdrantCollectionIndex name='{self.name}' location='{location}' vector_size={self.vector_size} qdrant_available={vector_available}>"
|