hammad-python 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. hammad/__init__.py +177 -0
  2. hammad/{performance/imports.py → _internal.py} +7 -1
  3. hammad/cache/__init__.py +1 -1
  4. hammad/cli/__init__.py +3 -1
  5. hammad/cli/_runner.py +265 -0
  6. hammad/cli/animations.py +1 -1
  7. hammad/cli/plugins.py +133 -78
  8. hammad/cli/styles/__init__.py +1 -1
  9. hammad/cli/styles/utils.py +149 -3
  10. hammad/data/__init__.py +56 -29
  11. hammad/data/collections/__init__.py +27 -17
  12. hammad/data/collections/collection.py +205 -383
  13. hammad/data/collections/indexes/__init__.py +37 -0
  14. hammad/data/collections/indexes/qdrant/__init__.py +1 -0
  15. hammad/data/collections/indexes/qdrant/index.py +735 -0
  16. hammad/data/collections/indexes/qdrant/settings.py +94 -0
  17. hammad/data/collections/indexes/qdrant/utils.py +220 -0
  18. hammad/data/collections/indexes/tantivy/__init__.py +1 -0
  19. hammad/data/collections/indexes/tantivy/index.py +428 -0
  20. hammad/data/collections/indexes/tantivy/settings.py +51 -0
  21. hammad/data/collections/indexes/tantivy/utils.py +200 -0
  22. hammad/data/configurations/__init__.py +2 -2
  23. hammad/data/configurations/configuration.py +2 -2
  24. hammad/data/models/__init__.py +20 -9
  25. hammad/data/models/extensions/__init__.py +4 -0
  26. hammad/data/models/{pydantic → extensions/pydantic}/__init__.py +6 -19
  27. hammad/data/models/{pydantic → extensions/pydantic}/converters.py +143 -16
  28. hammad/data/models/{base/fields.py → fields.py} +1 -1
  29. hammad/data/models/{base/model.py → model.py} +1 -1
  30. hammad/data/models/{base/utils.py → utils.py} +1 -1
  31. hammad/data/sql/__init__.py +23 -0
  32. hammad/data/sql/database.py +578 -0
  33. hammad/data/sql/types.py +141 -0
  34. hammad/data/types/__init__.py +1 -3
  35. hammad/data/types/file.py +3 -3
  36. hammad/data/types/multimodal/__init__.py +2 -2
  37. hammad/data/types/multimodal/audio.py +2 -2
  38. hammad/data/types/multimodal/image.py +2 -2
  39. hammad/formatting/__init__.py +9 -27
  40. hammad/formatting/json/__init__.py +8 -2
  41. hammad/formatting/json/converters.py +7 -1
  42. hammad/formatting/text/__init__.py +1 -1
  43. hammad/formatting/yaml/__init__.py +1 -1
  44. hammad/genai/__init__.py +78 -0
  45. hammad/genai/agents/__init__.py +1 -0
  46. hammad/genai/agents/types/__init__.py +35 -0
  47. hammad/genai/agents/types/history.py +277 -0
  48. hammad/genai/agents/types/tool.py +490 -0
  49. hammad/genai/embedding_models/__init__.py +41 -0
  50. hammad/{ai/embeddings/client/litellm_embeddings_client.py → genai/embedding_models/embedding_model.py} +47 -142
  51. hammad/genai/embedding_models/embedding_model_name.py +77 -0
  52. hammad/genai/embedding_models/embedding_model_request.py +65 -0
  53. hammad/{ai/embeddings/types.py → genai/embedding_models/embedding_model_response.py} +3 -3
  54. hammad/genai/embedding_models/run.py +161 -0
  55. hammad/genai/language_models/__init__.py +35 -0
  56. hammad/genai/language_models/_streaming.py +622 -0
  57. hammad/genai/language_models/_types.py +276 -0
  58. hammad/genai/language_models/_utils/__init__.py +31 -0
  59. hammad/genai/language_models/_utils/_completions.py +131 -0
  60. hammad/genai/language_models/_utils/_messages.py +89 -0
  61. hammad/genai/language_models/_utils/_requests.py +202 -0
  62. hammad/genai/language_models/_utils/_structured_outputs.py +124 -0
  63. hammad/genai/language_models/language_model.py +734 -0
  64. hammad/genai/language_models/language_model_request.py +135 -0
  65. hammad/genai/language_models/language_model_response.py +219 -0
  66. hammad/genai/language_models/language_model_response_chunk.py +53 -0
  67. hammad/genai/language_models/run.py +530 -0
  68. hammad/genai/multimodal_models.py +48 -0
  69. hammad/genai/rerank_models.py +26 -0
  70. hammad/logging/__init__.py +1 -1
  71. hammad/logging/decorators.py +1 -1
  72. hammad/logging/logger.py +2 -2
  73. hammad/mcp/__init__.py +1 -1
  74. hammad/mcp/client/__init__.py +35 -0
  75. hammad/mcp/client/client.py +105 -4
  76. hammad/mcp/client/client_service.py +10 -3
  77. hammad/mcp/servers/__init__.py +24 -0
  78. hammad/{performance/runtime → runtime}/__init__.py +2 -2
  79. hammad/{performance/runtime → runtime}/decorators.py +1 -1
  80. hammad/{performance/runtime → runtime}/run.py +1 -1
  81. hammad/service/__init__.py +1 -1
  82. hammad/service/create.py +3 -8
  83. hammad/service/decorators.py +8 -8
  84. hammad/typing/__init__.py +28 -0
  85. hammad/web/__init__.py +3 -3
  86. hammad/web/http/client.py +1 -1
  87. hammad/web/models.py +53 -21
  88. hammad/web/search/client.py +99 -52
  89. hammad/web/utils.py +13 -13
  90. hammad_python-0.0.16.dist-info/METADATA +191 -0
  91. hammad_python-0.0.16.dist-info/RECORD +110 -0
  92. hammad/ai/__init__.py +0 -1
  93. hammad/ai/_utils.py +0 -142
  94. hammad/ai/completions/__init__.py +0 -45
  95. hammad/ai/completions/client.py +0 -684
  96. hammad/ai/completions/create.py +0 -710
  97. hammad/ai/completions/settings.py +0 -100
  98. hammad/ai/completions/types.py +0 -792
  99. hammad/ai/completions/utils.py +0 -486
  100. hammad/ai/embeddings/__init__.py +0 -35
  101. hammad/ai/embeddings/client/__init__.py +0 -1
  102. hammad/ai/embeddings/client/base_embeddings_client.py +0 -26
  103. hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +0 -200
  104. hammad/ai/embeddings/create.py +0 -159
  105. hammad/data/collections/base_collection.py +0 -58
  106. hammad/data/collections/searchable_collection.py +0 -556
  107. hammad/data/collections/vector_collection.py +0 -596
  108. hammad/data/databases/__init__.py +0 -21
  109. hammad/data/databases/database.py +0 -902
  110. hammad/data/models/base/__init__.py +0 -35
  111. hammad/data/models/pydantic/models/__init__.py +0 -28
  112. hammad/data/models/pydantic/models/arbitrary_model.py +0 -46
  113. hammad/data/models/pydantic/models/cacheable_model.py +0 -79
  114. hammad/data/models/pydantic/models/fast_model.py +0 -318
  115. hammad/data/models/pydantic/models/function_model.py +0 -176
  116. hammad/data/models/pydantic/models/subscriptable_model.py +0 -63
  117. hammad/performance/__init__.py +0 -36
  118. hammad/py.typed +0 -0
  119. hammad_python-0.0.14.dist-info/METADATA +0 -70
  120. hammad_python-0.0.14.dist-info/RECORD +0 -99
  121. {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/WHEEL +0 -0
  122. {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,735 @@
1
+ """hammad.data.collections.indexes.qdrant.index"""
2
+
3
+ from datetime import datetime, timezone, timedelta
4
+ from typing import (
5
+ Any,
6
+ Callable,
7
+ Dict,
8
+ List,
9
+ Optional,
10
+ Type,
11
+ Union,
12
+ final,
13
+ TYPE_CHECKING,
14
+ Tuple,
15
+ NamedTuple
16
+ )
17
+
18
+ if TYPE_CHECKING:
19
+ from .....genai.embedding_models.embedding_model_name import EmbeddingModelName
20
+ # import uuid # Unused import
21
+ from pathlib import Path
22
+ import json
23
+
24
+ from ....sql.types import (
25
+ DatabaseItemType,
26
+ DatabaseItemFilters,
27
+ DatabaseItem,
28
+ )
29
+ from ....sql.database import Database
30
+ from . import utils
31
+ from .settings import (
32
+ QdrantCollectionIndexSettings,
33
+ QdrantCollectionIndexQuerySettings,
34
+ DistanceMetric,
35
+ )
36
+
37
+ class VectorSearchResult(NamedTuple):
38
+ """Result from vector search containing item and similarity score."""
39
+ item: 'DatabaseItem[DatabaseItemType]'
40
+ score: float
41
+
42
+
43
+ __all__ = (
44
+ "QdrantCollectionIndex",
45
+ "VectorSearchResult",
46
+ )
47
+
48
+
49
+ @final
50
+ class QdrantCollectionIndex:
51
+ """A vector collection index that uses Qdrant for vector storage
52
+ and similarity search, with SQL Database as the primary storage backend.
53
+
54
+ This collection index provides vector-based functionality for storing
55
+ embeddings and performing semantic similarity searches while using
56
+ the Database class for reliable data persistence.
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ *,
62
+ name: str = "default",
63
+ vector_size: Optional[int] = None,
64
+ schema: Optional[Type[DatabaseItemType]] = None,
65
+ ttl: Optional[int] = None,
66
+ path: Optional[Path | str] = None,
67
+ distance_metric: DistanceMetric = "dot",
68
+ settings: Optional[QdrantCollectionIndexSettings] = None,
69
+ query_settings: Optional[QdrantCollectionIndexQuerySettings] = None,
70
+ embedding_model: Optional["EmbeddingModelName"] = None,
71
+ embedding_dimensions: Optional[int] = None,
72
+ embedding_api_key: Optional[str] = None,
73
+ embedding_base_url: Optional[str] = None,
74
+ rerank_model: Optional[str] = None,
75
+ rerank_api_key: Optional[str] = None,
76
+ rerank_base_url: Optional[str] = None,
77
+ ) -> None:
78
+ """
79
+ Initialize a new QdrantCollectionIndex.
80
+
81
+ Args:
82
+ name: The name of the index.
83
+ vector_size: Size/dimension of the vectors to store.
84
+ schema: Optional schema type for validation.
85
+ ttl: The time to live for items in this index.
86
+ path: The path where the index will be stored.
87
+ distance_metric: Distance metric for similarity search.
88
+ settings: Settings for Qdrant configuration.
89
+ query_settings: Settings for query behavior.
90
+ embedding_model: The embedding model to use (e.g., 'openai/text-embedding-3-small').
91
+ embedding_dimensions: Number of dimensions for embeddings.
92
+ embedding_api_key: API key for the embedding service.
93
+ embedding_base_url: Base URL for the embedding service.
94
+ rerank_model: The rerank model to use (e.g., 'cohere/rerank-english-v3.0').
95
+ rerank_api_key: API key for the rerank service.
96
+ rerank_base_url: Base URL for the rerank service.
97
+ """
98
+ self.name = name
99
+ self.vector_size = vector_size
100
+ self._vector_size_determined = vector_size is not None
101
+ self.schema = schema
102
+ self.ttl = ttl
103
+ self.embedding_model = embedding_model
104
+ self.embedding_dimensions = embedding_dimensions
105
+ self.embedding_api_key = embedding_api_key
106
+ self.embedding_base_url = embedding_base_url
107
+ self._embedding_function = None
108
+
109
+ # Rerank model configuration
110
+ self.rerank_model = rerank_model
111
+ self.rerank_api_key = rerank_api_key
112
+ self.rerank_base_url = rerank_base_url
113
+
114
+ if path is not None and not isinstance(path, Path):
115
+ path = Path(path)
116
+
117
+ self.path = path
118
+
119
+ # Create settings with vector_size and distance_metric
120
+ if not settings:
121
+ qdrant_path = None
122
+ if self.path is not None:
123
+ qdrant_path = str(self.path / f"{name}_qdrant")
124
+
125
+ settings = QdrantCollectionIndexSettings(
126
+ vector_size=vector_size or 768, # Default fallback
127
+ distance_metric=distance_metric,
128
+ path=qdrant_path,
129
+ )
130
+
131
+ if not query_settings:
132
+ query_settings = QdrantCollectionIndexQuerySettings()
133
+
134
+ self.settings = settings
135
+ self.query_settings = query_settings
136
+
137
+ # Initialize SQL Database as primary storage backend
138
+ database_path = None
139
+ if self.path is not None:
140
+ database_path = self.path / f"{name}.db"
141
+
142
+ self._database = Database[DatabaseItemType](
143
+ name=name,
144
+ schema=schema,
145
+ ttl=ttl,
146
+ path=database_path,
147
+ table_name=f"qdrant_{name}"
148
+ )
149
+
150
+ # Initialize Qdrant client (lazily to handle import errors gracefully)
151
+ self._client = None
152
+ self._client_wrapper = None
153
+ # Only initialize if vector_size is determined
154
+ if self._vector_size_determined:
155
+ self._init_qdrant_client()
156
+
157
+ def _init_qdrant_client(self) -> None:
158
+ """Initialize Qdrant client and collection."""
159
+ try:
160
+ self._client = utils.create_qdrant_client(self.settings)
161
+ self._client_wrapper = utils.QdrantClientWrapper(
162
+ client=self._client,
163
+ collection_name=self.name
164
+ )
165
+
166
+ # Create collection if it doesn't exist
167
+ utils.create_collection_if_not_exists(
168
+ self._client,
169
+ self.name,
170
+ self.settings
171
+ )
172
+
173
+ except utils.QdrantCollectionIndexError:
174
+ # Qdrant not available - only SQL storage will work
175
+ self._client = None
176
+ self._client_wrapper = None
177
+
178
+ def _get_embedding_function(self) -> Optional[Callable[[Any], List[float]]]:
179
+ """Get or create embedding function from model configuration."""
180
+ if self._embedding_function is None and self.embedding_model:
181
+ from .....genai.embedding_models.embedding_model import EmbeddingModel
182
+
183
+ model = EmbeddingModel(model=self.embedding_model)
184
+
185
+ def embedding_function(item: Any) -> List[float]:
186
+ response = model.run(
187
+ input=item,
188
+ dimensions=self.embedding_dimensions,
189
+ api_key=self.embedding_api_key,
190
+ api_base=self.embedding_base_url,
191
+ format=True
192
+ )
193
+ if response.data and len(response.data) > 0:
194
+ return response.data[0].embedding
195
+ else:
196
+ raise utils.QdrantCollectionIndexError(
197
+ "Failed to generate embedding: empty response"
198
+ )
199
+
200
+ self._embedding_function = embedding_function
201
+
202
+ return self._embedding_function
203
+
204
+ def _rerank_results(
205
+ self,
206
+ query: str,
207
+ results: List[Tuple[DatabaseItem[DatabaseItemType], float]],
208
+ top_n: Optional[int] = None
209
+ ) -> List[Tuple[DatabaseItem[DatabaseItemType], float]]:
210
+ """
211
+ Rerank search results using the configured rerank model.
212
+
213
+ Args:
214
+ query: The original search query
215
+ results: List of (DatabaseItem, similarity_score) tuples
216
+ top_n: Number of top results to return after reranking
217
+
218
+ Returns:
219
+ Reranked list of (DatabaseItem, rerank_score) tuples
220
+ """
221
+ if not self.rerank_model or not results:
222
+ return results
223
+
224
+ try:
225
+ from .....genai.rerank_models import run_rerank_model
226
+
227
+ # Extract documents for reranking
228
+ documents = []
229
+ for db_item, _ in results:
230
+ # Convert item to string for reranking
231
+ if isinstance(db_item.item, dict):
232
+ doc_text = json.dumps(db_item.item)
233
+ else:
234
+ doc_text = str(db_item.item)
235
+ documents.append(doc_text)
236
+
237
+ # Perform reranking
238
+ rerank_response = run_rerank_model(
239
+ model=self.rerank_model,
240
+ query=query,
241
+ documents=documents,
242
+ top_n=top_n or len(results),
243
+ api_key=self.rerank_api_key,
244
+ api_base=self.rerank_base_url
245
+ )
246
+
247
+ # Reorder results based on rerank scores
248
+ reranked_results = []
249
+ for rerank_result in rerank_response.results:
250
+ original_index = rerank_result.index
251
+ rerank_score = rerank_result.relevance_score
252
+ db_item = results[original_index][0]
253
+ # Update the score on the DatabaseItem itself
254
+ db_item.score = rerank_score
255
+ reranked_results.append((db_item, rerank_score))
256
+
257
+ return reranked_results
258
+
259
+ except Exception:
260
+ # If reranking fails, return original results
261
+ return results
262
+
263
+ def _prepare_vector(self, item: Any) -> List[float]:
264
+ """Prepare vector from item using embedding function or direct vector."""
265
+ embedding_function = self._get_embedding_function()
266
+ if embedding_function:
267
+ vector = embedding_function(item)
268
+ # Determine vector size from first embedding if not set
269
+ if not self._vector_size_determined:
270
+ self._determine_vector_size(len(vector))
271
+ return vector
272
+ elif isinstance(item, dict) and "vector" in item:
273
+ vector = item["vector"]
274
+ # Determine vector size from first vector if not set
275
+ if not self._vector_size_determined:
276
+ self._determine_vector_size(len(vector))
277
+ return utils.prepare_vector(vector, self.vector_size)
278
+ elif isinstance(item, (list, tuple)):
279
+ # Determine vector size from first vector if not set
280
+ if not self._vector_size_determined:
281
+ self._determine_vector_size(len(item))
282
+ return utils.prepare_vector(item, self.vector_size)
283
+ else:
284
+ raise utils.QdrantCollectionIndexError(
285
+ "Item must contain 'vector' key, be a vector itself, "
286
+ "or embedding_model must be provided"
287
+ )
288
+
289
+ def _determine_vector_size(self, size: int) -> None:
290
+ """Determine and set vector size based on first embedding/vector."""
291
+ if not self._vector_size_determined:
292
+ self.vector_size = size
293
+ self._vector_size_determined = True
294
+
295
+ # Update settings with determined vector size
296
+ if self.settings:
297
+ self.settings.vector_size = size
298
+
299
+ # Initialize Qdrant client now that we have vector size
300
+ self._init_qdrant_client()
301
+
302
+ def _add_to_qdrant(
303
+ self,
304
+ item_id: str,
305
+ vector: List[float],
306
+ item: DatabaseItemType,
307
+ filters: Optional[DatabaseItemFilters] = None,
308
+ ) -> None:
309
+ """Add item to Qdrant vector store."""
310
+ if not self._client:
311
+ # Qdrant not available, skip vector indexing
312
+ return
313
+
314
+ try:
315
+ try:
316
+ from qdrant_client.models import PointStruct
317
+ except ImportError:
318
+ raise ImportError(
319
+ "Using Qdrant requires the `qdrant-client` package. Please install with: pip install 'hammad-python[ai]'"
320
+ )
321
+
322
+ # Prepare payload with metadata
323
+ payload = {
324
+ "item_data": json.dumps(utils.serialize(item)),
325
+ "created_at": datetime.now(timezone.utc).isoformat(),
326
+ }
327
+
328
+ # Add filters as top-level payload fields
329
+ if filters:
330
+ for key, value in filters.items():
331
+ payload[key] = value
332
+
333
+ # Create point and upsert to Qdrant
334
+ point = PointStruct(
335
+ id=item_id,
336
+ vector=vector,
337
+ payload=payload
338
+ )
339
+
340
+ self._client.upsert(
341
+ collection_name=self.name,
342
+ points=[point]
343
+ )
344
+
345
+ except Exception:
346
+ # Vector indexing failed, but data is still in SQL database
347
+ pass
348
+
349
+ def add(
350
+ self,
351
+ item: DatabaseItemType,
352
+ *,
353
+ id: Optional[str] = None,
354
+ filters: Optional[DatabaseItemFilters] = None,
355
+ ttl: Optional[int] = None,
356
+ vector: Optional[List[float]] = None,
357
+ ) -> str:
358
+ """
359
+ Add an item to the index.
360
+
361
+ Args:
362
+ item: The item to store.
363
+ id: Optional ID (will generate UUID if not provided).
364
+ filters: Optional filters/metadata.
365
+ ttl: Optional TTL in seconds.
366
+ vector: Optional pre-computed vector (if not provided, will use embedding_function).
367
+
368
+ Returns:
369
+ The ID of the stored item.
370
+ """
371
+ # Add to SQL database first
372
+ item_id = self._database.add(
373
+ item=item,
374
+ id=id,
375
+ filters=filters,
376
+ ttl=ttl,
377
+ )
378
+
379
+ # Prepare vector for Qdrant storage
380
+ if vector is None:
381
+ try:
382
+ prepared_vector = self._prepare_vector(item)
383
+ except utils.QdrantCollectionIndexError:
384
+ # Vector preparation failed, but item is still in database
385
+ return item_id
386
+ else:
387
+ prepared_vector = utils.prepare_vector(vector, self.vector_size)
388
+
389
+ # Add to Qdrant vector store
390
+ self._add_to_qdrant(item_id, prepared_vector, item, filters)
391
+
392
+ return item_id
393
+
394
+ def get(
395
+ self,
396
+ id: str,
397
+ *,
398
+ filters: Optional[DatabaseItemFilters] = None,
399
+ ) -> Optional[DatabaseItem[DatabaseItemType]]:
400
+ """
401
+ Get an item by ID.
402
+
403
+ Args:
404
+ id: The item ID.
405
+ filters: Optional filters to match.
406
+
407
+ Returns:
408
+ The database item or None if not found.
409
+ """
410
+ return self._database.get(id, filters=filters)
411
+
412
+ def _vector_search(
413
+ self,
414
+ query_vector: Union[List[float], Any],
415
+ *,
416
+ filters: Optional[DatabaseItemFilters] = None,
417
+ limit: int = 10,
418
+ score_threshold: Optional[float] = None,
419
+ query_text: Optional[str] = None,
420
+ enable_rerank: bool = True,
421
+ return_scores: bool = False,
422
+ ) -> Union[List[DatabaseItem[DatabaseItemType]], List[VectorSearchResult]]:
423
+ """
424
+ Internal method to perform vector similarity search.
425
+
426
+ Args:
427
+ query_vector: Query vector for similarity search.
428
+ filters: Optional filters to apply.
429
+ limit: Maximum number of results.
430
+ score_threshold: Minimum similarity score threshold.
431
+ query_text: Optional original query text for reranking.
432
+ enable_rerank: Whether to enable reranking if rerank model is configured.
433
+ return_scores: Whether to return scores with results.
434
+
435
+ Returns:
436
+ List of matching database items sorted by similarity score (and reranked if enabled),
437
+ or list of VectorSearchResult objects if return_scores is True.
438
+ """
439
+ if not self._client:
440
+ # Qdrant not available, return empty results
441
+ return []
442
+
443
+ # Prepare query vector
444
+ prepared_vector = utils.prepare_vector(query_vector, self.vector_size)
445
+
446
+ try:
447
+ # Build Qdrant filter
448
+ qdrant_filter = utils.build_qdrant_filter(filters)
449
+
450
+ # Perform search
451
+ results = self._client.query_points(
452
+ collection_name=self.name,
453
+ query=prepared_vector,
454
+ query_filter=qdrant_filter,
455
+ limit=limit,
456
+ score_threshold=score_threshold,
457
+ with_payload=True,
458
+ with_vectors=False,
459
+ )
460
+
461
+ # Get item IDs from results and fetch from database with scores
462
+ db_items_with_scores = []
463
+ for result in results.points:
464
+ item_id = str(result.id)
465
+ db_item = self._database.get(item_id, filters=filters)
466
+ if db_item:
467
+ # Set the score on the DatabaseItem itself
468
+ db_item.score = result.score
469
+ db_items_with_scores.append((db_item, result.score))
470
+
471
+ # Apply reranking if enabled and configured
472
+ if enable_rerank and self.rerank_model and query_text:
473
+ db_items_with_scores = self._rerank_results(
474
+ query=query_text,
475
+ results=db_items_with_scores,
476
+ top_n=limit
477
+ )
478
+
479
+ # Return results with or without scores based on return_scores parameter
480
+ if return_scores:
481
+ return [VectorSearchResult(item=item, score=score) for item, score in db_items_with_scores]
482
+ else:
483
+ # Extract just the database items (without scores) for backward compatibility
484
+ db_items = [item for item, score in db_items_with_scores]
485
+ return db_items
486
+
487
+ except Exception:
488
+ # Vector search failed, return empty results
489
+ return []
490
+
491
+ def query(
492
+ self,
493
+ query: Optional[str] = None,
494
+ *,
495
+ filters: Optional[DatabaseItemFilters] = None,
496
+ limit: Optional[int] = None,
497
+ vector: bool = False,
498
+ rerank: bool = False,
499
+ query_vector: Optional[List[float]] = None,
500
+ return_scores: bool = False,
501
+ ) -> Union[List[DatabaseItem[DatabaseItemType]], List[VectorSearchResult]]:
502
+ """
503
+ Query items from the collection.
504
+
505
+ Args:
506
+ query: Search query string.
507
+ filters: Optional filters to apply.
508
+ limit: Maximum number of results.
509
+ vector: Whether to use vector search (requires embedding_model to be configured).
510
+ rerank: Whether to use reranking (requires rerank_model to be configured).
511
+ query_vector: Optional pre-computed query vector for similarity search.
512
+ return_scores: Whether to return similarity scores with results (only applies to vector search).
513
+
514
+ Returns:
515
+ List of matching database items, or list of VectorSearchResult objects if return_scores is True.
516
+ """
517
+ effective_limit = limit or self.query_settings.limit
518
+
519
+ # If explicit vector is provided, use it directly
520
+ if query_vector is not None:
521
+ return self._vector_search(
522
+ query_vector=query_vector,
523
+ filters=filters,
524
+ limit=effective_limit,
525
+ score_threshold=self.query_settings.score_threshold,
526
+ query_text=query,
527
+ enable_rerank=rerank,
528
+ return_scores=return_scores,
529
+ )
530
+
531
+ # If vector=True, use vector search with embedding model
532
+ if vector:
533
+ if not query:
534
+ raise ValueError("Query string is required when vector=True")
535
+
536
+ embedding_function = self._get_embedding_function()
537
+ if not embedding_function:
538
+ raise ValueError("Embedding model not configured for vector search")
539
+
540
+ try:
541
+ query_vector = embedding_function(query)
542
+ return self._vector_search(
543
+ query_vector=query_vector,
544
+ filters=filters,
545
+ limit=effective_limit,
546
+ score_threshold=self.query_settings.score_threshold,
547
+ query_text=query,
548
+ enable_rerank=rerank,
549
+ return_scores=return_scores,
550
+ )
551
+ except Exception as e:
552
+ raise ValueError(f"Failed to generate embedding for query: {e}")
553
+
554
+ # If rerank=True but vector=False, perform both standard and vector search, then rerank
555
+ if rerank and query:
556
+ if not self.rerank_model:
557
+ raise ValueError("Rerank model not configured")
558
+
559
+ # Get results from both database and vector search (if possible)
560
+ db_results = self._database.query(
561
+ limit=effective_limit,
562
+ order_by="created_at",
563
+ ascending=False,
564
+ )
565
+
566
+ vector_results = []
567
+ embedding_function = self._get_embedding_function()
568
+ if embedding_function:
569
+ try:
570
+ query_vector = embedding_function(query)
571
+ vector_results = self._vector_search(
572
+ query_vector=query_vector,
573
+ filters=filters,
574
+ limit=effective_limit,
575
+ score_threshold=self.query_settings.score_threshold,
576
+ query_text=query,
577
+ enable_rerank=False, # We'll rerank combined results
578
+ return_scores=False, # We handle scores separately in rerank mode
579
+ )
580
+ except Exception:
581
+ pass
582
+
583
+ # Combine and deduplicate results
584
+ combined_results = []
585
+ seen_ids = set()
586
+
587
+ for result in db_results + vector_results:
588
+ if result.id not in seen_ids:
589
+ combined_results.append((result, 0.0)) # Score placeholder
590
+ seen_ids.add(result.id)
591
+
592
+ # Apply reranking to combined results
593
+ if combined_results:
594
+ reranked_results = self._rerank_results(
595
+ query=query,
596
+ results=combined_results,
597
+ top_n=effective_limit
598
+ )
599
+ # Scores are already set on the DatabaseItem objects by _rerank_results
600
+ return [item for item, _ in reranked_results]
601
+
602
+ return [item for item, _ in combined_results]
603
+
604
+ # Default: fall back to database query
605
+ return self._database.query(
606
+ limit=effective_limit,
607
+ order_by="created_at",
608
+ ascending=False,
609
+ )
610
+
611
+ def delete(self, id: str) -> bool:
612
+ """
613
+ Delete an item by ID.
614
+
615
+ Args:
616
+ id: The item ID.
617
+
618
+ Returns:
619
+ True if item was deleted, False if not found.
620
+ """
621
+ # Delete from database
622
+ deleted = self._database.delete(id)
623
+
624
+ if deleted and self._client:
625
+ # Delete from Qdrant
626
+ try:
627
+ self._client.delete(
628
+ collection_name=self.name,
629
+ points_selector=[id]
630
+ )
631
+ except Exception:
632
+ # Vector deletion failed, but item was removed from database
633
+ pass
634
+
635
+ return deleted
636
+
637
+ def count(
638
+ self,
639
+ filters: Optional[DatabaseItemFilters] = None,
640
+ ) -> int:
641
+ """
642
+ Count items matching the filters.
643
+
644
+ Args:
645
+ filters: Optional filters to apply.
646
+
647
+ Returns:
648
+ Number of matching items.
649
+ """
650
+ if not self._client:
651
+ # Use database count
652
+ from ....sql.types import QueryFilter, QueryCondition
653
+
654
+ query_filter = None
655
+ if filters:
656
+ conditions = [
657
+ QueryCondition(field="filters", operator="contains", value=json.dumps(filters))
658
+ ]
659
+ query_filter = QueryFilter(conditions=conditions)
660
+
661
+ return self._database.count(query_filter)
662
+
663
+ try:
664
+ # Use Qdrant count
665
+ qdrant_filter = utils.build_qdrant_filter(filters)
666
+ info = self._client.count(
667
+ collection_name=self.name,
668
+ count_filter=qdrant_filter,
669
+ exact=self.query_settings.exact,
670
+ )
671
+ return info.count
672
+ except Exception:
673
+ # Fall back to database count
674
+ return self._database.count()
675
+
676
+ def clear(self) -> int:
677
+ """
678
+ Clear all items from the index.
679
+
680
+ Returns:
681
+ Number of items deleted.
682
+ """
683
+ count = self._database.clear()
684
+
685
+ if self._client:
686
+ # Clear Qdrant collection by recreating it
687
+ try:
688
+ utils.create_collection_if_not_exists(
689
+ self._client,
690
+ self.name,
691
+ self.settings
692
+ )
693
+ except Exception:
694
+ pass
695
+
696
+ return count
697
+
698
+ def get_vector(self, id: str) -> Optional[List[float]]:
699
+ """
700
+ Get the vector for a specific item by ID.
701
+
702
+ Args:
703
+ id: The item ID.
704
+
705
+ Returns:
706
+ The vector or None if not found.
707
+ """
708
+ if not self._client:
709
+ return None
710
+
711
+ try:
712
+ points = self._client.retrieve(
713
+ collection_name=self.name,
714
+ ids=[id],
715
+ with_payload=False,
716
+ with_vectors=True,
717
+ )
718
+
719
+ if not points:
720
+ return None
721
+
722
+ vector = points[0].vector
723
+ if isinstance(vector, dict):
724
+ # Handle named vectors if used
725
+ return list(vector.values())[0] if vector else None
726
+ return vector
727
+
728
+ except Exception:
729
+ return None
730
+
731
+ def __repr__(self) -> str:
732
+ """String representation of the index."""
733
+ location = str(self.path) if self.path else "memory"
734
+ vector_available = "yes" if self._client else "no"
735
+ return f"<QdrantCollectionIndex name='{self.name}' location='{location}' vector_size={self.vector_size} qdrant_available={vector_available}>"