hammad-python 0.0.29__py3-none-any.whl → 0.0.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. ham/__init__.py +10 -0
  2. {hammad_python-0.0.29.dist-info → hammad_python-0.0.31.dist-info}/METADATA +6 -32
  3. hammad_python-0.0.31.dist-info/RECORD +6 -0
  4. hammad/__init__.py +0 -84
  5. hammad/_internal.py +0 -256
  6. hammad/_main.py +0 -226
  7. hammad/cache/__init__.py +0 -40
  8. hammad/cache/base_cache.py +0 -181
  9. hammad/cache/cache.py +0 -169
  10. hammad/cache/decorators.py +0 -261
  11. hammad/cache/file_cache.py +0 -80
  12. hammad/cache/ttl_cache.py +0 -74
  13. hammad/cli/__init__.py +0 -33
  14. hammad/cli/animations.py +0 -573
  15. hammad/cli/plugins.py +0 -867
  16. hammad/cli/styles/__init__.py +0 -55
  17. hammad/cli/styles/settings.py +0 -139
  18. hammad/cli/styles/types.py +0 -358
  19. hammad/cli/styles/utils.py +0 -634
  20. hammad/data/__init__.py +0 -90
  21. hammad/data/collections/__init__.py +0 -49
  22. hammad/data/collections/collection.py +0 -326
  23. hammad/data/collections/indexes/__init__.py +0 -37
  24. hammad/data/collections/indexes/qdrant/__init__.py +0 -1
  25. hammad/data/collections/indexes/qdrant/index.py +0 -723
  26. hammad/data/collections/indexes/qdrant/settings.py +0 -94
  27. hammad/data/collections/indexes/qdrant/utils.py +0 -210
  28. hammad/data/collections/indexes/tantivy/__init__.py +0 -1
  29. hammad/data/collections/indexes/tantivy/index.py +0 -426
  30. hammad/data/collections/indexes/tantivy/settings.py +0 -40
  31. hammad/data/collections/indexes/tantivy/utils.py +0 -176
  32. hammad/data/configurations/__init__.py +0 -35
  33. hammad/data/configurations/configuration.py +0 -564
  34. hammad/data/models/__init__.py +0 -50
  35. hammad/data/models/extensions/__init__.py +0 -4
  36. hammad/data/models/extensions/pydantic/__init__.py +0 -42
  37. hammad/data/models/extensions/pydantic/converters.py +0 -759
  38. hammad/data/models/fields.py +0 -546
  39. hammad/data/models/model.py +0 -1078
  40. hammad/data/models/utils.py +0 -280
  41. hammad/data/sql/__init__.py +0 -24
  42. hammad/data/sql/database.py +0 -576
  43. hammad/data/sql/types.py +0 -127
  44. hammad/data/types/__init__.py +0 -75
  45. hammad/data/types/file.py +0 -431
  46. hammad/data/types/multimodal/__init__.py +0 -36
  47. hammad/data/types/multimodal/audio.py +0 -200
  48. hammad/data/types/multimodal/image.py +0 -182
  49. hammad/data/types/text.py +0 -1308
  50. hammad/formatting/__init__.py +0 -33
  51. hammad/formatting/json/__init__.py +0 -27
  52. hammad/formatting/json/converters.py +0 -158
  53. hammad/formatting/text/__init__.py +0 -63
  54. hammad/formatting/text/converters.py +0 -723
  55. hammad/formatting/text/markdown.py +0 -131
  56. hammad/formatting/yaml/__init__.py +0 -26
  57. hammad/formatting/yaml/converters.py +0 -5
  58. hammad/genai/__init__.py +0 -217
  59. hammad/genai/a2a/__init__.py +0 -32
  60. hammad/genai/a2a/workers.py +0 -552
  61. hammad/genai/agents/__init__.py +0 -59
  62. hammad/genai/agents/agent.py +0 -1973
  63. hammad/genai/agents/run.py +0 -1024
  64. hammad/genai/agents/types/__init__.py +0 -42
  65. hammad/genai/agents/types/agent_context.py +0 -13
  66. hammad/genai/agents/types/agent_event.py +0 -128
  67. hammad/genai/agents/types/agent_hooks.py +0 -220
  68. hammad/genai/agents/types/agent_messages.py +0 -31
  69. hammad/genai/agents/types/agent_response.py +0 -125
  70. hammad/genai/agents/types/agent_stream.py +0 -327
  71. hammad/genai/graphs/__init__.py +0 -125
  72. hammad/genai/graphs/_utils.py +0 -190
  73. hammad/genai/graphs/base.py +0 -1828
  74. hammad/genai/graphs/plugins.py +0 -316
  75. hammad/genai/graphs/types.py +0 -638
  76. hammad/genai/models/__init__.py +0 -1
  77. hammad/genai/models/embeddings/__init__.py +0 -43
  78. hammad/genai/models/embeddings/model.py +0 -226
  79. hammad/genai/models/embeddings/run.py +0 -163
  80. hammad/genai/models/embeddings/types/__init__.py +0 -37
  81. hammad/genai/models/embeddings/types/embedding_model_name.py +0 -75
  82. hammad/genai/models/embeddings/types/embedding_model_response.py +0 -76
  83. hammad/genai/models/embeddings/types/embedding_model_run_params.py +0 -66
  84. hammad/genai/models/embeddings/types/embedding_model_settings.py +0 -47
  85. hammad/genai/models/language/__init__.py +0 -57
  86. hammad/genai/models/language/model.py +0 -1098
  87. hammad/genai/models/language/run.py +0 -878
  88. hammad/genai/models/language/types/__init__.py +0 -40
  89. hammad/genai/models/language/types/language_model_instructor_mode.py +0 -47
  90. hammad/genai/models/language/types/language_model_messages.py +0 -28
  91. hammad/genai/models/language/types/language_model_name.py +0 -239
  92. hammad/genai/models/language/types/language_model_request.py +0 -127
  93. hammad/genai/models/language/types/language_model_response.py +0 -217
  94. hammad/genai/models/language/types/language_model_response_chunk.py +0 -56
  95. hammad/genai/models/language/types/language_model_settings.py +0 -89
  96. hammad/genai/models/language/types/language_model_stream.py +0 -600
  97. hammad/genai/models/language/utils/__init__.py +0 -28
  98. hammad/genai/models/language/utils/requests.py +0 -421
  99. hammad/genai/models/language/utils/structured_outputs.py +0 -135
  100. hammad/genai/models/model_provider.py +0 -4
  101. hammad/genai/models/multimodal.py +0 -47
  102. hammad/genai/models/reranking.py +0 -26
  103. hammad/genai/types/__init__.py +0 -1
  104. hammad/genai/types/base.py +0 -215
  105. hammad/genai/types/history.py +0 -290
  106. hammad/genai/types/tools.py +0 -507
  107. hammad/logging/__init__.py +0 -35
  108. hammad/logging/decorators.py +0 -834
  109. hammad/logging/logger.py +0 -1018
  110. hammad/mcp/__init__.py +0 -53
  111. hammad/mcp/client/__init__.py +0 -35
  112. hammad/mcp/client/client.py +0 -624
  113. hammad/mcp/client/client_service.py +0 -400
  114. hammad/mcp/client/settings.py +0 -178
  115. hammad/mcp/servers/__init__.py +0 -26
  116. hammad/mcp/servers/launcher.py +0 -1161
  117. hammad/runtime/__init__.py +0 -32
  118. hammad/runtime/decorators.py +0 -142
  119. hammad/runtime/run.py +0 -299
  120. hammad/service/__init__.py +0 -49
  121. hammad/service/create.py +0 -527
  122. hammad/service/decorators.py +0 -283
  123. hammad/types.py +0 -288
  124. hammad/typing/__init__.py +0 -435
  125. hammad/web/__init__.py +0 -43
  126. hammad/web/http/__init__.py +0 -1
  127. hammad/web/http/client.py +0 -944
  128. hammad/web/models.py +0 -275
  129. hammad/web/openapi/__init__.py +0 -1
  130. hammad/web/openapi/client.py +0 -740
  131. hammad/web/search/__init__.py +0 -1
  132. hammad/web/search/client.py +0 -1023
  133. hammad/web/utils.py +0 -472
  134. hammad_python-0.0.29.dist-info/RECORD +0 -135
  135. {hammad → ham}/py.typed +0 -0
  136. {hammad_python-0.0.29.dist-info → hammad_python-0.0.31.dist-info}/WHEEL +0 -0
  137. {hammad_python-0.0.29.dist-info → hammad_python-0.0.31.dist-info}/licenses/LICENSE +0 -0
@@ -1,723 +0,0 @@
1
- """hammad.data.collections.indexes.qdrant.index"""
2
-
3
- from datetime import datetime, timezone, timedelta
4
- from typing import (
5
- Any,
6
- Callable,
7
- Dict,
8
- List,
9
- Optional,
10
- Type,
11
- Union,
12
- final,
13
- TYPE_CHECKING,
14
- Tuple,
15
- NamedTuple,
16
- )
17
-
18
- if TYPE_CHECKING:
19
- from .....genai.models.embeddings.types import EmbeddingModelName
20
- # import uuid # Unused import
21
- from pathlib import Path
22
- import json
23
-
24
- from ....sql.types import (
25
- DatabaseItemType,
26
- DatabaseItemFilters,
27
- DatabaseItem,
28
- )
29
- from ....sql.database import Database
30
- from . import utils
31
- from .settings import (
32
- QdrantCollectionIndexSettings,
33
- QdrantCollectionIndexQuerySettings,
34
- DistanceMetric,
35
- )
36
-
37
-
38
- class VectorSearchResult(NamedTuple):
39
- """Result from vector search containing item and similarity score."""
40
-
41
- item: "DatabaseItem[DatabaseItemType]"
42
- score: float
43
-
44
-
45
- __all__ = (
46
- "QdrantCollectionIndex",
47
- "VectorSearchResult",
48
- )
49
-
50
-
51
- @final
52
- class QdrantCollectionIndex:
53
- """A vector collection index that uses Qdrant for vector storage
54
- and similarity search, with SQL Database as the primary storage backend.
55
-
56
- This collection index provides vector-based functionality for storing
57
- embeddings and performing semantic similarity searches while using
58
- the Database class for reliable data persistence.
59
- """
60
-
61
- def __init__(
62
- self,
63
- *,
64
- name: str = "default",
65
- vector_size: Optional[int] = None,
66
- schema: Optional[Type[DatabaseItemType]] = None,
67
- ttl: Optional[int] = None,
68
- path: Optional[Path | str] = None,
69
- distance_metric: DistanceMetric = "dot",
70
- settings: Optional[QdrantCollectionIndexSettings] = None,
71
- query_settings: Optional[QdrantCollectionIndexQuerySettings] = None,
72
- embedding_model: Optional["EmbeddingModelName"] = None,
73
- embedding_dimensions: Optional[int] = None,
74
- embedding_api_key: Optional[str] = None,
75
- embedding_base_url: Optional[str] = None,
76
- rerank_model: Optional[str] = None,
77
- rerank_api_key: Optional[str] = None,
78
- rerank_base_url: Optional[str] = None,
79
- ) -> None:
80
- """
81
- Initialize a new QdrantCollectionIndex.
82
-
83
- Args:
84
- name: The name of the index.
85
- vector_size: Size/dimension of the vectors to store.
86
- schema: Optional schema type for validation.
87
- ttl: The time to live for items in this index.
88
- path: The path where the index will be stored.
89
- distance_metric: Distance metric for similarity search.
90
- settings: Settings for Qdrant configuration.
91
- query_settings: Settings for query behavior.
92
- embedding_model: The embedding model to use (e.g., 'openai/text-embedding-3-small').
93
- embedding_dimensions: Number of dimensions for embeddings.
94
- embedding_api_key: API key for the embedding service.
95
- embedding_base_url: Base URL for the embedding service.
96
- rerank_model: The rerank model to use (e.g., 'cohere/rerank-english-v3.0').
97
- rerank_api_key: API key for the rerank service.
98
- rerank_base_url: Base URL for the rerank service.
99
- """
100
- self.name = name
101
- self.vector_size = vector_size
102
- self._vector_size_determined = vector_size is not None
103
- self.schema = schema
104
- self.ttl = ttl
105
- self.embedding_model = embedding_model
106
- self.embedding_dimensions = embedding_dimensions
107
- self.embedding_api_key = embedding_api_key
108
- self.embedding_base_url = embedding_base_url
109
- self._embedding_function = None
110
-
111
- # Rerank model configuration
112
- self.rerank_model = rerank_model
113
- self.rerank_api_key = rerank_api_key
114
- self.rerank_base_url = rerank_base_url
115
-
116
- if path is not None and not isinstance(path, Path):
117
- path = Path(path)
118
-
119
- self.path = path
120
-
121
- # Create settings with vector_size and distance_metric
122
- if not settings:
123
- qdrant_path = None
124
- if self.path is not None:
125
- qdrant_path = str(self.path / f"{name}_qdrant")
126
-
127
- settings = QdrantCollectionIndexSettings(
128
- vector_size=vector_size or 768, # Default fallback
129
- distance_metric=distance_metric,
130
- path=qdrant_path,
131
- )
132
-
133
- if not query_settings:
134
- query_settings = QdrantCollectionIndexQuerySettings()
135
-
136
- self.settings = settings
137
- self.query_settings = query_settings
138
-
139
- # Initialize SQL Database as primary storage backend
140
- database_path = None
141
- if self.path is not None:
142
- database_path = self.path / f"{name}.db"
143
-
144
- self._database = Database[DatabaseItemType](
145
- name=name,
146
- schema=schema,
147
- ttl=ttl,
148
- path=database_path,
149
- table_name=f"qdrant_{name}",
150
- )
151
-
152
- # Initialize Qdrant client (lazily to handle import errors gracefully)
153
- self._client = None
154
- self._client_wrapper = None
155
- # Only initialize if vector_size is determined
156
- if self._vector_size_determined:
157
- self._init_qdrant_client()
158
-
159
- def _init_qdrant_client(self) -> None:
160
- """Initialize Qdrant client and collection."""
161
- try:
162
- self._client = utils.create_qdrant_client(self.settings)
163
- self._client_wrapper = utils.QdrantClientWrapper(
164
- client=self._client, collection_name=self.name
165
- )
166
-
167
- # Create collection if it doesn't exist
168
- utils.create_collection_if_not_exists(
169
- self._client, self.name, self.settings
170
- )
171
-
172
- except utils.QdrantCollectionIndexError:
173
- # Qdrant not available - only SQL storage will work
174
- self._client = None
175
- self._client_wrapper = None
176
-
177
- def _get_embedding_function(self) -> Optional[Callable[[Any], List[float]]]:
178
- """Get or create embedding function from model configuration."""
179
- if self._embedding_function is None and self.embedding_model:
180
- from .....genai.models.embeddings.model import EmbeddingModel
181
-
182
- model = EmbeddingModel(model=self.embedding_model)
183
-
184
- def embedding_function(item: Any) -> List[float]:
185
- response = model.run(
186
- input=item,
187
- dimensions=self.embedding_dimensions,
188
- api_key=self.embedding_api_key,
189
- api_base=self.embedding_base_url,
190
- format=True,
191
- )
192
- if response.data and len(response.data) > 0:
193
- return response.data[0].embedding
194
- else:
195
- raise utils.QdrantCollectionIndexError(
196
- "Failed to generate embedding: empty response"
197
- )
198
-
199
- self._embedding_function = embedding_function
200
-
201
- return self._embedding_function
202
-
203
- def _rerank_results(
204
- self,
205
- query: str,
206
- results: List[Tuple[DatabaseItem[DatabaseItemType], float]],
207
- top_n: Optional[int] = None,
208
- ) -> List[Tuple[DatabaseItem[DatabaseItemType], float]]:
209
- """
210
- Rerank search results using the configured rerank model.
211
-
212
- Args:
213
- query: The original search query
214
- results: List of (DatabaseItem, similarity_score) tuples
215
- top_n: Number of top results to return after reranking
216
-
217
- Returns:
218
- Reranked list of (DatabaseItem, rerank_score) tuples
219
- """
220
- if not self.rerank_model or not results:
221
- return results
222
-
223
- try:
224
- from .....genai.models.reranking import run_reranking_model
225
-
226
- # Extract documents for reranking
227
- documents = []
228
- for db_item, _ in results:
229
- # Convert item to string for reranking
230
- if isinstance(db_item.item, dict):
231
- doc_text = json.dumps(db_item.item)
232
- else:
233
- doc_text = str(db_item.item)
234
- documents.append(doc_text)
235
-
236
- # Perform reranking
237
- rerank_response = run_reranking_model(
238
- model=self.rerank_model,
239
- query=query,
240
- documents=documents,
241
- top_n=top_n or len(results),
242
- api_key=self.rerank_api_key,
243
- api_base=self.rerank_base_url,
244
- )
245
-
246
- # Reorder results based on rerank scores
247
- reranked_results = []
248
- for rerank_result in rerank_response.results:
249
- original_index = rerank_result.index
250
- rerank_score = rerank_result.relevance_score
251
- db_item = results[original_index][0]
252
- # Update the score on the DatabaseItem itself
253
- db_item.score = rerank_score
254
- reranked_results.append((db_item, rerank_score))
255
-
256
- return reranked_results
257
-
258
- except Exception:
259
- # If reranking fails, return original results
260
- return results
261
-
262
- def _prepare_vector(self, item: Any) -> List[float]:
263
- """Prepare vector from item using embedding function or direct vector."""
264
- embedding_function = self._get_embedding_function()
265
- if embedding_function:
266
- vector = embedding_function(item)
267
- # Determine vector size from first embedding if not set
268
- if not self._vector_size_determined:
269
- self._determine_vector_size(len(vector))
270
- return vector
271
- elif isinstance(item, dict) and "vector" in item:
272
- vector = item["vector"]
273
- # Determine vector size from first vector if not set
274
- if not self._vector_size_determined:
275
- self._determine_vector_size(len(vector))
276
- return utils.prepare_vector(vector, self.vector_size)
277
- elif isinstance(item, (list, tuple)):
278
- # Determine vector size from first vector if not set
279
- if not self._vector_size_determined:
280
- self._determine_vector_size(len(item))
281
- return utils.prepare_vector(item, self.vector_size)
282
- else:
283
- raise utils.QdrantCollectionIndexError(
284
- "Item must contain 'vector' key, be a vector itself, "
285
- "or embedding_model must be provided"
286
- )
287
-
288
- def _determine_vector_size(self, size: int) -> None:
289
- """Determine and set vector size based on first embedding/vector."""
290
- if not self._vector_size_determined:
291
- self.vector_size = size
292
- self._vector_size_determined = True
293
-
294
- # Update settings with determined vector size
295
- if self.settings:
296
- self.settings.vector_size = size
297
-
298
- # Initialize Qdrant client now that we have vector size
299
- self._init_qdrant_client()
300
-
301
- def _add_to_qdrant(
302
- self,
303
- item_id: str,
304
- vector: List[float],
305
- item: DatabaseItemType,
306
- filters: Optional[DatabaseItemFilters] = None,
307
- ) -> None:
308
- """Add item to Qdrant vector store."""
309
- if not self._client:
310
- # Qdrant not available, skip vector indexing
311
- return
312
-
313
- try:
314
- try:
315
- from qdrant_client.models import PointStruct
316
- except ImportError:
317
- raise ImportError(
318
- "Using Qdrant requires the `qdrant-client` package. Please install with: pip install 'hammad-python[genai]'"
319
- )
320
-
321
- # Prepare payload with metadata
322
- payload = {
323
- "item_data": json.dumps(utils.serialize(item)),
324
- "created_at": datetime.now(timezone.utc).isoformat(),
325
- }
326
-
327
- # Add filters as top-level payload fields
328
- if filters:
329
- for key, value in filters.items():
330
- payload[key] = value
331
-
332
- # Create point and upsert to Qdrant
333
- point = PointStruct(id=item_id, vector=vector, payload=payload)
334
-
335
- self._client.upsert(collection_name=self.name, points=[point])
336
-
337
- except Exception:
338
- # Vector indexing failed, but data is still in SQL database
339
- pass
340
-
341
- def add(
342
- self,
343
- item: DatabaseItemType,
344
- *,
345
- id: Optional[str] = None,
346
- filters: Optional[DatabaseItemFilters] = None,
347
- ttl: Optional[int] = None,
348
- vector: Optional[List[float]] = None,
349
- ) -> str:
350
- """
351
- Add an item to the index.
352
-
353
- Args:
354
- item: The item to store.
355
- id: Optional ID (will generate UUID if not provided).
356
- filters: Optional filters/metadata.
357
- ttl: Optional TTL in seconds.
358
- vector: Optional pre-computed vector (if not provided, will use embedding_function).
359
-
360
- Returns:
361
- The ID of the stored item.
362
- """
363
- # Add to SQL database first
364
- item_id = self._database.add(
365
- item=item,
366
- id=id,
367
- filters=filters,
368
- ttl=ttl,
369
- )
370
-
371
- # Prepare vector for Qdrant storage
372
- if vector is None:
373
- try:
374
- prepared_vector = self._prepare_vector(item)
375
- except utils.QdrantCollectionIndexError:
376
- # Vector preparation failed, but item is still in database
377
- return item_id
378
- else:
379
- prepared_vector = utils.prepare_vector(vector, self.vector_size)
380
-
381
- # Add to Qdrant vector store
382
- self._add_to_qdrant(item_id, prepared_vector, item, filters)
383
-
384
- return item_id
385
-
386
- def get(
387
- self,
388
- id: str,
389
- *,
390
- filters: Optional[DatabaseItemFilters] = None,
391
- ) -> Optional[DatabaseItem[DatabaseItemType]]:
392
- """
393
- Get an item by ID.
394
-
395
- Args:
396
- id: The item ID.
397
- filters: Optional filters to match.
398
-
399
- Returns:
400
- The database item or None if not found.
401
- """
402
- return self._database.get(id, filters=filters)
403
-
404
- def _vector_search(
405
- self,
406
- query_vector: Union[List[float], Any],
407
- *,
408
- filters: Optional[DatabaseItemFilters] = None,
409
- limit: int = 10,
410
- score_threshold: Optional[float] = None,
411
- query_text: Optional[str] = None,
412
- enable_rerank: bool = True,
413
- return_scores: bool = False,
414
- ) -> Union[List[DatabaseItem[DatabaseItemType]], List[VectorSearchResult]]:
415
- """
416
- Internal method to perform vector similarity search.
417
-
418
- Args:
419
- query_vector: Query vector for similarity search.
420
- filters: Optional filters to apply.
421
- limit: Maximum number of results.
422
- score_threshold: Minimum similarity score threshold.
423
- query_text: Optional original query text for reranking.
424
- enable_rerank: Whether to enable reranking if rerank model is configured.
425
- return_scores: Whether to return scores with results.
426
-
427
- Returns:
428
- List of matching database items sorted by similarity score (and reranked if enabled),
429
- or list of VectorSearchResult objects if return_scores is True.
430
- """
431
- if not self._client:
432
- # Qdrant not available, return empty results
433
- return []
434
-
435
- # Prepare query vector
436
- prepared_vector = utils.prepare_vector(query_vector, self.vector_size)
437
-
438
- try:
439
- # Build Qdrant filter
440
- qdrant_filter = utils.build_qdrant_filter(filters)
441
-
442
- # Perform search
443
- results = self._client.query_points(
444
- collection_name=self.name,
445
- query=prepared_vector,
446
- query_filter=qdrant_filter,
447
- limit=limit,
448
- score_threshold=score_threshold,
449
- with_payload=True,
450
- with_vectors=False,
451
- )
452
-
453
- # Get item IDs from results and fetch from database with scores
454
- db_items_with_scores = []
455
- for result in results.points:
456
- item_id = str(result.id)
457
- db_item = self._database.get(item_id, filters=filters)
458
- if db_item:
459
- # Set the score on the DatabaseItem itself
460
- db_item.score = result.score
461
- db_items_with_scores.append((db_item, result.score))
462
-
463
- # Apply reranking if enabled and configured
464
- if enable_rerank and self.rerank_model and query_text:
465
- db_items_with_scores = self._rerank_results(
466
- query=query_text, results=db_items_with_scores, top_n=limit
467
- )
468
-
469
- # Return results with or without scores based on return_scores parameter
470
- if return_scores:
471
- return [
472
- VectorSearchResult(item=item, score=score)
473
- for item, score in db_items_with_scores
474
- ]
475
- else:
476
- # Extract just the database items (without scores) for backward compatibility
477
- db_items = [item for item, score in db_items_with_scores]
478
- return db_items
479
-
480
- except Exception:
481
- # Vector search failed, return empty results
482
- return []
483
-
484
- def query(
485
- self,
486
- query: Optional[str] = None,
487
- *,
488
- filters: Optional[DatabaseItemFilters] = None,
489
- limit: Optional[int] = None,
490
- vector: bool = False,
491
- rerank: bool = False,
492
- query_vector: Optional[List[float]] = None,
493
- return_scores: bool = False,
494
- ) -> Union[List[DatabaseItem[DatabaseItemType]], List[VectorSearchResult]]:
495
- """
496
- Query items from the collection.
497
-
498
- Args:
499
- query: Search query string.
500
- filters: Optional filters to apply.
501
- limit: Maximum number of results.
502
- vector: Whether to use vector search (requires embedding_model to be configured).
503
- rerank: Whether to use reranking (requires rerank_model to be configured).
504
- query_vector: Optional pre-computed query vector for similarity search.
505
- return_scores: Whether to return similarity scores with results (only applies to vector search).
506
-
507
- Returns:
508
- List of matching database items, or list of VectorSearchResult objects if return_scores is True.
509
- """
510
- effective_limit = limit or self.query_settings.limit
511
-
512
- # If explicit vector is provided, use it directly
513
- if query_vector is not None:
514
- return self._vector_search(
515
- query_vector=query_vector,
516
- filters=filters,
517
- limit=effective_limit,
518
- score_threshold=self.query_settings.score_threshold,
519
- query_text=query,
520
- enable_rerank=rerank,
521
- return_scores=return_scores,
522
- )
523
-
524
- # If vector=True, use vector search with embedding model
525
- if vector:
526
- if not query:
527
- raise ValueError("Query string is required when vector=True")
528
-
529
- embedding_function = self._get_embedding_function()
530
- if not embedding_function:
531
- raise ValueError("Embedding model not configured for vector search")
532
-
533
- try:
534
- query_vector = embedding_function(query)
535
- return self._vector_search(
536
- query_vector=query_vector,
537
- filters=filters,
538
- limit=effective_limit,
539
- score_threshold=self.query_settings.score_threshold,
540
- query_text=query,
541
- enable_rerank=rerank,
542
- return_scores=return_scores,
543
- )
544
- except Exception as e:
545
- raise ValueError(f"Failed to generate embedding for query: {e}")
546
-
547
- # If rerank=True but vector=False, perform both standard and vector search, then rerank
548
- if rerank and query:
549
- if not self.rerank_model:
550
- raise ValueError("Rerank model not configured")
551
-
552
- # Get results from both database and vector search (if possible)
553
- db_results = self._database.query(
554
- limit=effective_limit,
555
- order_by="created_at",
556
- ascending=False,
557
- )
558
-
559
- vector_results = []
560
- embedding_function = self._get_embedding_function()
561
- if embedding_function:
562
- try:
563
- query_vector = embedding_function(query)
564
- vector_results = self._vector_search(
565
- query_vector=query_vector,
566
- filters=filters,
567
- limit=effective_limit,
568
- score_threshold=self.query_settings.score_threshold,
569
- query_text=query,
570
- enable_rerank=False, # We'll rerank combined results
571
- return_scores=False, # We handle scores separately in rerank mode
572
- )
573
- except Exception:
574
- pass
575
-
576
- # Combine and deduplicate results
577
- combined_results = []
578
- seen_ids = set()
579
-
580
- for result in db_results + vector_results:
581
- if result.id not in seen_ids:
582
- combined_results.append((result, 0.0)) # Score placeholder
583
- seen_ids.add(result.id)
584
-
585
- # Apply reranking to combined results
586
- if combined_results:
587
- reranked_results = self._rerank_results(
588
- query=query, results=combined_results, top_n=effective_limit
589
- )
590
- # Scores are already set on the DatabaseItem objects by _rerank_results
591
- return [item for item, _ in reranked_results]
592
-
593
- return [item for item, _ in combined_results]
594
-
595
- # Default: fall back to database query
596
- return self._database.query(
597
- limit=effective_limit,
598
- order_by="created_at",
599
- ascending=False,
600
- )
601
-
602
- def delete(self, id: str) -> bool:
603
- """
604
- Delete an item by ID.
605
-
606
- Args:
607
- id: The item ID.
608
-
609
- Returns:
610
- True if item was deleted, False if not found.
611
- """
612
- # Delete from database
613
- deleted = self._database.delete(id)
614
-
615
- if deleted and self._client:
616
- # Delete from Qdrant
617
- try:
618
- self._client.delete(collection_name=self.name, points_selector=[id])
619
- except Exception:
620
- # Vector deletion failed, but item was removed from database
621
- pass
622
-
623
- return deleted
624
-
625
- def count(
626
- self,
627
- filters: Optional[DatabaseItemFilters] = None,
628
- ) -> int:
629
- """
630
- Count items matching the filters.
631
-
632
- Args:
633
- filters: Optional filters to apply.
634
-
635
- Returns:
636
- Number of matching items.
637
- """
638
- if not self._client:
639
- # Use database count
640
- from ....sql.types import QueryFilter, QueryCondition
641
-
642
- query_filter = None
643
- if filters:
644
- conditions = [
645
- QueryCondition(
646
- field="filters", operator="contains", value=json.dumps(filters)
647
- )
648
- ]
649
- query_filter = QueryFilter(conditions=conditions)
650
-
651
- return self._database.count(query_filter)
652
-
653
- try:
654
- # Use Qdrant count
655
- qdrant_filter = utils.build_qdrant_filter(filters)
656
- info = self._client.count(
657
- collection_name=self.name,
658
- count_filter=qdrant_filter,
659
- exact=self.query_settings.exact,
660
- )
661
- return info.count
662
- except Exception:
663
- # Fall back to database count
664
- return self._database.count()
665
-
666
- def clear(self) -> int:
667
- """
668
- Clear all items from the index.
669
-
670
- Returns:
671
- Number of items deleted.
672
- """
673
- count = self._database.clear()
674
-
675
- if self._client:
676
- # Clear Qdrant collection by recreating it
677
- try:
678
- utils.create_collection_if_not_exists(
679
- self._client, self.name, self.settings
680
- )
681
- except Exception:
682
- pass
683
-
684
- return count
685
-
686
- def get_vector(self, id: str) -> Optional[List[float]]:
687
- """
688
- Get the vector for a specific item by ID.
689
-
690
- Args:
691
- id: The item ID.
692
-
693
- Returns:
694
- The vector or None if not found.
695
- """
696
- if not self._client:
697
- return None
698
-
699
- try:
700
- points = self._client.retrieve(
701
- collection_name=self.name,
702
- ids=[id],
703
- with_payload=False,
704
- with_vectors=True,
705
- )
706
-
707
- if not points:
708
- return None
709
-
710
- vector = points[0].vector
711
- if isinstance(vector, dict):
712
- # Handle named vectors if used
713
- return list(vector.values())[0] if vector else None
714
- return vector
715
-
716
- except Exception:
717
- return None
718
-
719
- def __repr__(self) -> str:
720
- """String representation of the index."""
721
- location = str(self.path) if self.path else "memory"
722
- vector_available = "yes" if self._client else "no"
723
- return f"<QdrantCollectionIndex name='{self.name}' location='{location}' vector_size={self.vector_size} qdrant_available={vector_available}>"