hammad-python 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. hammad/__init__.py +169 -56
  2. hammad/_core/__init__.py +1 -0
  3. hammad/_core/_utils/__init__.py +4 -0
  4. hammad/_core/_utils/_import_utils.py +182 -0
  5. hammad/ai/__init__.py +59 -0
  6. hammad/ai/_utils.py +142 -0
  7. hammad/ai/completions/__init__.py +44 -0
  8. hammad/ai/completions/client.py +729 -0
  9. hammad/ai/completions/create.py +686 -0
  10. hammad/ai/completions/types.py +711 -0
  11. hammad/ai/completions/utils.py +374 -0
  12. hammad/ai/embeddings/__init__.py +35 -0
  13. hammad/ai/embeddings/client/__init__.py +1 -0
  14. hammad/ai/embeddings/client/base_embeddings_client.py +26 -0
  15. hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +200 -0
  16. hammad/ai/embeddings/client/litellm_embeddings_client.py +288 -0
  17. hammad/ai/embeddings/create.py +159 -0
  18. hammad/ai/embeddings/types.py +69 -0
  19. hammad/base/__init__.py +35 -0
  20. hammad/{based → base}/fields.py +23 -23
  21. hammad/{based → base}/model.py +124 -14
  22. hammad/base/utils.py +280 -0
  23. hammad/cache/__init__.py +30 -12
  24. hammad/cache/base_cache.py +181 -0
  25. hammad/cache/cache.py +169 -0
  26. hammad/cache/decorators.py +261 -0
  27. hammad/cache/file_cache.py +80 -0
  28. hammad/cache/ttl_cache.py +74 -0
  29. hammad/cli/__init__.py +10 -2
  30. hammad/cli/{styles/animations.py → animations.py} +79 -23
  31. hammad/cli/{plugins/__init__.py → plugins.py} +85 -90
  32. hammad/cli/styles/__init__.py +50 -0
  33. hammad/cli/styles/settings.py +4 -0
  34. hammad/configuration/__init__.py +35 -0
  35. hammad/{data/types/files → configuration}/configuration.py +96 -7
  36. hammad/data/__init__.py +14 -26
  37. hammad/data/collections/__init__.py +4 -2
  38. hammad/data/collections/collection.py +300 -75
  39. hammad/data/collections/vector_collection.py +118 -12
  40. hammad/data/databases/__init__.py +2 -2
  41. hammad/data/databases/database.py +383 -32
  42. hammad/json/__init__.py +2 -2
  43. hammad/logging/__init__.py +13 -5
  44. hammad/logging/decorators.py +404 -2
  45. hammad/logging/logger.py +442 -22
  46. hammad/multimodal/__init__.py +24 -0
  47. hammad/{data/types/files → multimodal}/audio.py +21 -6
  48. hammad/{data/types/files → multimodal}/image.py +5 -5
  49. hammad/multithreading/__init__.py +304 -0
  50. hammad/pydantic/__init__.py +2 -2
  51. hammad/pydantic/converters.py +1 -1
  52. hammad/pydantic/models/__init__.py +2 -2
  53. hammad/text/__init__.py +59 -14
  54. hammad/text/converters.py +723 -0
  55. hammad/text/{utils/markdown/formatting.py → markdown.py} +25 -23
  56. hammad/text/text.py +12 -14
  57. hammad/types/__init__.py +11 -0
  58. hammad/{data/types/files → types}/file.py +18 -18
  59. hammad/typing/__init__.py +138 -84
  60. hammad/web/__init__.py +3 -2
  61. hammad/web/models.py +245 -0
  62. hammad/web/search/client.py +75 -23
  63. hammad/web/utils.py +14 -5
  64. hammad/yaml/__init__.py +2 -2
  65. hammad/yaml/converters.py +1 -1
  66. {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/METADATA +4 -1
  67. hammad_python-0.0.13.dist-info/RECORD +85 -0
  68. hammad/based/__init__.py +0 -52
  69. hammad/based/utils.py +0 -455
  70. hammad/cache/_cache.py +0 -746
  71. hammad/data/types/__init__.py +0 -33
  72. hammad/data/types/files/__init__.py +0 -1
  73. hammad/data/types/files/document.py +0 -195
  74. hammad/text/utils/__init__.py +0 -1
  75. hammad/text/utils/converters.py +0 -229
  76. hammad/text/utils/markdown/__init__.py +0 -1
  77. hammad/text/utils/markdown/converters.py +0 -506
  78. hammad_python-0.0.11.dist-info/RECORD +0 -65
  79. {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/WHEEL +0 -0
  80. {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/licenses/LICENSE +0 -0
@@ -13,6 +13,7 @@ from typing import (
13
13
  from typing_extensions import TypedDict
14
14
 
15
15
  if TYPE_CHECKING:
16
+ from .base_collection import BaseCollection
16
17
  from .searchable_collection import SearchableCollection
17
18
  from .vector_collection import VectorCollection
18
19
 
@@ -58,6 +59,231 @@ class VectorCollectionSettings(TypedDict, total=False):
58
59
  timeout: Optional[float]
59
60
 
60
61
 
62
+ class Collection:
63
+ """
64
+ A unified collection factory that creates the appropriate collection type
65
+ based on the provided parameters.
66
+
67
+ This class acts as a factory and doesn't contain its own logic - it simply
68
+ returns instances of SearchableCollection or VectorCollection based on the
69
+ type parameter.
70
+ """
71
+
72
+ @overload
73
+ def __new__(
74
+ cls,
75
+ type: Literal["searchable"],
76
+ name: str,
77
+ *,
78
+ schema: Optional[Any] = None,
79
+ default_ttl: Optional[int] = None,
80
+ storage_backend: Optional[Any] = None,
81
+ heap_size: Optional[int] = None,
82
+ num_threads: Optional[int] = None,
83
+ index_path: Optional[str] = None,
84
+ schema_builder: Optional[Any] = None,
85
+ writer_memory: Optional[int] = None,
86
+ reload_policy: Optional[str] = None,
87
+ ) -> "SearchableCollection": ...
88
+
89
+ @overload
90
+ def __new__(
91
+ cls,
92
+ type: Literal["vector"],
93
+ name: str,
94
+ vector_size: int,
95
+ *,
96
+ schema: Optional[Any] = None,
97
+ default_ttl: Optional[int] = None,
98
+ storage_backend: Optional[Any] = None,
99
+ distance_metric: Optional[Any] = None,
100
+ embedding_function: Optional[Callable[[Any], List[float]]] = None,
101
+ model: Optional[str] = None,
102
+ # Common embedding parameters
103
+ format: bool = False,
104
+ # LiteLLM parameters
105
+ dimensions: Optional[int] = None,
106
+ encoding_format: Optional[str] = None,
107
+ timeout: Optional[int] = None,
108
+ api_base: Optional[str] = None,
109
+ api_version: Optional[str] = None,
110
+ api_key: Optional[str] = None,
111
+ api_type: Optional[str] = None,
112
+ caching: bool = False,
113
+ user: Optional[str] = None,
114
+ # FastEmbed parameters
115
+ parallel: Optional[int] = None,
116
+ batch_size: Optional[int] = None,
117
+ # Qdrant parameters
118
+ path: Optional[str] = None,
119
+ host: Optional[str] = None,
120
+ port: Optional[int] = None,
121
+ grpc_port: Optional[int] = None,
122
+ prefer_grpc: Optional[bool] = None,
123
+ qdrant_timeout: Optional[float] = None,
124
+ ) -> "VectorCollection": ...
125
+
126
+ def __new__(
127
+ cls,
128
+ type: Literal["searchable", "vector"],
129
+ name: str,
130
+ vector_size: Optional[int] = None,
131
+ *,
132
+ schema: Optional[Any] = None,
133
+ default_ttl: Optional[int] = None,
134
+ storage_backend: Optional[Any] = None,
135
+ distance_metric: Optional[Any] = None,
136
+ embedding_function: Optional[Callable[[Any], List[float]]] = None,
137
+ model: Optional[str] = None,
138
+ # Common embedding parameters
139
+ format: bool = False,
140
+ # LiteLLM parameters
141
+ dimensions: Optional[int] = None,
142
+ encoding_format: Optional[str] = None,
143
+ timeout: Optional[int] = None,
144
+ api_base: Optional[str] = None,
145
+ api_version: Optional[str] = None,
146
+ api_key: Optional[str] = None,
147
+ api_type: Optional[str] = None,
148
+ caching: bool = False,
149
+ user: Optional[str] = None,
150
+ # FastEmbed parameters
151
+ parallel: Optional[int] = None,
152
+ batch_size: Optional[int] = None,
153
+ # Tantivy parameters (searchable collections only)
154
+ heap_size: Optional[int] = None,
155
+ num_threads: Optional[int] = None,
156
+ index_path: Optional[str] = None,
157
+ schema_builder: Optional[Any] = None,
158
+ writer_memory: Optional[int] = None,
159
+ reload_policy: Optional[str] = None,
160
+ # Qdrant parameters (vector collections only)
161
+ path: Optional[str] = None,
162
+ host: Optional[str] = None,
163
+ port: Optional[int] = None,
164
+ grpc_port: Optional[int] = None,
165
+ prefer_grpc: Optional[bool] = None,
166
+ qdrant_timeout: Optional[float] = None,
167
+ ) -> "BaseCollection":
168
+ """
169
+ Create a collection of the specified type.
170
+
171
+ Args:
172
+ type: Type of collection to create ("searchable" or "vector")
173
+ name: Name of the collection
174
+ vector_size: Size of vectors (required for vector collections)
175
+ schema: Optional schema for type validation
176
+ default_ttl: Default TTL for items in seconds
177
+ storage_backend: Optional storage backend
178
+ distance_metric: Distance metric for similarity search (vector collections only)
179
+ embedding_function: Function to convert objects to vectors (vector collections only)
180
+
181
+ Tantivy parameters (searchable collections only):
182
+ heap_size: Memory allocation for tantivy heap
183
+ num_threads: Number of threads for tantivy operations
184
+ index_path: Path to store tantivy index files
185
+ schema_builder: Custom schema builder for tantivy
186
+ writer_memory: Memory allocation for tantivy writer
187
+ reload_policy: Policy for reloading tantivy index
188
+
189
+ Qdrant parameters (vector collections only):
190
+ path: Path for local Qdrant storage
191
+ host: Qdrant server host
192
+ port: Qdrant server port
193
+ grpc_port: Qdrant gRPC port
194
+ prefer_grpc: Whether to prefer gRPC over HTTP
195
+ api_key: API key for Qdrant authentication
196
+ timeout: Request timeout for Qdrant operations
197
+
198
+ Returns:
199
+ A SearchableCollection or VectorCollection instance
200
+ """
201
+ if type == "searchable":
202
+ from .searchable_collection import SearchableCollection
203
+
204
+ # Build tantivy config from individual parameters
205
+ tantivy_config = {}
206
+ if heap_size is not None:
207
+ tantivy_config["heap_size"] = heap_size
208
+ if num_threads is not None:
209
+ tantivy_config["num_threads"] = num_threads
210
+ if index_path is not None:
211
+ tantivy_config["index_path"] = index_path
212
+ if schema_builder is not None:
213
+ tantivy_config["schema_builder"] = schema_builder
214
+ if writer_memory is not None:
215
+ tantivy_config["writer_memory"] = writer_memory
216
+ if reload_policy is not None:
217
+ tantivy_config["reload_policy"] = reload_policy
218
+
219
+ return SearchableCollection(
220
+ name=name,
221
+ schema=schema,
222
+ default_ttl=default_ttl,
223
+ storage_backend=storage_backend,
224
+ tantivy_config=tantivy_config if tantivy_config else None,
225
+ )
226
+ elif type == "vector":
227
+ if vector_size is None:
228
+ raise ValueError("vector_size is required for vector collections")
229
+
230
+ try:
231
+ from .vector_collection import VectorCollection, Distance
232
+ except ImportError:
233
+ raise ImportError(
234
+ "qdrant-client is required for vector collections. "
235
+ "Please install it with 'pip install qdrant-client'."
236
+ )
237
+
238
+ # Set default distance metric if not provided and Distance is available
239
+ if distance_metric is None and Distance is not None:
240
+ distance_metric = Distance.DOT
241
+
242
+ # Build qdrant config from individual parameters
243
+ qdrant_config = {}
244
+ if path is not None:
245
+ qdrant_config["path"] = path
246
+ if host is not None:
247
+ qdrant_config["host"] = host
248
+ if port is not None:
249
+ qdrant_config["port"] = port
250
+ if grpc_port is not None:
251
+ qdrant_config["grpc_port"] = grpc_port
252
+ if prefer_grpc is not None:
253
+ qdrant_config["prefer_grpc"] = prefer_grpc
254
+ if qdrant_timeout is not None:
255
+ qdrant_config["timeout"] = qdrant_timeout
256
+
257
+ return VectorCollection(
258
+ name=name,
259
+ vector_size=vector_size,
260
+ schema=schema,
261
+ default_ttl=default_ttl,
262
+ storage_backend=storage_backend,
263
+ distance_metric=distance_metric,
264
+ qdrant_config=qdrant_config if qdrant_config else None,
265
+ embedding_function=embedding_function,
266
+ model=model,
267
+ # Common embedding parameters
268
+ format=format,
269
+ # LiteLLM parameters
270
+ dimensions=dimensions,
271
+ encoding_format=encoding_format,
272
+ timeout=timeout,
273
+ api_base=api_base,
274
+ api_version=api_version,
275
+ api_key=api_key,
276
+ api_type=api_type,
277
+ caching=caching,
278
+ user=user,
279
+ # FastEmbed parameters
280
+ parallel=parallel,
281
+ batch_size=batch_size,
282
+ )
283
+ else:
284
+ raise ValueError(f"Unsupported collection type: {type}")
285
+
286
+
61
287
  @overload
62
288
  def create_collection(
63
289
  type: Literal["searchable"],
@@ -86,13 +312,29 @@ def create_collection(
86
312
  storage_backend: Optional[Any] = None,
87
313
  distance_metric: Optional[Any] = None,
88
314
  embedding_function: Optional[Callable[[Any], List[float]]] = None,
315
+ model: Optional[str] = None,
316
+ # Common embedding parameters
317
+ format: bool = False,
318
+ # LiteLLM parameters
319
+ dimensions: Optional[int] = None,
320
+ encoding_format: Optional[str] = None,
321
+ timeout: Optional[int] = None,
322
+ api_base: Optional[str] = None,
323
+ api_version: Optional[str] = None,
324
+ api_key: Optional[str] = None,
325
+ api_type: Optional[str] = None,
326
+ caching: bool = False,
327
+ user: Optional[str] = None,
328
+ # FastEmbed parameters
329
+ parallel: Optional[int] = None,
330
+ batch_size: Optional[int] = None,
331
+ # Qdrant parameters
89
332
  path: Optional[str] = None,
90
333
  host: Optional[str] = None,
91
334
  port: Optional[int] = None,
92
335
  grpc_port: Optional[int] = None,
93
336
  prefer_grpc: Optional[bool] = None,
94
- api_key: Optional[str] = None,
95
- timeout: Optional[float] = None,
337
+ qdrant_timeout: Optional[float] = None,
96
338
  ) -> "VectorCollection": ...
97
339
 
98
340
 
@@ -106,6 +348,22 @@ def create_collection(
106
348
  storage_backend: Optional[Any] = None,
107
349
  distance_metric: Optional[Any] = None,
108
350
  embedding_function: Optional[Callable[[Any], List[float]]] = None,
351
+ model: Optional[str] = None,
352
+ # Common embedding parameters
353
+ format: bool = False,
354
+ # LiteLLM parameters
355
+ dimensions: Optional[int] = None,
356
+ encoding_format: Optional[str] = None,
357
+ timeout: Optional[int] = None,
358
+ api_base: Optional[str] = None,
359
+ api_version: Optional[str] = None,
360
+ api_key: Optional[str] = None,
361
+ api_type: Optional[str] = None,
362
+ caching: bool = False,
363
+ user: Optional[str] = None,
364
+ # FastEmbed parameters
365
+ parallel: Optional[int] = None,
366
+ batch_size: Optional[int] = None,
109
367
  # Tantivy parameters (searchable collections only)
110
368
  heap_size: Optional[int] = None,
111
369
  num_threads: Optional[int] = None,
@@ -119,12 +377,14 @@ def create_collection(
119
377
  port: Optional[int] = None,
120
378
  grpc_port: Optional[int] = None,
121
379
  prefer_grpc: Optional[bool] = None,
122
- api_key: Optional[str] = None,
123
- timeout: Optional[float] = None,
124
- ) -> Union["SearchableCollection", "VectorCollection"]:
380
+ qdrant_timeout: Optional[float] = None,
381
+ ) -> "BaseCollection":
125
382
  """
126
383
  Create a collection of the specified type.
127
384
 
385
+ This function provides a factory pattern for creating collections.
386
+ Use the Collection class for a more object-oriented approach.
387
+
128
388
  Args:
129
389
  type: Type of collection to create ("searchable" or "vector")
130
390
  name: Name of the collection
@@ -155,73 +415,38 @@ def create_collection(
155
415
  Returns:
156
416
  A SearchableCollection or VectorCollection instance
157
417
  """
158
- if type == "searchable":
159
- from .searchable_collection import SearchableCollection
160
-
161
- # Build tantivy config from individual parameters
162
- tantivy_config = {}
163
- if heap_size is not None:
164
- tantivy_config["heap_size"] = heap_size
165
- if num_threads is not None:
166
- tantivy_config["num_threads"] = num_threads
167
- if index_path is not None:
168
- tantivy_config["index_path"] = index_path
169
- if schema_builder is not None:
170
- tantivy_config["schema_builder"] = schema_builder
171
- if writer_memory is not None:
172
- tantivy_config["writer_memory"] = writer_memory
173
- if reload_policy is not None:
174
- tantivy_config["reload_policy"] = reload_policy
175
-
176
- return SearchableCollection(
177
- name=name,
178
- schema=schema,
179
- default_ttl=default_ttl,
180
- storage_backend=storage_backend,
181
- tantivy_config=tantivy_config if tantivy_config else None,
182
- )
183
- elif type == "vector":
184
- if vector_size is None:
185
- raise ValueError("vector_size is required for vector collections")
186
-
187
- try:
188
- from .vector_collection import VectorCollection, Distance
189
- except ImportError:
190
- raise ImportError(
191
- "qdrant-client is required for vector collections. "
192
- "Please install it with 'pip install qdrant-client'."
193
- )
194
-
195
- # Set default distance metric if not provided and Distance is available
196
- if distance_metric is None and Distance is not None:
197
- distance_metric = Distance.DOT
198
-
199
- # Build qdrant config from individual parameters
200
- qdrant_config = {}
201
- if path is not None:
202
- qdrant_config["path"] = path
203
- if host is not None:
204
- qdrant_config["host"] = host
205
- if port is not None:
206
- qdrant_config["port"] = port
207
- if grpc_port is not None:
208
- qdrant_config["grpc_port"] = grpc_port
209
- if prefer_grpc is not None:
210
- qdrant_config["prefer_grpc"] = prefer_grpc
211
- if api_key is not None:
212
- qdrant_config["api_key"] = api_key
213
- if timeout is not None:
214
- qdrant_config["timeout"] = timeout
215
-
216
- return VectorCollection(
217
- name=name,
218
- vector_size=vector_size,
219
- schema=schema,
220
- default_ttl=default_ttl,
221
- storage_backend=storage_backend,
222
- distance_metric=distance_metric,
223
- qdrant_config=qdrant_config if qdrant_config else None,
224
- embedding_function=embedding_function,
225
- )
226
- else:
227
- raise ValueError(f"Unsupported collection type: {type}")
418
+ return Collection(
419
+ type=type,
420
+ name=name,
421
+ vector_size=vector_size,
422
+ schema=schema,
423
+ default_ttl=default_ttl,
424
+ storage_backend=storage_backend,
425
+ distance_metric=distance_metric,
426
+ embedding_function=embedding_function,
427
+ model=model,
428
+ format=format,
429
+ dimensions=dimensions,
430
+ encoding_format=encoding_format,
431
+ timeout=timeout,
432
+ api_base=api_base,
433
+ api_version=api_version,
434
+ api_key=api_key,
435
+ api_type=api_type,
436
+ caching=caching,
437
+ user=user,
438
+ parallel=parallel,
439
+ batch_size=batch_size,
440
+ heap_size=heap_size,
441
+ num_threads=num_threads,
442
+ index_path=index_path,
443
+ schema_builder=schema_builder,
444
+ writer_memory=writer_memory,
445
+ reload_policy=reload_policy,
446
+ path=path,
447
+ host=host,
448
+ port=port,
449
+ grpc_port=grpc_port,
450
+ prefer_grpc=prefer_grpc,
451
+ qdrant_timeout=qdrant_timeout,
452
+ )
@@ -25,6 +25,16 @@ except ImportError as e:
25
25
  ) from e
26
26
 
27
27
  from .base_collection import BaseCollection, Object, Filters, Schema
28
+ from ...ai.embeddings.create import (
29
+ create_embeddings,
30
+ async_create_embeddings,
31
+ )
32
+ from ...ai.embeddings.client.fastembed_text_embeddings_client import (
33
+ FastEmbedTextEmbeddingModel,
34
+ )
35
+ from ...ai.embeddings.client.litellm_embeddings_client import (
36
+ LiteLlmEmbeddingModel,
37
+ )
28
38
 
29
39
  __all__ = ("VectorCollection",)
30
40
 
@@ -50,6 +60,22 @@ class VectorCollection(BaseCollection, Generic[Object]):
50
60
  distance_metric: Distance = Distance.DOT,
51
61
  qdrant_config: Optional[Dict[str, Any]] = None,
52
62
  embedding_function: Optional[Callable[[Any], List[float]]] = None,
63
+ model: Optional[str] = None,
64
+ # Common embedding parameters
65
+ format: bool = False,
66
+ # LiteLLM parameters
67
+ dimensions: Optional[int] = None,
68
+ encoding_format: Optional[str] = None,
69
+ timeout: Optional[int] = None,
70
+ api_base: Optional[str] = None,
71
+ api_version: Optional[str] = None,
72
+ api_key: Optional[str] = None,
73
+ api_type: Optional[str] = None,
74
+ caching: bool = False,
75
+ user: Optional[str] = None,
76
+ # FastEmbed parameters
77
+ parallel: Optional[int] = None,
78
+ batch_size: Optional[int] = None,
53
79
  ):
54
80
  """
55
81
  Initialize a vector collection.
@@ -71,6 +97,23 @@ class VectorCollection(BaseCollection, Generic[Object]):
71
97
  "api_key": "your-api-key"
72
98
  }
73
99
  embedding_function: Optional function to convert objects to vectors
100
+ model: Optional model name (e.g., 'fastembed/BAAI/bge-small-en-v1.5', 'openai/text-embedding-3-small')
101
+ format: Whether to format each non-string input as a markdown string
102
+
103
+ # LiteLLM-specific parameters:
104
+ dimensions: The dimensions of the embedding
105
+ encoding_format: The encoding format of the embedding (e.g. "float", "base64")
106
+ timeout: The timeout for the embedding request
107
+ api_base: The base URL for the embedding API
108
+ api_version: The version of the embedding API
109
+ api_key: The API key for the embedding API
110
+ api_type: The type of the embedding API
111
+ caching: Whether to cache the embedding
112
+ user: The user for the embedding
113
+
114
+ # FastEmbed-specific parameters:
115
+ parallel: The number of parallel processes to use for the embedding
116
+ batch_size: The batch size to use for the embedding
74
117
  """
75
118
  self.name = name
76
119
  self.vector_size = vector_size
@@ -79,6 +122,29 @@ class VectorCollection(BaseCollection, Generic[Object]):
79
122
  self.distance_metric = distance_metric
80
123
  self._storage_backend = storage_backend
81
124
  self._embedding_function = embedding_function
125
+ self._model = model
126
+
127
+ # Store embedding parameters
128
+ self._embedding_params = {
129
+ "format": format,
130
+ # LiteLLM parameters
131
+ "dimensions": dimensions,
132
+ "encoding_format": encoding_format,
133
+ "timeout": timeout,
134
+ "api_base": api_base,
135
+ "api_version": api_version,
136
+ "api_key": api_key,
137
+ "api_type": api_type,
138
+ "caching": caching,
139
+ "user": user,
140
+ # FastEmbed parameters
141
+ "parallel": parallel,
142
+ "batch_size": batch_size,
143
+ }
144
+
145
+ # If model is provided, create embedding function
146
+ if model:
147
+ self._embedding_function = self._create_embedding_function(model)
82
148
 
83
149
  # Store qdrant configuration
84
150
  self._qdrant_config = qdrant_config or {}
@@ -92,6 +158,28 @@ class VectorCollection(BaseCollection, Generic[Object]):
92
158
  # Initialize Qdrant client
93
159
  self._init_qdrant_client()
94
160
 
161
+ def _create_embedding_function(
162
+ self,
163
+ model_name: str,
164
+ ) -> Callable[[Any], List[float]]:
165
+ """Create an embedding function from a model name."""
166
+
167
+ def embedding_function(text: Any) -> List[float]:
168
+ if not isinstance(text, str):
169
+ text = str(text)
170
+
171
+ # Filter out None values from embedding parameters
172
+ embedding_kwargs = {
173
+ k: v for k, v in self._embedding_params.items() if v is not None
174
+ }
175
+ embedding_kwargs["model"] = model_name
176
+ embedding_kwargs["input"] = text
177
+
178
+ response = create_embeddings(**embedding_kwargs)
179
+ return response.data[0].embedding
180
+
181
+ return embedding_function
182
+
95
183
  def _init_qdrant_client(self):
96
184
  """Initialize the Qdrant client and collection."""
97
185
  config = self._qdrant_config
@@ -257,18 +345,28 @@ class VectorCollection(BaseCollection, Generic[Object]):
257
345
  def add(
258
346
  self,
259
347
  entry: Object,
260
- *,
261
348
  id: Optional[str] = None,
349
+ *,
262
350
  filters: Optional[Filters] = None,
263
351
  ttl: Optional[int] = None,
264
- ) -> None:
265
- """Add an item to the collection."""
352
+ ) -> str:
353
+ """Add an item to the collection.
354
+
355
+ Args:
356
+ entry: The object/data to store
357
+ id: Optional ID for the item (will generate UUID if not provided)
358
+ filters: Optional metadata filters
359
+ ttl: Time-to-live in seconds
360
+
361
+ Returns:
362
+ The ID of the added item
363
+ """
266
364
  if self._storage_backend is not None:
267
365
  # Delegate to storage backend
268
366
  self._storage_backend.add(
269
367
  entry, id=id, collection=self.name, filters=filters, ttl=ttl
270
368
  )
271
- return
369
+ return id or str(uuid.uuid4())
272
370
 
273
371
  # Independent operation
274
372
  item_id = id or str(uuid.uuid4())
@@ -313,24 +411,32 @@ class VectorCollection(BaseCollection, Generic[Object]):
313
411
 
314
412
  self._client.upsert(collection_name=self.name, points=[point])
315
413
 
414
+ return item_id
415
+
316
416
  def query(
317
417
  self,
418
+ query: Optional[str] = None,
318
419
  *,
319
420
  filters: Optional[Filters] = None,
320
- search: Optional[str] = None,
321
421
  limit: Optional[int] = None,
322
422
  ) -> List[Object]:
323
- """Query items from the collection."""
423
+ """Query items from the collection.
424
+
425
+ Args:
426
+ query: Search query string. If provided, performs semantic similarity search.
427
+ filters: Optional filters to apply
428
+ limit: Maximum number of results to return
429
+ """
324
430
  if self._storage_backend is not None:
325
431
  return self._storage_backend.query(
326
432
  collection=self.name,
327
433
  filters=filters,
328
- search=search,
434
+ search=query,
329
435
  limit=limit,
330
436
  )
331
437
 
332
438
  # For basic query without vector search, just return all items with filters
333
- if search is None:
439
+ if query is None:
334
440
  return self._query_all(filters=filters, limit=limit)
335
441
 
336
442
  # If search is provided but no embedding function, treat as error
@@ -341,7 +447,7 @@ class VectorCollection(BaseCollection, Generic[Object]):
341
447
  )
342
448
 
343
449
  # Convert search to vector and perform similarity search
344
- query_vector = self._embedding_function(search)
450
+ query_vector = self._embedding_function(query)
345
451
  return self.vector_search(
346
452
  query_vector=query_vector, filters=filters, limit=limit
347
453
  )
@@ -386,7 +492,7 @@ class VectorCollection(BaseCollection, Generic[Object]):
386
492
  query_vector: Union[List[float], np.ndarray],
387
493
  *,
388
494
  filters: Optional[Filters] = None,
389
- limit: Optional[int] = None,
495
+ limit: int = 10,
390
496
  score_threshold: Optional[float] = None,
391
497
  ) -> List[Object]:
392
498
  """
@@ -395,7 +501,7 @@ class VectorCollection(BaseCollection, Generic[Object]):
395
501
  Args:
396
502
  query_vector: Query vector for similarity search
397
503
  filters: Optional filters to apply
398
- limit: Maximum number of results to return
504
+ limit: Maximum number of results to return (default: 10)
399
505
  score_threshold: Minimum similarity score threshold
400
506
 
401
507
  Returns:
@@ -414,7 +520,7 @@ class VectorCollection(BaseCollection, Generic[Object]):
414
520
  collection_name=self.name,
415
521
  query=query_vector,
416
522
  query_filter=self._build_qdrant_filter(filters),
417
- limit=limit or 10,
523
+ limit=limit,
418
524
  score_threshold=score_threshold,
419
525
  with_payload=True,
420
526
  with_vectors=False,
@@ -1,7 +1,7 @@
1
1
  """hammad.data.databases"""
2
2
 
3
3
  from typing import TYPE_CHECKING
4
- from ...based.utils import auto_create_lazy_loader
4
+ from ..._core._utils._import_utils import _auto_create_getattr_loader
5
5
 
6
6
  if TYPE_CHECKING:
7
7
  from .database import Database, create_database
@@ -13,7 +13,7 @@ __all__ = (
13
13
  )
14
14
 
15
15
 
16
- __getattr__ = auto_create_lazy_loader(__all__)
16
+ __getattr__ = _auto_create_getattr_loader(__all__)
17
17
 
18
18
 
19
19
  def __dir__() -> list[str]: