hammad-python 0.0.11__py3-none-any.whl → 0.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +169 -56
- hammad/_core/__init__.py +1 -0
- hammad/_core/_utils/__init__.py +4 -0
- hammad/_core/_utils/_import_utils.py +182 -0
- hammad/ai/__init__.py +59 -0
- hammad/ai/_utils.py +142 -0
- hammad/ai/completions/__init__.py +44 -0
- hammad/ai/completions/client.py +729 -0
- hammad/ai/completions/create.py +686 -0
- hammad/ai/completions/types.py +711 -0
- hammad/ai/completions/utils.py +374 -0
- hammad/ai/embeddings/__init__.py +35 -0
- hammad/ai/embeddings/client/__init__.py +1 -0
- hammad/ai/embeddings/client/base_embeddings_client.py +26 -0
- hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +200 -0
- hammad/ai/embeddings/client/litellm_embeddings_client.py +288 -0
- hammad/ai/embeddings/create.py +159 -0
- hammad/ai/embeddings/types.py +69 -0
- hammad/base/__init__.py +35 -0
- hammad/{based → base}/fields.py +23 -23
- hammad/{based → base}/model.py +124 -14
- hammad/base/utils.py +280 -0
- hammad/cache/__init__.py +30 -12
- hammad/cache/base_cache.py +181 -0
- hammad/cache/cache.py +169 -0
- hammad/cache/decorators.py +261 -0
- hammad/cache/file_cache.py +80 -0
- hammad/cache/ttl_cache.py +74 -0
- hammad/cli/__init__.py +10 -2
- hammad/cli/{styles/animations.py → animations.py} +79 -23
- hammad/cli/{plugins/__init__.py → plugins.py} +85 -90
- hammad/cli/styles/__init__.py +50 -0
- hammad/cli/styles/settings.py +4 -0
- hammad/configuration/__init__.py +35 -0
- hammad/{data/types/files → configuration}/configuration.py +96 -7
- hammad/data/__init__.py +14 -26
- hammad/data/collections/__init__.py +4 -2
- hammad/data/collections/collection.py +300 -75
- hammad/data/collections/vector_collection.py +118 -12
- hammad/data/databases/__init__.py +2 -2
- hammad/data/databases/database.py +383 -32
- hammad/json/__init__.py +2 -2
- hammad/logging/__init__.py +13 -5
- hammad/logging/decorators.py +404 -2
- hammad/logging/logger.py +442 -22
- hammad/multimodal/__init__.py +24 -0
- hammad/{data/types/files → multimodal}/audio.py +21 -6
- hammad/{data/types/files → multimodal}/image.py +5 -5
- hammad/multithreading/__init__.py +304 -0
- hammad/pydantic/__init__.py +2 -2
- hammad/pydantic/converters.py +1 -1
- hammad/pydantic/models/__init__.py +2 -2
- hammad/text/__init__.py +59 -14
- hammad/text/converters.py +723 -0
- hammad/text/{utils/markdown/formatting.py → markdown.py} +25 -23
- hammad/text/text.py +12 -14
- hammad/types/__init__.py +11 -0
- hammad/{data/types/files → types}/file.py +18 -18
- hammad/typing/__init__.py +138 -84
- hammad/web/__init__.py +3 -2
- hammad/web/models.py +245 -0
- hammad/web/search/client.py +75 -23
- hammad/web/utils.py +14 -5
- hammad/yaml/__init__.py +2 -2
- hammad/yaml/converters.py +1 -1
- {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/METADATA +4 -1
- hammad_python-0.0.13.dist-info/RECORD +85 -0
- hammad/based/__init__.py +0 -52
- hammad/based/utils.py +0 -455
- hammad/cache/_cache.py +0 -746
- hammad/data/types/__init__.py +0 -33
- hammad/data/types/files/__init__.py +0 -1
- hammad/data/types/files/document.py +0 -195
- hammad/text/utils/__init__.py +0 -1
- hammad/text/utils/converters.py +0 -229
- hammad/text/utils/markdown/__init__.py +0 -1
- hammad/text/utils/markdown/converters.py +0 -506
- hammad_python-0.0.11.dist-info/RECORD +0 -65
- {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.11.dist-info → hammad_python-0.0.13.dist-info}/licenses/LICENSE +0 -0
@@ -13,6 +13,7 @@ from typing import (
|
|
13
13
|
from typing_extensions import TypedDict
|
14
14
|
|
15
15
|
if TYPE_CHECKING:
|
16
|
+
from .base_collection import BaseCollection
|
16
17
|
from .searchable_collection import SearchableCollection
|
17
18
|
from .vector_collection import VectorCollection
|
18
19
|
|
@@ -58,6 +59,231 @@ class VectorCollectionSettings(TypedDict, total=False):
|
|
58
59
|
timeout: Optional[float]
|
59
60
|
|
60
61
|
|
62
|
+
class Collection:
|
63
|
+
"""
|
64
|
+
A unified collection factory that creates the appropriate collection type
|
65
|
+
based on the provided parameters.
|
66
|
+
|
67
|
+
This class acts as a factory and doesn't contain its own logic - it simply
|
68
|
+
returns instances of SearchableCollection or VectorCollection based on the
|
69
|
+
type parameter.
|
70
|
+
"""
|
71
|
+
|
72
|
+
@overload
|
73
|
+
def __new__(
|
74
|
+
cls,
|
75
|
+
type: Literal["searchable"],
|
76
|
+
name: str,
|
77
|
+
*,
|
78
|
+
schema: Optional[Any] = None,
|
79
|
+
default_ttl: Optional[int] = None,
|
80
|
+
storage_backend: Optional[Any] = None,
|
81
|
+
heap_size: Optional[int] = None,
|
82
|
+
num_threads: Optional[int] = None,
|
83
|
+
index_path: Optional[str] = None,
|
84
|
+
schema_builder: Optional[Any] = None,
|
85
|
+
writer_memory: Optional[int] = None,
|
86
|
+
reload_policy: Optional[str] = None,
|
87
|
+
) -> "SearchableCollection": ...
|
88
|
+
|
89
|
+
@overload
|
90
|
+
def __new__(
|
91
|
+
cls,
|
92
|
+
type: Literal["vector"],
|
93
|
+
name: str,
|
94
|
+
vector_size: int,
|
95
|
+
*,
|
96
|
+
schema: Optional[Any] = None,
|
97
|
+
default_ttl: Optional[int] = None,
|
98
|
+
storage_backend: Optional[Any] = None,
|
99
|
+
distance_metric: Optional[Any] = None,
|
100
|
+
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
101
|
+
model: Optional[str] = None,
|
102
|
+
# Common embedding parameters
|
103
|
+
format: bool = False,
|
104
|
+
# LiteLLM parameters
|
105
|
+
dimensions: Optional[int] = None,
|
106
|
+
encoding_format: Optional[str] = None,
|
107
|
+
timeout: Optional[int] = None,
|
108
|
+
api_base: Optional[str] = None,
|
109
|
+
api_version: Optional[str] = None,
|
110
|
+
api_key: Optional[str] = None,
|
111
|
+
api_type: Optional[str] = None,
|
112
|
+
caching: bool = False,
|
113
|
+
user: Optional[str] = None,
|
114
|
+
# FastEmbed parameters
|
115
|
+
parallel: Optional[int] = None,
|
116
|
+
batch_size: Optional[int] = None,
|
117
|
+
# Qdrant parameters
|
118
|
+
path: Optional[str] = None,
|
119
|
+
host: Optional[str] = None,
|
120
|
+
port: Optional[int] = None,
|
121
|
+
grpc_port: Optional[int] = None,
|
122
|
+
prefer_grpc: Optional[bool] = None,
|
123
|
+
qdrant_timeout: Optional[float] = None,
|
124
|
+
) -> "VectorCollection": ...
|
125
|
+
|
126
|
+
def __new__(
|
127
|
+
cls,
|
128
|
+
type: Literal["searchable", "vector"],
|
129
|
+
name: str,
|
130
|
+
vector_size: Optional[int] = None,
|
131
|
+
*,
|
132
|
+
schema: Optional[Any] = None,
|
133
|
+
default_ttl: Optional[int] = None,
|
134
|
+
storage_backend: Optional[Any] = None,
|
135
|
+
distance_metric: Optional[Any] = None,
|
136
|
+
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
137
|
+
model: Optional[str] = None,
|
138
|
+
# Common embedding parameters
|
139
|
+
format: bool = False,
|
140
|
+
# LiteLLM parameters
|
141
|
+
dimensions: Optional[int] = None,
|
142
|
+
encoding_format: Optional[str] = None,
|
143
|
+
timeout: Optional[int] = None,
|
144
|
+
api_base: Optional[str] = None,
|
145
|
+
api_version: Optional[str] = None,
|
146
|
+
api_key: Optional[str] = None,
|
147
|
+
api_type: Optional[str] = None,
|
148
|
+
caching: bool = False,
|
149
|
+
user: Optional[str] = None,
|
150
|
+
# FastEmbed parameters
|
151
|
+
parallel: Optional[int] = None,
|
152
|
+
batch_size: Optional[int] = None,
|
153
|
+
# Tantivy parameters (searchable collections only)
|
154
|
+
heap_size: Optional[int] = None,
|
155
|
+
num_threads: Optional[int] = None,
|
156
|
+
index_path: Optional[str] = None,
|
157
|
+
schema_builder: Optional[Any] = None,
|
158
|
+
writer_memory: Optional[int] = None,
|
159
|
+
reload_policy: Optional[str] = None,
|
160
|
+
# Qdrant parameters (vector collections only)
|
161
|
+
path: Optional[str] = None,
|
162
|
+
host: Optional[str] = None,
|
163
|
+
port: Optional[int] = None,
|
164
|
+
grpc_port: Optional[int] = None,
|
165
|
+
prefer_grpc: Optional[bool] = None,
|
166
|
+
qdrant_timeout: Optional[float] = None,
|
167
|
+
) -> "BaseCollection":
|
168
|
+
"""
|
169
|
+
Create a collection of the specified type.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
type: Type of collection to create ("searchable" or "vector")
|
173
|
+
name: Name of the collection
|
174
|
+
vector_size: Size of vectors (required for vector collections)
|
175
|
+
schema: Optional schema for type validation
|
176
|
+
default_ttl: Default TTL for items in seconds
|
177
|
+
storage_backend: Optional storage backend
|
178
|
+
distance_metric: Distance metric for similarity search (vector collections only)
|
179
|
+
embedding_function: Function to convert objects to vectors (vector collections only)
|
180
|
+
|
181
|
+
Tantivy parameters (searchable collections only):
|
182
|
+
heap_size: Memory allocation for tantivy heap
|
183
|
+
num_threads: Number of threads for tantivy operations
|
184
|
+
index_path: Path to store tantivy index files
|
185
|
+
schema_builder: Custom schema builder for tantivy
|
186
|
+
writer_memory: Memory allocation for tantivy writer
|
187
|
+
reload_policy: Policy for reloading tantivy index
|
188
|
+
|
189
|
+
Qdrant parameters (vector collections only):
|
190
|
+
path: Path for local Qdrant storage
|
191
|
+
host: Qdrant server host
|
192
|
+
port: Qdrant server port
|
193
|
+
grpc_port: Qdrant gRPC port
|
194
|
+
prefer_grpc: Whether to prefer gRPC over HTTP
|
195
|
+
api_key: API key for Qdrant authentication
|
196
|
+
timeout: Request timeout for Qdrant operations
|
197
|
+
|
198
|
+
Returns:
|
199
|
+
A SearchableCollection or VectorCollection instance
|
200
|
+
"""
|
201
|
+
if type == "searchable":
|
202
|
+
from .searchable_collection import SearchableCollection
|
203
|
+
|
204
|
+
# Build tantivy config from individual parameters
|
205
|
+
tantivy_config = {}
|
206
|
+
if heap_size is not None:
|
207
|
+
tantivy_config["heap_size"] = heap_size
|
208
|
+
if num_threads is not None:
|
209
|
+
tantivy_config["num_threads"] = num_threads
|
210
|
+
if index_path is not None:
|
211
|
+
tantivy_config["index_path"] = index_path
|
212
|
+
if schema_builder is not None:
|
213
|
+
tantivy_config["schema_builder"] = schema_builder
|
214
|
+
if writer_memory is not None:
|
215
|
+
tantivy_config["writer_memory"] = writer_memory
|
216
|
+
if reload_policy is not None:
|
217
|
+
tantivy_config["reload_policy"] = reload_policy
|
218
|
+
|
219
|
+
return SearchableCollection(
|
220
|
+
name=name,
|
221
|
+
schema=schema,
|
222
|
+
default_ttl=default_ttl,
|
223
|
+
storage_backend=storage_backend,
|
224
|
+
tantivy_config=tantivy_config if tantivy_config else None,
|
225
|
+
)
|
226
|
+
elif type == "vector":
|
227
|
+
if vector_size is None:
|
228
|
+
raise ValueError("vector_size is required for vector collections")
|
229
|
+
|
230
|
+
try:
|
231
|
+
from .vector_collection import VectorCollection, Distance
|
232
|
+
except ImportError:
|
233
|
+
raise ImportError(
|
234
|
+
"qdrant-client is required for vector collections. "
|
235
|
+
"Please install it with 'pip install qdrant-client'."
|
236
|
+
)
|
237
|
+
|
238
|
+
# Set default distance metric if not provided and Distance is available
|
239
|
+
if distance_metric is None and Distance is not None:
|
240
|
+
distance_metric = Distance.DOT
|
241
|
+
|
242
|
+
# Build qdrant config from individual parameters
|
243
|
+
qdrant_config = {}
|
244
|
+
if path is not None:
|
245
|
+
qdrant_config["path"] = path
|
246
|
+
if host is not None:
|
247
|
+
qdrant_config["host"] = host
|
248
|
+
if port is not None:
|
249
|
+
qdrant_config["port"] = port
|
250
|
+
if grpc_port is not None:
|
251
|
+
qdrant_config["grpc_port"] = grpc_port
|
252
|
+
if prefer_grpc is not None:
|
253
|
+
qdrant_config["prefer_grpc"] = prefer_grpc
|
254
|
+
if qdrant_timeout is not None:
|
255
|
+
qdrant_config["timeout"] = qdrant_timeout
|
256
|
+
|
257
|
+
return VectorCollection(
|
258
|
+
name=name,
|
259
|
+
vector_size=vector_size,
|
260
|
+
schema=schema,
|
261
|
+
default_ttl=default_ttl,
|
262
|
+
storage_backend=storage_backend,
|
263
|
+
distance_metric=distance_metric,
|
264
|
+
qdrant_config=qdrant_config if qdrant_config else None,
|
265
|
+
embedding_function=embedding_function,
|
266
|
+
model=model,
|
267
|
+
# Common embedding parameters
|
268
|
+
format=format,
|
269
|
+
# LiteLLM parameters
|
270
|
+
dimensions=dimensions,
|
271
|
+
encoding_format=encoding_format,
|
272
|
+
timeout=timeout,
|
273
|
+
api_base=api_base,
|
274
|
+
api_version=api_version,
|
275
|
+
api_key=api_key,
|
276
|
+
api_type=api_type,
|
277
|
+
caching=caching,
|
278
|
+
user=user,
|
279
|
+
# FastEmbed parameters
|
280
|
+
parallel=parallel,
|
281
|
+
batch_size=batch_size,
|
282
|
+
)
|
283
|
+
else:
|
284
|
+
raise ValueError(f"Unsupported collection type: {type}")
|
285
|
+
|
286
|
+
|
61
287
|
@overload
|
62
288
|
def create_collection(
|
63
289
|
type: Literal["searchable"],
|
@@ -86,13 +312,29 @@ def create_collection(
|
|
86
312
|
storage_backend: Optional[Any] = None,
|
87
313
|
distance_metric: Optional[Any] = None,
|
88
314
|
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
315
|
+
model: Optional[str] = None,
|
316
|
+
# Common embedding parameters
|
317
|
+
format: bool = False,
|
318
|
+
# LiteLLM parameters
|
319
|
+
dimensions: Optional[int] = None,
|
320
|
+
encoding_format: Optional[str] = None,
|
321
|
+
timeout: Optional[int] = None,
|
322
|
+
api_base: Optional[str] = None,
|
323
|
+
api_version: Optional[str] = None,
|
324
|
+
api_key: Optional[str] = None,
|
325
|
+
api_type: Optional[str] = None,
|
326
|
+
caching: bool = False,
|
327
|
+
user: Optional[str] = None,
|
328
|
+
# FastEmbed parameters
|
329
|
+
parallel: Optional[int] = None,
|
330
|
+
batch_size: Optional[int] = None,
|
331
|
+
# Qdrant parameters
|
89
332
|
path: Optional[str] = None,
|
90
333
|
host: Optional[str] = None,
|
91
334
|
port: Optional[int] = None,
|
92
335
|
grpc_port: Optional[int] = None,
|
93
336
|
prefer_grpc: Optional[bool] = None,
|
94
|
-
|
95
|
-
timeout: Optional[float] = None,
|
337
|
+
qdrant_timeout: Optional[float] = None,
|
96
338
|
) -> "VectorCollection": ...
|
97
339
|
|
98
340
|
|
@@ -106,6 +348,22 @@ def create_collection(
|
|
106
348
|
storage_backend: Optional[Any] = None,
|
107
349
|
distance_metric: Optional[Any] = None,
|
108
350
|
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
351
|
+
model: Optional[str] = None,
|
352
|
+
# Common embedding parameters
|
353
|
+
format: bool = False,
|
354
|
+
# LiteLLM parameters
|
355
|
+
dimensions: Optional[int] = None,
|
356
|
+
encoding_format: Optional[str] = None,
|
357
|
+
timeout: Optional[int] = None,
|
358
|
+
api_base: Optional[str] = None,
|
359
|
+
api_version: Optional[str] = None,
|
360
|
+
api_key: Optional[str] = None,
|
361
|
+
api_type: Optional[str] = None,
|
362
|
+
caching: bool = False,
|
363
|
+
user: Optional[str] = None,
|
364
|
+
# FastEmbed parameters
|
365
|
+
parallel: Optional[int] = None,
|
366
|
+
batch_size: Optional[int] = None,
|
109
367
|
# Tantivy parameters (searchable collections only)
|
110
368
|
heap_size: Optional[int] = None,
|
111
369
|
num_threads: Optional[int] = None,
|
@@ -119,12 +377,14 @@ def create_collection(
|
|
119
377
|
port: Optional[int] = None,
|
120
378
|
grpc_port: Optional[int] = None,
|
121
379
|
prefer_grpc: Optional[bool] = None,
|
122
|
-
|
123
|
-
|
124
|
-
) -> Union["SearchableCollection", "VectorCollection"]:
|
380
|
+
qdrant_timeout: Optional[float] = None,
|
381
|
+
) -> "BaseCollection":
|
125
382
|
"""
|
126
383
|
Create a collection of the specified type.
|
127
384
|
|
385
|
+
This function provides a factory pattern for creating collections.
|
386
|
+
Use the Collection class for a more object-oriented approach.
|
387
|
+
|
128
388
|
Args:
|
129
389
|
type: Type of collection to create ("searchable" or "vector")
|
130
390
|
name: Name of the collection
|
@@ -155,73 +415,38 @@ def create_collection(
|
|
155
415
|
Returns:
|
156
416
|
A SearchableCollection or VectorCollection instance
|
157
417
|
"""
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
)
|
194
|
-
|
195
|
-
# Set default distance metric if not provided and Distance is available
|
196
|
-
if distance_metric is None and Distance is not None:
|
197
|
-
distance_metric = Distance.DOT
|
198
|
-
|
199
|
-
# Build qdrant config from individual parameters
|
200
|
-
qdrant_config = {}
|
201
|
-
if path is not None:
|
202
|
-
qdrant_config["path"] = path
|
203
|
-
if host is not None:
|
204
|
-
qdrant_config["host"] = host
|
205
|
-
if port is not None:
|
206
|
-
qdrant_config["port"] = port
|
207
|
-
if grpc_port is not None:
|
208
|
-
qdrant_config["grpc_port"] = grpc_port
|
209
|
-
if prefer_grpc is not None:
|
210
|
-
qdrant_config["prefer_grpc"] = prefer_grpc
|
211
|
-
if api_key is not None:
|
212
|
-
qdrant_config["api_key"] = api_key
|
213
|
-
if timeout is not None:
|
214
|
-
qdrant_config["timeout"] = timeout
|
215
|
-
|
216
|
-
return VectorCollection(
|
217
|
-
name=name,
|
218
|
-
vector_size=vector_size,
|
219
|
-
schema=schema,
|
220
|
-
default_ttl=default_ttl,
|
221
|
-
storage_backend=storage_backend,
|
222
|
-
distance_metric=distance_metric,
|
223
|
-
qdrant_config=qdrant_config if qdrant_config else None,
|
224
|
-
embedding_function=embedding_function,
|
225
|
-
)
|
226
|
-
else:
|
227
|
-
raise ValueError(f"Unsupported collection type: {type}")
|
418
|
+
return Collection(
|
419
|
+
type=type,
|
420
|
+
name=name,
|
421
|
+
vector_size=vector_size,
|
422
|
+
schema=schema,
|
423
|
+
default_ttl=default_ttl,
|
424
|
+
storage_backend=storage_backend,
|
425
|
+
distance_metric=distance_metric,
|
426
|
+
embedding_function=embedding_function,
|
427
|
+
model=model,
|
428
|
+
format=format,
|
429
|
+
dimensions=dimensions,
|
430
|
+
encoding_format=encoding_format,
|
431
|
+
timeout=timeout,
|
432
|
+
api_base=api_base,
|
433
|
+
api_version=api_version,
|
434
|
+
api_key=api_key,
|
435
|
+
api_type=api_type,
|
436
|
+
caching=caching,
|
437
|
+
user=user,
|
438
|
+
parallel=parallel,
|
439
|
+
batch_size=batch_size,
|
440
|
+
heap_size=heap_size,
|
441
|
+
num_threads=num_threads,
|
442
|
+
index_path=index_path,
|
443
|
+
schema_builder=schema_builder,
|
444
|
+
writer_memory=writer_memory,
|
445
|
+
reload_policy=reload_policy,
|
446
|
+
path=path,
|
447
|
+
host=host,
|
448
|
+
port=port,
|
449
|
+
grpc_port=grpc_port,
|
450
|
+
prefer_grpc=prefer_grpc,
|
451
|
+
qdrant_timeout=qdrant_timeout,
|
452
|
+
)
|
@@ -25,6 +25,16 @@ except ImportError as e:
|
|
25
25
|
) from e
|
26
26
|
|
27
27
|
from .base_collection import BaseCollection, Object, Filters, Schema
|
28
|
+
from ...ai.embeddings.create import (
|
29
|
+
create_embeddings,
|
30
|
+
async_create_embeddings,
|
31
|
+
)
|
32
|
+
from ...ai.embeddings.client.fastembed_text_embeddings_client import (
|
33
|
+
FastEmbedTextEmbeddingModel,
|
34
|
+
)
|
35
|
+
from ...ai.embeddings.client.litellm_embeddings_client import (
|
36
|
+
LiteLlmEmbeddingModel,
|
37
|
+
)
|
28
38
|
|
29
39
|
__all__ = ("VectorCollection",)
|
30
40
|
|
@@ -50,6 +60,22 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
50
60
|
distance_metric: Distance = Distance.DOT,
|
51
61
|
qdrant_config: Optional[Dict[str, Any]] = None,
|
52
62
|
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
63
|
+
model: Optional[str] = None,
|
64
|
+
# Common embedding parameters
|
65
|
+
format: bool = False,
|
66
|
+
# LiteLLM parameters
|
67
|
+
dimensions: Optional[int] = None,
|
68
|
+
encoding_format: Optional[str] = None,
|
69
|
+
timeout: Optional[int] = None,
|
70
|
+
api_base: Optional[str] = None,
|
71
|
+
api_version: Optional[str] = None,
|
72
|
+
api_key: Optional[str] = None,
|
73
|
+
api_type: Optional[str] = None,
|
74
|
+
caching: bool = False,
|
75
|
+
user: Optional[str] = None,
|
76
|
+
# FastEmbed parameters
|
77
|
+
parallel: Optional[int] = None,
|
78
|
+
batch_size: Optional[int] = None,
|
53
79
|
):
|
54
80
|
"""
|
55
81
|
Initialize a vector collection.
|
@@ -71,6 +97,23 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
71
97
|
"api_key": "your-api-key"
|
72
98
|
}
|
73
99
|
embedding_function: Optional function to convert objects to vectors
|
100
|
+
model: Optional model name (e.g., 'fastembed/BAAI/bge-small-en-v1.5', 'openai/text-embedding-3-small')
|
101
|
+
format: Whether to format each non-string input as a markdown string
|
102
|
+
|
103
|
+
# LiteLLM-specific parameters:
|
104
|
+
dimensions: The dimensions of the embedding
|
105
|
+
encoding_format: The encoding format of the embedding (e.g. "float", "base64")
|
106
|
+
timeout: The timeout for the embedding request
|
107
|
+
api_base: The base URL for the embedding API
|
108
|
+
api_version: The version of the embedding API
|
109
|
+
api_key: The API key for the embedding API
|
110
|
+
api_type: The type of the embedding API
|
111
|
+
caching: Whether to cache the embedding
|
112
|
+
user: The user for the embedding
|
113
|
+
|
114
|
+
# FastEmbed-specific parameters:
|
115
|
+
parallel: The number of parallel processes to use for the embedding
|
116
|
+
batch_size: The batch size to use for the embedding
|
74
117
|
"""
|
75
118
|
self.name = name
|
76
119
|
self.vector_size = vector_size
|
@@ -79,6 +122,29 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
79
122
|
self.distance_metric = distance_metric
|
80
123
|
self._storage_backend = storage_backend
|
81
124
|
self._embedding_function = embedding_function
|
125
|
+
self._model = model
|
126
|
+
|
127
|
+
# Store embedding parameters
|
128
|
+
self._embedding_params = {
|
129
|
+
"format": format,
|
130
|
+
# LiteLLM parameters
|
131
|
+
"dimensions": dimensions,
|
132
|
+
"encoding_format": encoding_format,
|
133
|
+
"timeout": timeout,
|
134
|
+
"api_base": api_base,
|
135
|
+
"api_version": api_version,
|
136
|
+
"api_key": api_key,
|
137
|
+
"api_type": api_type,
|
138
|
+
"caching": caching,
|
139
|
+
"user": user,
|
140
|
+
# FastEmbed parameters
|
141
|
+
"parallel": parallel,
|
142
|
+
"batch_size": batch_size,
|
143
|
+
}
|
144
|
+
|
145
|
+
# If model is provided, create embedding function
|
146
|
+
if model:
|
147
|
+
self._embedding_function = self._create_embedding_function(model)
|
82
148
|
|
83
149
|
# Store qdrant configuration
|
84
150
|
self._qdrant_config = qdrant_config or {}
|
@@ -92,6 +158,28 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
92
158
|
# Initialize Qdrant client
|
93
159
|
self._init_qdrant_client()
|
94
160
|
|
161
|
+
def _create_embedding_function(
|
162
|
+
self,
|
163
|
+
model_name: str,
|
164
|
+
) -> Callable[[Any], List[float]]:
|
165
|
+
"""Create an embedding function from a model name."""
|
166
|
+
|
167
|
+
def embedding_function(text: Any) -> List[float]:
|
168
|
+
if not isinstance(text, str):
|
169
|
+
text = str(text)
|
170
|
+
|
171
|
+
# Filter out None values from embedding parameters
|
172
|
+
embedding_kwargs = {
|
173
|
+
k: v for k, v in self._embedding_params.items() if v is not None
|
174
|
+
}
|
175
|
+
embedding_kwargs["model"] = model_name
|
176
|
+
embedding_kwargs["input"] = text
|
177
|
+
|
178
|
+
response = create_embeddings(**embedding_kwargs)
|
179
|
+
return response.data[0].embedding
|
180
|
+
|
181
|
+
return embedding_function
|
182
|
+
|
95
183
|
def _init_qdrant_client(self):
|
96
184
|
"""Initialize the Qdrant client and collection."""
|
97
185
|
config = self._qdrant_config
|
@@ -257,18 +345,28 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
257
345
|
def add(
|
258
346
|
self,
|
259
347
|
entry: Object,
|
260
|
-
*,
|
261
348
|
id: Optional[str] = None,
|
349
|
+
*,
|
262
350
|
filters: Optional[Filters] = None,
|
263
351
|
ttl: Optional[int] = None,
|
264
|
-
) ->
|
265
|
-
"""Add an item to the collection.
|
352
|
+
) -> str:
|
353
|
+
"""Add an item to the collection.
|
354
|
+
|
355
|
+
Args:
|
356
|
+
entry: The object/data to store
|
357
|
+
id: Optional ID for the item (will generate UUID if not provided)
|
358
|
+
filters: Optional metadata filters
|
359
|
+
ttl: Time-to-live in seconds
|
360
|
+
|
361
|
+
Returns:
|
362
|
+
The ID of the added item
|
363
|
+
"""
|
266
364
|
if self._storage_backend is not None:
|
267
365
|
# Delegate to storage backend
|
268
366
|
self._storage_backend.add(
|
269
367
|
entry, id=id, collection=self.name, filters=filters, ttl=ttl
|
270
368
|
)
|
271
|
-
return
|
369
|
+
return id or str(uuid.uuid4())
|
272
370
|
|
273
371
|
# Independent operation
|
274
372
|
item_id = id or str(uuid.uuid4())
|
@@ -313,24 +411,32 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
313
411
|
|
314
412
|
self._client.upsert(collection_name=self.name, points=[point])
|
315
413
|
|
414
|
+
return item_id
|
415
|
+
|
316
416
|
def query(
|
317
417
|
self,
|
418
|
+
query: Optional[str] = None,
|
318
419
|
*,
|
319
420
|
filters: Optional[Filters] = None,
|
320
|
-
search: Optional[str] = None,
|
321
421
|
limit: Optional[int] = None,
|
322
422
|
) -> List[Object]:
|
323
|
-
"""Query items from the collection.
|
423
|
+
"""Query items from the collection.
|
424
|
+
|
425
|
+
Args:
|
426
|
+
query: Search query string. If provided, performs semantic similarity search.
|
427
|
+
filters: Optional filters to apply
|
428
|
+
limit: Maximum number of results to return
|
429
|
+
"""
|
324
430
|
if self._storage_backend is not None:
|
325
431
|
return self._storage_backend.query(
|
326
432
|
collection=self.name,
|
327
433
|
filters=filters,
|
328
|
-
search=
|
434
|
+
search=query,
|
329
435
|
limit=limit,
|
330
436
|
)
|
331
437
|
|
332
438
|
# For basic query without vector search, just return all items with filters
|
333
|
-
if
|
439
|
+
if query is None:
|
334
440
|
return self._query_all(filters=filters, limit=limit)
|
335
441
|
|
336
442
|
# If search is provided but no embedding function, treat as error
|
@@ -341,7 +447,7 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
341
447
|
)
|
342
448
|
|
343
449
|
# Convert search to vector and perform similarity search
|
344
|
-
query_vector = self._embedding_function(
|
450
|
+
query_vector = self._embedding_function(query)
|
345
451
|
return self.vector_search(
|
346
452
|
query_vector=query_vector, filters=filters, limit=limit
|
347
453
|
)
|
@@ -386,7 +492,7 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
386
492
|
query_vector: Union[List[float], np.ndarray],
|
387
493
|
*,
|
388
494
|
filters: Optional[Filters] = None,
|
389
|
-
limit:
|
495
|
+
limit: int = 10,
|
390
496
|
score_threshold: Optional[float] = None,
|
391
497
|
) -> List[Object]:
|
392
498
|
"""
|
@@ -395,7 +501,7 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
395
501
|
Args:
|
396
502
|
query_vector: Query vector for similarity search
|
397
503
|
filters: Optional filters to apply
|
398
|
-
limit: Maximum number of results to return
|
504
|
+
limit: Maximum number of results to return (default: 10)
|
399
505
|
score_threshold: Minimum similarity score threshold
|
400
506
|
|
401
507
|
Returns:
|
@@ -414,7 +520,7 @@ class VectorCollection(BaseCollection, Generic[Object]):
|
|
414
520
|
collection_name=self.name,
|
415
521
|
query=query_vector,
|
416
522
|
query_filter=self._build_qdrant_filter(filters),
|
417
|
-
limit=limit
|
523
|
+
limit=limit,
|
418
524
|
score_threshold=score_threshold,
|
419
525
|
with_payload=True,
|
420
526
|
with_vectors=False,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
"""hammad.data.databases"""
|
2
2
|
|
3
3
|
from typing import TYPE_CHECKING
|
4
|
-
from ...
|
4
|
+
from ..._core._utils._import_utils import _auto_create_getattr_loader
|
5
5
|
|
6
6
|
if TYPE_CHECKING:
|
7
7
|
from .database import Database, create_database
|
@@ -13,7 +13,7 @@ __all__ = (
|
|
13
13
|
)
|
14
14
|
|
15
15
|
|
16
|
-
__getattr__ =
|
16
|
+
__getattr__ = _auto_create_getattr_loader(__all__)
|
17
17
|
|
18
18
|
|
19
19
|
def __dir__() -> list[str]:
|