hammad-python 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +177 -0
- hammad/{performance/imports.py → _internal.py} +7 -1
- hammad/cache/__init__.py +1 -1
- hammad/cli/__init__.py +3 -1
- hammad/cli/_runner.py +265 -0
- hammad/cli/animations.py +1 -1
- hammad/cli/plugins.py +133 -78
- hammad/cli/styles/__init__.py +1 -1
- hammad/cli/styles/utils.py +149 -3
- hammad/data/__init__.py +56 -29
- hammad/data/collections/__init__.py +27 -17
- hammad/data/collections/collection.py +205 -383
- hammad/data/collections/indexes/__init__.py +37 -0
- hammad/data/collections/indexes/qdrant/__init__.py +1 -0
- hammad/data/collections/indexes/qdrant/index.py +735 -0
- hammad/data/collections/indexes/qdrant/settings.py +94 -0
- hammad/data/collections/indexes/qdrant/utils.py +220 -0
- hammad/data/collections/indexes/tantivy/__init__.py +1 -0
- hammad/data/collections/indexes/tantivy/index.py +428 -0
- hammad/data/collections/indexes/tantivy/settings.py +51 -0
- hammad/data/collections/indexes/tantivy/utils.py +200 -0
- hammad/data/configurations/__init__.py +2 -2
- hammad/data/configurations/configuration.py +2 -2
- hammad/data/models/__init__.py +20 -9
- hammad/data/models/extensions/__init__.py +4 -0
- hammad/data/models/{pydantic → extensions/pydantic}/__init__.py +6 -19
- hammad/data/models/{pydantic → extensions/pydantic}/converters.py +143 -16
- hammad/data/models/{base/fields.py → fields.py} +1 -1
- hammad/data/models/{base/model.py → model.py} +1 -1
- hammad/data/models/{base/utils.py → utils.py} +1 -1
- hammad/data/sql/__init__.py +23 -0
- hammad/data/sql/database.py +578 -0
- hammad/data/sql/types.py +141 -0
- hammad/data/types/__init__.py +1 -3
- hammad/data/types/file.py +3 -3
- hammad/data/types/multimodal/__init__.py +2 -2
- hammad/data/types/multimodal/audio.py +2 -2
- hammad/data/types/multimodal/image.py +2 -2
- hammad/formatting/__init__.py +9 -27
- hammad/formatting/json/__init__.py +8 -2
- hammad/formatting/json/converters.py +7 -1
- hammad/formatting/text/__init__.py +1 -1
- hammad/formatting/yaml/__init__.py +1 -1
- hammad/genai/__init__.py +78 -0
- hammad/genai/agents/__init__.py +1 -0
- hammad/genai/agents/types/__init__.py +35 -0
- hammad/genai/agents/types/history.py +277 -0
- hammad/genai/agents/types/tool.py +490 -0
- hammad/genai/embedding_models/__init__.py +41 -0
- hammad/{ai/embeddings/client/litellm_embeddings_client.py → genai/embedding_models/embedding_model.py} +47 -142
- hammad/genai/embedding_models/embedding_model_name.py +77 -0
- hammad/genai/embedding_models/embedding_model_request.py +65 -0
- hammad/{ai/embeddings/types.py → genai/embedding_models/embedding_model_response.py} +3 -3
- hammad/genai/embedding_models/run.py +161 -0
- hammad/genai/language_models/__init__.py +35 -0
- hammad/genai/language_models/_streaming.py +622 -0
- hammad/genai/language_models/_types.py +276 -0
- hammad/genai/language_models/_utils/__init__.py +31 -0
- hammad/genai/language_models/_utils/_completions.py +131 -0
- hammad/genai/language_models/_utils/_messages.py +89 -0
- hammad/genai/language_models/_utils/_requests.py +202 -0
- hammad/genai/language_models/_utils/_structured_outputs.py +124 -0
- hammad/genai/language_models/language_model.py +734 -0
- hammad/genai/language_models/language_model_request.py +135 -0
- hammad/genai/language_models/language_model_response.py +219 -0
- hammad/genai/language_models/language_model_response_chunk.py +53 -0
- hammad/genai/language_models/run.py +530 -0
- hammad/genai/multimodal_models.py +48 -0
- hammad/genai/rerank_models.py +26 -0
- hammad/logging/__init__.py +1 -1
- hammad/logging/decorators.py +1 -1
- hammad/logging/logger.py +2 -2
- hammad/mcp/__init__.py +1 -1
- hammad/mcp/client/__init__.py +35 -0
- hammad/mcp/client/client.py +105 -4
- hammad/mcp/client/client_service.py +10 -3
- hammad/mcp/servers/__init__.py +24 -0
- hammad/{performance/runtime → runtime}/__init__.py +2 -2
- hammad/{performance/runtime → runtime}/decorators.py +1 -1
- hammad/{performance/runtime → runtime}/run.py +1 -1
- hammad/service/__init__.py +1 -1
- hammad/service/create.py +3 -8
- hammad/service/decorators.py +8 -8
- hammad/typing/__init__.py +28 -0
- hammad/web/__init__.py +3 -3
- hammad/web/http/client.py +1 -1
- hammad/web/models.py +53 -21
- hammad/web/search/client.py +99 -52
- hammad/web/utils.py +13 -13
- hammad_python-0.0.16.dist-info/METADATA +191 -0
- hammad_python-0.0.16.dist-info/RECORD +110 -0
- hammad/ai/__init__.py +0 -1
- hammad/ai/_utils.py +0 -142
- hammad/ai/completions/__init__.py +0 -45
- hammad/ai/completions/client.py +0 -684
- hammad/ai/completions/create.py +0 -710
- hammad/ai/completions/settings.py +0 -100
- hammad/ai/completions/types.py +0 -792
- hammad/ai/completions/utils.py +0 -486
- hammad/ai/embeddings/__init__.py +0 -35
- hammad/ai/embeddings/client/__init__.py +0 -1
- hammad/ai/embeddings/client/base_embeddings_client.py +0 -26
- hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +0 -200
- hammad/ai/embeddings/create.py +0 -159
- hammad/data/collections/base_collection.py +0 -58
- hammad/data/collections/searchable_collection.py +0 -556
- hammad/data/collections/vector_collection.py +0 -596
- hammad/data/databases/__init__.py +0 -21
- hammad/data/databases/database.py +0 -902
- hammad/data/models/base/__init__.py +0 -35
- hammad/data/models/pydantic/models/__init__.py +0 -28
- hammad/data/models/pydantic/models/arbitrary_model.py +0 -46
- hammad/data/models/pydantic/models/cacheable_model.py +0 -79
- hammad/data/models/pydantic/models/fast_model.py +0 -318
- hammad/data/models/pydantic/models/function_model.py +0 -176
- hammad/data/models/pydantic/models/subscriptable_model.py +0 -63
- hammad/performance/__init__.py +0 -36
- hammad/py.typed +0 -0
- hammad_python-0.0.14.dist-info/METADATA +0 -70
- hammad_python-0.0.14.dist-info/RECORD +0 -99
- {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/licenses/LICENSE +0 -0
@@ -1,452 +1,274 @@
|
|
1
1
|
"""hammad.data.collections.collection"""
|
2
2
|
|
3
3
|
from typing import (
|
4
|
-
TYPE_CHECKING,
|
5
|
-
Literal,
|
6
|
-
Optional,
|
7
|
-
overload,
|
8
4
|
Any,
|
9
|
-
List,
|
10
5
|
Callable,
|
6
|
+
Dict,
|
7
|
+
List,
|
8
|
+
Literal,
|
9
|
+
Optional,
|
10
|
+
Type,
|
11
|
+
TypeVar,
|
11
12
|
Union,
|
13
|
+
overload,
|
14
|
+
TYPE_CHECKING,
|
12
15
|
)
|
13
|
-
from
|
16
|
+
from pathlib import Path
|
14
17
|
|
15
18
|
if TYPE_CHECKING:
|
16
|
-
from .
|
17
|
-
from .
|
18
|
-
from .
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
"euclidean_sim",
|
33
|
-
"manhattan_sim",
|
34
|
-
"hamming_sim",
|
35
|
-
"dot_sim",
|
36
|
-
]
|
37
|
-
|
38
|
-
|
39
|
-
class SearchableCollectionSettings(TypedDict, total=False):
|
40
|
-
"""Configuration settings for SearchableCollection using tantivy."""
|
41
|
-
|
42
|
-
heap_size: int
|
43
|
-
num_threads: Optional[int]
|
44
|
-
index_path: Optional[str]
|
45
|
-
schema_builder: Optional[Any]
|
46
|
-
writer_memory: Optional[int]
|
47
|
-
reload_policy: Optional[str]
|
48
|
-
|
19
|
+
from .indexes.tantivy.index import TantivyCollectionIndex
|
20
|
+
from .indexes.qdrant.index import QdrantCollectionIndex, VectorSearchResult
|
21
|
+
from .indexes.tantivy.settings import (
|
22
|
+
TantivyCollectionIndexSettings,
|
23
|
+
TantivyCollectionIndexQuerySettings,
|
24
|
+
)
|
25
|
+
from .indexes.qdrant.settings import (
|
26
|
+
QdrantCollectionIndexSettings,
|
27
|
+
QdrantCollectionIndexQuerySettings,
|
28
|
+
DistanceMetric,
|
29
|
+
)
|
30
|
+
from ..sql.types import DatabaseItemType
|
31
|
+
from ...genai.embedding_models.embedding_model_name import EmbeddingModelName
|
32
|
+
else:
|
33
|
+
from .indexes.tantivy.index import TantivyCollectionIndex
|
34
|
+
from .indexes.qdrant.index import QdrantCollectionIndex, VectorSearchResult
|
49
35
|
|
50
|
-
class VectorCollectionSettings(TypedDict, total=False):
|
51
|
-
"""Configuration settings for VectorCollection using Qdrant."""
|
52
36
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
prefer_grpc: Optional[bool]
|
58
|
-
api_key: Optional[str]
|
59
|
-
timeout: Optional[float]
|
37
|
+
__all__ = (
|
38
|
+
"Collection",
|
39
|
+
"VectorSearchResult",
|
40
|
+
)
|
60
41
|
|
61
42
|
|
62
43
|
class Collection:
|
63
44
|
"""
|
64
|
-
A unified collection factory that creates the appropriate collection type
|
45
|
+
A unified collection factory that creates the appropriate collection index type
|
65
46
|
based on the provided parameters.
|
66
|
-
|
47
|
+
|
67
48
|
This class acts as a factory and doesn't contain its own logic - it simply
|
68
|
-
returns instances of
|
69
|
-
|
49
|
+
returns instances of TantivyCollectionIndex or QdrantCollectionIndex based on the
|
50
|
+
vector parameter.
|
51
|
+
|
52
|
+
The main difference from the old approach is that now collections are 'unified'
|
53
|
+
- there's no separate collections interface. Each collection directly uses either
|
54
|
+
a Tantivy or Qdrant index with SQL Database as the storage backend.
|
70
55
|
"""
|
71
56
|
|
72
57
|
@overload
|
73
58
|
def __new__(
|
74
59
|
cls,
|
75
|
-
|
76
|
-
name: str,
|
60
|
+
name: str = "default",
|
77
61
|
*,
|
78
|
-
schema: Optional[
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
) -> "SearchableCollection": ...
|
62
|
+
schema: Optional[Type["DatabaseItemType"]] = None,
|
63
|
+
ttl: Optional[int] = None,
|
64
|
+
path: Optional[Union[Path, str]] = None,
|
65
|
+
vector: Literal[False] = False,
|
66
|
+
# Tantivy-specific parameters
|
67
|
+
fast: bool = True,
|
68
|
+
settings: Optional["TantivyCollectionIndexSettings"] = None,
|
69
|
+
query_settings: Optional["TantivyCollectionIndexQuerySettings"] = None,
|
70
|
+
) -> "TantivyCollectionIndex": ...
|
88
71
|
|
89
72
|
@overload
|
90
73
|
def __new__(
|
91
74
|
cls,
|
92
|
-
|
93
|
-
name: str,
|
94
|
-
vector_size: int,
|
75
|
+
name: str = "default",
|
95
76
|
*,
|
96
|
-
schema: Optional[
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
# FastEmbed parameters
|
115
|
-
parallel: Optional[int] = None,
|
116
|
-
batch_size: Optional[int] = None,
|
117
|
-
# Qdrant parameters
|
118
|
-
path: Optional[str] = None,
|
119
|
-
host: Optional[str] = None,
|
120
|
-
port: Optional[int] = None,
|
121
|
-
grpc_port: Optional[int] = None,
|
122
|
-
prefer_grpc: Optional[bool] = None,
|
123
|
-
qdrant_timeout: Optional[float] = None,
|
124
|
-
) -> "VectorCollection": ...
|
77
|
+
schema: Optional[Type["DatabaseItemType"]] = None,
|
78
|
+
ttl: Optional[int] = None,
|
79
|
+
path: Optional[Union[Path, str]] = None,
|
80
|
+
vector: Literal[True] = True,
|
81
|
+
vector_size: Optional[int] = None,
|
82
|
+
# Vector/Qdrant-specific parameters
|
83
|
+
distance_metric: "DistanceMetric" = "dot",
|
84
|
+
settings: Optional["QdrantCollectionIndexSettings"] = None,
|
85
|
+
query_settings: Optional["QdrantCollectionIndexQuerySettings"] = None,
|
86
|
+
embedding_model: Optional["EmbeddingModelName"] = "openai/text-embedding-3-small",
|
87
|
+
embedding_dimensions: Optional[int] = None,
|
88
|
+
embedding_api_key: Optional[str] = None,
|
89
|
+
embedding_base_url: Optional[str] = None,
|
90
|
+
# Rerank-specific parameters
|
91
|
+
rerank_model: Optional[str] = None,
|
92
|
+
rerank_api_key: Optional[str] = None,
|
93
|
+
rerank_base_url: Optional[str] = None,
|
94
|
+
) -> "QdrantCollectionIndex": ...
|
125
95
|
|
126
96
|
def __new__(
|
127
97
|
cls,
|
128
|
-
|
129
|
-
name: str,
|
130
|
-
vector_size: Optional[int] = None,
|
98
|
+
name: str = "default",
|
131
99
|
*,
|
132
|
-
schema: Optional[
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
# Tantivy parameters (searchable collections only)
|
154
|
-
heap_size: Optional[int] = None,
|
155
|
-
num_threads: Optional[int] = None,
|
156
|
-
index_path: Optional[str] = None,
|
157
|
-
schema_builder: Optional[Any] = None,
|
158
|
-
writer_memory: Optional[int] = None,
|
159
|
-
reload_policy: Optional[str] = None,
|
160
|
-
# Qdrant parameters (vector collections only)
|
161
|
-
path: Optional[str] = None,
|
162
|
-
host: Optional[str] = None,
|
163
|
-
port: Optional[int] = None,
|
164
|
-
grpc_port: Optional[int] = None,
|
165
|
-
prefer_grpc: Optional[bool] = None,
|
166
|
-
qdrant_timeout: Optional[float] = None,
|
167
|
-
) -> "BaseCollection":
|
100
|
+
schema: Optional[Type["DatabaseItemType"]] = None,
|
101
|
+
ttl: Optional[int] = None,
|
102
|
+
path: Optional[Union[Path, str]] = None,
|
103
|
+
vector: bool = False,
|
104
|
+
vector_size: Optional[int] = None,
|
105
|
+
# Tantivy-specific parameters
|
106
|
+
fast: bool = True,
|
107
|
+
# Unified settings parameters
|
108
|
+
settings: Optional[Union["TantivyCollectionIndexSettings", "QdrantCollectionIndexSettings"]] = None,
|
109
|
+
query_settings: Optional[Union["TantivyCollectionIndexQuerySettings", "QdrantCollectionIndexQuerySettings"]] = None,
|
110
|
+
# Vector/Qdrant-specific parameters
|
111
|
+
distance_metric: "DistanceMetric" = "dot",
|
112
|
+
embedding_model: Optional["EmbeddingModelName"] = "openai/text-embedding-3-small",
|
113
|
+
embedding_dimensions: Optional[int] = None,
|
114
|
+
embedding_api_key: Optional[str] = None,
|
115
|
+
embedding_base_url: Optional[str] = None,
|
116
|
+
# Rerank-specific parameters
|
117
|
+
rerank_model: Optional[str] = None,
|
118
|
+
rerank_api_key: Optional[str] = None,
|
119
|
+
rerank_base_url: Optional[str] = None,
|
120
|
+
) -> Union["TantivyCollectionIndex", "QdrantCollectionIndex"]:
|
168
121
|
"""
|
169
122
|
Create a collection of the specified type.
|
170
|
-
|
123
|
+
|
171
124
|
Args:
|
172
|
-
type: Type of collection to create ("searchable" or "vector")
|
173
125
|
name: Name of the collection
|
126
|
+
schema: Optional schema type for validation
|
127
|
+
ttl: Default TTL for items in seconds
|
128
|
+
path: File path for storage (None = in-memory)
|
129
|
+
vector: Whether this is a vector collection (True) or text search collection (False)
|
174
130
|
vector_size: Size of vectors (required for vector collections)
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
api_key: API key for Qdrant authentication
|
196
|
-
timeout: Request timeout for Qdrant operations
|
197
|
-
|
131
|
+
|
132
|
+
# Tantivy parameters (for non-vector collections):
|
133
|
+
fast: Whether to use fast schema building & indexing
|
134
|
+
|
135
|
+
# Unified parameters:
|
136
|
+
settings: Collection settings (TantivyCollectionIndexSettings or QdrantCollectionIndexSettings)
|
137
|
+
query_settings: Query behavior settings (TantivyCollectionIndexQuerySettings or QdrantCollectionIndexQuerySettings)
|
138
|
+
|
139
|
+
# Qdrant parameters (for vector collections):
|
140
|
+
distance_metric: Distance metric for similarity search
|
141
|
+
embedding_model: The embedding model to use (e.g., 'openai/text-embedding-3-small')
|
142
|
+
embedding_dimensions: Number of dimensions for embeddings
|
143
|
+
embedding_api_key: API key for the embedding service
|
144
|
+
embedding_base_url: Base URL for the embedding service
|
145
|
+
|
146
|
+
# Rerank parameters (for vector collections):
|
147
|
+
rerank_model: The rerank model to use (e.g., 'cohere/rerank-english-v3.0')
|
148
|
+
rerank_api_key: API key for the rerank service
|
149
|
+
rerank_base_url: Base URL for the rerank service
|
150
|
+
|
198
151
|
Returns:
|
199
|
-
A
|
152
|
+
A TantivyCollectionIndex or QdrantCollectionIndex instance
|
200
153
|
"""
|
201
|
-
if
|
202
|
-
|
203
|
-
|
204
|
-
# Build tantivy config from individual parameters
|
205
|
-
tantivy_config = {}
|
206
|
-
if heap_size is not None:
|
207
|
-
tantivy_config["heap_size"] = heap_size
|
208
|
-
if num_threads is not None:
|
209
|
-
tantivy_config["num_threads"] = num_threads
|
210
|
-
if index_path is not None:
|
211
|
-
tantivy_config["index_path"] = index_path
|
212
|
-
if schema_builder is not None:
|
213
|
-
tantivy_config["schema_builder"] = schema_builder
|
214
|
-
if writer_memory is not None:
|
215
|
-
tantivy_config["writer_memory"] = writer_memory
|
216
|
-
if reload_policy is not None:
|
217
|
-
tantivy_config["reload_policy"] = reload_policy
|
218
|
-
|
219
|
-
return SearchableCollection(
|
220
|
-
name=name,
|
221
|
-
schema=schema,
|
222
|
-
default_ttl=default_ttl,
|
223
|
-
storage_backend=storage_backend,
|
224
|
-
tantivy_config=tantivy_config if tantivy_config else None,
|
225
|
-
)
|
226
|
-
elif type == "vector":
|
227
|
-
if vector_size is None:
|
228
|
-
raise ValueError("vector_size is required for vector collections")
|
229
|
-
|
230
|
-
try:
|
231
|
-
from .vector_collection import VectorCollection, Distance
|
232
|
-
except ImportError:
|
233
|
-
raise ImportError(
|
234
|
-
"qdrant-client is required for vector collections. "
|
235
|
-
"Please install it with 'pip install qdrant-client'."
|
236
|
-
)
|
237
|
-
|
238
|
-
# Set default distance metric if not provided and Distance is available
|
239
|
-
if distance_metric is None and Distance is not None:
|
240
|
-
distance_metric = Distance.DOT
|
241
|
-
|
242
|
-
# Build qdrant config from individual parameters
|
243
|
-
qdrant_config = {}
|
244
|
-
if path is not None:
|
245
|
-
qdrant_config["path"] = path
|
246
|
-
if host is not None:
|
247
|
-
qdrant_config["host"] = host
|
248
|
-
if port is not None:
|
249
|
-
qdrant_config["port"] = port
|
250
|
-
if grpc_port is not None:
|
251
|
-
qdrant_config["grpc_port"] = grpc_port
|
252
|
-
if prefer_grpc is not None:
|
253
|
-
qdrant_config["prefer_grpc"] = prefer_grpc
|
254
|
-
if qdrant_timeout is not None:
|
255
|
-
qdrant_config["timeout"] = qdrant_timeout
|
256
|
-
|
257
|
-
return VectorCollection(
|
154
|
+
if vector:
|
155
|
+
# Vector collection using Qdrant
|
156
|
+
return QdrantCollectionIndex(
|
258
157
|
name=name,
|
259
158
|
vector_size=vector_size,
|
260
159
|
schema=schema,
|
261
|
-
|
262
|
-
|
160
|
+
ttl=ttl,
|
161
|
+
path=path,
|
263
162
|
distance_metric=distance_metric,
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
api_base=api_base,
|
274
|
-
api_version=api_version,
|
275
|
-
api_key=api_key,
|
276
|
-
api_type=api_type,
|
277
|
-
caching=caching,
|
278
|
-
user=user,
|
279
|
-
# FastEmbed parameters
|
280
|
-
parallel=parallel,
|
281
|
-
batch_size=batch_size,
|
163
|
+
settings=settings,
|
164
|
+
query_settings=query_settings,
|
165
|
+
embedding_model=embedding_model,
|
166
|
+
embedding_dimensions=embedding_dimensions,
|
167
|
+
embedding_api_key=embedding_api_key,
|
168
|
+
embedding_base_url=embedding_base_url,
|
169
|
+
rerank_model=rerank_model,
|
170
|
+
rerank_api_key=rerank_api_key,
|
171
|
+
rerank_base_url=rerank_base_url,
|
282
172
|
)
|
283
173
|
else:
|
284
|
-
|
285
|
-
|
174
|
+
# Text search collection using Tantivy
|
175
|
+
return TantivyCollectionIndex(
|
176
|
+
name=name,
|
177
|
+
schema=schema,
|
178
|
+
ttl=ttl,
|
179
|
+
path=path,
|
180
|
+
fast=fast,
|
181
|
+
settings=settings,
|
182
|
+
query_settings=query_settings,
|
183
|
+
)
|
184
|
+
|
286
185
|
|
287
186
|
@overload
|
288
187
|
def create_collection(
|
289
|
-
|
290
|
-
name: str,
|
188
|
+
name: str = "default",
|
291
189
|
*,
|
292
|
-
schema: Optional[
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
) -> "SearchableCollection": ...
|
302
|
-
|
190
|
+
schema: Optional[Type["DatabaseItemType"]] = None,
|
191
|
+
ttl: Optional[int] = None,
|
192
|
+
path: Optional[Union[Path, str]] = None,
|
193
|
+
vector: Literal[False] = False,
|
194
|
+
# Tantivy-specific parameters
|
195
|
+
fast: bool = True,
|
196
|
+
settings: Optional["TantivyCollectionIndexSettings"] = None,
|
197
|
+
query_settings: Optional["TantivyCollectionIndexQuerySettings"] = None,
|
198
|
+
) -> "TantivyCollectionIndex": ...
|
303
199
|
|
304
200
|
@overload
|
305
201
|
def create_collection(
|
306
|
-
|
307
|
-
name: str,
|
308
|
-
vector_size: int,
|
202
|
+
name: str = "default",
|
309
203
|
*,
|
310
|
-
schema: Optional[
|
311
|
-
|
312
|
-
|
313
|
-
|
204
|
+
schema: Optional[Type["DatabaseItemType"]] = None,
|
205
|
+
ttl: Optional[int] = None,
|
206
|
+
path: Optional[Union[Path, str]] = None,
|
207
|
+
vector: Literal[True],
|
208
|
+
vector_size: Optional[int] = None,
|
209
|
+
# Vector/Qdrant-specific parameters
|
210
|
+
distance_metric: "DistanceMetric" = "dot",
|
211
|
+
settings: Optional["QdrantCollectionIndexSettings"] = None,
|
212
|
+
query_settings: Optional["QdrantCollectionIndexQuerySettings"] = None,
|
314
213
|
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
315
|
-
|
316
|
-
# Common embedding parameters
|
317
|
-
format: bool = False,
|
318
|
-
# LiteLLM parameters
|
319
|
-
dimensions: Optional[int] = None,
|
320
|
-
encoding_format: Optional[str] = None,
|
321
|
-
timeout: Optional[int] = None,
|
322
|
-
api_base: Optional[str] = None,
|
323
|
-
api_version: Optional[str] = None,
|
324
|
-
api_key: Optional[str] = None,
|
325
|
-
api_type: Optional[str] = None,
|
326
|
-
caching: bool = False,
|
327
|
-
user: Optional[str] = None,
|
328
|
-
# FastEmbed parameters
|
329
|
-
parallel: Optional[int] = None,
|
330
|
-
batch_size: Optional[int] = None,
|
331
|
-
# Qdrant parameters
|
332
|
-
path: Optional[str] = None,
|
333
|
-
host: Optional[str] = None,
|
334
|
-
port: Optional[int] = None,
|
335
|
-
grpc_port: Optional[int] = None,
|
336
|
-
prefer_grpc: Optional[bool] = None,
|
337
|
-
qdrant_timeout: Optional[float] = None,
|
338
|
-
) -> "VectorCollection": ...
|
339
|
-
|
214
|
+
) -> "QdrantCollectionIndex": ...
|
340
215
|
|
341
216
|
def create_collection(
|
342
|
-
|
343
|
-
name: str,
|
344
|
-
vector_size: Optional[int] = None,
|
217
|
+
name: str = "default",
|
345
218
|
*,
|
346
|
-
schema: Optional[
|
347
|
-
|
348
|
-
|
349
|
-
|
219
|
+
schema: Optional[Type["DatabaseItemType"]] = None,
|
220
|
+
ttl: Optional[int] = None,
|
221
|
+
path: Optional[Union[Path, str]] = None,
|
222
|
+
vector: bool = False,
|
223
|
+
vector_size: Optional[int] = None,
|
224
|
+
# Tantivy-specific parameters
|
225
|
+
fast: bool = True,
|
226
|
+
# Unified settings parameters
|
227
|
+
settings: Optional[Union["TantivyCollectionIndexSettings", "QdrantCollectionIndexSettings"]] = None,
|
228
|
+
query_settings: Optional[Union["TantivyCollectionIndexQuerySettings", "QdrantCollectionIndexQuerySettings"]] = None,
|
229
|
+
# Vector/Qdrant-specific parameters
|
230
|
+
distance_metric: "DistanceMetric" = "dot",
|
350
231
|
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
351
|
-
|
352
|
-
# Common embedding parameters
|
353
|
-
format: bool = False,
|
354
|
-
# LiteLLM parameters
|
355
|
-
dimensions: Optional[int] = None,
|
356
|
-
encoding_format: Optional[str] = None,
|
357
|
-
timeout: Optional[int] = None,
|
358
|
-
api_base: Optional[str] = None,
|
359
|
-
api_version: Optional[str] = None,
|
360
|
-
api_key: Optional[str] = None,
|
361
|
-
api_type: Optional[str] = None,
|
362
|
-
caching: bool = False,
|
363
|
-
user: Optional[str] = None,
|
364
|
-
# FastEmbed parameters
|
365
|
-
parallel: Optional[int] = None,
|
366
|
-
batch_size: Optional[int] = None,
|
367
|
-
# Tantivy parameters (searchable collections only)
|
368
|
-
heap_size: Optional[int] = None,
|
369
|
-
num_threads: Optional[int] = None,
|
370
|
-
index_path: Optional[str] = None,
|
371
|
-
schema_builder: Optional[Any] = None,
|
372
|
-
writer_memory: Optional[int] = None,
|
373
|
-
reload_policy: Optional[str] = None,
|
374
|
-
# Qdrant parameters (vector collections only)
|
375
|
-
path: Optional[str] = None,
|
376
|
-
host: Optional[str] = None,
|
377
|
-
port: Optional[int] = None,
|
378
|
-
grpc_port: Optional[int] = None,
|
379
|
-
prefer_grpc: Optional[bool] = None,
|
380
|
-
qdrant_timeout: Optional[float] = None,
|
381
|
-
) -> "BaseCollection":
|
232
|
+
) -> Union["TantivyCollectionIndex", "QdrantCollectionIndex"]:
|
382
233
|
"""
|
383
|
-
Create a collection of the specified type.
|
384
|
-
|
385
|
-
|
386
|
-
Use the Collection class for a more object-oriented approach.
|
387
|
-
|
234
|
+
Create a data collection of the specified type. Collections are a unified
|
235
|
+
interface for creating searchable, vectorizable data stores.
|
236
|
+
|
388
237
|
Args:
|
389
|
-
type: Type of collection to create ("searchable" or "vector")
|
390
238
|
name: Name of the collection
|
239
|
+
schema: Optional schema type for validation
|
240
|
+
ttl: Default TTL for items in seconds
|
241
|
+
path: File path for storage (None = in-memory)
|
242
|
+
vector: Whether this is a vector collection (True) or text search collection (False)
|
391
243
|
vector_size: Size of vectors (required for vector collections)
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
path: Path for local Qdrant storage
|
408
|
-
host: Qdrant server host
|
409
|
-
port: Qdrant server port
|
410
|
-
grpc_port: Qdrant gRPC port
|
411
|
-
prefer_grpc: Whether to prefer gRPC over HTTP
|
412
|
-
api_key: API key for Qdrant authentication
|
413
|
-
timeout: Request timeout for Qdrant operations
|
414
|
-
|
244
|
+
|
245
|
+
# Tantivy parameters (for non-vector collections):
|
246
|
+
fast: Whether to use fast schema building & indexing
|
247
|
+
|
248
|
+
# Unified parameters:
|
249
|
+
settings: Collection settings (TantivyCollectionIndexSettings or QdrantCollectionIndexSettings)
|
250
|
+
query_settings: Query behavior settings (TantivyCollectionIndexQuerySettings or QdrantCollectionIndexQuerySettings)
|
251
|
+
|
252
|
+
# Qdrant parameters (for vector collections):
|
253
|
+
distance_metric: Distance metric for similarity search
|
254
|
+
embedding_model: The embedding model to use (e.g., 'openai/text-embedding-3-small')
|
255
|
+
embedding_dimensions: Number of dimensions for embeddings
|
256
|
+
embedding_api_key: API key for the embedding service
|
257
|
+
embedding_base_url: Base URL for the embedding service
|
258
|
+
|
415
259
|
Returns:
|
416
|
-
A
|
260
|
+
A TantivyCollectionIndex or QdrantCollectionIndex instance
|
417
261
|
"""
|
418
262
|
return Collection(
|
419
|
-
type=type,
|
420
263
|
name=name,
|
421
|
-
vector_size=vector_size,
|
422
264
|
schema=schema,
|
423
|
-
|
424
|
-
|
265
|
+
ttl=ttl,
|
266
|
+
path=path,
|
267
|
+
vector=vector,
|
268
|
+
vector_size=vector_size,
|
269
|
+
fast=fast,
|
270
|
+
settings=settings,
|
271
|
+
query_settings=query_settings,
|
425
272
|
distance_metric=distance_metric,
|
426
273
|
embedding_function=embedding_function,
|
427
|
-
|
428
|
-
format=format,
|
429
|
-
dimensions=dimensions,
|
430
|
-
encoding_format=encoding_format,
|
431
|
-
timeout=timeout,
|
432
|
-
api_base=api_base,
|
433
|
-
api_version=api_version,
|
434
|
-
api_key=api_key,
|
435
|
-
api_type=api_type,
|
436
|
-
caching=caching,
|
437
|
-
user=user,
|
438
|
-
parallel=parallel,
|
439
|
-
batch_size=batch_size,
|
440
|
-
heap_size=heap_size,
|
441
|
-
num_threads=num_threads,
|
442
|
-
index_path=index_path,
|
443
|
-
schema_builder=schema_builder,
|
444
|
-
writer_memory=writer_memory,
|
445
|
-
reload_policy=reload_policy,
|
446
|
-
path=path,
|
447
|
-
host=host,
|
448
|
-
port=port,
|
449
|
-
grpc_port=grpc_port,
|
450
|
-
prefer_grpc=prefer_grpc,
|
451
|
-
qdrant_timeout=qdrant_timeout,
|
452
|
-
)
|
274
|
+
)
|