hammad-python 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +177 -0
- hammad/{performance/imports.py → _internal.py} +7 -1
- hammad/cache/__init__.py +1 -1
- hammad/cli/__init__.py +3 -1
- hammad/cli/_runner.py +265 -0
- hammad/cli/animations.py +1 -1
- hammad/cli/plugins.py +133 -78
- hammad/cli/styles/__init__.py +1 -1
- hammad/cli/styles/utils.py +149 -3
- hammad/data/__init__.py +56 -29
- hammad/data/collections/__init__.py +27 -17
- hammad/data/collections/collection.py +205 -383
- hammad/data/collections/indexes/__init__.py +37 -0
- hammad/data/collections/indexes/qdrant/__init__.py +1 -0
- hammad/data/collections/indexes/qdrant/index.py +735 -0
- hammad/data/collections/indexes/qdrant/settings.py +94 -0
- hammad/data/collections/indexes/qdrant/utils.py +220 -0
- hammad/data/collections/indexes/tantivy/__init__.py +1 -0
- hammad/data/collections/indexes/tantivy/index.py +428 -0
- hammad/data/collections/indexes/tantivy/settings.py +51 -0
- hammad/data/collections/indexes/tantivy/utils.py +200 -0
- hammad/data/configurations/__init__.py +2 -2
- hammad/data/configurations/configuration.py +2 -2
- hammad/data/models/__init__.py +20 -9
- hammad/data/models/extensions/__init__.py +4 -0
- hammad/data/models/{pydantic → extensions/pydantic}/__init__.py +6 -19
- hammad/data/models/{pydantic → extensions/pydantic}/converters.py +143 -16
- hammad/data/models/{base/fields.py → fields.py} +1 -1
- hammad/data/models/{base/model.py → model.py} +1 -1
- hammad/data/models/{base/utils.py → utils.py} +1 -1
- hammad/data/sql/__init__.py +23 -0
- hammad/data/sql/database.py +578 -0
- hammad/data/sql/types.py +141 -0
- hammad/data/types/__init__.py +1 -3
- hammad/data/types/file.py +3 -3
- hammad/data/types/multimodal/__init__.py +2 -2
- hammad/data/types/multimodal/audio.py +2 -2
- hammad/data/types/multimodal/image.py +2 -2
- hammad/formatting/__init__.py +9 -27
- hammad/formatting/json/__init__.py +8 -2
- hammad/formatting/json/converters.py +7 -1
- hammad/formatting/text/__init__.py +1 -1
- hammad/formatting/yaml/__init__.py +1 -1
- hammad/genai/__init__.py +78 -0
- hammad/genai/agents/__init__.py +1 -0
- hammad/genai/agents/types/__init__.py +35 -0
- hammad/genai/agents/types/history.py +277 -0
- hammad/genai/agents/types/tool.py +490 -0
- hammad/genai/embedding_models/__init__.py +41 -0
- hammad/{ai/embeddings/client/litellm_embeddings_client.py → genai/embedding_models/embedding_model.py} +47 -142
- hammad/genai/embedding_models/embedding_model_name.py +77 -0
- hammad/genai/embedding_models/embedding_model_request.py +65 -0
- hammad/{ai/embeddings/types.py → genai/embedding_models/embedding_model_response.py} +3 -3
- hammad/genai/embedding_models/run.py +161 -0
- hammad/genai/language_models/__init__.py +35 -0
- hammad/genai/language_models/_streaming.py +622 -0
- hammad/genai/language_models/_types.py +276 -0
- hammad/genai/language_models/_utils/__init__.py +31 -0
- hammad/genai/language_models/_utils/_completions.py +131 -0
- hammad/genai/language_models/_utils/_messages.py +89 -0
- hammad/genai/language_models/_utils/_requests.py +202 -0
- hammad/genai/language_models/_utils/_structured_outputs.py +124 -0
- hammad/genai/language_models/language_model.py +734 -0
- hammad/genai/language_models/language_model_request.py +135 -0
- hammad/genai/language_models/language_model_response.py +219 -0
- hammad/genai/language_models/language_model_response_chunk.py +53 -0
- hammad/genai/language_models/run.py +530 -0
- hammad/genai/multimodal_models.py +48 -0
- hammad/genai/rerank_models.py +26 -0
- hammad/logging/__init__.py +1 -1
- hammad/logging/decorators.py +1 -1
- hammad/logging/logger.py +2 -2
- hammad/mcp/__init__.py +1 -1
- hammad/mcp/client/__init__.py +35 -0
- hammad/mcp/client/client.py +105 -4
- hammad/mcp/client/client_service.py +10 -3
- hammad/mcp/servers/__init__.py +24 -0
- hammad/{performance/runtime → runtime}/__init__.py +2 -2
- hammad/{performance/runtime → runtime}/decorators.py +1 -1
- hammad/{performance/runtime → runtime}/run.py +1 -1
- hammad/service/__init__.py +1 -1
- hammad/service/create.py +3 -8
- hammad/service/decorators.py +8 -8
- hammad/typing/__init__.py +28 -0
- hammad/web/__init__.py +3 -3
- hammad/web/http/client.py +1 -1
- hammad/web/models.py +53 -21
- hammad/web/search/client.py +99 -52
- hammad/web/utils.py +13 -13
- hammad_python-0.0.16.dist-info/METADATA +191 -0
- hammad_python-0.0.16.dist-info/RECORD +110 -0
- hammad/ai/__init__.py +0 -1
- hammad/ai/_utils.py +0 -142
- hammad/ai/completions/__init__.py +0 -45
- hammad/ai/completions/client.py +0 -684
- hammad/ai/completions/create.py +0 -710
- hammad/ai/completions/settings.py +0 -100
- hammad/ai/completions/types.py +0 -792
- hammad/ai/completions/utils.py +0 -486
- hammad/ai/embeddings/__init__.py +0 -35
- hammad/ai/embeddings/client/__init__.py +0 -1
- hammad/ai/embeddings/client/base_embeddings_client.py +0 -26
- hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +0 -200
- hammad/ai/embeddings/create.py +0 -159
- hammad/data/collections/base_collection.py +0 -58
- hammad/data/collections/searchable_collection.py +0 -556
- hammad/data/collections/vector_collection.py +0 -596
- hammad/data/databases/__init__.py +0 -21
- hammad/data/databases/database.py +0 -902
- hammad/data/models/base/__init__.py +0 -35
- hammad/data/models/pydantic/models/__init__.py +0 -28
- hammad/data/models/pydantic/models/arbitrary_model.py +0 -46
- hammad/data/models/pydantic/models/cacheable_model.py +0 -79
- hammad/data/models/pydantic/models/fast_model.py +0 -318
- hammad/data/models/pydantic/models/function_model.py +0 -176
- hammad/data/models/pydantic/models/subscriptable_model.py +0 -63
- hammad/performance/__init__.py +0 -36
- hammad/py.typed +0 -0
- hammad_python-0.0.14.dist-info/METADATA +0 -70
- hammad_python-0.0.14.dist-info/RECORD +0 -99
- {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/licenses/LICENSE +0 -0
@@ -1,902 +0,0 @@
|
|
1
|
-
"""hammad.data.databases.database"""
|
2
|
-
|
3
|
-
import uuid
|
4
|
-
from typing import (
|
5
|
-
Any,
|
6
|
-
Dict,
|
7
|
-
Optional,
|
8
|
-
List,
|
9
|
-
Literal,
|
10
|
-
TypeVar,
|
11
|
-
Generic,
|
12
|
-
Callable,
|
13
|
-
overload,
|
14
|
-
Literal,
|
15
|
-
TYPE_CHECKING,
|
16
|
-
)
|
17
|
-
from datetime import datetime, timezone, timedelta
|
18
|
-
import json
|
19
|
-
import os
|
20
|
-
|
21
|
-
try:
|
22
|
-
from sqlalchemy import (
|
23
|
-
create_engine,
|
24
|
-
Column,
|
25
|
-
String,
|
26
|
-
Text,
|
27
|
-
DateTime,
|
28
|
-
Integer,
|
29
|
-
MetaData,
|
30
|
-
Table,
|
31
|
-
)
|
32
|
-
from sqlalchemy.orm import sessionmaker, declarative_base
|
33
|
-
from sqlalchemy.sql import select, insert, update, delete
|
34
|
-
except ImportError:
|
35
|
-
# SQLAlchemy not available - file storage will not work
|
36
|
-
create_engine = None
|
37
|
-
|
38
|
-
from ..collections.base_collection import BaseCollection, Filters, Schema
|
39
|
-
from ..collections.collection import create_collection
|
40
|
-
|
41
|
-
if TYPE_CHECKING:
|
42
|
-
from ..collections.searchable_collection import SearchableCollection
|
43
|
-
from ..collections.vector_collection import VectorCollection
|
44
|
-
|
45
|
-
__all__ = ("Database",)
|
46
|
-
|
47
|
-
DatabaseEntryType = TypeVar("DatabaseEntryType", bound=Any)
|
48
|
-
|
49
|
-
DatabaseLocation = Literal["memory", "file"]
|
50
|
-
|
51
|
-
|
52
|
-
class Database(Generic[DatabaseEntryType]):
|
53
|
-
"""
|
54
|
-
Enhanced Database class that supports both traditional collections and
|
55
|
-
new searchable/vector collections with beautiful IDE typing support.
|
56
|
-
|
57
|
-
Features:
|
58
|
-
- Dict-like access: db["collection_name"]
|
59
|
-
- Easy creation of searchable and vector collections
|
60
|
-
- Full type hinting and IDE autocomplete
|
61
|
-
- Backward compatibility with traditional collections
|
62
|
-
- TTL support and filtering
|
63
|
-
"""
|
64
|
-
|
65
|
-
def __init__(
|
66
|
-
self,
|
67
|
-
location: DatabaseLocation = "memory",
|
68
|
-
path: str = "database.db",
|
69
|
-
default_ttl: Optional[int] = None,
|
70
|
-
):
|
71
|
-
"""
|
72
|
-
Initialize the database.
|
73
|
-
|
74
|
-
Args:
|
75
|
-
location: Storage location ("memory" for in-memory, "file" for persistent)
|
76
|
-
path: Path to the database file when using "file" location (default: "database.db")
|
77
|
-
default_ttl: Default TTL for items in seconds
|
78
|
-
"""
|
79
|
-
self.location = location
|
80
|
-
self.path = path
|
81
|
-
self.default_ttl = default_ttl
|
82
|
-
|
83
|
-
# Storage for traditional collections
|
84
|
-
self._schemas: Dict[str, Optional[Schema]] = {}
|
85
|
-
self._collection_ttls: Dict[str, Optional[int]] = {}
|
86
|
-
self._storage: Dict[str, Dict[str, Dict[str, Any]]] = {"default": {}}
|
87
|
-
|
88
|
-
# Registry for modern collections (searchable/vector)
|
89
|
-
self._collections: Dict[str, BaseCollection] = {}
|
90
|
-
|
91
|
-
# SQLAlchemy setup for file storage
|
92
|
-
self._engine = None
|
93
|
-
self._SessionLocal = None
|
94
|
-
self._metadata = None
|
95
|
-
self._tables: Dict[str, Table] = {}
|
96
|
-
|
97
|
-
if location == "file":
|
98
|
-
self._init_file_storage()
|
99
|
-
|
100
|
-
def _init_file_storage(self) -> None:
|
101
|
-
"""Initialize SQLAlchemy for file-based storage."""
|
102
|
-
if create_engine is None:
|
103
|
-
raise ImportError(
|
104
|
-
"SQLAlchemy is required for file storage. "
|
105
|
-
"Install with: pip install sqlalchemy"
|
106
|
-
)
|
107
|
-
|
108
|
-
# Create database directory if it doesn't exist
|
109
|
-
db_dir = os.path.dirname(os.path.abspath(self.path))
|
110
|
-
if db_dir and not os.path.exists(db_dir):
|
111
|
-
os.makedirs(db_dir)
|
112
|
-
|
113
|
-
# Create SQLAlchemy engine
|
114
|
-
self._engine = create_engine(f"sqlite:///{self.path}", echo=False)
|
115
|
-
self._SessionLocal = sessionmaker(bind=self._engine)
|
116
|
-
self._metadata = MetaData()
|
117
|
-
|
118
|
-
# Create default table
|
119
|
-
self._create_collection_table("default")
|
120
|
-
|
121
|
-
def _create_collection_table(self, collection_name: str) -> Table:
|
122
|
-
"""Create a table for a collection."""
|
123
|
-
if collection_name in self._tables:
|
124
|
-
return self._tables[collection_name]
|
125
|
-
|
126
|
-
table = Table(
|
127
|
-
f"collection_{collection_name}",
|
128
|
-
self._metadata,
|
129
|
-
Column("id", String, primary_key=True),
|
130
|
-
Column("value", Text),
|
131
|
-
Column("filters", Text),
|
132
|
-
Column("created_at", DateTime),
|
133
|
-
Column("updated_at", DateTime),
|
134
|
-
Column("expires_at", DateTime, nullable=True),
|
135
|
-
)
|
136
|
-
|
137
|
-
self._tables[collection_name] = table
|
138
|
-
|
139
|
-
# Create table in database
|
140
|
-
if self._engine:
|
141
|
-
self._metadata.create_all(self._engine)
|
142
|
-
|
143
|
-
return table
|
144
|
-
|
145
|
-
def _get_from_file(
|
146
|
-
self, id: str, collection: str, filters: Optional[Filters] = None
|
147
|
-
) -> Optional[DatabaseEntryType]:
|
148
|
-
"""Get an item from file storage."""
|
149
|
-
if collection not in self._schemas:
|
150
|
-
return None
|
151
|
-
|
152
|
-
table = self._tables.get(collection)
|
153
|
-
if table is None:
|
154
|
-
return None
|
155
|
-
|
156
|
-
with self._SessionLocal() as session:
|
157
|
-
stmt = select(table).where(table.c.id == id)
|
158
|
-
result = session.execute(stmt).fetchone()
|
159
|
-
|
160
|
-
if not result:
|
161
|
-
return None
|
162
|
-
|
163
|
-
# Check expiration
|
164
|
-
if result.expires_at and self._is_expired(result.expires_at):
|
165
|
-
# Delete expired item
|
166
|
-
delete_stmt = delete(table).where(table.c.id == id)
|
167
|
-
session.execute(delete_stmt)
|
168
|
-
session.commit()
|
169
|
-
return None
|
170
|
-
|
171
|
-
# Check filters
|
172
|
-
stored_filters = json.loads(result.filters) if result.filters else {}
|
173
|
-
if not self._match_filters(stored_filters, filters):
|
174
|
-
return None
|
175
|
-
|
176
|
-
return json.loads(result.value)
|
177
|
-
|
178
|
-
def _add_to_file(
|
179
|
-
self,
|
180
|
-
entry: DatabaseEntryType,
|
181
|
-
id: Optional[str],
|
182
|
-
collection: str,
|
183
|
-
filters: Optional[Filters],
|
184
|
-
ttl: Optional[int],
|
185
|
-
) -> None:
|
186
|
-
"""Add an item to file storage."""
|
187
|
-
if collection not in self._schemas:
|
188
|
-
self.create_collection(collection)
|
189
|
-
|
190
|
-
table = self._tables.get(collection)
|
191
|
-
if table is None:
|
192
|
-
return
|
193
|
-
|
194
|
-
item_id = id or str(uuid.uuid4())
|
195
|
-
expires_at = self._calculate_expires_at(ttl)
|
196
|
-
now = datetime.now(timezone.utc)
|
197
|
-
|
198
|
-
with self._SessionLocal() as session:
|
199
|
-
# Check if item exists
|
200
|
-
existing = session.execute(
|
201
|
-
select(table).where(table.c.id == item_id)
|
202
|
-
).fetchone()
|
203
|
-
|
204
|
-
if existing:
|
205
|
-
# Update existing item
|
206
|
-
stmt = (
|
207
|
-
update(table)
|
208
|
-
.where(table.c.id == item_id)
|
209
|
-
.values(
|
210
|
-
value=json.dumps(entry),
|
211
|
-
filters=json.dumps(filters or {}),
|
212
|
-
updated_at=now,
|
213
|
-
expires_at=expires_at,
|
214
|
-
)
|
215
|
-
)
|
216
|
-
else:
|
217
|
-
# Insert new item
|
218
|
-
stmt = insert(table).values(
|
219
|
-
id=item_id,
|
220
|
-
value=json.dumps(entry),
|
221
|
-
filters=json.dumps(filters or {}),
|
222
|
-
created_at=now,
|
223
|
-
updated_at=now,
|
224
|
-
expires_at=expires_at,
|
225
|
-
)
|
226
|
-
|
227
|
-
session.execute(stmt)
|
228
|
-
session.commit()
|
229
|
-
|
230
|
-
def _query_from_file(
|
231
|
-
self,
|
232
|
-
collection: str,
|
233
|
-
filters: Optional[Filters],
|
234
|
-
search: Optional[str],
|
235
|
-
limit: Optional[int],
|
236
|
-
) -> List[DatabaseEntryType]:
|
237
|
-
"""Query items from file storage."""
|
238
|
-
if collection not in self._schemas:
|
239
|
-
return []
|
240
|
-
|
241
|
-
table = self._tables.get(collection)
|
242
|
-
if table is None:
|
243
|
-
return []
|
244
|
-
|
245
|
-
with self._SessionLocal() as session:
|
246
|
-
stmt = select(table)
|
247
|
-
|
248
|
-
# Apply limit
|
249
|
-
if limit:
|
250
|
-
stmt = stmt.limit(limit)
|
251
|
-
|
252
|
-
results = session.execute(stmt).fetchall()
|
253
|
-
|
254
|
-
items = []
|
255
|
-
expired_ids = []
|
256
|
-
|
257
|
-
for result in results:
|
258
|
-
# Check expiration
|
259
|
-
if result.expires_at and self._is_expired(result.expires_at):
|
260
|
-
expired_ids.append(result.id)
|
261
|
-
continue
|
262
|
-
|
263
|
-
# Check filters
|
264
|
-
stored_filters = json.loads(result.filters) if result.filters else {}
|
265
|
-
if not self._match_filters(stored_filters, filters):
|
266
|
-
continue
|
267
|
-
|
268
|
-
# Basic search implementation
|
269
|
-
value = json.loads(result.value)
|
270
|
-
if search:
|
271
|
-
item_text = str(value).lower()
|
272
|
-
if search.lower() not in item_text:
|
273
|
-
continue
|
274
|
-
|
275
|
-
items.append(value)
|
276
|
-
if limit and len(items) >= limit:
|
277
|
-
break
|
278
|
-
|
279
|
-
# Clean up expired items
|
280
|
-
if expired_ids:
|
281
|
-
delete_stmt = delete(table).where(table.c.id.in_(expired_ids))
|
282
|
-
session.execute(delete_stmt)
|
283
|
-
session.commit()
|
284
|
-
|
285
|
-
return items
|
286
|
-
|
287
|
-
def __repr__(self) -> str:
|
288
|
-
all_collections = set(self._schemas.keys()) | set(self._collections.keys())
|
289
|
-
location_info = f"location='{self.location}'"
|
290
|
-
if self.location == "file":
|
291
|
-
location_info += f" path='{self.path}'"
|
292
|
-
return f"<Database {location_info} collections={list(all_collections)}>"
|
293
|
-
|
294
|
-
@overload
|
295
|
-
def create_searchable_collection(
|
296
|
-
self,
|
297
|
-
name: str,
|
298
|
-
*,
|
299
|
-
schema: Optional[Schema] = None,
|
300
|
-
default_ttl: Optional[int] = None,
|
301
|
-
heap_size: Optional[int] = None,
|
302
|
-
num_threads: Optional[int] = None,
|
303
|
-
index_path: Optional[str] = None,
|
304
|
-
schema_builder: Optional[Any] = None,
|
305
|
-
writer_memory: Optional[int] = None,
|
306
|
-
reload_policy: Optional[str] = None,
|
307
|
-
) -> "SearchableCollection[DatabaseEntryType]":
|
308
|
-
"""Create a searchable collection using tantivy for full-text search."""
|
309
|
-
...
|
310
|
-
|
311
|
-
@overload
|
312
|
-
def create_vector_collection(
|
313
|
-
self,
|
314
|
-
name: str,
|
315
|
-
vector_size: int,
|
316
|
-
*,
|
317
|
-
schema: Optional[Schema] = None,
|
318
|
-
default_ttl: Optional[int] = None,
|
319
|
-
distance_metric: Optional[Any] = None,
|
320
|
-
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
321
|
-
model: Optional[str] = None,
|
322
|
-
# Common embedding parameters
|
323
|
-
format: bool = False,
|
324
|
-
# LiteLLM parameters
|
325
|
-
dimensions: Optional[int] = None,
|
326
|
-
encoding_format: Optional[str] = None,
|
327
|
-
timeout: Optional[int] = None,
|
328
|
-
api_base: Optional[str] = None,
|
329
|
-
api_version: Optional[str] = None,
|
330
|
-
api_key: Optional[str] = None,
|
331
|
-
api_type: Optional[str] = None,
|
332
|
-
caching: bool = False,
|
333
|
-
user: Optional[str] = None,
|
334
|
-
# FastEmbed parameters
|
335
|
-
parallel: Optional[int] = None,
|
336
|
-
batch_size: Optional[int] = None,
|
337
|
-
# Qdrant configuration parameters
|
338
|
-
path: Optional[str] = None,
|
339
|
-
host: Optional[str] = None,
|
340
|
-
port: Optional[int] = None,
|
341
|
-
grpc_port: Optional[int] = None,
|
342
|
-
prefer_grpc: Optional[bool] = None,
|
343
|
-
qdrant_timeout: Optional[float] = None,
|
344
|
-
) -> "VectorCollection[DatabaseEntryType]":
|
345
|
-
"""Create a vector collection using Qdrant for semantic similarity search."""
|
346
|
-
...
|
347
|
-
|
348
|
-
def create_searchable_collection(
|
349
|
-
self,
|
350
|
-
name: str,
|
351
|
-
*,
|
352
|
-
schema: Optional[Schema] = None,
|
353
|
-
default_ttl: Optional[int] = None,
|
354
|
-
heap_size: Optional[int] = None,
|
355
|
-
num_threads: Optional[int] = None,
|
356
|
-
index_path: Optional[str] = None,
|
357
|
-
schema_builder: Optional[Any] = None,
|
358
|
-
writer_memory: Optional[int] = None,
|
359
|
-
reload_policy: Optional[str] = None,
|
360
|
-
) -> "SearchableCollection[DatabaseEntryType]":
|
361
|
-
"""Create a searchable collection using tantivy for full-text search."""
|
362
|
-
collection = create_collection(
|
363
|
-
"searchable",
|
364
|
-
name,
|
365
|
-
schema=schema,
|
366
|
-
default_ttl=default_ttl or self.default_ttl,
|
367
|
-
storage_backend=self,
|
368
|
-
heap_size=heap_size,
|
369
|
-
num_threads=num_threads,
|
370
|
-
index_path=index_path,
|
371
|
-
schema_builder=schema_builder,
|
372
|
-
writer_memory=writer_memory,
|
373
|
-
reload_policy=reload_policy,
|
374
|
-
)
|
375
|
-
self._collections[name] = collection
|
376
|
-
return collection
|
377
|
-
|
378
|
-
def create_vector_collection(
|
379
|
-
self,
|
380
|
-
name: str,
|
381
|
-
vector_size: int,
|
382
|
-
*,
|
383
|
-
schema: Optional[Schema] = None,
|
384
|
-
default_ttl: Optional[int] = None,
|
385
|
-
distance_metric: Optional[Any] = None,
|
386
|
-
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
387
|
-
model: Optional[str] = None,
|
388
|
-
# Common embedding parameters
|
389
|
-
format: bool = False,
|
390
|
-
# LiteLLM parameters
|
391
|
-
dimensions: Optional[int] = None,
|
392
|
-
encoding_format: Optional[str] = None,
|
393
|
-
timeout: Optional[int] = None,
|
394
|
-
api_base: Optional[str] = None,
|
395
|
-
api_version: Optional[str] = None,
|
396
|
-
api_key: Optional[str] = None,
|
397
|
-
api_type: Optional[str] = None,
|
398
|
-
caching: bool = False,
|
399
|
-
user: Optional[str] = None,
|
400
|
-
# FastEmbed parameters
|
401
|
-
parallel: Optional[int] = None,
|
402
|
-
batch_size: Optional[int] = None,
|
403
|
-
# Qdrant configuration parameters
|
404
|
-
path: Optional[str] = None,
|
405
|
-
host: Optional[str] = None,
|
406
|
-
port: Optional[int] = None,
|
407
|
-
grpc_port: Optional[int] = None,
|
408
|
-
prefer_grpc: Optional[bool] = None,
|
409
|
-
qdrant_timeout: Optional[float] = None,
|
410
|
-
) -> "VectorCollection[DatabaseEntryType]":
|
411
|
-
"""Create a vector collection using Qdrant for semantic similarity search.
|
412
|
-
|
413
|
-
Args:
|
414
|
-
model: Model name (e.g., 'fastembed/BAAI/bge-small-en-v1.5', 'openai/text-embedding-3-small')
|
415
|
-
format: Whether to format each non-string input as a markdown string
|
416
|
-
|
417
|
-
# LiteLLM-specific parameters:
|
418
|
-
dimensions: The dimensions of the embedding
|
419
|
-
encoding_format: The encoding format (e.g. "float", "base64")
|
420
|
-
timeout: The timeout for embedding requests
|
421
|
-
api_base: API base URL for remote models
|
422
|
-
api_version: The version of the embedding API
|
423
|
-
api_key: API key for remote models
|
424
|
-
api_type: The type of the embedding API
|
425
|
-
caching: Whether to cache embeddings
|
426
|
-
user: The user for the embedding
|
427
|
-
|
428
|
-
# FastEmbed-specific parameters:
|
429
|
-
parallel: Number of parallel processes for embedding
|
430
|
-
batch_size: Batch size for embedding
|
431
|
-
|
432
|
-
# Qdrant configuration parameters:
|
433
|
-
path: Path for local Qdrant storage
|
434
|
-
host: Qdrant server host
|
435
|
-
port: Qdrant server port
|
436
|
-
grpc_port: Qdrant gRPC port
|
437
|
-
prefer_grpc: Whether to prefer gRPC over HTTP
|
438
|
-
qdrant_timeout: Request timeout for Qdrant operations
|
439
|
-
"""
|
440
|
-
|
441
|
-
# Build qdrant config, using database defaults and unified path
|
442
|
-
qdrant_config = getattr(self, "_default_qdrant_settings", {}).copy()
|
443
|
-
|
444
|
-
# Override with method parameters if provided
|
445
|
-
if path is not None:
|
446
|
-
qdrant_config["path"] = path
|
447
|
-
elif host is not None:
|
448
|
-
qdrant_config["host"] = host
|
449
|
-
elif (
|
450
|
-
self.location == "file"
|
451
|
-
and "path" not in qdrant_config
|
452
|
-
and "host" not in qdrant_config
|
453
|
-
):
|
454
|
-
# Use unified path approach for file storage
|
455
|
-
qdrant_path = self.path.replace(".db", f"_qdrant_{name}")
|
456
|
-
qdrant_config["path"] = qdrant_path
|
457
|
-
|
458
|
-
# Override other parameters
|
459
|
-
if port is not None:
|
460
|
-
qdrant_config["port"] = port
|
461
|
-
if grpc_port is not None:
|
462
|
-
qdrant_config["grpc_port"] = grpc_port
|
463
|
-
if prefer_grpc is not None:
|
464
|
-
qdrant_config["prefer_grpc"] = prefer_grpc
|
465
|
-
if qdrant_timeout is not None:
|
466
|
-
qdrant_config["timeout"] = qdrant_timeout
|
467
|
-
|
468
|
-
collection = create_collection(
|
469
|
-
"vector",
|
470
|
-
name,
|
471
|
-
vector_size,
|
472
|
-
schema=schema,
|
473
|
-
default_ttl=default_ttl or self.default_ttl,
|
474
|
-
storage_backend=self,
|
475
|
-
distance_metric=distance_metric,
|
476
|
-
embedding_function=embedding_function,
|
477
|
-
model=model,
|
478
|
-
# Common embedding parameters
|
479
|
-
format=format,
|
480
|
-
# LiteLLM parameters
|
481
|
-
dimensions=dimensions,
|
482
|
-
encoding_format=encoding_format,
|
483
|
-
timeout=timeout,
|
484
|
-
api_base=api_base,
|
485
|
-
api_version=api_version,
|
486
|
-
api_key=api_key,
|
487
|
-
api_type=api_type,
|
488
|
-
caching=caching,
|
489
|
-
user=user,
|
490
|
-
# FastEmbed parameters
|
491
|
-
parallel=parallel,
|
492
|
-
batch_size=batch_size,
|
493
|
-
# Qdrant config
|
494
|
-
path=qdrant_config.get("path"),
|
495
|
-
host=qdrant_config.get("host"),
|
496
|
-
port=qdrant_config.get("port"),
|
497
|
-
grpc_port=qdrant_config.get("grpc_port"),
|
498
|
-
prefer_grpc=qdrant_config.get("prefer_grpc"),
|
499
|
-
qdrant_timeout=qdrant_config.get("timeout"),
|
500
|
-
)
|
501
|
-
self._collections[name] = collection
|
502
|
-
return collection
|
503
|
-
|
504
|
-
def register_collection(self, collection: BaseCollection) -> None:
|
505
|
-
"""Register an external collection with this database."""
|
506
|
-
collection.attach_to_database(self)
|
507
|
-
self._collections[collection.name] = collection
|
508
|
-
|
509
|
-
def create_collection(
|
510
|
-
self,
|
511
|
-
name: str,
|
512
|
-
schema: Optional[Schema] = None,
|
513
|
-
default_ttl: Optional[int] = None,
|
514
|
-
) -> None:
|
515
|
-
"""Create a traditional collection (backward compatibility)."""
|
516
|
-
self._schemas[name] = schema
|
517
|
-
self._collection_ttls[name] = default_ttl
|
518
|
-
|
519
|
-
if self.location == "file":
|
520
|
-
self._create_collection_table(name)
|
521
|
-
else:
|
522
|
-
self._storage.setdefault(name, {})
|
523
|
-
|
524
|
-
def _calculate_expires_at(self, ttl: Optional[int]) -> Optional[datetime]:
|
525
|
-
"""Calculate expiry time based on TTL."""
|
526
|
-
if ttl is None:
|
527
|
-
ttl = self.default_ttl
|
528
|
-
if ttl and ttl > 0:
|
529
|
-
return datetime.now(timezone.utc) + timedelta(seconds=ttl)
|
530
|
-
return None
|
531
|
-
|
532
|
-
def _is_expired(self, expires_at: Optional[datetime]) -> bool:
|
533
|
-
"""Check if an item has expired."""
|
534
|
-
if expires_at is None:
|
535
|
-
return False
|
536
|
-
now = datetime.now(timezone.utc)
|
537
|
-
if expires_at.tzinfo is None:
|
538
|
-
expires_at = expires_at.replace(tzinfo=timezone.utc)
|
539
|
-
return now >= expires_at
|
540
|
-
|
541
|
-
def _match_filters(
|
542
|
-
self, stored: Optional[Filters], query: Optional[Filters]
|
543
|
-
) -> bool:
|
544
|
-
"""Check if stored filters match query filters."""
|
545
|
-
if query is None:
|
546
|
-
return True
|
547
|
-
if stored is None:
|
548
|
-
return False
|
549
|
-
return all(stored.get(k) == v for k, v in query.items())
|
550
|
-
|
551
|
-
def get(
|
552
|
-
self,
|
553
|
-
id: str,
|
554
|
-
*,
|
555
|
-
collection: str = "default",
|
556
|
-
filters: Optional[Filters] = None,
|
557
|
-
) -> Optional[DatabaseEntryType]:
|
558
|
-
"""Get an item from any collection."""
|
559
|
-
# Check modern collections first
|
560
|
-
if collection in self._collections:
|
561
|
-
coll = self._collections[collection]
|
562
|
-
# Temporarily remove storage backend to avoid recursion
|
563
|
-
original_backend = coll._storage_backend
|
564
|
-
coll._storage_backend = None
|
565
|
-
try:
|
566
|
-
return coll.get(id, filters=filters)
|
567
|
-
finally:
|
568
|
-
coll._storage_backend = original_backend
|
569
|
-
|
570
|
-
# File storage
|
571
|
-
if self.location == "file":
|
572
|
-
return self._get_from_file(id, collection, filters)
|
573
|
-
|
574
|
-
# Traditional in-memory collection logic
|
575
|
-
if collection not in self._schemas:
|
576
|
-
return None
|
577
|
-
|
578
|
-
coll_store = self._storage.get(collection, {})
|
579
|
-
item = coll_store.get(id)
|
580
|
-
if not item:
|
581
|
-
return None
|
582
|
-
|
583
|
-
# Check expiration
|
584
|
-
if self._is_expired(item.get("expires_at")):
|
585
|
-
del coll_store[id]
|
586
|
-
return None
|
587
|
-
|
588
|
-
# Check filters
|
589
|
-
if not self._match_filters(item.get("filters"), filters):
|
590
|
-
return None
|
591
|
-
|
592
|
-
return item["value"]
|
593
|
-
|
594
|
-
def add(
|
595
|
-
self,
|
596
|
-
entry: DatabaseEntryType,
|
597
|
-
*,
|
598
|
-
id: Optional[str] = None,
|
599
|
-
collection: str = "default",
|
600
|
-
filters: Optional[Filters] = None,
|
601
|
-
ttl: Optional[int] = None,
|
602
|
-
) -> None:
|
603
|
-
"""Add an item to any collection."""
|
604
|
-
# Check modern collections first
|
605
|
-
if collection in self._collections:
|
606
|
-
coll = self._collections[collection]
|
607
|
-
# Temporarily remove storage backend to avoid recursion
|
608
|
-
original_backend = coll._storage_backend
|
609
|
-
coll._storage_backend = None
|
610
|
-
try:
|
611
|
-
coll.add(entry, id=id, filters=filters, ttl=ttl)
|
612
|
-
finally:
|
613
|
-
coll._storage_backend = original_backend
|
614
|
-
return
|
615
|
-
|
616
|
-
# File storage
|
617
|
-
if self.location == "file":
|
618
|
-
self._add_to_file(entry, id, collection, filters, ttl)
|
619
|
-
return
|
620
|
-
|
621
|
-
# Traditional in-memory collection logic
|
622
|
-
if collection not in self._schemas:
|
623
|
-
self.create_collection(collection)
|
624
|
-
|
625
|
-
item_id = id or str(uuid.uuid4())
|
626
|
-
expires_at = self._calculate_expires_at(ttl)
|
627
|
-
coll_store = self._storage.setdefault(collection, {})
|
628
|
-
|
629
|
-
coll_store[item_id] = {
|
630
|
-
"value": entry,
|
631
|
-
"filters": filters or {},
|
632
|
-
"created_at": datetime.now(timezone.utc),
|
633
|
-
"updated_at": datetime.now(timezone.utc),
|
634
|
-
"expires_at": expires_at,
|
635
|
-
}
|
636
|
-
|
637
|
-
def query(
|
638
|
-
self,
|
639
|
-
*,
|
640
|
-
collection: str = "default",
|
641
|
-
filters: Optional[Filters] = None,
|
642
|
-
search: Optional[str] = None,
|
643
|
-
limit: Optional[int] = None,
|
644
|
-
**kwargs,
|
645
|
-
) -> List[DatabaseEntryType]:
|
646
|
-
"""Query items from any collection."""
|
647
|
-
# Check modern collections first
|
648
|
-
if collection in self._collections:
|
649
|
-
coll = self._collections[collection]
|
650
|
-
# Temporarily remove storage backend to avoid recursion
|
651
|
-
original_backend = coll._storage_backend
|
652
|
-
coll._storage_backend = None
|
653
|
-
try:
|
654
|
-
return coll.query(filters=filters, search=search, limit=limit, **kwargs)
|
655
|
-
finally:
|
656
|
-
coll._storage_backend = original_backend
|
657
|
-
|
658
|
-
# File storage
|
659
|
-
if self.location == "file":
|
660
|
-
return self._query_from_file(collection, filters, search, limit)
|
661
|
-
|
662
|
-
# Traditional in-memory collection logic
|
663
|
-
if collection not in self._schemas:
|
664
|
-
return []
|
665
|
-
|
666
|
-
results = []
|
667
|
-
coll_store = self._storage.get(collection, {})
|
668
|
-
|
669
|
-
for item in coll_store.values():
|
670
|
-
# Check expiration
|
671
|
-
if self._is_expired(item.get("expires_at")):
|
672
|
-
continue
|
673
|
-
|
674
|
-
# Check filters
|
675
|
-
if not self._match_filters(item.get("filters"), filters):
|
676
|
-
continue
|
677
|
-
|
678
|
-
# Basic search implementation
|
679
|
-
if search:
|
680
|
-
item_text = str(item["value"]).lower()
|
681
|
-
if search.lower() not in item_text:
|
682
|
-
continue
|
683
|
-
|
684
|
-
results.append(item["value"])
|
685
|
-
if limit and len(results) >= limit:
|
686
|
-
break
|
687
|
-
|
688
|
-
return results
|
689
|
-
|
690
|
-
def __getitem__(self, collection_name: str) -> BaseCollection[DatabaseEntryType]:
|
691
|
-
"""Get a collection accessor with full IDE typing support."""
|
692
|
-
# Return modern collection if it exists
|
693
|
-
if collection_name in self._collections:
|
694
|
-
return self._collections[collection_name]
|
695
|
-
|
696
|
-
# Create a database-backed collection accessor for traditional collections
|
697
|
-
class DatabaseCollectionAccessor(BaseCollection[DatabaseEntryType]):
|
698
|
-
def __init__(self, database_instance: "Database", name: str):
|
699
|
-
self._database = database_instance
|
700
|
-
self.name = name
|
701
|
-
self._storage_backend = database_instance
|
702
|
-
|
703
|
-
def get(
|
704
|
-
self, id: str, *, filters: Optional[Filters] = None
|
705
|
-
) -> Optional[DatabaseEntryType]:
|
706
|
-
return self._database.get(id, collection=self.name, filters=filters)
|
707
|
-
|
708
|
-
def add(
|
709
|
-
self,
|
710
|
-
entry: DatabaseEntryType,
|
711
|
-
*,
|
712
|
-
id: Optional[str] = None,
|
713
|
-
filters: Optional[Filters] = None,
|
714
|
-
ttl: Optional[int] = None,
|
715
|
-
) -> None:
|
716
|
-
self._database.add(
|
717
|
-
entry, id=id, collection=self.name, filters=filters, ttl=ttl
|
718
|
-
)
|
719
|
-
|
720
|
-
def query(
|
721
|
-
self,
|
722
|
-
*,
|
723
|
-
filters: Optional[Filters] = None,
|
724
|
-
search: Optional[str] = None,
|
725
|
-
limit: Optional[int] = None,
|
726
|
-
) -> List[DatabaseEntryType]:
|
727
|
-
return self._database.query(
|
728
|
-
collection=self.name, filters=filters, search=search, limit=limit
|
729
|
-
)
|
730
|
-
|
731
|
-
return DatabaseCollectionAccessor(self, collection_name)
|
732
|
-
|
733
|
-
def __contains__(self, collection_name: str) -> bool:
|
734
|
-
"""Check if a collection exists."""
|
735
|
-
return collection_name in self._schemas or collection_name in self._collections
|
736
|
-
|
737
|
-
def keys(self) -> List[str]:
|
738
|
-
"""Get all collection names."""
|
739
|
-
all_collections = set(self._schemas.keys())
|
740
|
-
all_collections.update(self._collections.keys())
|
741
|
-
return list(all_collections)
|
742
|
-
|
743
|
-
def collections(self) -> Dict[str, BaseCollection]:
|
744
|
-
"""Get all modern collections."""
|
745
|
-
return self._collections.copy()
|
746
|
-
|
747
|
-
def delete_collection(self, name: str) -> bool:
|
748
|
-
"""Delete a collection."""
|
749
|
-
deleted = False
|
750
|
-
|
751
|
-
if name in self._collections:
|
752
|
-
del self._collections[name]
|
753
|
-
deleted = True
|
754
|
-
|
755
|
-
if name in self._schemas:
|
756
|
-
del self._schemas[name]
|
757
|
-
del self._collection_ttls[name]
|
758
|
-
if name in self._storage:
|
759
|
-
del self._storage[name]
|
760
|
-
deleted = True
|
761
|
-
|
762
|
-
return deleted
|
763
|
-
|
764
|
-
def clear(self) -> None:
|
765
|
-
"""Clear all collections and data."""
|
766
|
-
self._collections.clear()
|
767
|
-
self._schemas.clear()
|
768
|
-
self._collection_ttls.clear()
|
769
|
-
self._storage.clear()
|
770
|
-
self._storage["default"] = {}
|
771
|
-
|
772
|
-
|
773
|
-
@overload
|
774
|
-
def create_database(
|
775
|
-
type: Literal["searchable"],
|
776
|
-
location: DatabaseLocation = "memory",
|
777
|
-
*,
|
778
|
-
path: str = "database.db",
|
779
|
-
default_ttl: Optional[int] = None,
|
780
|
-
heap_size: Optional[int] = None,
|
781
|
-
num_threads: Optional[int] = None,
|
782
|
-
index_path: Optional[str] = None,
|
783
|
-
schema_builder: Optional[Any] = None,
|
784
|
-
writer_memory: Optional[int] = None,
|
785
|
-
reload_policy: Optional[str] = None,
|
786
|
-
) -> "Database[SearchableCollection]": ...
|
787
|
-
|
788
|
-
|
789
|
-
@overload
|
790
|
-
def create_database(
|
791
|
-
type: Literal["vector"],
|
792
|
-
location: DatabaseLocation = "memory",
|
793
|
-
*,
|
794
|
-
path: str = "database.db",
|
795
|
-
default_ttl: Optional[int] = None,
|
796
|
-
host: Optional[str] = None,
|
797
|
-
port: Optional[int] = None,
|
798
|
-
grpc_port: Optional[int] = None,
|
799
|
-
prefer_grpc: Optional[bool] = None,
|
800
|
-
api_key: Optional[str] = None,
|
801
|
-
timeout: Optional[float] = None,
|
802
|
-
) -> "Database[VectorCollection]": ...
|
803
|
-
|
804
|
-
|
805
|
-
def create_database(
|
806
|
-
type: Literal["searchable", "vector"],
|
807
|
-
location: DatabaseLocation = "memory",
|
808
|
-
*,
|
809
|
-
path: str = "database.db",
|
810
|
-
default_ttl: Optional[int] = None,
|
811
|
-
# Tantivy parameters (searchable databases only)
|
812
|
-
heap_size: Optional[int] = None,
|
813
|
-
num_threads: Optional[int] = None,
|
814
|
-
index_path: Optional[str] = None,
|
815
|
-
schema_builder: Optional[Any] = None,
|
816
|
-
writer_memory: Optional[int] = None,
|
817
|
-
reload_policy: Optional[str] = None,
|
818
|
-
# Qdrant parameters (vector databases only)
|
819
|
-
host: Optional[str] = None,
|
820
|
-
port: Optional[int] = None,
|
821
|
-
grpc_port: Optional[int] = None,
|
822
|
-
prefer_grpc: Optional[bool] = None,
|
823
|
-
api_key: Optional[str] = None,
|
824
|
-
timeout: Optional[float] = None,
|
825
|
-
) -> "Database":
|
826
|
-
"""
|
827
|
-
Create a database instance optimized for specific collection types.
|
828
|
-
|
829
|
-
Args:
|
830
|
-
type: Type of database to create ("searchable" or "vector")
|
831
|
-
location: Database location ("memory" or "file")
|
832
|
-
path: Path to the database file when using "file" location
|
833
|
-
default_ttl: Default TTL for items in seconds
|
834
|
-
|
835
|
-
Tantivy parameters (searchable databases only):
|
836
|
-
heap_size: Memory allocation for tantivy heap
|
837
|
-
num_threads: Number of threads for tantivy operations
|
838
|
-
index_path: Path to store tantivy index files
|
839
|
-
schema_builder: Custom schema builder for tantivy
|
840
|
-
writer_memory: Memory allocation for tantivy writer
|
841
|
-
reload_policy: Policy for reloading tantivy index
|
842
|
-
|
843
|
-
Qdrant parameters (vector databases only):
|
844
|
-
host: Qdrant server host (if not provided, uses local storage with unified 'path')
|
845
|
-
port: Qdrant server port
|
846
|
-
grpc_port: Qdrant gRPC port
|
847
|
-
prefer_grpc: Whether to prefer gRPC over HTTP
|
848
|
-
api_key: API key for Qdrant authentication
|
849
|
-
timeout: Request timeout for Qdrant operations
|
850
|
-
|
851
|
-
Returns:
|
852
|
-
A Database instance optimized for the specified collection type
|
853
|
-
"""
|
854
|
-
database = Database(location=location, path=path, default_ttl=default_ttl)
|
855
|
-
|
856
|
-
# Store the database type for future collection creation optimization
|
857
|
-
database._database_type = type
|
858
|
-
|
859
|
-
if type == "searchable":
|
860
|
-
# Build default tantivy settings from individual parameters
|
861
|
-
tantivy_defaults = {}
|
862
|
-
if heap_size is not None:
|
863
|
-
tantivy_defaults["heap_size"] = heap_size
|
864
|
-
if num_threads is not None:
|
865
|
-
tantivy_defaults["num_threads"] = num_threads
|
866
|
-
if index_path is not None:
|
867
|
-
tantivy_defaults["index_path"] = index_path
|
868
|
-
if schema_builder is not None:
|
869
|
-
tantivy_defaults["schema_builder"] = schema_builder
|
870
|
-
if writer_memory is not None:
|
871
|
-
tantivy_defaults["writer_memory"] = writer_memory
|
872
|
-
if reload_policy is not None:
|
873
|
-
tantivy_defaults["reload_policy"] = reload_policy
|
874
|
-
|
875
|
-
if tantivy_defaults:
|
876
|
-
database._default_tantivy_settings = tantivy_defaults
|
877
|
-
|
878
|
-
elif type == "vector":
|
879
|
-
# Build default qdrant settings from individual parameters
|
880
|
-
qdrant_defaults = {}
|
881
|
-
# Use the unified path for local Qdrant storage when no host is provided
|
882
|
-
if host is None and location == "file":
|
883
|
-
# For file storage, create a directory path for Qdrant
|
884
|
-
qdrant_path = path.replace(".db", "_qdrant")
|
885
|
-
qdrant_defaults["path"] = qdrant_path
|
886
|
-
elif host is not None:
|
887
|
-
qdrant_defaults["host"] = host
|
888
|
-
if port is not None:
|
889
|
-
qdrant_defaults["port"] = port
|
890
|
-
if grpc_port is not None:
|
891
|
-
qdrant_defaults["grpc_port"] = grpc_port
|
892
|
-
if prefer_grpc is not None:
|
893
|
-
qdrant_defaults["prefer_grpc"] = prefer_grpc
|
894
|
-
if api_key is not None:
|
895
|
-
qdrant_defaults["api_key"] = api_key
|
896
|
-
if timeout is not None:
|
897
|
-
qdrant_defaults["timeout"] = timeout
|
898
|
-
|
899
|
-
if qdrant_defaults:
|
900
|
-
database._default_qdrant_settings = qdrant_defaults
|
901
|
-
|
902
|
-
return database
|