hammad-python 0.0.10__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hammad/__init__.py +64 -10
- hammad/based/__init__.py +52 -0
- hammad/based/fields.py +546 -0
- hammad/based/model.py +968 -0
- hammad/based/utils.py +455 -0
- hammad/cache/__init__.py +30 -0
- hammad/{cache.py → cache/_cache.py} +83 -12
- hammad/cli/__init__.py +25 -0
- hammad/cli/plugins/__init__.py +786 -0
- hammad/cli/styles/__init__.py +5 -0
- hammad/cli/styles/animations.py +548 -0
- hammad/cli/styles/settings.py +135 -0
- hammad/cli/styles/types.py +358 -0
- hammad/cli/styles/utils.py +480 -0
- hammad/data/__init__.py +51 -0
- hammad/data/collections/__init__.py +32 -0
- hammad/data/collections/base_collection.py +58 -0
- hammad/data/collections/collection.py +227 -0
- hammad/data/collections/searchable_collection.py +556 -0
- hammad/data/collections/vector_collection.py +497 -0
- hammad/data/databases/__init__.py +21 -0
- hammad/data/databases/database.py +551 -0
- hammad/data/types/__init__.py +33 -0
- hammad/data/types/files/__init__.py +1 -0
- hammad/data/types/files/audio.py +81 -0
- hammad/data/types/files/configuration.py +475 -0
- hammad/data/types/files/document.py +195 -0
- hammad/data/types/files/file.py +358 -0
- hammad/data/types/files/image.py +80 -0
- hammad/json/__init__.py +21 -0
- hammad/{utils/json → json}/converters.py +4 -1
- hammad/logging/__init__.py +27 -0
- hammad/logging/decorators.py +432 -0
- hammad/logging/logger.py +534 -0
- hammad/pydantic/__init__.py +43 -0
- hammad/{utils/pydantic → pydantic}/converters.py +2 -1
- hammad/pydantic/models/__init__.py +28 -0
- hammad/pydantic/models/arbitrary_model.py +46 -0
- hammad/pydantic/models/cacheable_model.py +79 -0
- hammad/pydantic/models/fast_model.py +318 -0
- hammad/pydantic/models/function_model.py +176 -0
- hammad/pydantic/models/subscriptable_model.py +63 -0
- hammad/text/__init__.py +37 -0
- hammad/text/text.py +1068 -0
- hammad/text/utils/__init__.py +1 -0
- hammad/{utils/text → text/utils}/converters.py +2 -2
- hammad/text/utils/markdown/__init__.py +1 -0
- hammad/{utils → text/utils}/markdown/converters.py +3 -3
- hammad/{utils → text/utils}/markdown/formatting.py +1 -1
- hammad/{utils/typing/utils.py → typing/__init__.py} +75 -2
- hammad/web/__init__.py +42 -0
- hammad/web/http/__init__.py +1 -0
- hammad/web/http/client.py +944 -0
- hammad/web/openapi/client.py +740 -0
- hammad/web/search/__init__.py +1 -0
- hammad/web/search/client.py +936 -0
- hammad/web/utils.py +463 -0
- hammad/yaml/__init__.py +30 -0
- hammad/yaml/converters.py +19 -0
- {hammad_python-0.0.10.dist-info → hammad_python-0.0.11.dist-info}/METADATA +14 -8
- hammad_python-0.0.11.dist-info/RECORD +65 -0
- hammad/database.py +0 -447
- hammad/logger.py +0 -273
- hammad/types/color.py +0 -951
- hammad/utils/json/__init__.py +0 -0
- hammad/utils/markdown/__init__.py +0 -0
- hammad/utils/pydantic/__init__.py +0 -0
- hammad/utils/text/__init__.py +0 -0
- hammad/utils/typing/__init__.py +0 -0
- hammad_python-0.0.10.dist-info/RECORD +0 -22
- /hammad/{types/__init__.py → py.typed} +0 -0
- /hammad/{utils → web/openapi}/__init__.py +0 -0
- {hammad_python-0.0.10.dist-info → hammad_python-0.0.11.dist-info}/WHEEL +0 -0
- {hammad_python-0.0.10.dist-info → hammad_python-0.0.11.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,551 @@
|
|
1
|
+
"""hammad.data.databases.database"""
|
2
|
+
|
3
|
+
import uuid
|
4
|
+
from typing import (
|
5
|
+
Any,
|
6
|
+
Dict,
|
7
|
+
Optional,
|
8
|
+
List,
|
9
|
+
TypeVar,
|
10
|
+
Generic,
|
11
|
+
Callable,
|
12
|
+
overload,
|
13
|
+
Literal,
|
14
|
+
TYPE_CHECKING,
|
15
|
+
)
|
16
|
+
from datetime import datetime, timezone, timedelta
|
17
|
+
|
18
|
+
from ..collections.base_collection import BaseCollection, Filters, Schema
|
19
|
+
from ..collections.collection import create_collection
|
20
|
+
|
21
|
+
if TYPE_CHECKING:
|
22
|
+
from ..collections.searchable_collection import SearchableCollection
|
23
|
+
from ..collections.vector_collection import VectorCollection
|
24
|
+
|
25
|
+
__all__ = ("Database",)
|
26
|
+
|
27
|
+
DatabaseEntryType = TypeVar("DatabaseEntryType", bound=Any)
|
28
|
+
|
29
|
+
|
30
|
+
class Database(Generic[DatabaseEntryType]):
|
31
|
+
"""
|
32
|
+
Enhanced Database class that supports both traditional collections and
|
33
|
+
new searchable/vector collections with beautiful IDE typing support.
|
34
|
+
|
35
|
+
Features:
|
36
|
+
- Dict-like access: db["collection_name"]
|
37
|
+
- Easy creation of searchable and vector collections
|
38
|
+
- Full type hinting and IDE autocomplete
|
39
|
+
- Backward compatibility with traditional collections
|
40
|
+
- TTL support and filtering
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(self, location: str = "memory", default_ttl: Optional[int] = None):
|
44
|
+
"""
|
45
|
+
Initialize the database.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
location: Storage location ("memory" for in-memory, or path for persistent)
|
49
|
+
default_ttl: Default TTL for items in seconds
|
50
|
+
"""
|
51
|
+
self.location = location
|
52
|
+
self.default_ttl = default_ttl
|
53
|
+
|
54
|
+
# Storage for traditional collections
|
55
|
+
self._schemas: Dict[str, Optional[Schema]] = {}
|
56
|
+
self._collection_ttls: Dict[str, Optional[int]] = {}
|
57
|
+
self._storage: Dict[str, Dict[str, Dict[str, Any]]] = {"default": {}}
|
58
|
+
|
59
|
+
# Registry for modern collections (searchable/vector)
|
60
|
+
self._collections: Dict[str, BaseCollection] = {}
|
61
|
+
|
62
|
+
def __repr__(self) -> str:
|
63
|
+
all_collections = set(self._schemas.keys()) | set(self._collections.keys())
|
64
|
+
return (
|
65
|
+
f"<Database location='{self.location}' collections={list(all_collections)}>"
|
66
|
+
)
|
67
|
+
|
68
|
+
@overload
|
69
|
+
def create_searchable_collection(
|
70
|
+
self,
|
71
|
+
name: str,
|
72
|
+
*,
|
73
|
+
schema: Optional[Schema] = None,
|
74
|
+
default_ttl: Optional[int] = None,
|
75
|
+
heap_size: Optional[int] = None,
|
76
|
+
num_threads: Optional[int] = None,
|
77
|
+
index_path: Optional[str] = None,
|
78
|
+
schema_builder: Optional[Any] = None,
|
79
|
+
writer_memory: Optional[int] = None,
|
80
|
+
reload_policy: Optional[str] = None,
|
81
|
+
) -> "SearchableCollection[DatabaseEntryType]":
|
82
|
+
"""Create a searchable collection using tantivy for full-text search."""
|
83
|
+
...
|
84
|
+
|
85
|
+
@overload
|
86
|
+
def create_vector_collection(
|
87
|
+
self,
|
88
|
+
name: str,
|
89
|
+
vector_size: int,
|
90
|
+
*,
|
91
|
+
schema: Optional[Schema] = None,
|
92
|
+
default_ttl: Optional[int] = None,
|
93
|
+
distance_metric: Optional[Any] = None,
|
94
|
+
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
95
|
+
path: Optional[str] = None,
|
96
|
+
host: Optional[str] = None,
|
97
|
+
port: Optional[int] = None,
|
98
|
+
grpc_port: Optional[int] = None,
|
99
|
+
prefer_grpc: Optional[bool] = None,
|
100
|
+
api_key: Optional[str] = None,
|
101
|
+
timeout: Optional[float] = None,
|
102
|
+
) -> "VectorCollection[DatabaseEntryType]":
|
103
|
+
"""Create a vector collection using Qdrant for semantic similarity search."""
|
104
|
+
...
|
105
|
+
|
106
|
+
def create_searchable_collection(
|
107
|
+
self,
|
108
|
+
name: str,
|
109
|
+
*,
|
110
|
+
schema: Optional[Schema] = None,
|
111
|
+
default_ttl: Optional[int] = None,
|
112
|
+
heap_size: Optional[int] = None,
|
113
|
+
num_threads: Optional[int] = None,
|
114
|
+
index_path: Optional[str] = None,
|
115
|
+
schema_builder: Optional[Any] = None,
|
116
|
+
writer_memory: Optional[int] = None,
|
117
|
+
reload_policy: Optional[str] = None,
|
118
|
+
) -> "SearchableCollection[DatabaseEntryType]":
|
119
|
+
"""Create a searchable collection using tantivy for full-text search."""
|
120
|
+
collection = create_collection(
|
121
|
+
"searchable",
|
122
|
+
name,
|
123
|
+
schema=schema,
|
124
|
+
default_ttl=default_ttl or self.default_ttl,
|
125
|
+
storage_backend=self,
|
126
|
+
heap_size=heap_size,
|
127
|
+
num_threads=num_threads,
|
128
|
+
index_path=index_path,
|
129
|
+
schema_builder=schema_builder,
|
130
|
+
writer_memory=writer_memory,
|
131
|
+
reload_policy=reload_policy,
|
132
|
+
)
|
133
|
+
self._collections[name] = collection
|
134
|
+
return collection
|
135
|
+
|
136
|
+
def create_vector_collection(
|
137
|
+
self,
|
138
|
+
name: str,
|
139
|
+
vector_size: int,
|
140
|
+
*,
|
141
|
+
schema: Optional[Schema] = None,
|
142
|
+
default_ttl: Optional[int] = None,
|
143
|
+
distance_metric: Optional[Any] = None,
|
144
|
+
embedding_function: Optional[Callable[[Any], List[float]]] = None,
|
145
|
+
path: Optional[str] = None,
|
146
|
+
host: Optional[str] = None,
|
147
|
+
port: Optional[int] = None,
|
148
|
+
grpc_port: Optional[int] = None,
|
149
|
+
prefer_grpc: Optional[bool] = None,
|
150
|
+
api_key: Optional[str] = None,
|
151
|
+
timeout: Optional[float] = None,
|
152
|
+
) -> "VectorCollection[DatabaseEntryType]":
|
153
|
+
"""Create a vector collection using Qdrant for semantic similarity search."""
|
154
|
+
collection = create_collection(
|
155
|
+
"vector",
|
156
|
+
name,
|
157
|
+
vector_size,
|
158
|
+
schema=schema,
|
159
|
+
default_ttl=default_ttl or self.default_ttl,
|
160
|
+
storage_backend=self,
|
161
|
+
distance_metric=distance_metric,
|
162
|
+
embedding_function=embedding_function,
|
163
|
+
path=path,
|
164
|
+
host=host,
|
165
|
+
port=port,
|
166
|
+
grpc_port=grpc_port,
|
167
|
+
prefer_grpc=prefer_grpc,
|
168
|
+
api_key=api_key,
|
169
|
+
timeout=timeout,
|
170
|
+
)
|
171
|
+
self._collections[name] = collection
|
172
|
+
return collection
|
173
|
+
|
174
|
+
def register_collection(self, collection: BaseCollection) -> None:
|
175
|
+
"""Register an external collection with this database."""
|
176
|
+
collection.attach_to_database(self)
|
177
|
+
self._collections[collection.name] = collection
|
178
|
+
|
179
|
+
def create_collection(
|
180
|
+
self,
|
181
|
+
name: str,
|
182
|
+
schema: Optional[Schema] = None,
|
183
|
+
default_ttl: Optional[int] = None,
|
184
|
+
) -> None:
|
185
|
+
"""Create a traditional collection (backward compatibility)."""
|
186
|
+
self._schemas[name] = schema
|
187
|
+
self._collection_ttls[name] = default_ttl
|
188
|
+
self._storage.setdefault(name, {})
|
189
|
+
|
190
|
+
def _calculate_expires_at(self, ttl: Optional[int]) -> Optional[datetime]:
|
191
|
+
"""Calculate expiry time based on TTL."""
|
192
|
+
if ttl is None:
|
193
|
+
ttl = self.default_ttl
|
194
|
+
if ttl and ttl > 0:
|
195
|
+
return datetime.now(timezone.utc) + timedelta(seconds=ttl)
|
196
|
+
return None
|
197
|
+
|
198
|
+
def _is_expired(self, expires_at: Optional[datetime]) -> bool:
|
199
|
+
"""Check if an item has expired."""
|
200
|
+
if expires_at is None:
|
201
|
+
return False
|
202
|
+
now = datetime.now(timezone.utc)
|
203
|
+
if expires_at.tzinfo is None:
|
204
|
+
expires_at = expires_at.replace(tzinfo=timezone.utc)
|
205
|
+
return now >= expires_at
|
206
|
+
|
207
|
+
def _match_filters(
|
208
|
+
self, stored: Optional[Filters], query: Optional[Filters]
|
209
|
+
) -> bool:
|
210
|
+
"""Check if stored filters match query filters."""
|
211
|
+
if query is None:
|
212
|
+
return True
|
213
|
+
if stored is None:
|
214
|
+
return False
|
215
|
+
return all(stored.get(k) == v for k, v in query.items())
|
216
|
+
|
217
|
+
def get(
|
218
|
+
self,
|
219
|
+
id: str,
|
220
|
+
*,
|
221
|
+
collection: str = "default",
|
222
|
+
filters: Optional[Filters] = None,
|
223
|
+
) -> Optional[DatabaseEntryType]:
|
224
|
+
"""Get an item from any collection."""
|
225
|
+
# Check modern collections first
|
226
|
+
if collection in self._collections:
|
227
|
+
coll = self._collections[collection]
|
228
|
+
# Temporarily remove storage backend to avoid recursion
|
229
|
+
original_backend = coll._storage_backend
|
230
|
+
coll._storage_backend = None
|
231
|
+
try:
|
232
|
+
return coll.get(id, filters=filters)
|
233
|
+
finally:
|
234
|
+
coll._storage_backend = original_backend
|
235
|
+
|
236
|
+
# Traditional collection logic
|
237
|
+
if collection not in self._schemas:
|
238
|
+
return None
|
239
|
+
|
240
|
+
coll_store = self._storage.get(collection, {})
|
241
|
+
item = coll_store.get(id)
|
242
|
+
if not item:
|
243
|
+
return None
|
244
|
+
|
245
|
+
# Check expiration
|
246
|
+
if self._is_expired(item.get("expires_at")):
|
247
|
+
del coll_store[id]
|
248
|
+
return None
|
249
|
+
|
250
|
+
# Check filters
|
251
|
+
if not self._match_filters(item.get("filters"), filters):
|
252
|
+
return None
|
253
|
+
|
254
|
+
return item["value"]
|
255
|
+
|
256
|
+
def add(
|
257
|
+
self,
|
258
|
+
entry: DatabaseEntryType,
|
259
|
+
*,
|
260
|
+
id: Optional[str] = None,
|
261
|
+
collection: str = "default",
|
262
|
+
filters: Optional[Filters] = None,
|
263
|
+
ttl: Optional[int] = None,
|
264
|
+
) -> None:
|
265
|
+
"""Add an item to any collection."""
|
266
|
+
# Check modern collections first
|
267
|
+
if collection in self._collections:
|
268
|
+
coll = self._collections[collection]
|
269
|
+
# Temporarily remove storage backend to avoid recursion
|
270
|
+
original_backend = coll._storage_backend
|
271
|
+
coll._storage_backend = None
|
272
|
+
try:
|
273
|
+
coll.add(entry, id=id, filters=filters, ttl=ttl)
|
274
|
+
finally:
|
275
|
+
coll._storage_backend = original_backend
|
276
|
+
return
|
277
|
+
|
278
|
+
# Traditional collection logic
|
279
|
+
if collection not in self._schemas:
|
280
|
+
self.create_collection(collection)
|
281
|
+
|
282
|
+
item_id = id or str(uuid.uuid4())
|
283
|
+
expires_at = self._calculate_expires_at(ttl)
|
284
|
+
coll_store = self._storage.setdefault(collection, {})
|
285
|
+
|
286
|
+
coll_store[item_id] = {
|
287
|
+
"value": entry,
|
288
|
+
"filters": filters or {},
|
289
|
+
"created_at": datetime.now(timezone.utc),
|
290
|
+
"updated_at": datetime.now(timezone.utc),
|
291
|
+
"expires_at": expires_at,
|
292
|
+
}
|
293
|
+
|
294
|
+
def query(
|
295
|
+
self,
|
296
|
+
*,
|
297
|
+
collection: str = "default",
|
298
|
+
filters: Optional[Filters] = None,
|
299
|
+
search: Optional[str] = None,
|
300
|
+
limit: Optional[int] = None,
|
301
|
+
**kwargs,
|
302
|
+
) -> List[DatabaseEntryType]:
|
303
|
+
"""Query items from any collection."""
|
304
|
+
# Check modern collections first
|
305
|
+
if collection in self._collections:
|
306
|
+
coll = self._collections[collection]
|
307
|
+
# Temporarily remove storage backend to avoid recursion
|
308
|
+
original_backend = coll._storage_backend
|
309
|
+
coll._storage_backend = None
|
310
|
+
try:
|
311
|
+
return coll.query(filters=filters, search=search, limit=limit, **kwargs)
|
312
|
+
finally:
|
313
|
+
coll._storage_backend = original_backend
|
314
|
+
|
315
|
+
# Traditional collection logic
|
316
|
+
if collection not in self._schemas:
|
317
|
+
return []
|
318
|
+
|
319
|
+
results = []
|
320
|
+
coll_store = self._storage.get(collection, {})
|
321
|
+
|
322
|
+
for item in coll_store.values():
|
323
|
+
# Check expiration
|
324
|
+
if self._is_expired(item.get("expires_at")):
|
325
|
+
continue
|
326
|
+
|
327
|
+
# Check filters
|
328
|
+
if not self._match_filters(item.get("filters"), filters):
|
329
|
+
continue
|
330
|
+
|
331
|
+
# Basic search implementation
|
332
|
+
if search:
|
333
|
+
item_text = str(item["value"]).lower()
|
334
|
+
if search.lower() not in item_text:
|
335
|
+
continue
|
336
|
+
|
337
|
+
results.append(item["value"])
|
338
|
+
if limit and len(results) >= limit:
|
339
|
+
break
|
340
|
+
|
341
|
+
return results
|
342
|
+
|
343
|
+
def __getitem__(self, collection_name: str) -> BaseCollection[DatabaseEntryType]:
|
344
|
+
"""Get a collection accessor with full IDE typing support."""
|
345
|
+
# Return modern collection if it exists
|
346
|
+
if collection_name in self._collections:
|
347
|
+
return self._collections[collection_name]
|
348
|
+
|
349
|
+
# Create a database-backed collection accessor for traditional collections
|
350
|
+
class DatabaseCollectionAccessor(BaseCollection[DatabaseEntryType]):
|
351
|
+
def __init__(self, database_instance: "Database", name: str):
|
352
|
+
self._database = database_instance
|
353
|
+
self.name = name
|
354
|
+
self._storage_backend = database_instance
|
355
|
+
|
356
|
+
def get(
|
357
|
+
self, id: str, *, filters: Optional[Filters] = None
|
358
|
+
) -> Optional[DatabaseEntryType]:
|
359
|
+
return self._database.get(id, collection=self.name, filters=filters)
|
360
|
+
|
361
|
+
def add(
|
362
|
+
self,
|
363
|
+
entry: DatabaseEntryType,
|
364
|
+
*,
|
365
|
+
id: Optional[str] = None,
|
366
|
+
filters: Optional[Filters] = None,
|
367
|
+
ttl: Optional[int] = None,
|
368
|
+
) -> None:
|
369
|
+
self._database.add(
|
370
|
+
entry, id=id, collection=self.name, filters=filters, ttl=ttl
|
371
|
+
)
|
372
|
+
|
373
|
+
def query(
|
374
|
+
self,
|
375
|
+
*,
|
376
|
+
filters: Optional[Filters] = None,
|
377
|
+
search: Optional[str] = None,
|
378
|
+
limit: Optional[int] = None,
|
379
|
+
) -> List[DatabaseEntryType]:
|
380
|
+
return self._database.query(
|
381
|
+
collection=self.name, filters=filters, search=search, limit=limit
|
382
|
+
)
|
383
|
+
|
384
|
+
return DatabaseCollectionAccessor(self, collection_name)
|
385
|
+
|
386
|
+
def __contains__(self, collection_name: str) -> bool:
|
387
|
+
"""Check if a collection exists."""
|
388
|
+
return collection_name in self._schemas or collection_name in self._collections
|
389
|
+
|
390
|
+
def keys(self) -> List[str]:
|
391
|
+
"""Get all collection names."""
|
392
|
+
all_collections = set(self._schemas.keys())
|
393
|
+
all_collections.update(self._collections.keys())
|
394
|
+
return list(all_collections)
|
395
|
+
|
396
|
+
def collections(self) -> Dict[str, BaseCollection]:
|
397
|
+
"""Get all modern collections."""
|
398
|
+
return self._collections.copy()
|
399
|
+
|
400
|
+
def delete_collection(self, name: str) -> bool:
|
401
|
+
"""Delete a collection."""
|
402
|
+
deleted = False
|
403
|
+
|
404
|
+
if name in self._collections:
|
405
|
+
del self._collections[name]
|
406
|
+
deleted = True
|
407
|
+
|
408
|
+
if name in self._schemas:
|
409
|
+
del self._schemas[name]
|
410
|
+
del self._collection_ttls[name]
|
411
|
+
if name in self._storage:
|
412
|
+
del self._storage[name]
|
413
|
+
deleted = True
|
414
|
+
|
415
|
+
return deleted
|
416
|
+
|
417
|
+
def clear(self) -> None:
|
418
|
+
"""Clear all collections and data."""
|
419
|
+
self._collections.clear()
|
420
|
+
self._schemas.clear()
|
421
|
+
self._collection_ttls.clear()
|
422
|
+
self._storage.clear()
|
423
|
+
self._storage["default"] = {}
|
424
|
+
|
425
|
+
|
426
|
+
@overload
|
427
|
+
def create_database(
|
428
|
+
type: Literal["searchable"],
|
429
|
+
location: str = "memory",
|
430
|
+
*,
|
431
|
+
default_ttl: Optional[int] = None,
|
432
|
+
heap_size: Optional[int] = None,
|
433
|
+
num_threads: Optional[int] = None,
|
434
|
+
index_path: Optional[str] = None,
|
435
|
+
schema_builder: Optional[Any] = None,
|
436
|
+
writer_memory: Optional[int] = None,
|
437
|
+
reload_policy: Optional[str] = None,
|
438
|
+
) -> "Database[SearchableCollection]": ...
|
439
|
+
|
440
|
+
|
441
|
+
@overload
|
442
|
+
def create_database(
|
443
|
+
type: Literal["vector"],
|
444
|
+
location: str = "memory",
|
445
|
+
*,
|
446
|
+
default_ttl: Optional[int] = None,
|
447
|
+
path: Optional[str] = None,
|
448
|
+
host: Optional[str] = None,
|
449
|
+
port: Optional[int] = None,
|
450
|
+
grpc_port: Optional[int] = None,
|
451
|
+
prefer_grpc: Optional[bool] = None,
|
452
|
+
api_key: Optional[str] = None,
|
453
|
+
timeout: Optional[float] = None,
|
454
|
+
) -> "Database[VectorCollection]": ...
|
455
|
+
|
456
|
+
|
457
|
+
def create_database(
|
458
|
+
type: Literal["searchable", "vector"],
|
459
|
+
location: str = "memory",
|
460
|
+
*,
|
461
|
+
default_ttl: Optional[int] = None,
|
462
|
+
# Tantivy parameters (searchable databases only)
|
463
|
+
heap_size: Optional[int] = None,
|
464
|
+
num_threads: Optional[int] = None,
|
465
|
+
index_path: Optional[str] = None,
|
466
|
+
schema_builder: Optional[Any] = None,
|
467
|
+
writer_memory: Optional[int] = None,
|
468
|
+
reload_policy: Optional[str] = None,
|
469
|
+
# Qdrant parameters (vector databases only)
|
470
|
+
path: Optional[str] = None,
|
471
|
+
host: Optional[str] = None,
|
472
|
+
port: Optional[int] = None,
|
473
|
+
grpc_port: Optional[int] = None,
|
474
|
+
prefer_grpc: Optional[bool] = None,
|
475
|
+
api_key: Optional[str] = None,
|
476
|
+
timeout: Optional[float] = None,
|
477
|
+
) -> "Database":
|
478
|
+
"""
|
479
|
+
Create a database instance optimized for specific collection types.
|
480
|
+
|
481
|
+
Args:
|
482
|
+
type: Type of database to create ("searchable" or "vector")
|
483
|
+
location: Database location (default: "memory")
|
484
|
+
default_ttl: Default TTL for items in seconds
|
485
|
+
|
486
|
+
Tantivy parameters (searchable databases only):
|
487
|
+
heap_size: Memory allocation for tantivy heap
|
488
|
+
num_threads: Number of threads for tantivy operations
|
489
|
+
index_path: Path to store tantivy index files
|
490
|
+
schema_builder: Custom schema builder for tantivy
|
491
|
+
writer_memory: Memory allocation for tantivy writer
|
492
|
+
reload_policy: Policy for reloading tantivy index
|
493
|
+
|
494
|
+
Qdrant parameters (vector databases only):
|
495
|
+
path: Path for local Qdrant storage
|
496
|
+
host: Qdrant server host
|
497
|
+
port: Qdrant server port
|
498
|
+
grpc_port: Qdrant gRPC port
|
499
|
+
prefer_grpc: Whether to prefer gRPC over HTTP
|
500
|
+
api_key: API key for Qdrant authentication
|
501
|
+
timeout: Request timeout for Qdrant operations
|
502
|
+
|
503
|
+
Returns:
|
504
|
+
A Database instance optimized for the specified collection type
|
505
|
+
"""
|
506
|
+
database = Database(location=location, default_ttl=default_ttl)
|
507
|
+
|
508
|
+
# Store the database type for future collection creation optimization
|
509
|
+
database._database_type = type
|
510
|
+
|
511
|
+
if type == "searchable":
|
512
|
+
# Build default tantivy settings from individual parameters
|
513
|
+
tantivy_defaults = {}
|
514
|
+
if heap_size is not None:
|
515
|
+
tantivy_defaults["heap_size"] = heap_size
|
516
|
+
if num_threads is not None:
|
517
|
+
tantivy_defaults["num_threads"] = num_threads
|
518
|
+
if index_path is not None:
|
519
|
+
tantivy_defaults["index_path"] = index_path
|
520
|
+
if schema_builder is not None:
|
521
|
+
tantivy_defaults["schema_builder"] = schema_builder
|
522
|
+
if writer_memory is not None:
|
523
|
+
tantivy_defaults["writer_memory"] = writer_memory
|
524
|
+
if reload_policy is not None:
|
525
|
+
tantivy_defaults["reload_policy"] = reload_policy
|
526
|
+
|
527
|
+
if tantivy_defaults:
|
528
|
+
database._default_tantivy_settings = tantivy_defaults
|
529
|
+
|
530
|
+
elif type == "vector":
|
531
|
+
# Build default qdrant settings from individual parameters
|
532
|
+
qdrant_defaults = {}
|
533
|
+
if path is not None:
|
534
|
+
qdrant_defaults["path"] = path
|
535
|
+
if host is not None:
|
536
|
+
qdrant_defaults["host"] = host
|
537
|
+
if port is not None:
|
538
|
+
qdrant_defaults["port"] = port
|
539
|
+
if grpc_port is not None:
|
540
|
+
qdrant_defaults["grpc_port"] = grpc_port
|
541
|
+
if prefer_grpc is not None:
|
542
|
+
qdrant_defaults["prefer_grpc"] = prefer_grpc
|
543
|
+
if api_key is not None:
|
544
|
+
qdrant_defaults["api_key"] = api_key
|
545
|
+
if timeout is not None:
|
546
|
+
qdrant_defaults["timeout"] = timeout
|
547
|
+
|
548
|
+
if qdrant_defaults:
|
549
|
+
database._default_qdrant_settings = qdrant_defaults
|
550
|
+
|
551
|
+
return database
|
@@ -0,0 +1,33 @@
|
|
1
|
+
"""hammad.data.types
|
2
|
+
|
3
|
+
Contains various explicit data models and definitions for
|
4
|
+
various file types, data formats, and other data related
|
5
|
+
concepts."""
|
6
|
+
|
7
|
+
from typing import TYPE_CHECKING
|
8
|
+
from ...based.utils import auto_create_lazy_loader
|
9
|
+
|
10
|
+
if TYPE_CHECKING:
|
11
|
+
from .files.audio import Audio
|
12
|
+
from .files.configuration import Configuration
|
13
|
+
from .files.document import Document
|
14
|
+
from .files.file import File, FileSource
|
15
|
+
from .files.image import Image
|
16
|
+
|
17
|
+
|
18
|
+
__all__ = (
|
19
|
+
"Audio",
|
20
|
+
"Configuration",
|
21
|
+
"Document",
|
22
|
+
"File",
|
23
|
+
"FileSource",
|
24
|
+
"Image",
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
__getattr__ = auto_create_lazy_loader(__all__)
|
29
|
+
|
30
|
+
|
31
|
+
def __dir__() -> list[str]:
|
32
|
+
"""Get the attributes of the data.types module."""
|
33
|
+
return list(__all__)
|
@@ -0,0 +1 @@
|
|
1
|
+
"""hammad.data.types.files"""
|
@@ -0,0 +1,81 @@
|
|
1
|
+
"""hammad.data.types.files.audio"""
|
2
|
+
|
3
|
+
import httpx
|
4
|
+
from typing import Self
|
5
|
+
|
6
|
+
from .file import File, FileSource
|
7
|
+
from ....based.fields import basedfield
|
8
|
+
|
9
|
+
__all__ = ("Audio",)
|
10
|
+
|
11
|
+
|
12
|
+
class Audio(File):
|
13
|
+
"""A representation of an audio file, that is loadable from both a URL, file path
|
14
|
+
or bytes."""
|
15
|
+
|
16
|
+
# Audio-specific metadata
|
17
|
+
_duration: float | None = basedfield(default=None)
|
18
|
+
_sample_rate: int | None = basedfield(default=None)
|
19
|
+
_channels: int | None = basedfield(default=None)
|
20
|
+
_format: str | None = basedfield(default=None)
|
21
|
+
|
22
|
+
@property
|
23
|
+
def is_valid_audio(self) -> bool:
|
24
|
+
"""Check if this is a valid audio file based on MIME type."""
|
25
|
+
return self.type is not None and self.type.startswith("audio/")
|
26
|
+
|
27
|
+
@property
|
28
|
+
def format(self) -> str | None:
|
29
|
+
"""Get the audio format from MIME type."""
|
30
|
+
if self._format is None and self.type:
|
31
|
+
# Extract format from MIME type (e.g., 'audio/mp3' -> 'mp3')
|
32
|
+
self._format = self.type.split("/")[-1].upper()
|
33
|
+
return self._format
|
34
|
+
|
35
|
+
@classmethod
|
36
|
+
def from_url(
|
37
|
+
cls,
|
38
|
+
url: str,
|
39
|
+
*,
|
40
|
+
lazy: bool = True,
|
41
|
+
timeout: float = 30.0,
|
42
|
+
) -> Self:
|
43
|
+
"""Download and create an audio file from a URL.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
url: The URL to download from.
|
47
|
+
lazy: If True, defer loading content until needed.
|
48
|
+
timeout: Request timeout in seconds.
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
A new Audio instance.
|
52
|
+
"""
|
53
|
+
data = None
|
54
|
+
size = None
|
55
|
+
type = None
|
56
|
+
|
57
|
+
if not lazy:
|
58
|
+
with httpx.Client(timeout=timeout) as client:
|
59
|
+
response = client.get(url)
|
60
|
+
response.raise_for_status()
|
61
|
+
|
62
|
+
data = response.content
|
63
|
+
size = len(data)
|
64
|
+
|
65
|
+
# Get content type
|
66
|
+
content_type = response.headers.get("content-type", "")
|
67
|
+
type = content_type.split(";")[0] if content_type else None
|
68
|
+
|
69
|
+
# Validate it's audio
|
70
|
+
if type and not type.startswith("audio/"):
|
71
|
+
raise ValueError(f"URL does not point to an audio file: {type}")
|
72
|
+
|
73
|
+
return cls(
|
74
|
+
data=data,
|
75
|
+
type=type,
|
76
|
+
source=FileSource(
|
77
|
+
is_url=True,
|
78
|
+
url=url,
|
79
|
+
size=size,
|
80
|
+
),
|
81
|
+
)
|