hammad-python 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. hammad/__init__.py +177 -0
  2. hammad/{performance/imports.py → _internal.py} +7 -1
  3. hammad/cache/__init__.py +1 -1
  4. hammad/cli/__init__.py +3 -1
  5. hammad/cli/_runner.py +265 -0
  6. hammad/cli/animations.py +1 -1
  7. hammad/cli/plugins.py +133 -78
  8. hammad/cli/styles/__init__.py +1 -1
  9. hammad/cli/styles/utils.py +149 -3
  10. hammad/data/__init__.py +56 -29
  11. hammad/data/collections/__init__.py +27 -17
  12. hammad/data/collections/collection.py +205 -383
  13. hammad/data/collections/indexes/__init__.py +37 -0
  14. hammad/data/collections/indexes/qdrant/__init__.py +1 -0
  15. hammad/data/collections/indexes/qdrant/index.py +735 -0
  16. hammad/data/collections/indexes/qdrant/settings.py +94 -0
  17. hammad/data/collections/indexes/qdrant/utils.py +220 -0
  18. hammad/data/collections/indexes/tantivy/__init__.py +1 -0
  19. hammad/data/collections/indexes/tantivy/index.py +428 -0
  20. hammad/data/collections/indexes/tantivy/settings.py +51 -0
  21. hammad/data/collections/indexes/tantivy/utils.py +200 -0
  22. hammad/data/configurations/__init__.py +2 -2
  23. hammad/data/configurations/configuration.py +2 -2
  24. hammad/data/models/__init__.py +20 -9
  25. hammad/data/models/extensions/__init__.py +4 -0
  26. hammad/data/models/{pydantic → extensions/pydantic}/__init__.py +6 -19
  27. hammad/data/models/{pydantic → extensions/pydantic}/converters.py +143 -16
  28. hammad/data/models/{base/fields.py → fields.py} +1 -1
  29. hammad/data/models/{base/model.py → model.py} +1 -1
  30. hammad/data/models/{base/utils.py → utils.py} +1 -1
  31. hammad/data/sql/__init__.py +23 -0
  32. hammad/data/sql/database.py +578 -0
  33. hammad/data/sql/types.py +141 -0
  34. hammad/data/types/__init__.py +1 -3
  35. hammad/data/types/file.py +3 -3
  36. hammad/data/types/multimodal/__init__.py +2 -2
  37. hammad/data/types/multimodal/audio.py +2 -2
  38. hammad/data/types/multimodal/image.py +2 -2
  39. hammad/formatting/__init__.py +9 -27
  40. hammad/formatting/json/__init__.py +8 -2
  41. hammad/formatting/json/converters.py +7 -1
  42. hammad/formatting/text/__init__.py +1 -1
  43. hammad/formatting/yaml/__init__.py +1 -1
  44. hammad/genai/__init__.py +78 -0
  45. hammad/genai/agents/__init__.py +1 -0
  46. hammad/genai/agents/types/__init__.py +35 -0
  47. hammad/genai/agents/types/history.py +277 -0
  48. hammad/genai/agents/types/tool.py +490 -0
  49. hammad/genai/embedding_models/__init__.py +41 -0
  50. hammad/{ai/embeddings/client/litellm_embeddings_client.py → genai/embedding_models/embedding_model.py} +47 -142
  51. hammad/genai/embedding_models/embedding_model_name.py +77 -0
  52. hammad/genai/embedding_models/embedding_model_request.py +65 -0
  53. hammad/{ai/embeddings/types.py → genai/embedding_models/embedding_model_response.py} +3 -3
  54. hammad/genai/embedding_models/run.py +161 -0
  55. hammad/genai/language_models/__init__.py +35 -0
  56. hammad/genai/language_models/_streaming.py +622 -0
  57. hammad/genai/language_models/_types.py +276 -0
  58. hammad/genai/language_models/_utils/__init__.py +31 -0
  59. hammad/genai/language_models/_utils/_completions.py +131 -0
  60. hammad/genai/language_models/_utils/_messages.py +89 -0
  61. hammad/genai/language_models/_utils/_requests.py +202 -0
  62. hammad/genai/language_models/_utils/_structured_outputs.py +124 -0
  63. hammad/genai/language_models/language_model.py +734 -0
  64. hammad/genai/language_models/language_model_request.py +135 -0
  65. hammad/genai/language_models/language_model_response.py +219 -0
  66. hammad/genai/language_models/language_model_response_chunk.py +53 -0
  67. hammad/genai/language_models/run.py +530 -0
  68. hammad/genai/multimodal_models.py +48 -0
  69. hammad/genai/rerank_models.py +26 -0
  70. hammad/logging/__init__.py +1 -1
  71. hammad/logging/decorators.py +1 -1
  72. hammad/logging/logger.py +2 -2
  73. hammad/mcp/__init__.py +1 -1
  74. hammad/mcp/client/__init__.py +35 -0
  75. hammad/mcp/client/client.py +105 -4
  76. hammad/mcp/client/client_service.py +10 -3
  77. hammad/mcp/servers/__init__.py +24 -0
  78. hammad/{performance/runtime → runtime}/__init__.py +2 -2
  79. hammad/{performance/runtime → runtime}/decorators.py +1 -1
  80. hammad/{performance/runtime → runtime}/run.py +1 -1
  81. hammad/service/__init__.py +1 -1
  82. hammad/service/create.py +3 -8
  83. hammad/service/decorators.py +8 -8
  84. hammad/typing/__init__.py +28 -0
  85. hammad/web/__init__.py +3 -3
  86. hammad/web/http/client.py +1 -1
  87. hammad/web/models.py +53 -21
  88. hammad/web/search/client.py +99 -52
  89. hammad/web/utils.py +13 -13
  90. hammad_python-0.0.16.dist-info/METADATA +191 -0
  91. hammad_python-0.0.16.dist-info/RECORD +110 -0
  92. hammad/ai/__init__.py +0 -1
  93. hammad/ai/_utils.py +0 -142
  94. hammad/ai/completions/__init__.py +0 -45
  95. hammad/ai/completions/client.py +0 -684
  96. hammad/ai/completions/create.py +0 -710
  97. hammad/ai/completions/settings.py +0 -100
  98. hammad/ai/completions/types.py +0 -792
  99. hammad/ai/completions/utils.py +0 -486
  100. hammad/ai/embeddings/__init__.py +0 -35
  101. hammad/ai/embeddings/client/__init__.py +0 -1
  102. hammad/ai/embeddings/client/base_embeddings_client.py +0 -26
  103. hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +0 -200
  104. hammad/ai/embeddings/create.py +0 -159
  105. hammad/data/collections/base_collection.py +0 -58
  106. hammad/data/collections/searchable_collection.py +0 -556
  107. hammad/data/collections/vector_collection.py +0 -596
  108. hammad/data/databases/__init__.py +0 -21
  109. hammad/data/databases/database.py +0 -902
  110. hammad/data/models/base/__init__.py +0 -35
  111. hammad/data/models/pydantic/models/__init__.py +0 -28
  112. hammad/data/models/pydantic/models/arbitrary_model.py +0 -46
  113. hammad/data/models/pydantic/models/cacheable_model.py +0 -79
  114. hammad/data/models/pydantic/models/fast_model.py +0 -318
  115. hammad/data/models/pydantic/models/function_model.py +0 -176
  116. hammad/data/models/pydantic/models/subscriptable_model.py +0 -63
  117. hammad/performance/__init__.py +0 -36
  118. hammad/py.typed +0 -0
  119. hammad_python-0.0.14.dist-info/METADATA +0 -70
  120. hammad_python-0.0.14.dist-info/RECORD +0 -99
  121. {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/WHEEL +0 -0
  122. {hammad_python-0.0.14.dist-info → hammad_python-0.0.16.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,428 @@
1
+ """hammad.data.collections.indexes.tantivy.index"""
2
+
3
+ from datetime import datetime, timezone, timedelta
4
+ from typing import (
5
+ Any,
6
+ Dict,
7
+ Generic,
8
+ List,
9
+ Optional,
10
+ Type,
11
+ final
12
+ )
13
+ import uuid
14
+ from pathlib import Path
15
+ import json
16
+
17
+ import tantivy
18
+
19
+ from ....sql.types import (
20
+ DatabaseItemType,
21
+ DatabaseItemFilters,
22
+ DatabaseItem,
23
+ )
24
+ from ....sql.database import Database
25
+ from . import utils
26
+ from .settings import (
27
+ TantivyCollectionIndexSettings,
28
+ TantivyCollectionIndexQuerySettings
29
+ )
30
+
31
+
32
+ @final
33
+ class TantivyCollectionIndex(Generic[DatabaseItemType]):
34
+ """A standalone (simplified) index that can be used as the
35
+ storage / search engine for a collection, that implements
36
+ fast indexing & querying capabilities using the
37
+ `tantivy` package.
38
+
39
+ This collection index is built into the core dependencies
40
+ of the `hammad-python` package, and is the default index
41
+ used by the `Collection` class."""
42
+
43
+ def __init__(
44
+ self,
45
+ *,
46
+ name: str = "default",
47
+ schema: Optional[Type[DatabaseItemType]] = None,
48
+ ttl: Optional[int] = None,
49
+ path: Optional[Path | str] = None,
50
+ fast: bool = True,
51
+ settings: Optional[TantivyCollectionIndexSettings] = None,
52
+ query_settings: Optional[TantivyCollectionIndexQuerySettings] = None,
53
+ ) -> None:
54
+ """Initialize a new `TantivyCollectionIndex` with a given set
55
+ of parameters.
56
+
57
+ Args:
58
+ name: The name of the index.
59
+ schema: The schema of the items that can be stored
60
+ within this index.
61
+ ttl: The time to live for the items within this index.
62
+ path: The path to the directory where the index will be stored.
63
+ (If not provided, the collection will be built on memory. This is how to
64
+ distinguish between different collection locations.)
65
+ fast: Whether to use fast schema building & indexing
66
+ from `tantivy`'s builtin implementation.
67
+ settings: Default settings to use for indexing & schema
68
+ building.
69
+ query_settings: Default settings to use for the query
70
+ engine.
71
+ """
72
+ self.name = name
73
+ self.schema = schema
74
+ self.ttl = ttl
75
+
76
+ if path is not None and not isinstance(path, Path):
77
+ path = Path(path)
78
+
79
+ self.path = path
80
+ """The file path to the collection index.
81
+
82
+ (You wouldnt know), but earlier versions of this package allowed
83
+ for implementing `databases` with file system paths. The new
84
+ structure of the package does not implement the `Database` class
85
+ anymore, and rather allows for creating custom extensions using
86
+ collections directly.
87
+
88
+ Ex: `/database/collection.db | /database/collection.myextension`"""
89
+
90
+ if not settings:
91
+ settings = TantivyCollectionIndexSettings(
92
+ fast=fast,
93
+ )
94
+
95
+ if not query_settings:
96
+ query_settings = TantivyCollectionIndexQuerySettings()
97
+
98
+ self.settings = settings
99
+ """The default settings to use when indexing and schema building
100
+ for this index."""
101
+
102
+ self.query_settings = query_settings
103
+ """The default settings to use when querying this index."""
104
+
105
+ # Initialize SQL Database as storage backend
106
+ database_path = None
107
+ if self.path is not None:
108
+ database_path = self.path / f"{name}.db"
109
+
110
+ self._database = Database[DatabaseItemType](
111
+ name=name,
112
+ schema=schema,
113
+ ttl=ttl,
114
+ path=database_path,
115
+ table_name=f"tantivy_{name}"
116
+ )
117
+
118
+ try:
119
+ self._tantivy_wrapper = utils.build_tantivy_index_from_settings(
120
+ settings=settings
121
+ )
122
+ self._index = self._tantivy_wrapper.index
123
+ self._schema = self._tantivy_wrapper.schema
124
+ self._writer = self._tantivy_wrapper.index_writer
125
+ except Exception as e:
126
+ raise utils.TantivyCollectionIndexError(
127
+ f"Failed to build tantivy index from settings: {e}"
128
+ ) from e
129
+
130
+ def add(
131
+ self,
132
+ item: DatabaseItemType,
133
+ *,
134
+ id: Optional[str] = None,
135
+ filters: Optional[DatabaseItemFilters] = None,
136
+ ttl: Optional[int] = None,
137
+ ) -> str:
138
+ """Add a new item to the index.
139
+
140
+ Args:
141
+ item: The item to add to the index.
142
+ id: The id of the item.
143
+ filters: The filters to apply to the item.
144
+ ttl: The time to live for the item.
145
+
146
+ Returns:
147
+ The ID of the added item.
148
+ """
149
+ # Add to SQL database first
150
+ item_id = self._database.add(
151
+ item=item,
152
+ id=id,
153
+ filters=filters,
154
+ ttl=ttl,
155
+ )
156
+
157
+ # Add to tantivy index for search
158
+ self._add_to_tantivy_index(item_id, item, filters)
159
+
160
+ return item_id
161
+
162
+ def _add_to_tantivy_index(
163
+ self,
164
+ item_id: str,
165
+ item: DatabaseItemType,
166
+ filters: Optional[DatabaseItemFilters] = None,
167
+ ) -> None:
168
+ """Add item to tantivy search index."""
169
+ doc = tantivy.Document()
170
+
171
+ # Add ID field
172
+ doc.add_text("id", item_id)
173
+
174
+ # Extract and add content for search
175
+ content = utils.extract_content_for_indexing(item)
176
+ doc.add_text("content", content)
177
+
178
+ # Add title field if present
179
+ if isinstance(item, dict) and "title" in item:
180
+ doc.add_text("title", str(item["title"]))
181
+
182
+ # Store the full data as JSON in tantivy
183
+ serialized_data = utils.serialize(item)
184
+ json_data = {"value": serialized_data}
185
+ doc.add_json("data", json.dumps(json_data))
186
+
187
+ # Add filters as facets
188
+ if filters:
189
+ for key, value in filters.items():
190
+ facet_value = f"/{key}/{value}"
191
+ doc.add_facet("filters", tantivy.Facet.from_string(facet_value))
192
+
193
+ # Add timestamps
194
+ now = datetime.now(timezone.utc)
195
+ doc.add_date("created_at", now)
196
+
197
+ # Add score field if present
198
+ if isinstance(item, dict) and "score" in item and isinstance(item["score"], (int, float)):
199
+ doc.add_integer("score", int(item["score"]))
200
+
201
+ # Add to index
202
+ self._writer.add_document(doc)
203
+ self._writer.commit()
204
+
205
+ def get(
206
+ self,
207
+ id: str,
208
+ *,
209
+ filters: Optional[DatabaseItemFilters] = None,
210
+ ) -> Optional[DatabaseItem[DatabaseItemType]]:
211
+ """Get an item by ID.
212
+
213
+ Args:
214
+ id: The item ID.
215
+ filters: Optional filters to match.
216
+
217
+ Returns:
218
+ The database item or None if not found.
219
+ """
220
+ return self._database.get(id, filters=filters)
221
+
222
+ def query(
223
+ self,
224
+ query: Optional[str] = None,
225
+ *,
226
+ filters: Optional[DatabaseItemFilters] = None,
227
+ limit: Optional[int] = None,
228
+ offset: int = 0,
229
+ fuzzy: bool = False,
230
+ fuzzy_distance: int = 2,
231
+ phrase: bool = False,
232
+ phrase_slop: int = 0,
233
+ boost_fields: Optional[Dict[str, float]] = None,
234
+ min_score: Optional[float] = None,
235
+ sort_by: Optional[str] = None,
236
+ ascending: bool = True,
237
+ ) -> List[DatabaseItem[DatabaseItemType]]:
238
+ """Query items using tantivy search.
239
+
240
+ Args:
241
+ query: Search query string.
242
+ filters: Dictionary of filters to apply.
243
+ limit: Maximum number of results.
244
+ offset: Number of results to skip.
245
+ fuzzy: Enable fuzzy matching.
246
+ fuzzy_distance: Maximum edit distance for fuzzy matching.
247
+ phrase: Treat query as exact phrase match.
248
+ phrase_slop: Max words between phrase terms.
249
+ boost_fields: Field-specific score boosting.
250
+ min_score: Minimum relevance score threshold.
251
+ sort_by: Field to sort by.
252
+ ascending: Sort direction.
253
+
254
+ Returns:
255
+ List of matching database items.
256
+ """
257
+ if not query:
258
+ # No search query - use database query directly
259
+ return self._database.query(
260
+ limit=limit,
261
+ offset=offset,
262
+ order_by=sort_by,
263
+ ascending=ascending,
264
+ )
265
+
266
+ # Use tantivy for search
267
+ self._index.reload()
268
+ searcher = self._index.searcher()
269
+
270
+ # Build tantivy query
271
+ query_parts = []
272
+
273
+ # Add filter queries
274
+ if filters:
275
+ for key, value in filters.items():
276
+ facet_query = tantivy.Query.term_query(
277
+ self._schema,
278
+ "filters",
279
+ tantivy.Facet.from_string(f"/{key}/{value}"),
280
+ )
281
+ query_parts.append((tantivy.Occur.Must, facet_query))
282
+
283
+ # Add search query
284
+ if phrase:
285
+ words = query.split()
286
+ search_query = tantivy.Query.phrase_query(
287
+ self._schema, "content", words, slop=phrase_slop
288
+ )
289
+ elif fuzzy:
290
+ terms = query.split()
291
+ fuzzy_queries = []
292
+ for term in terms:
293
+ fuzzy_q = tantivy.Query.fuzzy_term_query(
294
+ self._schema,
295
+ "content",
296
+ term,
297
+ distance=fuzzy_distance,
298
+ )
299
+ fuzzy_queries.append((tantivy.Occur.Should, fuzzy_q))
300
+ search_query = tantivy.Query.boolean_query(fuzzy_queries)
301
+ else:
302
+ # Use tantivy's query parser
303
+ if boost_fields:
304
+ search_query = self._index.parse_query(
305
+ query,
306
+ default_field_names=["content", "title"],
307
+ field_boosts=boost_fields,
308
+ )
309
+ else:
310
+ search_query = self._index.parse_query(
311
+ query, default_field_names=["content", "title"]
312
+ )
313
+
314
+ query_parts.append((tantivy.Occur.Must, search_query))
315
+
316
+ # Build final query
317
+ if query_parts:
318
+ final_query = tantivy.Query.boolean_query(query_parts)
319
+ else:
320
+ final_query = tantivy.Query.all_query()
321
+
322
+ # Execute search
323
+ search_limit = limit or self.query_settings.limit
324
+
325
+ # Perform search
326
+ search_result = searcher.search(
327
+ final_query,
328
+ limit=search_limit,
329
+ offset=offset,
330
+ )
331
+
332
+ # Get IDs from search results and fetch from database
333
+ item_ids = []
334
+ for score, doc_address in search_result.hits:
335
+ if min_score and score < min_score:
336
+ continue
337
+
338
+ doc = searcher.doc(doc_address)
339
+ item_id = doc.get_first("id")
340
+ if item_id:
341
+ item_ids.append(item_id)
342
+
343
+ # Fetch items from database by IDs
344
+ results = []
345
+ for item_id in item_ids:
346
+ db_item = self._database.get(item_id, filters=filters)
347
+ if db_item:
348
+ results.append(db_item)
349
+
350
+ return results
351
+
352
+ def delete(self, id: str) -> bool:
353
+ """Delete an item by ID.
354
+
355
+ Args:
356
+ id: The item ID.
357
+
358
+ Returns:
359
+ True if item was deleted, False if not found.
360
+ """
361
+ # Delete from database
362
+ deleted = self._database.delete(id)
363
+
364
+ if deleted:
365
+ # Remove from tantivy index by reindexing without this item
366
+ # Note: Tantivy doesn't have efficient single-document deletion
367
+ # For now, we rely on the database as the source of truth
368
+ pass
369
+
370
+ return deleted
371
+
372
+ def count(
373
+ self,
374
+ query: Optional[str] = None,
375
+ *,
376
+ filters: Optional[DatabaseItemFilters] = None,
377
+ ) -> int:
378
+ """Count items matching the query and filters.
379
+
380
+ Args:
381
+ query: Search query string.
382
+ filters: Dictionary of filters to apply.
383
+
384
+ Returns:
385
+ Number of matching items.
386
+ """
387
+ if not query:
388
+ # Simple count from database
389
+ from ....sql.types import QueryFilter, QueryCondition
390
+
391
+ query_filter = None
392
+ if filters:
393
+ conditions = [
394
+ QueryCondition(field="filters", operator="contains", value=json.dumps(filters))
395
+ ]
396
+ query_filter = QueryFilter(conditions=conditions)
397
+
398
+ return self._database.count(query_filter)
399
+ else:
400
+ # Count via search results
401
+ results = self.query(query, filters=filters, limit=None)
402
+ return len(results)
403
+
404
+ def clear(self) -> int:
405
+ """Clear all items from the index.
406
+
407
+ Returns:
408
+ Number of items deleted.
409
+ """
410
+ count = self._database.clear()
411
+
412
+ # Clear tantivy index by rebuilding it
413
+ try:
414
+ self._tantivy_wrapper = utils.build_tantivy_index_from_settings(
415
+ settings=self.settings
416
+ )
417
+ self._index = self._tantivy_wrapper.index
418
+ self._schema = self._tantivy_wrapper.schema
419
+ self._writer = self._tantivy_wrapper.index_writer
420
+ except Exception:
421
+ pass
422
+
423
+ return count
424
+
425
+ def __repr__(self) -> str:
426
+ """String representation of the index."""
427
+ location = str(self.path) if self.path else "memory"
428
+ return f"<TantivyCollectionIndex name='{self.name}' location='{location}'>"
@@ -0,0 +1,51 @@
1
+ """hammad.data.collections.indexes.tantivy.settings"""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import (
5
+ Any,
6
+ Dict,
7
+ )
8
+
9
+ __all__ = (
10
+ "TantivyCollectionIndexSettings",
11
+ "TantivyCollectionIndexQuerySettings"
12
+ )
13
+
14
+
15
+ @dataclass
16
+ class TantivyCollectionIndexSettings:
17
+ """Object representation of user configurable settings
18
+ that can be used to configure a `TantivyCollectionIndex`."""
19
+
20
+ fast : bool = True
21
+ """Whether to use fast schema building & indexing from
22
+ `tantivy`'s builtin implementation."""
23
+
24
+ def get_tantivy_config(self) -> Dict[str, Any]:
25
+ """Returns a configuration dictionary used
26
+ to configure the tantivy index internally."""
27
+
28
+ return {
29
+ "text_fields" : {
30
+ "stored" : True, "fast" : self.fast
31
+ },
32
+ "numeric_fields" : {
33
+ "stored" : True, "indexed" : True, "fast" : self.fast
34
+ },
35
+ "date_fields" : {
36
+ "stored" : True, "indexed" : True, "fast" : self.fast
37
+ },
38
+ "json_fields" : {
39
+ "stored" : True
40
+ }
41
+ }
42
+
43
+
44
+ @dataclass
45
+ class TantivyCollectionIndexQuerySettings:
46
+ """Object representation of user configurable settings
47
+ that can be used to configure the query engine for a
48
+ `TantivyCollectionIndex`."""
49
+
50
+ limit : int = 10
51
+ """The maximum number of results to return."""
@@ -0,0 +1,200 @@
1
+ """hammad.data.collections.indexes.tantivy.utils"""
2
+
3
+ from dataclasses import (
4
+ dataclass,
5
+ is_dataclass,
6
+ asdict
7
+ )
8
+ from msgspec import json
9
+ from typing import (
10
+ Any,
11
+ Dict,
12
+ List,
13
+ Optional,
14
+ final
15
+ )
16
+
17
+ import tantivy
18
+
19
+ from .....cache import cached
20
+ from .settings import (
21
+ TantivyCollectionIndexSettings,
22
+ TantivyCollectionIndexQuerySettings
23
+ )
24
+
25
+
26
+ __all__ = (
27
+ "TantivyCollectionIndexError",
28
+ "extract_content_for_indexing",
29
+ )
30
+
31
+
32
+ class TantivyCollectionIndexError(Exception):
33
+ """Exception raised when an error occurs in the `TantivyCollectionIndex`."""
34
+
35
+
36
+ @dataclass
37
+ class TantivyIndexWrapper:
38
+ """Wrapper over the `tantivy` index object."""
39
+
40
+ index : tantivy.Index
41
+ """The `tantivy` index object."""
42
+
43
+ schema : tantivy.Schema
44
+ """The `tantivy` schema object."""
45
+
46
+ index_writer : Any
47
+ """The `tantivy` index writer object."""
48
+
49
+
50
+ @cached
51
+ def match_filters_for_query(
52
+ stored_filters : Dict[str, Any] | None = None,
53
+ query_filters : Dict[str, Any] | None = None
54
+ ) -> bool:
55
+ """Checks if stored filters match query filters."""
56
+ if query_filters is None:
57
+ return True
58
+ if stored_filters is None:
59
+ return False
60
+ return all(stored_filters.get(k) == v for k, v in query_filters.items())
61
+
62
+
63
+ @cached
64
+ def serialize(
65
+ obj : Any
66
+ ) -> Any:
67
+ """Serializes an object to JSON."""
68
+ try:
69
+ return json.decode(json.encode(obj))
70
+ except Exception:
71
+ # Fallback to manual serialization if msgspec fails
72
+ if isinstance(obj, (str, int, float, bool, type(None))):
73
+ return obj
74
+ elif isinstance(obj, (list, tuple)):
75
+ return [serialize(item) for item in obj]
76
+ elif isinstance(obj, dict):
77
+ return {k: serialize(v) for k, v in obj.items()}
78
+ elif is_dataclass(obj):
79
+ return serialize(asdict(obj))
80
+ elif hasattr(obj, "__dict__"):
81
+ return serialize(obj.__dict__)
82
+ else:
83
+ return str(obj)
84
+
85
+
86
+ @cached
87
+ def build_tantivy_index_from_settings(
88
+ settings : TantivyCollectionIndexSettings
89
+ ) -> TantivyIndexWrapper:
90
+ """Builds a new `tantivy` index from the given settings."""
91
+ # Init schema for index
92
+ schema_builder = tantivy.SchemaBuilder()
93
+
94
+ # Add fields
95
+ # ID (stored and indexed)
96
+ schema_builder.add_text_field(
97
+ "id",
98
+ **settings.get_tantivy_config()["text_fields"]
99
+ )
100
+ # Content (stored and indexed) Contains entry content
101
+ schema_builder.add_text_field(
102
+ "content",
103
+ **{
104
+ **settings.get_tantivy_config()["text_fields"],
105
+ "tokenizer_name" : "default",
106
+ "index_option" : "position"
107
+ }
108
+ )
109
+ # Title (stored and indexed) Contains entry title
110
+ schema_builder.add_text_field(
111
+ "title",
112
+ **{
113
+ **settings.get_tantivy_config()["text_fields"],
114
+ "tokenizer_name" : "default",
115
+ "index_option" : "position"
116
+ }
117
+ )
118
+ # JSON (stored) Contains actual entry data
119
+ schema_builder.add_json_field(
120
+ "data",
121
+ **settings.get_tantivy_config()["json_fields"]
122
+ )
123
+
124
+ # Timestamps
125
+ schema_builder.add_date_field(
126
+ "created_at",
127
+ **settings.get_tantivy_config()["date_fields"]
128
+ )
129
+ schema_builder.add_date_field(
130
+ "expires_at",
131
+ **settings.get_tantivy_config()["date_fields"]
132
+ )
133
+
134
+ # Sorting / Scoring
135
+ schema_builder.add_integer_field(
136
+ "score",
137
+ **settings.get_tantivy_config()["numeric_fields"]
138
+ )
139
+
140
+ # Facet for Optional filters
141
+ schema_builder.add_facet_field("filters")
142
+
143
+ # Build the schema
144
+ schema = schema_builder.build()
145
+
146
+ # Create index in memory (no path means in-memory)
147
+ index = tantivy.Index(schema)
148
+
149
+ # Configure index writer with custom settings if provided
150
+ writer_config = {}
151
+ if "writer_heap_size" in settings.get_tantivy_config():
152
+ writer_config["heap_size"] = settings.get_tantivy_config()["writer_heap_size"]
153
+ if "writer_num_threads" in settings.get_tantivy_config():
154
+ writer_config["num_threads"] = settings.get_tantivy_config()["writer_num_threads"]
155
+
156
+ index_writer = index.writer(**writer_config)
157
+
158
+ # Configure index reader if settings provided
159
+ reader_config = settings.get_tantivy_config().get("reader_config", {})
160
+ if reader_config:
161
+ reload_policy = reader_config.get("reload_policy", "commit")
162
+ num_warmers = reader_config.get("num_warmers", 0)
163
+ index.config_reader(
164
+ reload_policy=reload_policy,
165
+ num_warmers=num_warmers
166
+ )
167
+
168
+ return TantivyIndexWrapper(
169
+ schema=schema,
170
+ index=index,
171
+ index_writer=index_writer
172
+ )
173
+
174
+
175
+ @cached
176
+ def extract_content_for_indexing(value: Any) -> str:
177
+ """Extract searchable text content from value for indexing."""
178
+ if isinstance(value, str):
179
+ return value
180
+ elif isinstance(value, dict):
181
+ # Concatenate all string values
182
+ content_parts = []
183
+ for v in value.values():
184
+ if isinstance(v, str):
185
+ content_parts.append(v)
186
+ elif isinstance(v, (list, dict)):
187
+ content_parts.append(json.encode(v).decode())
188
+ else:
189
+ content_parts.append(str(v))
190
+ return " ".join(content_parts)
191
+ elif isinstance(value, (list, tuple)):
192
+ content_parts = []
193
+ for item in value:
194
+ if isinstance(item, str):
195
+ content_parts.append(item)
196
+ else:
197
+ content_parts.append(str(item))
198
+ return " ".join(content_parts)
199
+ else:
200
+ return str(value)
@@ -1,11 +1,11 @@
1
- """hammad.configuration
1
+ """hammad.data.configurations
2
2
 
3
3
  Contains the `Configuration` class and related functions for parsing configurations
4
4
  from various sources.
5
5
  """
6
6
 
7
7
  from typing import TYPE_CHECKING
8
- from ...performance.imports import create_getattr_importer
8
+ from ..._internal import create_getattr_importer
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from .configuration import (
@@ -1,4 +1,4 @@
1
- """hammad.data.types.files.configuration"""
1
+ """hammad.data.configurations.configuration"""
2
2
 
3
3
  import os
4
4
  import configparser
@@ -10,7 +10,7 @@ import msgspec
10
10
  import yaml
11
11
 
12
12
  from ..types.file import File, FileSource
13
- from ..models.base.fields import field
13
+ from ..models.fields import field
14
14
 
15
15
  __all__ = (
16
16
  "Configuration",