hammad-python 0.0.30__py3-none-any.whl → 0.0.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. ham/__init__.py +10 -0
  2. {hammad_python-0.0.30.dist-info → hammad_python-0.0.31.dist-info}/METADATA +6 -32
  3. hammad_python-0.0.31.dist-info/RECORD +6 -0
  4. hammad/__init__.py +0 -84
  5. hammad/_internal.py +0 -256
  6. hammad/_main.py +0 -226
  7. hammad/cache/__init__.py +0 -40
  8. hammad/cache/base_cache.py +0 -181
  9. hammad/cache/cache.py +0 -169
  10. hammad/cache/decorators.py +0 -261
  11. hammad/cache/file_cache.py +0 -80
  12. hammad/cache/ttl_cache.py +0 -74
  13. hammad/cli/__init__.py +0 -33
  14. hammad/cli/animations.py +0 -573
  15. hammad/cli/plugins.py +0 -867
  16. hammad/cli/styles/__init__.py +0 -55
  17. hammad/cli/styles/settings.py +0 -139
  18. hammad/cli/styles/types.py +0 -358
  19. hammad/cli/styles/utils.py +0 -634
  20. hammad/data/__init__.py +0 -90
  21. hammad/data/collections/__init__.py +0 -49
  22. hammad/data/collections/collection.py +0 -326
  23. hammad/data/collections/indexes/__init__.py +0 -37
  24. hammad/data/collections/indexes/qdrant/__init__.py +0 -1
  25. hammad/data/collections/indexes/qdrant/index.py +0 -723
  26. hammad/data/collections/indexes/qdrant/settings.py +0 -94
  27. hammad/data/collections/indexes/qdrant/utils.py +0 -210
  28. hammad/data/collections/indexes/tantivy/__init__.py +0 -1
  29. hammad/data/collections/indexes/tantivy/index.py +0 -426
  30. hammad/data/collections/indexes/tantivy/settings.py +0 -40
  31. hammad/data/collections/indexes/tantivy/utils.py +0 -176
  32. hammad/data/configurations/__init__.py +0 -35
  33. hammad/data/configurations/configuration.py +0 -564
  34. hammad/data/models/__init__.py +0 -50
  35. hammad/data/models/extensions/__init__.py +0 -4
  36. hammad/data/models/extensions/pydantic/__init__.py +0 -42
  37. hammad/data/models/extensions/pydantic/converters.py +0 -759
  38. hammad/data/models/fields.py +0 -546
  39. hammad/data/models/model.py +0 -1078
  40. hammad/data/models/utils.py +0 -280
  41. hammad/data/sql/__init__.py +0 -24
  42. hammad/data/sql/database.py +0 -576
  43. hammad/data/sql/types.py +0 -127
  44. hammad/data/types/__init__.py +0 -75
  45. hammad/data/types/file.py +0 -431
  46. hammad/data/types/multimodal/__init__.py +0 -36
  47. hammad/data/types/multimodal/audio.py +0 -200
  48. hammad/data/types/multimodal/image.py +0 -182
  49. hammad/data/types/text.py +0 -1308
  50. hammad/formatting/__init__.py +0 -33
  51. hammad/formatting/json/__init__.py +0 -27
  52. hammad/formatting/json/converters.py +0 -158
  53. hammad/formatting/text/__init__.py +0 -63
  54. hammad/formatting/text/converters.py +0 -723
  55. hammad/formatting/text/markdown.py +0 -131
  56. hammad/formatting/yaml/__init__.py +0 -26
  57. hammad/formatting/yaml/converters.py +0 -5
  58. hammad/genai/__init__.py +0 -217
  59. hammad/genai/a2a/__init__.py +0 -32
  60. hammad/genai/a2a/workers.py +0 -552
  61. hammad/genai/agents/__init__.py +0 -59
  62. hammad/genai/agents/agent.py +0 -1973
  63. hammad/genai/agents/run.py +0 -1024
  64. hammad/genai/agents/types/__init__.py +0 -42
  65. hammad/genai/agents/types/agent_context.py +0 -13
  66. hammad/genai/agents/types/agent_event.py +0 -128
  67. hammad/genai/agents/types/agent_hooks.py +0 -220
  68. hammad/genai/agents/types/agent_messages.py +0 -31
  69. hammad/genai/agents/types/agent_response.py +0 -125
  70. hammad/genai/agents/types/agent_stream.py +0 -327
  71. hammad/genai/graphs/__init__.py +0 -125
  72. hammad/genai/graphs/_utils.py +0 -190
  73. hammad/genai/graphs/base.py +0 -1828
  74. hammad/genai/graphs/plugins.py +0 -316
  75. hammad/genai/graphs/types.py +0 -638
  76. hammad/genai/models/__init__.py +0 -1
  77. hammad/genai/models/embeddings/__init__.py +0 -43
  78. hammad/genai/models/embeddings/model.py +0 -226
  79. hammad/genai/models/embeddings/run.py +0 -163
  80. hammad/genai/models/embeddings/types/__init__.py +0 -37
  81. hammad/genai/models/embeddings/types/embedding_model_name.py +0 -75
  82. hammad/genai/models/embeddings/types/embedding_model_response.py +0 -76
  83. hammad/genai/models/embeddings/types/embedding_model_run_params.py +0 -66
  84. hammad/genai/models/embeddings/types/embedding_model_settings.py +0 -47
  85. hammad/genai/models/language/__init__.py +0 -57
  86. hammad/genai/models/language/model.py +0 -1098
  87. hammad/genai/models/language/run.py +0 -878
  88. hammad/genai/models/language/types/__init__.py +0 -40
  89. hammad/genai/models/language/types/language_model_instructor_mode.py +0 -47
  90. hammad/genai/models/language/types/language_model_messages.py +0 -28
  91. hammad/genai/models/language/types/language_model_name.py +0 -239
  92. hammad/genai/models/language/types/language_model_request.py +0 -127
  93. hammad/genai/models/language/types/language_model_response.py +0 -217
  94. hammad/genai/models/language/types/language_model_response_chunk.py +0 -56
  95. hammad/genai/models/language/types/language_model_settings.py +0 -89
  96. hammad/genai/models/language/types/language_model_stream.py +0 -600
  97. hammad/genai/models/language/utils/__init__.py +0 -28
  98. hammad/genai/models/language/utils/requests.py +0 -421
  99. hammad/genai/models/language/utils/structured_outputs.py +0 -135
  100. hammad/genai/models/model_provider.py +0 -4
  101. hammad/genai/models/multimodal.py +0 -47
  102. hammad/genai/models/reranking.py +0 -26
  103. hammad/genai/types/__init__.py +0 -1
  104. hammad/genai/types/base.py +0 -215
  105. hammad/genai/types/history.py +0 -290
  106. hammad/genai/types/tools.py +0 -507
  107. hammad/logging/__init__.py +0 -35
  108. hammad/logging/decorators.py +0 -834
  109. hammad/logging/logger.py +0 -1018
  110. hammad/mcp/__init__.py +0 -53
  111. hammad/mcp/client/__init__.py +0 -35
  112. hammad/mcp/client/client.py +0 -624
  113. hammad/mcp/client/client_service.py +0 -400
  114. hammad/mcp/client/settings.py +0 -178
  115. hammad/mcp/servers/__init__.py +0 -26
  116. hammad/mcp/servers/launcher.py +0 -1161
  117. hammad/runtime/__init__.py +0 -32
  118. hammad/runtime/decorators.py +0 -142
  119. hammad/runtime/run.py +0 -299
  120. hammad/service/__init__.py +0 -49
  121. hammad/service/create.py +0 -527
  122. hammad/service/decorators.py +0 -283
  123. hammad/types.py +0 -288
  124. hammad/typing/__init__.py +0 -435
  125. hammad/web/__init__.py +0 -43
  126. hammad/web/http/__init__.py +0 -1
  127. hammad/web/http/client.py +0 -944
  128. hammad/web/models.py +0 -275
  129. hammad/web/openapi/__init__.py +0 -1
  130. hammad/web/openapi/client.py +0 -740
  131. hammad/web/search/__init__.py +0 -1
  132. hammad/web/search/client.py +0 -1023
  133. hammad/web/utils.py +0 -472
  134. hammad_python-0.0.30.dist-info/RECORD +0 -135
  135. {hammad → ham}/py.typed +0 -0
  136. {hammad_python-0.0.30.dist-info → hammad_python-0.0.31.dist-info}/WHEEL +0 -0
  137. {hammad_python-0.0.30.dist-info → hammad_python-0.0.31.dist-info}/licenses/LICENSE +0 -0
@@ -1,426 +0,0 @@
1
- """hammad.data.collections.indexes.tantivy.index"""
2
-
3
- from datetime import datetime, timezone, timedelta
4
- from typing import Any, Dict, Generic, List, Optional, Type, final
5
- import uuid
6
- from pathlib import Path
7
- import json
8
-
9
- import tantivy
10
-
11
- from ....sql.types import (
12
- DatabaseItemType,
13
- DatabaseItemFilters,
14
- DatabaseItem,
15
- )
16
- from ....sql.database import Database
17
- from . import utils
18
- from .settings import (
19
- TantivyCollectionIndexSettings,
20
- TantivyCollectionIndexQuerySettings,
21
- )
22
-
23
-
24
- @final
25
- class TantivyCollectionIndex(Generic[DatabaseItemType]):
26
- """A standalone (simplified) index that can be used as the
27
- storage / search engine for a collection, that implements
28
- fast indexing & querying capabilities using the
29
- `tantivy` package.
30
-
31
- This collection index is built into the core dependencies
32
- of the `hammad-python` package, and is the default index
33
- used by the `Collection` class."""
34
-
35
- def __init__(
36
- self,
37
- *,
38
- name: str = "default",
39
- schema: Optional[Type[DatabaseItemType]] = None,
40
- ttl: Optional[int] = None,
41
- path: Optional[Path | str] = None,
42
- fast: bool = True,
43
- settings: Optional[TantivyCollectionIndexSettings] = None,
44
- query_settings: Optional[TantivyCollectionIndexQuerySettings] = None,
45
- ) -> None:
46
- """Initialize a new `TantivyCollectionIndex` with a given set
47
- of parameters.
48
-
49
- Args:
50
- name: The name of the index.
51
- schema: The schema of the items that can be stored
52
- within this index.
53
- ttl: The time to live for the items within this index.
54
- path: The path to the directory where the index will be stored.
55
- (If not provided, the collection will be built on memory. This is how to
56
- distinguish between different collection locations.)
57
- fast: Whether to use fast schema building & indexing
58
- from `tantivy`'s builtin implementation.
59
- settings: Default settings to use for indexing & schema
60
- building.
61
- query_settings: Default settings to use for the query
62
- engine.
63
- """
64
- self.name = name
65
- self.schema = schema
66
- self.ttl = ttl
67
-
68
- if path is not None and not isinstance(path, Path):
69
- path = Path(path)
70
-
71
- self.path = path
72
- """The file path to the collection index.
73
-
74
- (You wouldnt know), but earlier versions of this package allowed
75
- for implementing `databases` with file system paths. The new
76
- structure of the package does not implement the `Database` class
77
- anymore, and rather allows for creating custom extensions using
78
- collections directly.
79
-
80
- Ex: `/database/collection.db | /database/collection.myextension`"""
81
-
82
- if not settings:
83
- settings = TantivyCollectionIndexSettings(
84
- fast=fast,
85
- )
86
-
87
- if not query_settings:
88
- query_settings = TantivyCollectionIndexQuerySettings()
89
-
90
- self.settings = settings
91
- """The default settings to use when indexing and schema building
92
- for this index."""
93
-
94
- self.query_settings = query_settings
95
- """The default settings to use when querying this index."""
96
-
97
- # Initialize SQL Database as storage backend
98
- database_path = None
99
- if self.path is not None:
100
- database_path = self.path / f"{name}.db"
101
-
102
- self._database = Database[DatabaseItemType](
103
- name=name,
104
- schema=schema,
105
- ttl=ttl,
106
- path=database_path,
107
- table_name=f"tantivy_{name}",
108
- )
109
-
110
- try:
111
- self._tantivy_wrapper = utils.build_tantivy_index_from_settings(
112
- settings=settings
113
- )
114
- self._index = self._tantivy_wrapper.index
115
- self._schema = self._tantivy_wrapper.schema
116
- self._writer = self._tantivy_wrapper.index_writer
117
- except Exception as e:
118
- raise utils.TantivyCollectionIndexError(
119
- f"Failed to build tantivy index from settings: {e}"
120
- ) from e
121
-
122
- def add(
123
- self,
124
- item: DatabaseItemType,
125
- *,
126
- id: Optional[str] = None,
127
- filters: Optional[DatabaseItemFilters] = None,
128
- ttl: Optional[int] = None,
129
- ) -> str:
130
- """Add a new item to the index.
131
-
132
- Args:
133
- item: The item to add to the index.
134
- id: The id of the item.
135
- filters: The filters to apply to the item.
136
- ttl: The time to live for the item.
137
-
138
- Returns:
139
- The ID of the added item.
140
- """
141
- # Add to SQL database first
142
- item_id = self._database.add(
143
- item=item,
144
- id=id,
145
- filters=filters,
146
- ttl=ttl,
147
- )
148
-
149
- # Add to tantivy index for search
150
- self._add_to_tantivy_index(item_id, item, filters)
151
-
152
- return item_id
153
-
154
- def _add_to_tantivy_index(
155
- self,
156
- item_id: str,
157
- item: DatabaseItemType,
158
- filters: Optional[DatabaseItemFilters] = None,
159
- ) -> None:
160
- """Add item to tantivy search index."""
161
- doc = tantivy.Document()
162
-
163
- # Add ID field
164
- doc.add_text("id", item_id)
165
-
166
- # Extract and add content for search
167
- content = utils.extract_content_for_indexing(item)
168
- doc.add_text("content", content)
169
-
170
- # Add title field if present
171
- if isinstance(item, dict) and "title" in item:
172
- doc.add_text("title", str(item["title"]))
173
-
174
- # Store the full data as JSON in tantivy
175
- serialized_data = utils.serialize(item)
176
- json_data = {"value": serialized_data}
177
- doc.add_json("data", json.dumps(json_data))
178
-
179
- # Add filters as facets
180
- if filters:
181
- for key, value in filters.items():
182
- facet_value = f"/{key}/{value}"
183
- doc.add_facet("filters", tantivy.Facet.from_string(facet_value))
184
-
185
- # Add timestamps
186
- now = datetime.now(timezone.utc)
187
- doc.add_date("created_at", now)
188
-
189
- # Add score field if present
190
- if (
191
- isinstance(item, dict)
192
- and "score" in item
193
- and isinstance(item["score"], (int, float))
194
- ):
195
- doc.add_integer("score", int(item["score"]))
196
-
197
- # Add to index
198
- self._writer.add_document(doc)
199
- self._writer.commit()
200
-
201
- def get(
202
- self,
203
- id: str,
204
- *,
205
- filters: Optional[DatabaseItemFilters] = None,
206
- ) -> Optional[DatabaseItem[DatabaseItemType]]:
207
- """Get an item by ID.
208
-
209
- Args:
210
- id: The item ID.
211
- filters: Optional filters to match.
212
-
213
- Returns:
214
- The database item or None if not found.
215
- """
216
- return self._database.get(id, filters=filters)
217
-
218
- def query(
219
- self,
220
- query: Optional[str] = None,
221
- *,
222
- filters: Optional[DatabaseItemFilters] = None,
223
- limit: Optional[int] = None,
224
- offset: int = 0,
225
- fuzzy: bool = False,
226
- fuzzy_distance: int = 2,
227
- phrase: bool = False,
228
- phrase_slop: int = 0,
229
- boost_fields: Optional[Dict[str, float]] = None,
230
- min_score: Optional[float] = None,
231
- sort_by: Optional[str] = None,
232
- ascending: bool = True,
233
- ) -> List[DatabaseItem[DatabaseItemType]]:
234
- """Query items using tantivy search.
235
-
236
- Args:
237
- query: Search query string.
238
- filters: Dictionary of filters to apply.
239
- limit: Maximum number of results.
240
- offset: Number of results to skip.
241
- fuzzy: Enable fuzzy matching.
242
- fuzzy_distance: Maximum edit distance for fuzzy matching.
243
- phrase: Treat query as exact phrase match.
244
- phrase_slop: Max words between phrase terms.
245
- boost_fields: Field-specific score boosting.
246
- min_score: Minimum relevance score threshold.
247
- sort_by: Field to sort by.
248
- ascending: Sort direction.
249
-
250
- Returns:
251
- List of matching database items.
252
- """
253
- if not query:
254
- # No search query - use database query directly
255
- return self._database.query(
256
- limit=limit,
257
- offset=offset,
258
- order_by=sort_by,
259
- ascending=ascending,
260
- )
261
-
262
- # Use tantivy for search
263
- self._index.reload()
264
- searcher = self._index.searcher()
265
-
266
- # Build tantivy query
267
- query_parts = []
268
-
269
- # Add filter queries
270
- if filters:
271
- for key, value in filters.items():
272
- facet_query = tantivy.Query.term_query(
273
- self._schema,
274
- "filters",
275
- tantivy.Facet.from_string(f"/{key}/{value}"),
276
- )
277
- query_parts.append((tantivy.Occur.Must, facet_query))
278
-
279
- # Add search query
280
- if phrase:
281
- words = query.split()
282
- search_query = tantivy.Query.phrase_query(
283
- self._schema, "content", words, slop=phrase_slop
284
- )
285
- elif fuzzy:
286
- terms = query.split()
287
- fuzzy_queries = []
288
- for term in terms:
289
- fuzzy_q = tantivy.Query.fuzzy_term_query(
290
- self._schema,
291
- "content",
292
- term,
293
- distance=fuzzy_distance,
294
- )
295
- fuzzy_queries.append((tantivy.Occur.Should, fuzzy_q))
296
- search_query = tantivy.Query.boolean_query(fuzzy_queries)
297
- else:
298
- # Use tantivy's query parser
299
- if boost_fields:
300
- search_query = self._index.parse_query(
301
- query,
302
- default_field_names=["content", "title"],
303
- field_boosts=boost_fields,
304
- )
305
- else:
306
- search_query = self._index.parse_query(
307
- query, default_field_names=["content", "title"]
308
- )
309
-
310
- query_parts.append((tantivy.Occur.Must, search_query))
311
-
312
- # Build final query
313
- if query_parts:
314
- final_query = tantivy.Query.boolean_query(query_parts)
315
- else:
316
- final_query = tantivy.Query.all_query()
317
-
318
- # Execute search
319
- search_limit = limit or self.query_settings.limit
320
-
321
- # Perform search
322
- search_result = searcher.search(
323
- final_query,
324
- limit=search_limit,
325
- offset=offset,
326
- )
327
-
328
- # Get IDs from search results and fetch from database
329
- item_ids = []
330
- for score, doc_address in search_result.hits:
331
- if min_score and score < min_score:
332
- continue
333
-
334
- doc = searcher.doc(doc_address)
335
- item_id = doc.get_first("id")
336
- if item_id:
337
- item_ids.append(item_id)
338
-
339
- # Fetch items from database by IDs
340
- results = []
341
- for item_id in item_ids:
342
- db_item = self._database.get(item_id, filters=filters)
343
- if db_item:
344
- results.append(db_item)
345
-
346
- return results
347
-
348
- def delete(self, id: str) -> bool:
349
- """Delete an item by ID.
350
-
351
- Args:
352
- id: The item ID.
353
-
354
- Returns:
355
- True if item was deleted, False if not found.
356
- """
357
- # Delete from database
358
- deleted = self._database.delete(id)
359
-
360
- if deleted:
361
- # Remove from tantivy index by reindexing without this item
362
- # Note: Tantivy doesn't have efficient single-document deletion
363
- # For now, we rely on the database as the source of truth
364
- pass
365
-
366
- return deleted
367
-
368
- def count(
369
- self,
370
- query: Optional[str] = None,
371
- *,
372
- filters: Optional[DatabaseItemFilters] = None,
373
- ) -> int:
374
- """Count items matching the query and filters.
375
-
376
- Args:
377
- query: Search query string.
378
- filters: Dictionary of filters to apply.
379
-
380
- Returns:
381
- Number of matching items.
382
- """
383
- if not query:
384
- # Simple count from database
385
- from ....sql.types import QueryFilter, QueryCondition
386
-
387
- query_filter = None
388
- if filters:
389
- conditions = [
390
- QueryCondition(
391
- field="filters", operator="contains", value=json.dumps(filters)
392
- )
393
- ]
394
- query_filter = QueryFilter(conditions=conditions)
395
-
396
- return self._database.count(query_filter)
397
- else:
398
- # Count via search results
399
- results = self.query(query, filters=filters, limit=None)
400
- return len(results)
401
-
402
- def clear(self) -> int:
403
- """Clear all items from the index.
404
-
405
- Returns:
406
- Number of items deleted.
407
- """
408
- count = self._database.clear()
409
-
410
- # Clear tantivy index by rebuilding it
411
- try:
412
- self._tantivy_wrapper = utils.build_tantivy_index_from_settings(
413
- settings=self.settings
414
- )
415
- self._index = self._tantivy_wrapper.index
416
- self._schema = self._tantivy_wrapper.schema
417
- self._writer = self._tantivy_wrapper.index_writer
418
- except Exception:
419
- pass
420
-
421
- return count
422
-
423
- def __repr__(self) -> str:
424
- """String representation of the index."""
425
- location = str(self.path) if self.path else "memory"
426
- return f"<TantivyCollectionIndex name='{self.name}' location='{location}'>"
@@ -1,40 +0,0 @@
1
- """hammad.data.collections.indexes.tantivy.settings"""
2
-
3
- from dataclasses import dataclass
4
- from typing import (
5
- Any,
6
- Dict,
7
- )
8
-
9
- __all__ = ("TantivyCollectionIndexSettings", "TantivyCollectionIndexQuerySettings")
10
-
11
-
12
- @dataclass
13
- class TantivyCollectionIndexSettings:
14
- """Object representation of user configurable settings
15
- that can be used to configure a `TantivyCollectionIndex`."""
16
-
17
- fast: bool = True
18
- """Whether to use fast schema building & indexing from
19
- `tantivy`'s builtin implementation."""
20
-
21
- def get_tantivy_config(self) -> Dict[str, Any]:
22
- """Returns a configuration dictionary used
23
- to configure the tantivy index internally."""
24
-
25
- return {
26
- "text_fields": {"stored": True, "fast": self.fast},
27
- "numeric_fields": {"stored": True, "indexed": True, "fast": self.fast},
28
- "date_fields": {"stored": True, "indexed": True, "fast": self.fast},
29
- "json_fields": {"stored": True},
30
- }
31
-
32
-
33
- @dataclass
34
- class TantivyCollectionIndexQuerySettings:
35
- """Object representation of user configurable settings
36
- that can be used to configure the query engine for a
37
- `TantivyCollectionIndex`."""
38
-
39
- limit: int = 10
40
- """The maximum number of results to return."""
@@ -1,176 +0,0 @@
1
- """hammad.data.collections.indexes.tantivy.utils"""
2
-
3
- from dataclasses import dataclass, is_dataclass, asdict
4
- from msgspec import json
5
- from typing import Any, Dict, List, Optional, final
6
-
7
- import tantivy
8
-
9
- from .....cache import cached
10
- from .settings import (
11
- TantivyCollectionIndexSettings,
12
- TantivyCollectionIndexQuerySettings,
13
- )
14
-
15
-
16
- __all__ = (
17
- "TantivyCollectionIndexError",
18
- "extract_content_for_indexing",
19
- )
20
-
21
-
22
- class TantivyCollectionIndexError(Exception):
23
- """Exception raised when an error occurs in the `TantivyCollectionIndex`."""
24
-
25
-
26
- @dataclass
27
- class TantivyIndexWrapper:
28
- """Wrapper over the `tantivy` index object."""
29
-
30
- index: tantivy.Index
31
- """The `tantivy` index object."""
32
-
33
- schema: tantivy.Schema
34
- """The `tantivy` schema object."""
35
-
36
- index_writer: Any
37
- """The `tantivy` index writer object."""
38
-
39
-
40
- @cached
41
- def match_filters_for_query(
42
- stored_filters: Dict[str, Any] | None = None,
43
- query_filters: Dict[str, Any] | None = None,
44
- ) -> bool:
45
- """Checks if stored filters match query filters."""
46
- if query_filters is None:
47
- return True
48
- if stored_filters is None:
49
- return False
50
- return all(stored_filters.get(k) == v for k, v in query_filters.items())
51
-
52
-
53
- @cached
54
- def serialize(obj: Any) -> Any:
55
- """Serializes an object to JSON."""
56
- try:
57
- return json.decode(json.encode(obj))
58
- except Exception:
59
- # Fallback to manual serialization if msgspec fails
60
- if isinstance(obj, (str, int, float, bool, type(None))):
61
- return obj
62
- elif isinstance(obj, (list, tuple)):
63
- return [serialize(item) for item in obj]
64
- elif isinstance(obj, dict):
65
- return {k: serialize(v) for k, v in obj.items()}
66
- elif is_dataclass(obj):
67
- return serialize(asdict(obj))
68
- elif hasattr(obj, "__dict__"):
69
- return serialize(obj.__dict__)
70
- else:
71
- return str(obj)
72
-
73
-
74
- @cached
75
- def build_tantivy_index_from_settings(
76
- settings: TantivyCollectionIndexSettings,
77
- ) -> TantivyIndexWrapper:
78
- """Builds a new `tantivy` index from the given settings."""
79
- # Init schema for index
80
- schema_builder = tantivy.SchemaBuilder()
81
-
82
- # Add fields
83
- # ID (stored and indexed)
84
- schema_builder.add_text_field("id", **settings.get_tantivy_config()["text_fields"])
85
- # Content (stored and indexed) Contains entry content
86
- schema_builder.add_text_field(
87
- "content",
88
- **{
89
- **settings.get_tantivy_config()["text_fields"],
90
- "tokenizer_name": "default",
91
- "index_option": "position",
92
- },
93
- )
94
- # Title (stored and indexed) Contains entry title
95
- schema_builder.add_text_field(
96
- "title",
97
- **{
98
- **settings.get_tantivy_config()["text_fields"],
99
- "tokenizer_name": "default",
100
- "index_option": "position",
101
- },
102
- )
103
- # JSON (stored) Contains actual entry data
104
- schema_builder.add_json_field(
105
- "data", **settings.get_tantivy_config()["json_fields"]
106
- )
107
-
108
- # Timestamps
109
- schema_builder.add_date_field(
110
- "created_at", **settings.get_tantivy_config()["date_fields"]
111
- )
112
- schema_builder.add_date_field(
113
- "expires_at", **settings.get_tantivy_config()["date_fields"]
114
- )
115
-
116
- # Sorting / Scoring
117
- schema_builder.add_integer_field(
118
- "score", **settings.get_tantivy_config()["numeric_fields"]
119
- )
120
-
121
- # Facet for Optional filters
122
- schema_builder.add_facet_field("filters")
123
-
124
- # Build the schema
125
- schema = schema_builder.build()
126
-
127
- # Create index in memory (no path means in-memory)
128
- index = tantivy.Index(schema)
129
-
130
- # Configure index writer with custom settings if provided
131
- writer_config = {}
132
- if "writer_heap_size" in settings.get_tantivy_config():
133
- writer_config["heap_size"] = settings.get_tantivy_config()["writer_heap_size"]
134
- if "writer_num_threads" in settings.get_tantivy_config():
135
- writer_config["num_threads"] = settings.get_tantivy_config()[
136
- "writer_num_threads"
137
- ]
138
-
139
- index_writer = index.writer(**writer_config)
140
-
141
- # Configure index reader if settings provided
142
- reader_config = settings.get_tantivy_config().get("reader_config", {})
143
- if reader_config:
144
- reload_policy = reader_config.get("reload_policy", "commit")
145
- num_warmers = reader_config.get("num_warmers", 0)
146
- index.config_reader(reload_policy=reload_policy, num_warmers=num_warmers)
147
-
148
- return TantivyIndexWrapper(schema=schema, index=index, index_writer=index_writer)
149
-
150
-
151
- @cached
152
- def extract_content_for_indexing(value: Any) -> str:
153
- """Extract searchable text content from value for indexing."""
154
- if isinstance(value, str):
155
- return value
156
- elif isinstance(value, dict):
157
- # Concatenate all string values
158
- content_parts = []
159
- for v in value.values():
160
- if isinstance(v, str):
161
- content_parts.append(v)
162
- elif isinstance(v, (list, dict)):
163
- content_parts.append(json.encode(v).decode())
164
- else:
165
- content_parts.append(str(v))
166
- return " ".join(content_parts)
167
- elif isinstance(value, (list, tuple)):
168
- content_parts = []
169
- for item in value:
170
- if isinstance(item, str):
171
- content_parts.append(item)
172
- else:
173
- content_parts.append(str(item))
174
- return " ".join(content_parts)
175
- else:
176
- return str(value)
@@ -1,35 +0,0 @@
1
- """hammad.data.configurations
2
-
3
- Contains the `Configuration` class and related functions for parsing configurations
4
- from various sources.
5
- """
6
-
7
- from typing import TYPE_CHECKING
8
- from ..._internal import create_getattr_importer
9
-
10
- if TYPE_CHECKING:
11
- from .configuration import (
12
- Configuration,
13
- read_configuration_from_file,
14
- read_configuration_from_url,
15
- read_configuration_from_os_vars,
16
- read_configuration_from_os_prefix,
17
- read_configuration_from_dotenv,
18
- )
19
-
20
-
21
- __all__ = (
22
- "Configuration",
23
- "read_configuration_from_file",
24
- "read_configuration_from_url",
25
- "read_configuration_from_os_vars",
26
- "read_configuration_from_os_prefix",
27
- "read_configuration_from_dotenv",
28
- )
29
-
30
-
31
- __getattr__ = create_getattr_importer(__all__)
32
-
33
-
34
- def __dir__() -> list[str]:
35
- return list(__all__)