hammad-python 0.0.13__py3-none-any.whl → 0.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. hammad_python-0.0.15.dist-info/METADATA +184 -0
  2. hammad_python-0.0.15.dist-info/RECORD +4 -0
  3. hammad/__init__.py +0 -180
  4. hammad/_core/__init__.py +0 -1
  5. hammad/_core/_utils/__init__.py +0 -4
  6. hammad/_core/_utils/_import_utils.py +0 -182
  7. hammad/ai/__init__.py +0 -59
  8. hammad/ai/_utils.py +0 -142
  9. hammad/ai/completions/__init__.py +0 -44
  10. hammad/ai/completions/client.py +0 -729
  11. hammad/ai/completions/create.py +0 -686
  12. hammad/ai/completions/types.py +0 -711
  13. hammad/ai/completions/utils.py +0 -374
  14. hammad/ai/embeddings/__init__.py +0 -35
  15. hammad/ai/embeddings/client/__init__.py +0 -1
  16. hammad/ai/embeddings/client/base_embeddings_client.py +0 -26
  17. hammad/ai/embeddings/client/fastembed_text_embeddings_client.py +0 -200
  18. hammad/ai/embeddings/client/litellm_embeddings_client.py +0 -288
  19. hammad/ai/embeddings/create.py +0 -159
  20. hammad/ai/embeddings/types.py +0 -69
  21. hammad/base/__init__.py +0 -35
  22. hammad/base/fields.py +0 -546
  23. hammad/base/model.py +0 -1078
  24. hammad/base/utils.py +0 -280
  25. hammad/cache/__init__.py +0 -48
  26. hammad/cache/base_cache.py +0 -181
  27. hammad/cache/cache.py +0 -169
  28. hammad/cache/decorators.py +0 -261
  29. hammad/cache/file_cache.py +0 -80
  30. hammad/cache/ttl_cache.py +0 -74
  31. hammad/cli/__init__.py +0 -33
  32. hammad/cli/animations.py +0 -604
  33. hammad/cli/plugins.py +0 -781
  34. hammad/cli/styles/__init__.py +0 -55
  35. hammad/cli/styles/settings.py +0 -139
  36. hammad/cli/styles/types.py +0 -358
  37. hammad/cli/styles/utils.py +0 -480
  38. hammad/configuration/__init__.py +0 -35
  39. hammad/configuration/configuration.py +0 -564
  40. hammad/data/__init__.py +0 -39
  41. hammad/data/collections/__init__.py +0 -34
  42. hammad/data/collections/base_collection.py +0 -58
  43. hammad/data/collections/collection.py +0 -452
  44. hammad/data/collections/searchable_collection.py +0 -556
  45. hammad/data/collections/vector_collection.py +0 -603
  46. hammad/data/databases/__init__.py +0 -21
  47. hammad/data/databases/database.py +0 -902
  48. hammad/json/__init__.py +0 -21
  49. hammad/json/converters.py +0 -152
  50. hammad/logging/__init__.py +0 -35
  51. hammad/logging/decorators.py +0 -834
  52. hammad/logging/logger.py +0 -954
  53. hammad/multimodal/__init__.py +0 -24
  54. hammad/multimodal/audio.py +0 -96
  55. hammad/multimodal/image.py +0 -80
  56. hammad/multithreading/__init__.py +0 -304
  57. hammad/py.typed +0 -0
  58. hammad/pydantic/__init__.py +0 -43
  59. hammad/pydantic/converters.py +0 -623
  60. hammad/pydantic/models/__init__.py +0 -28
  61. hammad/pydantic/models/arbitrary_model.py +0 -46
  62. hammad/pydantic/models/cacheable_model.py +0 -79
  63. hammad/pydantic/models/fast_model.py +0 -318
  64. hammad/pydantic/models/function_model.py +0 -176
  65. hammad/pydantic/models/subscriptable_model.py +0 -63
  66. hammad/text/__init__.py +0 -82
  67. hammad/text/converters.py +0 -723
  68. hammad/text/markdown.py +0 -131
  69. hammad/text/text.py +0 -1066
  70. hammad/types/__init__.py +0 -11
  71. hammad/types/file.py +0 -358
  72. hammad/typing/__init__.py +0 -407
  73. hammad/web/__init__.py +0 -43
  74. hammad/web/http/__init__.py +0 -1
  75. hammad/web/http/client.py +0 -944
  76. hammad/web/models.py +0 -245
  77. hammad/web/openapi/__init__.py +0 -0
  78. hammad/web/openapi/client.py +0 -740
  79. hammad/web/search/__init__.py +0 -1
  80. hammad/web/search/client.py +0 -988
  81. hammad/web/utils.py +0 -472
  82. hammad/yaml/__init__.py +0 -30
  83. hammad/yaml/converters.py +0 -19
  84. hammad_python-0.0.13.dist-info/METADATA +0 -38
  85. hammad_python-0.0.13.dist-info/RECORD +0 -85
  86. {hammad_python-0.0.13.dist-info → hammad_python-0.0.15.dist-info}/WHEEL +0 -0
  87. {hammad_python-0.0.13.dist-info → hammad_python-0.0.15.dist-info}/licenses/LICENSE +0 -0
@@ -1,556 +0,0 @@
1
- """hammad.data.collections.searchable_collection"""
2
-
3
- import uuid
4
- import json
5
- from typing import Any, Dict, Optional, List, Generic
6
- from datetime import datetime, timezone, timedelta
7
- from dataclasses import asdict, is_dataclass
8
- import tantivy
9
-
10
- from .base_collection import BaseCollection, Object, Filters, Schema
11
-
12
- __all__ = ("SearchableCollection",)
13
-
14
-
15
- class SearchableCollection(BaseCollection, Generic[Object]):
16
- """
17
- Base collection class that can be used independently or with a database.
18
-
19
- This provides the core collection functionality that can work standalone
20
- or be integrated with the main Database class.
21
- """
22
-
23
- def __init__(
24
- self,
25
- name: str,
26
- schema: Optional[Schema] = None,
27
- default_ttl: Optional[int] = None,
28
- storage_backend: Optional[Any] = None,
29
- tantivy_config: Optional[Dict[str, Any]] = None,
30
- ):
31
- """
32
- Initialize a collection.
33
-
34
- Args:
35
- name: The name of the collection
36
- schema: Optional schema for type validation
37
- default_ttl: Default TTL for items in seconds
38
- storage_backend: Optional storage backend (Database instance or custom)
39
- tantivy_config: Optional tantivy configuration for field properties and index settings
40
- Example: {
41
- "text_fields": {"fast": True, "stored": True},
42
- "numeric_fields": {"fast": True, "indexed": True},
43
- "writer_heap_size": 256_000_000,
44
- "writer_num_threads": 2
45
- }
46
- """
47
- self.name = name
48
- self.schema = schema
49
- self.default_ttl = default_ttl
50
- self._storage_backend = storage_backend
51
-
52
- # Store tantivy configuration
53
- self._tantivy_config = tantivy_config or {}
54
-
55
- # In-memory storage when used independently
56
- self._items: Dict[str, Dict[str, Any]] = {}
57
-
58
- # Initialize tantivy index
59
- self._init_tantivy_index()
60
-
61
- def _init_tantivy_index(self):
62
- """Initialize the tantivy search index."""
63
- # Build schema for tantivy
64
- schema_builder = tantivy.SchemaBuilder()
65
-
66
- # Get configuration for different field types
67
- text_config = self._tantivy_config.get(
68
- "text_fields", {"stored": True, "fast": True}
69
- )
70
- numeric_config = self._tantivy_config.get(
71
- "numeric_fields", {"stored": True, "indexed": True, "fast": True}
72
- )
73
- date_config = self._tantivy_config.get(
74
- "date_fields", {"stored": True, "indexed": True, "fast": True}
75
- )
76
- json_config = self._tantivy_config.get("json_fields", {"stored": True})
77
-
78
- # Add ID field (stored and indexed)
79
- schema_builder.add_text_field("id", **text_config)
80
-
81
- # Add content field for general text search
82
- content_config = {
83
- **text_config,
84
- "tokenizer_name": "default",
85
- "index_option": "position",
86
- }
87
- schema_builder.add_text_field("content", **content_config)
88
-
89
- # Add dynamic fields that might be searched and sorted
90
- title_config = {
91
- **text_config,
92
- "tokenizer_name": "default",
93
- "index_option": "position",
94
- }
95
- schema_builder.add_text_field("title", **title_config)
96
-
97
- # Add JSON field for storing the actual data
98
- schema_builder.add_json_field("data", **json_config)
99
-
100
- # Add filter fields as facets
101
- schema_builder.add_facet_field("filters")
102
-
103
- # Add timestamp fields
104
- schema_builder.add_date_field("created_at", **date_config)
105
- schema_builder.add_date_field("expires_at", **date_config)
106
-
107
- # Add numeric fields for sorting
108
- schema_builder.add_integer_field("score", **numeric_config)
109
-
110
- # Build the schema
111
- self._tantivy_schema = schema_builder.build()
112
-
113
- # Create index in memory (no path means in-memory)
114
- self._index = tantivy.Index(self._tantivy_schema)
115
-
116
- # Configure index writer with custom settings if provided
117
- writer_config = {}
118
- if "writer_heap_size" in self._tantivy_config:
119
- writer_config["heap_size"] = self._tantivy_config["writer_heap_size"]
120
- if "writer_num_threads" in self._tantivy_config:
121
- writer_config["num_threads"] = self._tantivy_config["writer_num_threads"]
122
-
123
- self._index_writer = self._index.writer(**writer_config)
124
-
125
- # Configure index reader if settings provided
126
- reader_config = self._tantivy_config.get("reader_config", {})
127
- if reader_config:
128
- reload_policy = reader_config.get("reload_policy", "commit")
129
- num_warmers = reader_config.get("num_warmers", 0)
130
- self._index.config_reader(
131
- reload_policy=reload_policy, num_warmers=num_warmers
132
- )
133
-
134
- def __repr__(self) -> str:
135
- item_count = len(self._items) if self._storage_backend is None else "managed"
136
- return f"<{self.__class__.__name__} name='{self.name}' items={item_count}>"
137
-
138
- def _calculate_expires_at(self, ttl: Optional[int]) -> Optional[datetime]:
139
- """Calculate expiry time based on TTL."""
140
- if ttl is None:
141
- ttl = self.default_ttl
142
- if ttl and ttl > 0:
143
- return datetime.now(timezone.utc) + timedelta(seconds=ttl)
144
- return None
145
-
146
- def _is_expired(self, expires_at: Optional[datetime]) -> bool:
147
- """Check if an item has expired."""
148
- if expires_at is None:
149
- return False
150
- now = datetime.now(timezone.utc)
151
- if expires_at.tzinfo is None:
152
- expires_at = expires_at.replace(tzinfo=timezone.utc)
153
- return now >= expires_at
154
-
155
- def _match_filters(
156
- self, stored: Optional[Filters], query: Optional[Filters]
157
- ) -> bool:
158
- """Check if stored filters match query filters."""
159
- if query is None:
160
- return True
161
- if stored is None:
162
- return False
163
- return all(stored.get(k) == v for k, v in query.items())
164
-
165
- def get(self, id: str, *, filters: Optional[Filters] = None) -> Optional[Object]:
166
- """Get an item by ID."""
167
- if self._storage_backend is not None:
168
- # Delegate to storage backend (Database instance)
169
- return self._storage_backend.get(id, collection=self.name, filters=filters)
170
-
171
- # Independent operation
172
- item = self._items.get(id)
173
- if not item:
174
- return None
175
-
176
- if self._is_expired(item.get("expires_at")):
177
- del self._items[id]
178
- return None
179
-
180
- if not self._match_filters(item.get("filters"), filters):
181
- return None
182
-
183
- return item["value"]
184
-
185
- def _serialize_for_json(self, obj: Any) -> Any:
186
- """Serialize object for JSON storage."""
187
- if isinstance(obj, (str, int, float, bool, type(None))):
188
- return obj
189
- elif isinstance(obj, (list, tuple)):
190
- return [self._serialize_for_json(item) for item in obj]
191
- elif isinstance(obj, dict):
192
- return {k: self._serialize_for_json(v) for k, v in obj.items()}
193
- elif is_dataclass(obj):
194
- return self._serialize_for_json(asdict(obj))
195
- elif hasattr(obj, "__dict__"):
196
- return self._serialize_for_json(obj.__dict__)
197
- else:
198
- return str(obj)
199
-
200
- def add(
201
- self,
202
- entry: Object,
203
- *,
204
- id: Optional[str] = None,
205
- filters: Optional[Filters] = None,
206
- ttl: Optional[int] = None,
207
- ) -> None:
208
- """Add an item to the collection."""
209
- if self._storage_backend is not None:
210
- # Delegate to storage backend
211
- self._storage_backend.add(
212
- entry, id=id, collection=self.name, filters=filters, ttl=ttl
213
- )
214
- return
215
-
216
- # Independent operation
217
- item_id = id or str(uuid.uuid4())
218
- expires_at = self._calculate_expires_at(ttl)
219
- created_at = datetime.now(timezone.utc)
220
-
221
- # Store in memory
222
- self._items[item_id] = {
223
- "value": entry,
224
- "filters": filters or {},
225
- "created_at": created_at,
226
- "updated_at": created_at,
227
- "expires_at": expires_at,
228
- }
229
-
230
- # Add to tantivy index
231
- doc = tantivy.Document()
232
- doc.add_text("id", item_id)
233
-
234
- # Extract searchable content
235
- content = self._extract_content_for_indexing(entry)
236
- doc.add_text("content", content)
237
-
238
- # Add title field if present
239
- if isinstance(entry, dict) and "title" in entry:
240
- doc.add_text("title", str(entry["title"]))
241
-
242
- # Store the full data as JSON
243
- serialized_data = self._serialize_for_json(entry)
244
- # Wrap in object structure for tantivy JSON field
245
- json_data = {"value": serialized_data}
246
- doc.add_json("data", json.dumps(json_data))
247
-
248
- # Add filters as facets
249
- if filters:
250
- for key, value in filters.items():
251
- facet_value = f"/{key}/{value}"
252
- doc.add_facet("filters", tantivy.Facet.from_string(facet_value))
253
-
254
- # Add timestamps
255
- doc.add_date("created_at", created_at)
256
- if expires_at:
257
- doc.add_date("expires_at", expires_at)
258
-
259
- # Add score field if present
260
- if (
261
- isinstance(entry, dict)
262
- and "score" in entry
263
- and isinstance(entry["score"], (int, float))
264
- ):
265
- doc.add_integer("score", int(entry["score"]))
266
-
267
- self._index_writer.add_document(doc)
268
- self._index_writer.commit()
269
-
270
- def _extract_content_for_indexing(self, value: Any) -> str:
271
- """Extract searchable text content from value for indexing."""
272
- if isinstance(value, str):
273
- return value
274
- elif isinstance(value, dict):
275
- # Concatenate all string values
276
- content_parts = []
277
- for v in value.values():
278
- if isinstance(v, str):
279
- content_parts.append(v)
280
- elif isinstance(v, (list, dict)):
281
- content_parts.append(json.dumps(v))
282
- else:
283
- content_parts.append(str(v))
284
- return " ".join(content_parts)
285
- else:
286
- return str(value)
287
-
288
- def query(
289
- self,
290
- *,
291
- filters: Optional[Filters] = None,
292
- search: Optional[str] = None,
293
- limit: Optional[int] = None,
294
- offset: int = 0,
295
- fields: Optional[List[str]] = None,
296
- fuzzy: bool = False,
297
- fuzzy_distance: int = 2,
298
- fuzzy_transposition_cost_one: bool = True,
299
- fuzzy_prefix: bool = False,
300
- phrase: bool = False,
301
- phrase_slop: int = 0,
302
- boost_fields: Optional[Dict[str, float]] = None,
303
- min_score: Optional[float] = None,
304
- sort_by: Optional[str] = None,
305
- ascending: bool = True,
306
- count: bool = True,
307
- regex_search: Optional[str] = None,
308
- ) -> List[Object]:
309
- """
310
- Query items from the collection using tantivy search.
311
-
312
- Args:
313
- filters: Dictionary of filters to apply to results
314
- search: Search query string supporting boolean operators (AND, OR, NOT, +, -)
315
- limit: Maximum number of results to return
316
- offset: Number of results to skip (for pagination)
317
- fields: Specific fields to search in (defaults to content field)
318
- fuzzy: Enable fuzzy matching for approximate string matching
319
- fuzzy_distance: Maximum edit distance for fuzzy matching (default: 2)
320
- fuzzy_transposition_cost_one: Whether transpositions have cost 1 in fuzzy matching
321
- fuzzy_prefix: Whether to match only as prefix in fuzzy search
322
- phrase: Treat search query as exact phrase match
323
- phrase_slop: Maximum number of words that can appear between phrase terms
324
- boost_fields: Field-specific score boosting weights (field_name -> boost_factor)
325
- min_score: Minimum relevance score threshold for results
326
- sort_by: Field name to sort results by (defaults to relevance score)
327
- ascending: Sort order direction (True for ascending, False for descending)
328
- count: Whether to count total matches (performance optimization)
329
- regex_search: Regular expression pattern to search for in specified fields
330
-
331
- Returns:
332
- List of matching objects sorted by relevance or specified field
333
- """
334
- if self._storage_backend is not None:
335
- # Delegate to storage backend with enhanced parameters
336
- return self._storage_backend.query(
337
- collection=self.name,
338
- filters=filters,
339
- search=search,
340
- limit=limit,
341
- offset=offset,
342
- fields=fields,
343
- fuzzy=fuzzy,
344
- fuzzy_distance=fuzzy_distance,
345
- fuzzy_transposition_cost_one=fuzzy_transposition_cost_one,
346
- fuzzy_prefix=fuzzy_prefix,
347
- phrase=phrase,
348
- phrase_slop=phrase_slop,
349
- boost_fields=boost_fields,
350
- min_score=min_score,
351
- sort_by=sort_by,
352
- ascending=ascending,
353
- count=count,
354
- regex_search=regex_search,
355
- )
356
-
357
- # Refresh index and get searcher
358
- self._index.reload()
359
- searcher = self._index.searcher()
360
-
361
- # Build the query
362
- query_parts = []
363
-
364
- # Add filter queries
365
- if filters:
366
- for key, value in filters.items():
367
- facet_query = tantivy.Query.term_query(
368
- self._tantivy_schema,
369
- "filters",
370
- tantivy.Facet.from_string(f"/{key}/{value}"),
371
- )
372
- query_parts.append((tantivy.Occur.Must, facet_query))
373
-
374
- # Add search query
375
- if regex_search:
376
- # Regular expression query
377
- search_query = tantivy.Query.regex_query(
378
- self._tantivy_schema, fields[0] if fields else "content", regex_search
379
- )
380
- query_parts.append((tantivy.Occur.Must, search_query))
381
- elif search:
382
- if phrase:
383
- # Phrase query
384
- words = search.split()
385
- search_query = tantivy.Query.phrase_query(
386
- self._tantivy_schema, "content", words, slop=phrase_slop
387
- )
388
- elif fuzzy:
389
- # Fuzzy query for each term
390
- terms = search.split()
391
- fuzzy_queries = []
392
- for term in terms:
393
- fuzzy_q = tantivy.Query.fuzzy_term_query(
394
- self._tantivy_schema,
395
- "content",
396
- term,
397
- distance=fuzzy_distance,
398
- transposition_cost_one=fuzzy_transposition_cost_one,
399
- prefix=fuzzy_prefix,
400
- )
401
- fuzzy_queries.append((tantivy.Occur.Should, fuzzy_q))
402
- search_query = tantivy.Query.boolean_query(fuzzy_queries)
403
- else:
404
- # Use tantivy's query parser for boolean operators
405
- # Handle None boost_fields
406
- if boost_fields:
407
- search_query = self._index.parse_query(
408
- search,
409
- default_field_names=fields or ["content", "title"],
410
- field_boosts=boost_fields,
411
- )
412
- else:
413
- search_query = self._index.parse_query(
414
- search, default_field_names=fields or ["content", "title"]
415
- )
416
-
417
- query_parts.append((tantivy.Occur.Must, search_query))
418
-
419
- # Build final query
420
- if query_parts:
421
- final_query = tantivy.Query.boolean_query(query_parts)
422
- else:
423
- final_query = tantivy.Query.all_query()
424
-
425
- # Execute search
426
- limit = limit or 100
427
-
428
- # Use tantivy's built-in sorting for known fast fields, otherwise manual sort
429
- tantivy_sortable_fields = {
430
- "score",
431
- "created_at",
432
- "expires_at",
433
- } # Remove title for now
434
-
435
- if sort_by and sort_by in tantivy_sortable_fields:
436
- # Use tantivy's built-in sorting for fast fields
437
- try:
438
- search_result = searcher.search(
439
- final_query,
440
- limit=limit,
441
- offset=offset,
442
- count=count,
443
- order_by_field=sort_by,
444
- order=tantivy.Order.Asc if ascending else tantivy.Order.Desc,
445
- )
446
- manual_sort_needed = False
447
- except Exception:
448
- # Fallback to manual sorting if tantivy sorting fails
449
- search_result = searcher.search(
450
- final_query, limit=1000, offset=offset, count=count
451
- )
452
- manual_sort_needed = True
453
- else:
454
- # Default search or manual sorting needed
455
- search_result = searcher.search(
456
- final_query,
457
- limit=1000 if sort_by else limit,
458
- offset=offset,
459
- count=count,
460
- )
461
- manual_sort_needed = bool(sort_by and sort_by != "score")
462
-
463
- # Extract results
464
- if manual_sort_needed:
465
- # Manual sorting needed for non-tantivy fields
466
- all_results = []
467
- for score, doc_address in search_result.hits:
468
- # Skip if min_score is set and score is too low
469
- if min_score and score < min_score:
470
- continue
471
-
472
- doc = searcher.doc(doc_address)
473
-
474
- # Check expiration
475
- expires_at = doc.get_first("expires_at")
476
- if expires_at and self._is_expired(expires_at):
477
- continue
478
-
479
- # Get the stored data
480
- data = doc.get_first("data")
481
- if data:
482
- # Parse JSON data back to Python object
483
- if isinstance(data, str):
484
- json_obj = json.loads(data)
485
- parsed_data = json_obj.get("value", json_obj)
486
- else:
487
- parsed_data = (
488
- data.get("value", data) if isinstance(data, dict) else data
489
- )
490
- all_results.append((score, parsed_data))
491
-
492
- # Sort by the specified field
493
- all_results.sort(
494
- key=lambda x: self._get_sort_value(x[1], sort_by), reverse=not ascending
495
- )
496
-
497
- # Apply limit and extract just the data
498
- results = [data for _, data in all_results[:limit]]
499
- else:
500
- # Direct extraction for tantivy-sorted or unsorted results
501
- results = []
502
- for score, doc_address in search_result.hits:
503
- # Skip if min_score is set and score is too low
504
- if min_score and score < min_score:
505
- continue
506
-
507
- doc = searcher.doc(doc_address)
508
-
509
- # Check expiration
510
- expires_at = doc.get_first("expires_at")
511
- if expires_at and self._is_expired(expires_at):
512
- continue
513
-
514
- # Get the stored data
515
- data = doc.get_first("data")
516
- if data:
517
- # Parse JSON data back to Python object
518
- if isinstance(data, str):
519
- json_obj = json.loads(data)
520
- parsed_data = json_obj.get("value", json_obj)
521
- else:
522
- parsed_data = (
523
- data.get("value", data) if isinstance(data, dict) else data
524
- )
525
- results.append(parsed_data)
526
-
527
- return results
528
-
529
- def _get_sort_value(self, value: Any, sort_field: str) -> Any:
530
- """Extract sort value from object for specified field."""
531
- if isinstance(value, dict):
532
- # For dictionaries, return the value or a default that sorts appropriately
533
- if sort_field in value:
534
- val = value[sort_field]
535
- # Handle numeric values properly
536
- if isinstance(val, (int, float)):
537
- return val
538
- return str(val)
539
- # Return a value that sorts to the end for missing fields
540
- return float("inf") if sort_field == "score" else ""
541
- elif hasattr(value, sort_field):
542
- val = getattr(value, sort_field)
543
- if isinstance(val, (int, float)):
544
- return val
545
- return str(val)
546
- else:
547
- # Return a value that sorts to the end for missing fields
548
- return float("inf") if sort_field == "score" else ""
549
-
550
- def attach_to_database(self, database: Any) -> None:
551
- """Attach this collection to a database instance."""
552
- self._storage_backend = database
553
- # Ensure the collection exists in the database
554
- database.create_collection(
555
- self.name, schema=self.schema, default_ttl=self.default_ttl
556
- )