mdb-engine 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. mdb_engine/__init__.py +7 -1
  2. mdb_engine/auth/README.md +6 -0
  3. mdb_engine/auth/audit.py +40 -40
  4. mdb_engine/auth/base.py +3 -3
  5. mdb_engine/auth/casbin_factory.py +6 -6
  6. mdb_engine/auth/config_defaults.py +5 -5
  7. mdb_engine/auth/config_helpers.py +12 -12
  8. mdb_engine/auth/cookie_utils.py +9 -9
  9. mdb_engine/auth/csrf.py +9 -8
  10. mdb_engine/auth/decorators.py +7 -6
  11. mdb_engine/auth/dependencies.py +22 -21
  12. mdb_engine/auth/integration.py +9 -9
  13. mdb_engine/auth/jwt.py +9 -9
  14. mdb_engine/auth/middleware.py +4 -3
  15. mdb_engine/auth/oso_factory.py +6 -6
  16. mdb_engine/auth/provider.py +4 -4
  17. mdb_engine/auth/rate_limiter.py +12 -11
  18. mdb_engine/auth/restrictions.py +16 -15
  19. mdb_engine/auth/session_manager.py +11 -13
  20. mdb_engine/auth/shared_middleware.py +344 -132
  21. mdb_engine/auth/shared_users.py +20 -20
  22. mdb_engine/auth/token_lifecycle.py +10 -12
  23. mdb_engine/auth/token_store.py +4 -5
  24. mdb_engine/auth/users.py +51 -52
  25. mdb_engine/auth/utils.py +29 -33
  26. mdb_engine/cli/commands/generate.py +6 -6
  27. mdb_engine/cli/utils.py +4 -4
  28. mdb_engine/config.py +6 -7
  29. mdb_engine/core/app_registration.py +12 -12
  30. mdb_engine/core/app_secrets.py +1 -2
  31. mdb_engine/core/connection.py +3 -4
  32. mdb_engine/core/encryption.py +1 -2
  33. mdb_engine/core/engine.py +43 -44
  34. mdb_engine/core/manifest.py +80 -58
  35. mdb_engine/core/ray_integration.py +10 -9
  36. mdb_engine/core/seeding.py +3 -3
  37. mdb_engine/core/service_initialization.py +10 -9
  38. mdb_engine/core/types.py +40 -40
  39. mdb_engine/database/abstraction.py +15 -16
  40. mdb_engine/database/connection.py +40 -12
  41. mdb_engine/database/query_validator.py +8 -8
  42. mdb_engine/database/resource_limiter.py +7 -7
  43. mdb_engine/database/scoped_wrapper.py +51 -58
  44. mdb_engine/dependencies.py +14 -13
  45. mdb_engine/di/container.py +12 -13
  46. mdb_engine/di/providers.py +14 -13
  47. mdb_engine/di/scopes.py +5 -5
  48. mdb_engine/embeddings/dependencies.py +2 -2
  49. mdb_engine/embeddings/service.py +67 -50
  50. mdb_engine/exceptions.py +20 -20
  51. mdb_engine/indexes/helpers.py +11 -11
  52. mdb_engine/indexes/manager.py +9 -9
  53. mdb_engine/memory/README.md +93 -2
  54. mdb_engine/memory/service.py +361 -1109
  55. mdb_engine/observability/health.py +10 -9
  56. mdb_engine/observability/logging.py +10 -10
  57. mdb_engine/observability/metrics.py +8 -7
  58. mdb_engine/repositories/base.py +25 -25
  59. mdb_engine/repositories/mongo.py +17 -17
  60. mdb_engine/repositories/unit_of_work.py +6 -6
  61. mdb_engine/routing/websockets.py +19 -18
  62. mdb_engine/utils/__init__.py +3 -1
  63. mdb_engine/utils/mongo.py +117 -0
  64. {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/METADATA +88 -13
  65. mdb_engine-0.2.4.dist-info/RECORD +97 -0
  66. {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/WHEEL +1 -1
  67. mdb_engine-0.2.1.dist-info/RECORD +0 -96
  68. {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/entry_points.txt +0 -0
  69. {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/licenses/LICENSE +0 -0
  70. {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/top_level.txt +0 -0
@@ -23,7 +23,7 @@ import os
23
23
  import time
24
24
  from abc import ABC, abstractmethod
25
25
  from datetime import datetime
26
- from typing import Any, Dict, List, Optional, Union
26
+ from typing import Any
27
27
 
28
28
  # Optional OpenAI SDK import
29
29
  try:
@@ -59,9 +59,7 @@ class BaseEmbeddingProvider(ABC):
59
59
  """
60
60
 
61
61
  @abstractmethod
62
- async def embed(
63
- self, text: Union[str, List[str]], model: Optional[str] = None
64
- ) -> List[List[float]]:
62
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
65
63
  """
66
64
  Generate embeddings for text.
67
65
 
@@ -84,7 +82,7 @@ class OpenAIEmbeddingProvider(BaseEmbeddingProvider):
84
82
 
85
83
  def __init__(
86
84
  self,
87
- api_key: Optional[str] = None,
85
+ api_key: str | None = None,
88
86
  default_model: str = "text-embedding-3-small",
89
87
  ):
90
88
  """
@@ -108,9 +106,7 @@ class OpenAIEmbeddingProvider(BaseEmbeddingProvider):
108
106
  self.client = AsyncOpenAI(api_key=api_key)
109
107
  self.default_model = default_model
110
108
 
111
- async def embed(
112
- self, text: Union[str, List[str]], model: Optional[str] = None
113
- ) -> List[List[float]]:
109
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
114
110
  """Generate embeddings using OpenAI."""
115
111
  model = model or self.default_model
116
112
 
@@ -149,9 +145,9 @@ class AzureOpenAIEmbeddingProvider(BaseEmbeddingProvider):
149
145
 
150
146
  def __init__(
151
147
  self,
152
- api_key: Optional[str] = None,
153
- endpoint: Optional[str] = None,
154
- api_version: Optional[str] = None,
148
+ api_key: str | None = None,
149
+ endpoint: str | None = None,
150
+ api_version: str | None = None,
155
151
  default_model: str = "text-embedding-3-small",
156
152
  ):
157
153
  """
@@ -191,9 +187,7 @@ class AzureOpenAIEmbeddingProvider(BaseEmbeddingProvider):
191
187
  )
192
188
  self.default_model = default_model
193
189
 
194
- async def embed(
195
- self, text: Union[str, List[str]], model: Optional[str] = None
196
- ) -> List[List[float]]:
190
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
197
191
  """Generate embeddings using Azure OpenAI."""
198
192
  model = model or self.default_model
199
193
 
@@ -255,8 +249,8 @@ class EmbeddingProvider:
255
249
 
256
250
  def __init__(
257
251
  self,
258
- embedding_provider: Optional[BaseEmbeddingProvider] = None,
259
- config: Optional[Dict[str, Any]] = None,
252
+ embedding_provider: BaseEmbeddingProvider | None = None,
253
+ config: dict[str, Any] | None = None,
260
254
  ):
261
255
  """
262
256
  Initialize Embedding Provider.
@@ -293,9 +287,7 @@ class EmbeddingProvider:
293
287
  # Store config for potential future use
294
288
  self.config = config or {}
295
289
 
296
- async def embed(
297
- self, text: Union[str, List[str]], model: Optional[str] = None
298
- ) -> List[List[float]]:
290
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
299
291
  """
300
292
  Generates vector embeddings for a string or list of strings.
301
293
 
@@ -361,10 +353,10 @@ class EmbeddingService:
361
353
 
362
354
  def __init__(
363
355
  self,
364
- embedding_provider: Optional[EmbeddingProvider] = None,
356
+ embedding_provider: EmbeddingProvider | None = None,
365
357
  default_max_tokens: int = 1000,
366
358
  default_tokenizer_model: str = "gpt-3.5-turbo",
367
- config: Optional[Dict[str, Any]] = None,
359
+ config: dict[str, Any] | None = None,
368
360
  ):
369
361
  """
370
362
  Initialize Embedding Service.
@@ -397,9 +389,7 @@ class EmbeddingService:
397
389
  self.default_max_tokens = default_max_tokens
398
390
  self.default_tokenizer_model = default_tokenizer_model
399
391
 
400
- def _create_splitter(
401
- self, max_tokens: int, tokenizer_model: Optional[str] = None
402
- ) -> TextSplitter:
392
+ def _create_splitter(self, max_tokens: int, tokenizer_model: str | None = None) -> TextSplitter:
403
393
  """
404
394
  Create a TextSplitter instance.
405
395
 
@@ -419,9 +409,9 @@ class EmbeddingService:
419
409
  async def chunk_text(
420
410
  self,
421
411
  text_content: str,
422
- max_tokens: Optional[int] = None,
423
- tokenizer_model: Optional[str] = None,
424
- ) -> List[str]:
412
+ max_tokens: int | None = None,
413
+ tokenizer_model: str | None = None,
414
+ ) -> list[str]:
425
415
  """
426
416
  Split text into semantic chunks.
427
417
 
@@ -455,32 +445,39 @@ class EmbeddingService:
455
445
  logger.error(f"Error chunking text: {e}", exc_info=True)
456
446
  raise EmbeddingServiceError(f"Chunking failed: {str(e)}") from e
457
447
 
458
- async def embed_chunks(
459
- self, chunks: List[str], model: Optional[str] = None
460
- ) -> List[List[float]]:
448
+ async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
461
449
  """
462
- Generate embeddings for text chunks.
450
+ Generate embeddings for text or a list of texts.
463
451
 
464
- Uses the user-provided embedding provider/function.
452
+ Natural API that works with both single strings and lists.
465
453
 
466
454
  Args:
467
- chunks: List of text chunks to embed
455
+ text: A single string or list of strings to embed
468
456
  model: Optional model identifier (passed to embedding provider)
469
457
 
470
458
  Returns:
471
- List of embedding vectors (each is a list of floats)
459
+ List of embedding vectors (each is a list of floats).
460
+ If input was a single string, returns a list containing one vector.
472
461
 
473
462
  Example:
474
- chunks = ["chunk 1", "chunk 2"]
475
- vectors = await service.embed_chunks(chunks, model="text-embedding-3-small")
463
+ # Single string
464
+ vectors = await service.embed("Hello world", model="text-embedding-3-small")
465
+ # vectors is [[0.1, 0.2, ...]]
466
+
467
+ # List of strings (batch - more efficient)
468
+ vectors = await service.embed(["chunk 1", "chunk 2"], model="text-embedding-3-small")
469
+ # vectors is [[0.1, ...], [0.2, ...]]
476
470
  """
471
+ # Normalize to list
472
+ chunks = [text] if isinstance(text, str) else text
473
+
477
474
  if not chunks:
478
475
  return []
479
476
 
480
477
  try:
481
478
  # Use EmbeddingProvider's embed method (handles retries, logging, etc.)
482
479
  vectors = await self.embedding_provider.embed(chunks, model=model)
483
- logger.info(f"Generated {len(vectors)} embeddings")
480
+ logger.info(f"Generated {len(vectors)} embedding(s)")
484
481
  return vectors
485
482
  except (
486
483
  AttributeError,
@@ -493,16 +490,36 @@ class EmbeddingService:
493
490
  logger.error(f"Error generating embeddings: {e}", exc_info=True)
494
491
  raise EmbeddingServiceError(f"Embedding generation failed: {str(e)}") from e
495
492
 
493
+ async def embed_chunks(self, chunks: list[str], model: str | None = None) -> list[list[float]]:
494
+ """
495
+ Generate embeddings for text chunks (list only).
496
+
497
+ DEPRECATED: Use embed() instead, which accepts both strings and lists.
498
+ This method is kept for backward compatibility.
499
+
500
+ Args:
501
+ chunks: List of text chunks to embed
502
+ model: Optional model identifier (passed to embedding provider)
503
+
504
+ Returns:
505
+ List of embedding vectors (each is a list of floats)
506
+
507
+ Example:
508
+ chunks = ["chunk 1", "chunk 2"]
509
+ vectors = await service.embed_chunks(chunks, model="text-embedding-3-small")
510
+ """
511
+ return await self.embed(chunks, model=model)
512
+
496
513
  async def process_and_store(
497
514
  self,
498
515
  text_content: str,
499
516
  source_id: str,
500
517
  collection: Any, # MongoDB collection (AppDB Collection or Motor collection)
501
- max_tokens: Optional[int] = None,
502
- tokenizer_model: Optional[str] = None,
503
- embedding_model: Optional[str] = None,
504
- metadata: Optional[Dict[str, Any]] = None,
505
- ) -> Dict[str, Any]:
518
+ max_tokens: int | None = None,
519
+ tokenizer_model: str | None = None,
520
+ embedding_model: str | None = None,
521
+ metadata: dict[str, Any] | None = None,
522
+ ) -> dict[str, Any]:
506
523
  """
507
524
  Process text and store chunks with embeddings in MongoDB.
508
525
 
@@ -573,7 +590,7 @@ class EmbeddingService:
573
590
 
574
591
  # Step 3: Prepare documents for insertion
575
592
  documents_to_insert = []
576
- for i, (chunk_text, vector) in enumerate(zip(chunks, vectors)):
593
+ for i, (chunk_text, vector) in enumerate(zip(chunks, vectors, strict=False)):
577
594
  doc = {
578
595
  "source_id": source_id,
579
596
  "chunk_index": i,
@@ -626,10 +643,10 @@ class EmbeddingService:
626
643
  async def process_text(
627
644
  self,
628
645
  text_content: str,
629
- max_tokens: Optional[int] = None,
630
- tokenizer_model: Optional[str] = None,
631
- embedding_model: Optional[str] = None,
632
- ) -> List[Dict[str, Any]]:
646
+ max_tokens: int | None = None,
647
+ tokenizer_model: str | None = None,
648
+ embedding_model: str | None = None,
649
+ ) -> list[dict[str, Any]]:
633
650
  """
634
651
  Process text and return chunks with embeddings (without storing).
635
652
 
@@ -673,7 +690,7 @@ class EmbeddingService:
673
690
 
674
691
  # Prepare results
675
692
  results = []
676
- for i, (chunk_text, vector) in enumerate(zip(chunks, vectors)):
693
+ for i, (chunk_text, vector) in enumerate(zip(chunks, vectors, strict=False)):
677
694
  results.append(
678
695
  {
679
696
  "chunk_index": i,
@@ -692,8 +709,8 @@ class EmbeddingService:
692
709
 
693
710
  # Dependency injection helper
694
711
  def get_embedding_service(
695
- embedding_provider: Optional[BaseEmbeddingProvider] = None,
696
- config: Optional[Dict[str, Any]] = None,
712
+ embedding_provider: BaseEmbeddingProvider | None = None,
713
+ config: dict[str, Any] | None = None,
697
714
  ) -> EmbeddingService:
698
715
  """
699
716
  Create EmbeddingService instance with auto-detected or provided embedding provider.
mdb_engine/exceptions.py CHANGED
@@ -5,7 +5,7 @@ These exceptions provide more specific error types while maintaining
5
5
  backward compatibility with RuntimeError.
6
6
  """
7
7
 
8
- from typing import Any, Dict, List, Optional
8
+ from typing import Any
9
9
 
10
10
 
11
11
  class MongoDBEngineError(RuntimeError):
@@ -21,7 +21,7 @@ class MongoDBEngineError(RuntimeError):
21
21
  collection_name, etc.)
22
22
  """
23
23
 
24
- def __init__(self, message: str, context: Optional[Dict[str, Any]] = None) -> None:
24
+ def __init__(self, message: str, context: dict[str, Any] | None = None) -> None:
25
25
  """
26
26
  Initialize the exception.
27
27
 
@@ -58,9 +58,9 @@ class InitializationError(MongoDBEngineError):
58
58
  def __init__(
59
59
  self,
60
60
  message: str,
61
- mongo_uri: Optional[str] = None,
62
- db_name: Optional[str] = None,
63
- context: Optional[Dict[str, Any]] = None,
61
+ mongo_uri: str | None = None,
62
+ db_name: str | None = None,
63
+ context: dict[str, Any] | None = None,
64
64
  ) -> None:
65
65
  """
66
66
  Initialize the initialization error.
@@ -99,10 +99,10 @@ class ManifestValidationError(MongoDBEngineError):
99
99
  def __init__(
100
100
  self,
101
101
  message: str,
102
- error_paths: Optional[List[str]] = None,
103
- manifest_slug: Optional[str] = None,
104
- schema_version: Optional[str] = None,
105
- context: Optional[Dict[str, Any]] = None,
102
+ error_paths: list[str] | None = None,
103
+ manifest_slug: str | None = None,
104
+ schema_version: str | None = None,
105
+ context: dict[str, Any] | None = None,
106
106
  ) -> None:
107
107
  """
108
108
  Initialize the manifest validation error.
@@ -144,9 +144,9 @@ class ConfigurationError(MongoDBEngineError):
144
144
  def __init__(
145
145
  self,
146
146
  message: str,
147
- config_key: Optional[str] = None,
148
- config_value: Optional[Any] = None,
149
- context: Optional[Dict[str, Any]] = None,
147
+ config_key: str | None = None,
148
+ config_value: Any | None = None,
149
+ context: dict[str, Any] | None = None,
150
150
  ) -> None:
151
151
  """
152
152
  Initialize the configuration error.
@@ -185,10 +185,10 @@ class QueryValidationError(MongoDBEngineError):
185
185
  def __init__(
186
186
  self,
187
187
  message: str,
188
- query_type: Optional[str] = None,
189
- operator: Optional[str] = None,
190
- path: Optional[str] = None,
191
- context: Optional[Dict[str, Any]] = None,
188
+ query_type: str | None = None,
189
+ operator: str | None = None,
190
+ path: str | None = None,
191
+ context: dict[str, Any] | None = None,
192
192
  ) -> None:
193
193
  """
194
194
  Initialize the query validation error.
@@ -231,10 +231,10 @@ class ResourceLimitExceeded(MongoDBEngineError):
231
231
  def __init__(
232
232
  self,
233
233
  message: str,
234
- limit_type: Optional[str] = None,
235
- limit_value: Optional[Any] = None,
236
- actual_value: Optional[Any] = None,
237
- context: Optional[Dict[str, Any]] = None,
234
+ limit_type: str | None = None,
235
+ limit_value: Any | None = None,
236
+ actual_value: Any | None = None,
237
+ context: dict[str, Any] | None = None,
238
238
  ) -> None:
239
239
  """
240
240
  Initialize the resource limit exceeded error.
@@ -6,14 +6,14 @@ in index creation and management.
6
6
  """
7
7
 
8
8
  import logging
9
- from typing import Any, Dict, List, Optional, Tuple, Union
9
+ from typing import Any
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
13
13
 
14
14
  def normalize_keys(
15
- keys: Union[Dict[str, Any], List[Tuple[str, Any]]],
16
- ) -> List[Tuple[str, Any]]:
15
+ keys: dict[str, Any] | list[tuple[str, Any]],
16
+ ) -> list[tuple[str, Any]]:
17
17
  """
18
18
  Normalize index keys to a consistent format.
19
19
 
@@ -28,7 +28,7 @@ def normalize_keys(
28
28
  return keys
29
29
 
30
30
 
31
- def keys_to_dict(keys: Union[Dict[str, Any], List[Tuple[str, Any]]]) -> Dict[str, Any]:
31
+ def keys_to_dict(keys: dict[str, Any] | list[tuple[str, Any]]) -> dict[str, Any]:
32
32
  """
33
33
  Convert index keys to dictionary format for comparison.
34
34
 
@@ -43,7 +43,7 @@ def keys_to_dict(keys: Union[Dict[str, Any], List[Tuple[str, Any]]]) -> Dict[str
43
43
  return {k: v for k, v in keys}
44
44
 
45
45
 
46
- def is_id_index(keys: Union[Dict[str, Any], List[Tuple[str, Any]]]) -> bool:
46
+ def is_id_index(keys: dict[str, Any] | list[tuple[str, Any]]) -> bool:
47
47
  """
48
48
  Check if index keys target the _id field (which MongoDB creates automatically).
49
49
 
@@ -63,10 +63,10 @@ def is_id_index(keys: Union[Dict[str, Any], List[Tuple[str, Any]]]) -> bool:
63
63
  async def check_and_update_index(
64
64
  index_manager: Any,
65
65
  index_name: str,
66
- expected_keys: Union[Dict[str, Any], List[Tuple[str, Any]]],
67
- expected_options: Optional[Dict[str, Any]] = None,
66
+ expected_keys: dict[str, Any] | list[tuple[str, Any]],
67
+ expected_options: dict[str, Any] | None = None,
68
68
  log_prefix: str = "",
69
- ) -> Tuple[bool, Optional[Dict[str, Any]]]:
69
+ ) -> tuple[bool, dict[str, Any] | None]:
70
70
  """
71
71
  Check if an index exists and matches the expected definition.
72
72
 
@@ -118,11 +118,11 @@ async def check_and_update_index(
118
118
 
119
119
 
120
120
  def validate_index_definition_basic(
121
- index_def: Dict[str, Any],
121
+ index_def: dict[str, Any],
122
122
  index_name: str,
123
- required_fields: List[str],
123
+ required_fields: list[str],
124
124
  log_prefix: str = "",
125
- ) -> Tuple[bool, Optional[str]]:
125
+ ) -> tuple[bool, str | None]:
126
126
  """
127
127
  Basic validation for index definitions.
128
128
 
@@ -8,7 +8,7 @@ This module is part of MDB_ENGINE - MongoDB Engine.
8
8
 
9
9
  import json
10
10
  import logging
11
- from typing import Any, Dict, List
11
+ from typing import Any
12
12
 
13
13
  from motor.motor_asyncio import AsyncIOMotorDatabase
14
14
  from pymongo.errors import (
@@ -44,7 +44,7 @@ logger = logging.getLogger(__name__)
44
44
 
45
45
  async def _handle_regular_index(
46
46
  index_manager: AsyncAtlasIndexManager,
47
- index_def: Dict[str, Any],
47
+ index_def: dict[str, Any],
48
48
  index_name: str,
49
49
  log_prefix: str,
50
50
  ) -> None:
@@ -156,7 +156,7 @@ async def _handle_regular_index(
156
156
 
157
157
  async def _handle_ttl_index(
158
158
  index_manager: AsyncAtlasIndexManager,
159
- index_def: Dict[str, Any],
159
+ index_def: dict[str, Any],
160
160
  index_name: str,
161
161
  log_prefix: str,
162
162
  ) -> None:
@@ -203,7 +203,7 @@ async def _handle_ttl_index(
203
203
 
204
204
  async def _handle_partial_index(
205
205
  index_manager: AsyncAtlasIndexManager,
206
- index_def: Dict[str, Any],
206
+ index_def: dict[str, Any],
207
207
  index_name: str,
208
208
  log_prefix: str,
209
209
  ) -> None:
@@ -269,7 +269,7 @@ async def _handle_partial_index(
269
269
 
270
270
  async def _handle_text_index(
271
271
  index_manager: AsyncAtlasIndexManager,
272
- index_def: Dict[str, Any],
272
+ index_def: dict[str, Any],
273
273
  index_name: str,
274
274
  log_prefix: str,
275
275
  ) -> None:
@@ -335,7 +335,7 @@ async def _handle_text_index(
335
335
 
336
336
  async def _handle_geospatial_index(
337
337
  index_manager: AsyncAtlasIndexManager,
338
- index_def: Dict[str, Any],
338
+ index_def: dict[str, Any],
339
339
  index_name: str,
340
340
  log_prefix: str,
341
341
  ) -> None:
@@ -400,7 +400,7 @@ async def _handle_geospatial_index(
400
400
 
401
401
  async def _handle_search_index(
402
402
  index_manager: AsyncAtlasIndexManager,
403
- index_def: Dict[str, Any],
403
+ index_def: dict[str, Any],
404
404
  index_name: str,
405
405
  index_type: str,
406
406
  slug: str,
@@ -502,7 +502,7 @@ async def _handle_search_index(
502
502
 
503
503
  async def _handle_hybrid_index(
504
504
  index_manager: AsyncAtlasIndexManager,
505
- index_def: Dict[str, Any],
505
+ index_def: dict[str, Any],
506
506
  index_name: str,
507
507
  slug: str,
508
508
  log_prefix: str,
@@ -692,7 +692,7 @@ async def run_index_creation_for_collection(
692
692
  db: AsyncIOMotorDatabase,
693
693
  slug: str,
694
694
  collection_name: str,
695
- index_definitions: List[Dict[str, Any]],
695
+ index_definitions: list[dict[str, Any]],
696
696
  ):
697
697
  """Create or update indexes for a collection based on index definitions."""
698
698
  log_prefix = f"[{slug} -> {collection_name}]"
@@ -10,6 +10,8 @@ Mem0.ai integration for intelligent memory management in MDB_ENGINE applications
10
10
  - **Semantic Search**: Vector-based semantic memory search
11
11
  - **Memory Inference**: Optional LLM-based memory inference and summarization
12
12
  - **Graph Memory**: Optional graph-based memory relationships (requires graph store config)
13
+ - **Bucket Organization**: Built-in support for organizing memories into buckets (general, file, conversation, etc.)
14
+ - **Dual Storage**: Store both extracted facts AND raw content for richer context retrieval
13
15
 
14
16
  ## Installation
15
17
 
@@ -203,6 +205,92 @@ await memory_service.delete(memory_id="memory_123", user_id="user123")
203
205
  await memory_service.delete_all(user_id="user123")
204
206
  ```
205
207
 
208
+ ### Bucket Organization
209
+
210
+ Organize memories into buckets for better management:
211
+
212
+ ```python
213
+ # Add memory to a bucket
214
+ memory = await memory_service.add(
215
+ messages=[{"role": "user", "content": "I love Python programming"}],
216
+ user_id="user123",
217
+ bucket_id="coding:user123",
218
+ bucket_type="general",
219
+ metadata={"category": "coding"}
220
+ )
221
+
222
+ # Get all buckets for a user
223
+ buckets = await memory_service.get_buckets(user_id="user123")
224
+
225
+ # Get only file buckets
226
+ file_buckets = await memory_service.get_buckets(
227
+ user_id="user123",
228
+ bucket_type="file"
229
+ )
230
+
231
+ # Get all memories in a specific bucket
232
+ bucket_memories = await memory_service.get_bucket_memories(
233
+ bucket_id="file:document.pdf:user123",
234
+ user_id="user123"
235
+ )
236
+ ```
237
+
238
+ ### Store Both Facts and Raw Content
239
+
240
+ Store extracted facts alongside raw content for richer context:
241
+
242
+ ```python
243
+ # Store both extracted facts and raw content
244
+ facts, raw_memory_id = await memory_service.add_with_raw_content(
245
+ messages=[{"role": "user", "content": "Extract key facts from this document..."}],
246
+ raw_content="Full document text here...",
247
+ user_id="user123",
248
+ bucket_id="file:document.pdf:user123",
249
+ bucket_type="file",
250
+ infer=True # Extract facts
251
+ )
252
+
253
+ # Later, retrieve raw content when needed
254
+ raw_content = await memory_service.get_raw_content(
255
+ bucket_id="file:document.pdf:user123",
256
+ user_id="user123"
257
+ )
258
+
259
+ # Or include raw content when getting bucket memories
260
+ all_memories = await memory_service.get_bucket_memories(
261
+ bucket_id="file:document.pdf:user123",
262
+ user_id="user123",
263
+ include_raw_content=True
264
+ )
265
+ ```
266
+
267
+ ### Bucket Types
268
+
269
+ Common bucket types:
270
+ - **`general`**: General purpose buckets (e.g., category-based)
271
+ - **`file`**: File-specific buckets (one per uploaded file)
272
+ - **`conversation`**: Conversation-specific buckets
273
+ - **`user`**: User-level buckets
274
+
275
+ ```python
276
+ # General bucket (category-based)
277
+ await memory_service.add(
278
+ messages=[{"role": "user", "content": "I prefer dark mode"}],
279
+ user_id="user123",
280
+ bucket_id="preferences:user123",
281
+ bucket_type="general"
282
+ )
283
+
284
+ # File bucket
285
+ await memory_service.add(
286
+ messages=[{"role": "user", "content": "Document content..."}],
287
+ user_id="user123",
288
+ bucket_id="file:report.pdf:user123",
289
+ bucket_type="file",
290
+ metadata={"filename": "report.pdf"}
291
+ )
292
+ ```
293
+
206
294
  ### Memory Inference
207
295
 
208
296
  With `infer=True`, the service can generate insights and summaries:
@@ -241,8 +329,11 @@ Mem0MemoryService(
241
329
 
242
330
  #### Methods
243
331
 
244
- - `add(messages, user_id, metadata=None)` - Add single memory
245
- - `add_all(memories)` - Add multiple memories
332
+ - `add(messages, user_id, metadata=None, bucket_id=None, bucket_type=None, store_raw_content=False, raw_content=None)` - Add single memory with optional bucket and raw content storage
333
+ - `add_with_raw_content(messages, raw_content, user_id, bucket_id=None, bucket_type=None)` - Store both extracted facts and raw content
334
+ - `get_buckets(user_id, bucket_type=None, limit=None)` - Get all buckets for a user
335
+ - `get_bucket_memories(bucket_id, user_id, include_raw_content=False, limit=None)` - Get all memories in a bucket
336
+ - `get_raw_content(bucket_id, user_id)` - Get raw content for a bucket
246
337
  - `search(query, user_id, limit=10, filters=None)` - Search memories
247
338
  - `get(memory_id, user_id)` - Get specific memory
248
339
  - `get_all(user_id, filters=None)` - Get all memories for user