mdb-engine 0.1.6__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mdb_engine/__init__.py +116 -11
  2. mdb_engine/auth/ARCHITECTURE.md +112 -0
  3. mdb_engine/auth/README.md +654 -11
  4. mdb_engine/auth/__init__.py +136 -29
  5. mdb_engine/auth/audit.py +592 -0
  6. mdb_engine/auth/base.py +252 -0
  7. mdb_engine/auth/casbin_factory.py +265 -70
  8. mdb_engine/auth/config_defaults.py +5 -5
  9. mdb_engine/auth/config_helpers.py +19 -18
  10. mdb_engine/auth/cookie_utils.py +12 -16
  11. mdb_engine/auth/csrf.py +483 -0
  12. mdb_engine/auth/decorators.py +10 -16
  13. mdb_engine/auth/dependencies.py +69 -71
  14. mdb_engine/auth/helpers.py +3 -3
  15. mdb_engine/auth/integration.py +61 -88
  16. mdb_engine/auth/jwt.py +11 -15
  17. mdb_engine/auth/middleware.py +79 -35
  18. mdb_engine/auth/oso_factory.py +21 -41
  19. mdb_engine/auth/provider.py +270 -171
  20. mdb_engine/auth/rate_limiter.py +505 -0
  21. mdb_engine/auth/restrictions.py +21 -36
  22. mdb_engine/auth/session_manager.py +24 -41
  23. mdb_engine/auth/shared_middleware.py +977 -0
  24. mdb_engine/auth/shared_users.py +775 -0
  25. mdb_engine/auth/token_lifecycle.py +10 -12
  26. mdb_engine/auth/token_store.py +17 -32
  27. mdb_engine/auth/users.py +99 -159
  28. mdb_engine/auth/utils.py +236 -42
  29. mdb_engine/cli/commands/generate.py +546 -10
  30. mdb_engine/cli/commands/validate.py +3 -7
  31. mdb_engine/cli/utils.py +7 -7
  32. mdb_engine/config.py +13 -28
  33. mdb_engine/constants.py +65 -0
  34. mdb_engine/core/README.md +117 -6
  35. mdb_engine/core/__init__.py +39 -7
  36. mdb_engine/core/app_registration.py +31 -50
  37. mdb_engine/core/app_secrets.py +289 -0
  38. mdb_engine/core/connection.py +20 -12
  39. mdb_engine/core/encryption.py +222 -0
  40. mdb_engine/core/engine.py +2862 -115
  41. mdb_engine/core/index_management.py +12 -16
  42. mdb_engine/core/manifest.py +628 -204
  43. mdb_engine/core/ray_integration.py +436 -0
  44. mdb_engine/core/seeding.py +13 -21
  45. mdb_engine/core/service_initialization.py +20 -30
  46. mdb_engine/core/types.py +40 -43
  47. mdb_engine/database/README.md +140 -17
  48. mdb_engine/database/__init__.py +17 -6
  49. mdb_engine/database/abstraction.py +37 -50
  50. mdb_engine/database/connection.py +51 -30
  51. mdb_engine/database/query_validator.py +367 -0
  52. mdb_engine/database/resource_limiter.py +204 -0
  53. mdb_engine/database/scoped_wrapper.py +747 -237
  54. mdb_engine/dependencies.py +427 -0
  55. mdb_engine/di/__init__.py +34 -0
  56. mdb_engine/di/container.py +247 -0
  57. mdb_engine/di/providers.py +206 -0
  58. mdb_engine/di/scopes.py +139 -0
  59. mdb_engine/embeddings/README.md +54 -24
  60. mdb_engine/embeddings/__init__.py +31 -24
  61. mdb_engine/embeddings/dependencies.py +38 -155
  62. mdb_engine/embeddings/service.py +78 -75
  63. mdb_engine/exceptions.py +104 -12
  64. mdb_engine/indexes/README.md +30 -13
  65. mdb_engine/indexes/__init__.py +1 -0
  66. mdb_engine/indexes/helpers.py +11 -11
  67. mdb_engine/indexes/manager.py +59 -123
  68. mdb_engine/memory/README.md +95 -4
  69. mdb_engine/memory/__init__.py +1 -2
  70. mdb_engine/memory/service.py +363 -1168
  71. mdb_engine/observability/README.md +4 -2
  72. mdb_engine/observability/__init__.py +26 -9
  73. mdb_engine/observability/health.py +17 -17
  74. mdb_engine/observability/logging.py +10 -10
  75. mdb_engine/observability/metrics.py +40 -19
  76. mdb_engine/repositories/__init__.py +34 -0
  77. mdb_engine/repositories/base.py +325 -0
  78. mdb_engine/repositories/mongo.py +233 -0
  79. mdb_engine/repositories/unit_of_work.py +166 -0
  80. mdb_engine/routing/README.md +1 -1
  81. mdb_engine/routing/__init__.py +1 -3
  82. mdb_engine/routing/websockets.py +41 -75
  83. mdb_engine/utils/__init__.py +3 -1
  84. mdb_engine/utils/mongo.py +117 -0
  85. mdb_engine-0.4.12.dist-info/METADATA +492 -0
  86. mdb_engine-0.4.12.dist-info/RECORD +97 -0
  87. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/WHEEL +1 -1
  88. mdb_engine-0.1.6.dist-info/METADATA +0 -213
  89. mdb_engine-0.1.6.dist-info/RECORD +0 -75
  90. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/entry_points.txt +0 -0
  91. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/licenses/LICENSE +0 -0
  92. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/top_level.txt +0 -0
@@ -8,11 +8,15 @@ This module is part of MDB_ENGINE - MongoDB Engine.
8
8
 
9
9
  import json
10
10
  import logging
11
- from typing import Any, Dict, List
11
+ from typing import Any
12
12
 
13
13
  from motor.motor_asyncio import AsyncIOMotorDatabase
14
- from pymongo.errors import (CollectionInvalid, ConnectionFailure,
15
- OperationFailure, ServerSelectionTimeoutError)
14
+ from pymongo.errors import (
15
+ CollectionInvalid,
16
+ ConnectionFailure,
17
+ OperationFailure,
18
+ ServerSelectionTimeoutError,
19
+ )
16
20
 
17
21
  # Import constants
18
22
  from ..constants import INDEX_TYPE_REGULAR, INDEX_TYPE_TTL, MIN_TTL_SECONDS
@@ -25,8 +29,12 @@ except ImportError:
25
29
  logging.warning("AsyncAtlasIndexManager not available")
26
30
 
27
31
  # Import helper functions
28
- from .helpers import (check_and_update_index, is_id_index, normalize_keys,
29
- validate_index_definition_basic)
32
+ from .helpers import (
33
+ check_and_update_index,
34
+ is_id_index,
35
+ normalize_keys,
36
+ validate_index_definition_basic,
37
+ )
30
38
 
31
39
  # Check if index manager is available
32
40
  INDEX_MANAGER_AVAILABLE = AsyncAtlasIndexManager is not None
@@ -36,7 +44,7 @@ logger = logging.getLogger(__name__)
36
44
 
37
45
  async def _handle_regular_index(
38
46
  index_manager: AsyncAtlasIndexManager,
39
- index_def: Dict[str, Any],
47
+ index_def: dict[str, Any],
40
48
  index_name: str,
41
49
  log_prefix: str,
42
50
  ) -> None:
@@ -48,10 +56,7 @@ async def _handle_regular_index(
48
56
  is_valid, error_msg = validate_index_definition_basic(
49
57
  index_def, index_name, ["keys"], log_prefix
50
58
  )
51
- logger.info(
52
- f"{log_prefix} Validation result: is_valid={is_valid}, "
53
- f"error_msg={error_msg}"
54
- )
59
+ logger.info(f"{log_prefix} Validation result: is_valid={is_valid}, " f"error_msg={error_msg}")
55
60
  if not is_valid:
56
61
  logger.error(f"{log_prefix} ❌ Validation failed: {error_msg}")
57
62
  return
@@ -78,8 +83,7 @@ async def _handle_regular_index(
78
83
  index_manager, index_name, keys, options, log_prefix
79
84
  )
80
85
  logger.debug(
81
- f"{log_prefix} Index exists check result: exists={exists}, "
82
- f"existing={existing}"
86
+ f"{log_prefix} Index exists check result: exists={exists}, " f"existing={existing}"
83
87
  )
84
88
 
85
89
  if exists and existing:
@@ -113,8 +117,7 @@ async def _handle_regular_index(
113
117
 
114
118
  if verify_index:
115
119
  logger.info(
116
- f"{log_prefix} ✅ Verified index '{index_name}' exists "
117
- f"after {waited:.1f}s."
120
+ f"{log_prefix} ✅ Verified index '{index_name}' exists " f"after {waited:.1f}s."
118
121
  )
119
122
  break
120
123
 
@@ -153,7 +156,7 @@ async def _handle_regular_index(
153
156
 
154
157
  async def _handle_ttl_index(
155
158
  index_manager: AsyncAtlasIndexManager,
156
- index_def: Dict[str, Any],
159
+ index_def: dict[str, Any],
157
160
  index_name: str,
158
161
  log_prefix: str,
159
162
  ) -> None:
@@ -168,11 +171,7 @@ async def _handle_ttl_index(
168
171
 
169
172
  options = index_def.get("options", {})
170
173
  expire_after = options.get("expireAfterSeconds")
171
- if (
172
- not expire_after
173
- or not isinstance(expire_after, int)
174
- or expire_after < MIN_TTL_SECONDS
175
- ):
174
+ if not expire_after or not isinstance(expire_after, int) or expire_after < MIN_TTL_SECONDS:
176
175
  logger.warning(
177
176
  f"{log_prefix} TTL index '{index_name}' missing or "
178
177
  f"invalid 'expireAfterSeconds' in options. "
@@ -204,7 +203,7 @@ async def _handle_ttl_index(
204
203
 
205
204
  async def _handle_partial_index(
206
205
  index_manager: AsyncAtlasIndexManager,
207
- index_def: Dict[str, Any],
206
+ index_def: dict[str, Any],
208
207
  index_name: str,
209
208
  log_prefix: str,
210
209
  ) -> None:
@@ -270,7 +269,7 @@ async def _handle_partial_index(
270
269
 
271
270
  async def _handle_text_index(
272
271
  index_manager: AsyncAtlasIndexManager,
273
- index_def: Dict[str, Any],
272
+ index_def: dict[str, Any],
274
273
  index_name: str,
275
274
  log_prefix: str,
276
275
  ) -> None:
@@ -290,15 +289,9 @@ async def _handle_text_index(
290
289
  text_keys = keys
291
290
 
292
291
  has_text = any(
293
- (
294
- isinstance(k, list)
295
- and len(k) >= 2
296
- and (k[1] == "text" or k[1] == "TEXT" or k[1] == 1)
297
- )
292
+ (isinstance(k, list) and len(k) >= 2 and (k[1] == "text" or k[1] == "TEXT" or k[1] == 1))
298
293
  or (
299
- isinstance(k, tuple)
300
- and len(k) >= 2
301
- and (k[1] == "text" or k[1] == "TEXT" or k[1] == 1)
294
+ isinstance(k, tuple) and len(k) >= 2 and (k[1] == "text" or k[1] == "TEXT" or k[1] == 1)
302
295
  )
303
296
  for k in text_keys
304
297
  ) or any(
@@ -335,16 +328,14 @@ async def _handle_text_index(
335
328
  logger.info(f"{log_prefix} Text index '{index_name}' matches; skipping.")
336
329
  return
337
330
 
338
- logger.info(
339
- f"{log_prefix} Creating text index '{index_name}' on field(s) {text_keys}..."
340
- )
331
+ logger.info(f"{log_prefix} Creating text index '{index_name}' on field(s) {text_keys}...")
341
332
  await index_manager.create_index(text_keys, **options)
342
333
  logger.info(f"{log_prefix} ✔️ Created text index '{index_name}'.")
343
334
 
344
335
 
345
336
  async def _handle_geospatial_index(
346
337
  index_manager: AsyncAtlasIndexManager,
347
- index_def: Dict[str, Any],
338
+ index_def: dict[str, Any],
348
339
  index_name: str,
349
340
  log_prefix: str,
350
341
  ) -> None:
@@ -369,9 +360,7 @@ async def _handle_geospatial_index(
369
360
  (isinstance(k, list) and len(k) >= 2 and k[1] in geo_types)
370
361
  or (isinstance(k, tuple) and len(k) >= 2 and k[1] in geo_types)
371
362
  for k in geo_keys
372
- ) or any(
373
- v in geo_types for k, v in (keys.items() if isinstance(keys, dict) else [])
374
- )
363
+ ) or any(v in geo_types for k, v in (keys.items() if isinstance(keys, dict) else []))
375
364
 
376
365
  if not has_geo:
377
366
  logger.warning(
@@ -399,14 +388,11 @@ async def _handle_geospatial_index(
399
388
  )
400
389
  await index_manager.drop_index(index_name)
401
390
  else:
402
- logger.info(
403
- f"{log_prefix} Geospatial index '{index_name}' matches; skipping."
404
- )
391
+ logger.info(f"{log_prefix} Geospatial index '{index_name}' matches; skipping.")
405
392
  return
406
393
 
407
394
  logger.info(
408
- f"{log_prefix} Creating geospatial index '{index_name}' "
409
- f"on field(s) {geo_keys}..."
395
+ f"{log_prefix} Creating geospatial index '{index_name}' " f"on field(s) {geo_keys}..."
410
396
  )
411
397
  await index_manager.create_index(geo_keys, **options)
412
398
  logger.info(f"{log_prefix} ✔️ Created geospatial index '{index_name}'.")
@@ -414,7 +400,7 @@ async def _handle_geospatial_index(
414
400
 
415
401
  async def _handle_search_index(
416
402
  index_manager: AsyncAtlasIndexManager,
417
- index_def: Dict[str, Any],
403
+ index_def: dict[str, Any],
418
404
  index_name: str,
419
405
  index_type: str,
420
406
  slug: str,
@@ -447,21 +433,14 @@ async def _handle_search_index(
447
433
  )
448
434
  existing_index = await index_manager.get_search_index(index_name)
449
435
  if existing_index:
450
- current_def = existing_index.get(
451
- "latestDefinition", existing_index.get("definition")
452
- )
436
+ current_def = existing_index.get("latestDefinition", existing_index.get("definition"))
453
437
  normalized_current = normalize_json_def(current_def)
454
438
  normalized_expected = normalize_json_def(definition)
455
439
 
456
440
  if normalized_current == normalized_expected:
457
441
  logger.info(f"{log_prefix} Search index '{index_name}' definition matches.")
458
- if (
459
- not existing_index.get("queryable")
460
- and existing_index.get("status") != "FAILED"
461
- ):
462
- logger.info(
463
- f"{log_prefix} Index '{index_name}' not queryable yet; waiting."
464
- )
442
+ if not existing_index.get("queryable") and existing_index.get("status") != "FAILED":
443
+ logger.info(f"{log_prefix} Index '{index_name}' not queryable yet; waiting.")
465
444
  await index_manager._wait_for_search_index_ready(
466
445
  index_name, index_manager.DEFAULT_SEARCH_TIMEOUT
467
446
  )
@@ -479,13 +458,10 @@ async def _handle_search_index(
479
458
  logger.info(f"{log_prefix} Index '{index_name}' is ready.")
480
459
  else:
481
460
  logger.warning(
482
- f"{log_prefix} Search index '{index_name}' "
483
- f"definition changed; updating."
461
+ f"{log_prefix} Search index '{index_name}' " f"definition changed; updating."
484
462
  )
485
463
  current_fields = (
486
- normalized_current.get("fields", [])
487
- if isinstance(normalized_current, dict)
488
- else []
464
+ normalized_current.get("fields", []) if isinstance(normalized_current, dict) else []
489
465
  )
490
466
  expected_fields = (
491
467
  normalized_expected.get("fields", [])
@@ -493,12 +469,8 @@ async def _handle_search_index(
493
469
  else []
494
470
  )
495
471
 
496
- current_paths = [
497
- f.get("path", "?") for f in current_fields if isinstance(f, dict)
498
- ]
499
- expected_paths = [
500
- f.get("path", "?") for f in expected_fields if isinstance(f, dict)
501
- ]
472
+ current_paths = [f.get("path", "?") for f in current_fields if isinstance(f, dict)]
473
+ expected_paths = [f.get("path", "?") for f in expected_fields if isinstance(f, dict)]
502
474
 
503
475
  logger.info(f"{log_prefix} Current index filter fields: {current_paths}")
504
476
  logger.info(f"{log_prefix} Expected index filter fields: {expected_paths}")
@@ -530,7 +502,7 @@ async def _handle_search_index(
530
502
 
531
503
  async def _handle_hybrid_index(
532
504
  index_manager: AsyncAtlasIndexManager,
533
- index_def: Dict[str, Any],
505
+ index_def: dict[str, Any],
534
506
  index_name: str,
535
507
  slug: str,
536
508
  log_prefix: str,
@@ -588,8 +560,7 @@ async def _handle_hybrid_index(
588
560
 
589
561
  # Process vector index
590
562
  logger.info(
591
- f"{log_prefix} Processing vector index '{vector_index_name}' "
592
- f"for hybrid search..."
563
+ f"{log_prefix} Processing vector index '{vector_index_name}' " f"for hybrid search..."
593
564
  )
594
565
  existing_vector_index = await index_manager.get_search_index(vector_index_name)
595
566
  if existing_vector_index:
@@ -600,9 +571,7 @@ async def _handle_hybrid_index(
600
571
  normalized_expected_vector = normalize_json_def(vector_definition)
601
572
 
602
573
  if normalized_current_vector == normalized_expected_vector:
603
- logger.info(
604
- f"{log_prefix} Vector index '{vector_index_name}' definition matches."
605
- )
574
+ logger.info(f"{log_prefix} Vector index '{vector_index_name}' definition matches.")
606
575
  if (
607
576
  not existing_vector_index.get("queryable")
608
577
  and existing_vector_index.get("status") != "FAILED"
@@ -614,9 +583,7 @@ async def _handle_hybrid_index(
614
583
  await index_manager._wait_for_search_index_ready(
615
584
  vector_index_name, index_manager.DEFAULT_SEARCH_TIMEOUT
616
585
  )
617
- logger.info(
618
- f"{log_prefix} Vector index '{vector_index_name}' now ready."
619
- )
586
+ logger.info(f"{log_prefix} Vector index '{vector_index_name}' now ready.")
620
587
  elif existing_vector_index.get("status") == "FAILED":
621
588
  logger.error(
622
589
  f"{log_prefix} Vector index '{vector_index_name}' "
@@ -624,13 +591,10 @@ async def _handle_hybrid_index(
624
591
  f"Check Atlas UI for detailed error messages."
625
592
  )
626
593
  else:
627
- logger.info(
628
- f"{log_prefix} Vector index '{vector_index_name}' is ready."
629
- )
594
+ logger.info(f"{log_prefix} Vector index '{vector_index_name}' is ready.")
630
595
  else:
631
596
  logger.warning(
632
- f"{log_prefix} Vector index '{vector_index_name}' "
633
- f"definition changed; updating."
597
+ f"{log_prefix} Vector index '{vector_index_name}' " f"definition changed; updating."
634
598
  )
635
599
  # Type 4: Let index update errors bubble up to framework handler
636
600
  await index_manager.update_search_index(
@@ -639,8 +603,7 @@ async def _handle_hybrid_index(
639
603
  wait_for_ready=True,
640
604
  )
641
605
  logger.info(
642
- f"{log_prefix} ✔️ Successfully updated vector index "
643
- f"'{vector_index_name}'."
606
+ f"{log_prefix} ✔️ Successfully updated vector index " f"'{vector_index_name}'."
644
607
  )
645
608
  else:
646
609
  logger.info(f"{log_prefix} Creating new vector index '{vector_index_name}'...")
@@ -653,10 +616,7 @@ async def _handle_hybrid_index(
653
616
  logger.info(f"{log_prefix} ✔️ Created vector index '{vector_index_name}'.")
654
617
 
655
618
  # Process text index
656
- logger.info(
657
- f"{log_prefix} Processing text index '{text_index_name}' "
658
- f"for hybrid search..."
659
- )
619
+ logger.info(f"{log_prefix} Processing text index '{text_index_name}' " f"for hybrid search...")
660
620
  existing_text_index = await index_manager.get_search_index(text_index_name)
661
621
  if existing_text_index:
662
622
  current_text_def = existing_text_index.get(
@@ -666,16 +626,13 @@ async def _handle_hybrid_index(
666
626
  normalized_expected_text = normalize_json_def(text_definition)
667
627
 
668
628
  if normalized_current_text == normalized_expected_text:
669
- logger.info(
670
- f"{log_prefix} Text index '{text_index_name}' definition matches."
671
- )
629
+ logger.info(f"{log_prefix} Text index '{text_index_name}' definition matches.")
672
630
  if (
673
631
  not existing_text_index.get("queryable")
674
632
  and existing_text_index.get("status") != "FAILED"
675
633
  ):
676
634
  logger.info(
677
- f"{log_prefix} Text index '{text_index_name}' "
678
- f"not queryable yet; waiting."
635
+ f"{log_prefix} Text index '{text_index_name}' " f"not queryable yet; waiting."
679
636
  )
680
637
  await index_manager._wait_for_search_index_ready(
681
638
  text_index_name, index_manager.DEFAULT_SEARCH_TIMEOUT
@@ -690,8 +647,7 @@ async def _handle_hybrid_index(
690
647
  logger.info(f"{log_prefix} Text index '{text_index_name}' is ready.")
691
648
  else:
692
649
  logger.warning(
693
- f"{log_prefix} Text index '{text_index_name}' "
694
- f"definition changed; updating."
650
+ f"{log_prefix} Text index '{text_index_name}' " f"definition changed; updating."
695
651
  )
696
652
  # Type 4: Let index update errors bubble up to framework handler
697
653
  await index_manager.update_search_index(
@@ -699,10 +655,7 @@ async def _handle_hybrid_index(
699
655
  definition=text_definition,
700
656
  wait_for_ready=True,
701
657
  )
702
- logger.info(
703
- f"{log_prefix} ✔️ Successfully updated text index "
704
- f"'{text_index_name}'."
705
- )
658
+ logger.info(f"{log_prefix} ✔️ Successfully updated text index " f"'{text_index_name}'.")
706
659
  else:
707
660
  logger.info(f"{log_prefix} Creating new text index '{text_index_name}'...")
708
661
  await index_manager.create_search_index(
@@ -739,7 +692,7 @@ async def run_index_creation_for_collection(
739
692
  db: AsyncIOMotorDatabase,
740
693
  slug: str,
741
694
  collection_name: str,
742
- index_definitions: List[Dict[str, Any]],
695
+ index_definitions: list[dict[str, Any]],
743
696
  ):
744
697
  """Create or update indexes for a collection based on index definitions."""
745
698
  log_prefix = f"[{slug} -> {collection_name}]"
@@ -758,20 +711,16 @@ async def run_index_creation_for_collection(
758
711
  # Try to create the collection explicitly
759
712
  await db.create_collection(collection_name)
760
713
  logger.debug(
761
- f"{log_prefix} Created collection '{collection_name}' "
762
- f"for index operations."
714
+ f"{log_prefix} Created collection '{collection_name}' " f"for index operations."
763
715
  )
764
716
  except CollectionInvalid as e:
765
717
  if "already exists" in str(e).lower():
766
718
  # Collection already exists, which is fine
767
- logger.debug(
768
- f"{log_prefix} Collection '{collection_name}' already exists."
769
- )
719
+ logger.debug(f"{log_prefix} Collection '{collection_name}' already exists.")
770
720
  else:
771
721
  # Some other CollectionInvalid error - log but continue
772
722
  logger.warning(
773
- f"{log_prefix} CollectionInvalid when ensuring collection "
774
- f"exists: {e}"
723
+ f"{log_prefix} CollectionInvalid when ensuring collection " f"exists: {e}"
775
724
  )
776
725
  except (
777
726
  OperationFailure,
@@ -794,8 +743,7 @@ async def run_index_creation_for_collection(
794
743
  ServerSelectionTimeoutError,
795
744
  ) as ensure_error:
796
745
  logger.warning(
797
- f"{log_prefix} Could not ensure collection exists: "
798
- f"{ensure_error}"
746
+ f"{log_prefix} Could not ensure collection exists: " f"{ensure_error}"
799
747
  )
800
748
 
801
749
  index_manager = AsyncAtlasIndexManager(real_collection)
@@ -832,37 +780,25 @@ async def run_index_creation_for_collection(
832
780
  index_type = index_def.get("type")
833
781
  try:
834
782
  if index_type == INDEX_TYPE_REGULAR:
835
- await _handle_regular_index(
836
- index_manager, index_def, index_name, log_prefix
837
- )
783
+ await _handle_regular_index(index_manager, index_def, index_name, log_prefix)
838
784
  # Wait for index to be ready after creation
839
785
  import asyncio
840
786
 
841
787
  await asyncio.sleep(0.5) # Give MongoDB time to make index visible
842
788
  elif index_type == INDEX_TYPE_TTL:
843
- await _handle_ttl_index(
844
- index_manager, index_def, index_name, log_prefix
845
- )
789
+ await _handle_ttl_index(index_manager, index_def, index_name, log_prefix)
846
790
  elif index_type == "partial":
847
- await _handle_partial_index(
848
- index_manager, index_def, index_name, log_prefix
849
- )
791
+ await _handle_partial_index(index_manager, index_def, index_name, log_prefix)
850
792
  elif index_type == "text":
851
- await _handle_text_index(
852
- index_manager, index_def, index_name, log_prefix
853
- )
793
+ await _handle_text_index(index_manager, index_def, index_name, log_prefix)
854
794
  elif index_type == "geospatial":
855
- await _handle_geospatial_index(
856
- index_manager, index_def, index_name, log_prefix
857
- )
795
+ await _handle_geospatial_index(index_manager, index_def, index_name, log_prefix)
858
796
  elif index_type in ("vectorSearch", "search"):
859
797
  await _handle_search_index(
860
798
  index_manager, index_def, index_name, index_type, slug, log_prefix
861
799
  )
862
800
  elif index_type == "hybrid":
863
- await _handle_hybrid_index(
864
- index_manager, index_def, index_name, slug, log_prefix
865
- )
801
+ await _handle_hybrid_index(index_manager, index_def, index_name, slug, log_prefix)
866
802
  else:
867
803
  from ..constants import SUPPORTED_INDEX_TYPES
868
804
 
@@ -10,6 +10,8 @@ Mem0.ai integration for intelligent memory management in MDB_ENGINE applications
10
10
  - **Semantic Search**: Vector-based semantic memory search
11
11
  - **Memory Inference**: Optional LLM-based memory inference and summarization
12
12
  - **Graph Memory**: Optional graph-based memory relationships (requires graph store config)
13
+ - **Bucket Organization**: Built-in support for organizing memories into buckets (general, file, conversation, etc.)
14
+ - **Dual Storage**: Store both extracted facts AND raw content for richer context retrieval
13
15
 
14
16
  ## Installation
15
17
 
@@ -203,6 +205,92 @@ await memory_service.delete(memory_id="memory_123", user_id="user123")
203
205
  await memory_service.delete_all(user_id="user123")
204
206
  ```
205
207
 
208
+ ### Bucket Organization
209
+
210
+ Organize memories into buckets for better management:
211
+
212
+ ```python
213
+ # Add memory to a bucket
214
+ memory = await memory_service.add(
215
+ messages=[{"role": "user", "content": "I love Python programming"}],
216
+ user_id="user123",
217
+ bucket_id="coding:user123",
218
+ bucket_type="general",
219
+ metadata={"category": "coding"}
220
+ )
221
+
222
+ # Get all buckets for a user
223
+ buckets = await memory_service.get_buckets(user_id="user123")
224
+
225
+ # Get only file buckets
226
+ file_buckets = await memory_service.get_buckets(
227
+ user_id="user123",
228
+ bucket_type="file"
229
+ )
230
+
231
+ # Get all memories in a specific bucket
232
+ bucket_memories = await memory_service.get_bucket_memories(
233
+ bucket_id="file:document.pdf:user123",
234
+ user_id="user123"
235
+ )
236
+ ```
237
+
238
+ ### Store Both Facts and Raw Content
239
+
240
+ Store extracted facts alongside raw content for richer context:
241
+
242
+ ```python
243
+ # Store both extracted facts and raw content
244
+ facts, raw_memory_id = await memory_service.add_with_raw_content(
245
+ messages=[{"role": "user", "content": "Extract key facts from this document..."}],
246
+ raw_content="Full document text here...",
247
+ user_id="user123",
248
+ bucket_id="file:document.pdf:user123",
249
+ bucket_type="file",
250
+ infer=True # Extract facts
251
+ )
252
+
253
+ # Later, retrieve raw content when needed
254
+ raw_content = await memory_service.get_raw_content(
255
+ bucket_id="file:document.pdf:user123",
256
+ user_id="user123"
257
+ )
258
+
259
+ # Or include raw content when getting bucket memories
260
+ all_memories = await memory_service.get_bucket_memories(
261
+ bucket_id="file:document.pdf:user123",
262
+ user_id="user123",
263
+ include_raw_content=True
264
+ )
265
+ ```
266
+
267
+ ### Bucket Types
268
+
269
+ Common bucket types:
270
+ - **`general`**: General purpose buckets (e.g., category-based)
271
+ - **`file`**: File-specific buckets (one per uploaded file)
272
+ - **`conversation`**: Conversation-specific buckets
273
+ - **`user`**: User-level buckets
274
+
275
+ ```python
276
+ # General bucket (category-based)
277
+ await memory_service.add(
278
+ messages=[{"role": "user", "content": "I prefer dark mode"}],
279
+ user_id="user123",
280
+ bucket_id="preferences:user123",
281
+ bucket_type="general"
282
+ )
283
+
284
+ # File bucket
285
+ await memory_service.add(
286
+ messages=[{"role": "user", "content": "Document content..."}],
287
+ user_id="user123",
288
+ bucket_id="file:report.pdf:user123",
289
+ bucket_type="file",
290
+ metadata={"filename": "report.pdf"}
291
+ )
292
+ ```
293
+
206
294
  ### Memory Inference
207
295
 
208
296
  With `infer=True`, the service can generate insights and summaries:
@@ -241,8 +329,11 @@ Mem0MemoryService(
241
329
 
242
330
  #### Methods
243
331
 
244
- - `add(messages, user_id, metadata=None)` - Add single memory
245
- - `add_all(memories)` - Add multiple memories
332
+ - `add(messages, user_id, metadata=None, bucket_id=None, bucket_type=None, store_raw_content=False, raw_content=None)` - Add single memory with optional bucket and raw content storage
333
+ - `add_with_raw_content(messages, raw_content, user_id, bucket_id=None, bucket_type=None)` - Store both extracted facts and raw content
334
+ - `get_buckets(user_id, bucket_type=None, limit=None)` - Get all buckets for a user
335
+ - `get_bucket_memories(bucket_id, user_id, include_raw_content=False, limit=None)` - Get all memories in a bucket
336
+ - `get_raw_content(bucket_id, user_id)` - Get raw content for a bucket
246
337
  - `search(query, user_id, limit=10, filters=None)` - Search memories
247
338
  - `get(memory_id, user_id)` - Get specific memory
248
339
  - `get_all(user_id, filters=None)` - Get all memories for user
@@ -400,8 +491,8 @@ try:
400
491
  )
401
492
  except Mem0MemoryServiceError as e:
402
493
  print(f"Memory service error: {e}")
403
- except Exception as e:
404
- print(f"Unexpected error: {e}")
494
+ except (ValueError, TypeError, ConnectionError) as e:
495
+ print(f"Configuration or connection error: {e}")
405
496
  ```
406
497
 
407
498
  ## Environment Variables Reference
@@ -20,8 +20,7 @@ Dependencies:
20
20
  """
21
21
 
22
22
  # Import service components (mem0 import is lazy within service.py)
23
- from .service import (Mem0MemoryService, Mem0MemoryServiceError,
24
- get_memory_service)
23
+ from .service import Mem0MemoryService, Mem0MemoryServiceError, get_memory_service
25
24
 
26
25
  __all__ = [
27
26
  "Mem0MemoryService",