mdb-engine 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mdb_engine/__init__.py +7 -1
- mdb_engine/auth/README.md +6 -0
- mdb_engine/auth/audit.py +40 -40
- mdb_engine/auth/base.py +3 -3
- mdb_engine/auth/casbin_factory.py +6 -6
- mdb_engine/auth/config_defaults.py +5 -5
- mdb_engine/auth/config_helpers.py +12 -12
- mdb_engine/auth/cookie_utils.py +9 -9
- mdb_engine/auth/csrf.py +9 -8
- mdb_engine/auth/decorators.py +7 -6
- mdb_engine/auth/dependencies.py +22 -21
- mdb_engine/auth/integration.py +9 -9
- mdb_engine/auth/jwt.py +9 -9
- mdb_engine/auth/middleware.py +4 -3
- mdb_engine/auth/oso_factory.py +6 -6
- mdb_engine/auth/provider.py +4 -4
- mdb_engine/auth/rate_limiter.py +12 -11
- mdb_engine/auth/restrictions.py +16 -15
- mdb_engine/auth/session_manager.py +11 -13
- mdb_engine/auth/shared_middleware.py +344 -132
- mdb_engine/auth/shared_users.py +20 -20
- mdb_engine/auth/token_lifecycle.py +10 -12
- mdb_engine/auth/token_store.py +4 -5
- mdb_engine/auth/users.py +51 -52
- mdb_engine/auth/utils.py +29 -33
- mdb_engine/cli/commands/generate.py +6 -6
- mdb_engine/cli/utils.py +4 -4
- mdb_engine/config.py +6 -7
- mdb_engine/core/app_registration.py +12 -12
- mdb_engine/core/app_secrets.py +1 -2
- mdb_engine/core/connection.py +3 -4
- mdb_engine/core/encryption.py +1 -2
- mdb_engine/core/engine.py +43 -44
- mdb_engine/core/manifest.py +80 -58
- mdb_engine/core/ray_integration.py +10 -9
- mdb_engine/core/seeding.py +3 -3
- mdb_engine/core/service_initialization.py +10 -9
- mdb_engine/core/types.py +40 -40
- mdb_engine/database/abstraction.py +15 -16
- mdb_engine/database/connection.py +40 -12
- mdb_engine/database/query_validator.py +8 -8
- mdb_engine/database/resource_limiter.py +7 -7
- mdb_engine/database/scoped_wrapper.py +51 -58
- mdb_engine/dependencies.py +14 -13
- mdb_engine/di/container.py +12 -13
- mdb_engine/di/providers.py +14 -13
- mdb_engine/di/scopes.py +5 -5
- mdb_engine/embeddings/dependencies.py +2 -2
- mdb_engine/embeddings/service.py +67 -50
- mdb_engine/exceptions.py +20 -20
- mdb_engine/indexes/helpers.py +11 -11
- mdb_engine/indexes/manager.py +9 -9
- mdb_engine/memory/README.md +93 -2
- mdb_engine/memory/service.py +361 -1109
- mdb_engine/observability/health.py +10 -9
- mdb_engine/observability/logging.py +10 -10
- mdb_engine/observability/metrics.py +8 -7
- mdb_engine/repositories/base.py +25 -25
- mdb_engine/repositories/mongo.py +17 -17
- mdb_engine/repositories/unit_of_work.py +6 -6
- mdb_engine/routing/websockets.py +19 -18
- mdb_engine/utils/__init__.py +3 -1
- mdb_engine/utils/mongo.py +117 -0
- {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/METADATA +88 -13
- mdb_engine-0.2.4.dist-info/RECORD +97 -0
- {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/WHEEL +1 -1
- mdb_engine-0.2.1.dist-info/RECORD +0 -96
- {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/entry_points.txt +0 -0
- {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {mdb_engine-0.2.1.dist-info → mdb_engine-0.2.4.dist-info}/top_level.txt +0 -0
mdb_engine/embeddings/service.py
CHANGED
|
@@ -23,7 +23,7 @@ import os
|
|
|
23
23
|
import time
|
|
24
24
|
from abc import ABC, abstractmethod
|
|
25
25
|
from datetime import datetime
|
|
26
|
-
from typing import Any
|
|
26
|
+
from typing import Any
|
|
27
27
|
|
|
28
28
|
# Optional OpenAI SDK import
|
|
29
29
|
try:
|
|
@@ -59,9 +59,7 @@ class BaseEmbeddingProvider(ABC):
|
|
|
59
59
|
"""
|
|
60
60
|
|
|
61
61
|
@abstractmethod
|
|
62
|
-
async def embed(
|
|
63
|
-
self, text: Union[str, List[str]], model: Optional[str] = None
|
|
64
|
-
) -> List[List[float]]:
|
|
62
|
+
async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
|
|
65
63
|
"""
|
|
66
64
|
Generate embeddings for text.
|
|
67
65
|
|
|
@@ -84,7 +82,7 @@ class OpenAIEmbeddingProvider(BaseEmbeddingProvider):
|
|
|
84
82
|
|
|
85
83
|
def __init__(
|
|
86
84
|
self,
|
|
87
|
-
api_key:
|
|
85
|
+
api_key: str | None = None,
|
|
88
86
|
default_model: str = "text-embedding-3-small",
|
|
89
87
|
):
|
|
90
88
|
"""
|
|
@@ -108,9 +106,7 @@ class OpenAIEmbeddingProvider(BaseEmbeddingProvider):
|
|
|
108
106
|
self.client = AsyncOpenAI(api_key=api_key)
|
|
109
107
|
self.default_model = default_model
|
|
110
108
|
|
|
111
|
-
async def embed(
|
|
112
|
-
self, text: Union[str, List[str]], model: Optional[str] = None
|
|
113
|
-
) -> List[List[float]]:
|
|
109
|
+
async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
|
|
114
110
|
"""Generate embeddings using OpenAI."""
|
|
115
111
|
model = model or self.default_model
|
|
116
112
|
|
|
@@ -149,9 +145,9 @@ class AzureOpenAIEmbeddingProvider(BaseEmbeddingProvider):
|
|
|
149
145
|
|
|
150
146
|
def __init__(
|
|
151
147
|
self,
|
|
152
|
-
api_key:
|
|
153
|
-
endpoint:
|
|
154
|
-
api_version:
|
|
148
|
+
api_key: str | None = None,
|
|
149
|
+
endpoint: str | None = None,
|
|
150
|
+
api_version: str | None = None,
|
|
155
151
|
default_model: str = "text-embedding-3-small",
|
|
156
152
|
):
|
|
157
153
|
"""
|
|
@@ -191,9 +187,7 @@ class AzureOpenAIEmbeddingProvider(BaseEmbeddingProvider):
|
|
|
191
187
|
)
|
|
192
188
|
self.default_model = default_model
|
|
193
189
|
|
|
194
|
-
async def embed(
|
|
195
|
-
self, text: Union[str, List[str]], model: Optional[str] = None
|
|
196
|
-
) -> List[List[float]]:
|
|
190
|
+
async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
|
|
197
191
|
"""Generate embeddings using Azure OpenAI."""
|
|
198
192
|
model = model or self.default_model
|
|
199
193
|
|
|
@@ -255,8 +249,8 @@ class EmbeddingProvider:
|
|
|
255
249
|
|
|
256
250
|
def __init__(
|
|
257
251
|
self,
|
|
258
|
-
embedding_provider:
|
|
259
|
-
config:
|
|
252
|
+
embedding_provider: BaseEmbeddingProvider | None = None,
|
|
253
|
+
config: dict[str, Any] | None = None,
|
|
260
254
|
):
|
|
261
255
|
"""
|
|
262
256
|
Initialize Embedding Provider.
|
|
@@ -293,9 +287,7 @@ class EmbeddingProvider:
|
|
|
293
287
|
# Store config for potential future use
|
|
294
288
|
self.config = config or {}
|
|
295
289
|
|
|
296
|
-
async def embed(
|
|
297
|
-
self, text: Union[str, List[str]], model: Optional[str] = None
|
|
298
|
-
) -> List[List[float]]:
|
|
290
|
+
async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
|
|
299
291
|
"""
|
|
300
292
|
Generates vector embeddings for a string or list of strings.
|
|
301
293
|
|
|
@@ -361,10 +353,10 @@ class EmbeddingService:
|
|
|
361
353
|
|
|
362
354
|
def __init__(
|
|
363
355
|
self,
|
|
364
|
-
embedding_provider:
|
|
356
|
+
embedding_provider: EmbeddingProvider | None = None,
|
|
365
357
|
default_max_tokens: int = 1000,
|
|
366
358
|
default_tokenizer_model: str = "gpt-3.5-turbo",
|
|
367
|
-
config:
|
|
359
|
+
config: dict[str, Any] | None = None,
|
|
368
360
|
):
|
|
369
361
|
"""
|
|
370
362
|
Initialize Embedding Service.
|
|
@@ -397,9 +389,7 @@ class EmbeddingService:
|
|
|
397
389
|
self.default_max_tokens = default_max_tokens
|
|
398
390
|
self.default_tokenizer_model = default_tokenizer_model
|
|
399
391
|
|
|
400
|
-
def _create_splitter(
|
|
401
|
-
self, max_tokens: int, tokenizer_model: Optional[str] = None
|
|
402
|
-
) -> TextSplitter:
|
|
392
|
+
def _create_splitter(self, max_tokens: int, tokenizer_model: str | None = None) -> TextSplitter:
|
|
403
393
|
"""
|
|
404
394
|
Create a TextSplitter instance.
|
|
405
395
|
|
|
@@ -419,9 +409,9 @@ class EmbeddingService:
|
|
|
419
409
|
async def chunk_text(
|
|
420
410
|
self,
|
|
421
411
|
text_content: str,
|
|
422
|
-
max_tokens:
|
|
423
|
-
tokenizer_model:
|
|
424
|
-
) ->
|
|
412
|
+
max_tokens: int | None = None,
|
|
413
|
+
tokenizer_model: str | None = None,
|
|
414
|
+
) -> list[str]:
|
|
425
415
|
"""
|
|
426
416
|
Split text into semantic chunks.
|
|
427
417
|
|
|
@@ -455,32 +445,39 @@ class EmbeddingService:
|
|
|
455
445
|
logger.error(f"Error chunking text: {e}", exc_info=True)
|
|
456
446
|
raise EmbeddingServiceError(f"Chunking failed: {str(e)}") from e
|
|
457
447
|
|
|
458
|
-
async def
|
|
459
|
-
self, chunks: List[str], model: Optional[str] = None
|
|
460
|
-
) -> List[List[float]]:
|
|
448
|
+
async def embed(self, text: str | list[str], model: str | None = None) -> list[list[float]]:
|
|
461
449
|
"""
|
|
462
|
-
Generate embeddings for text
|
|
450
|
+
Generate embeddings for text or a list of texts.
|
|
463
451
|
|
|
464
|
-
|
|
452
|
+
Natural API that works with both single strings and lists.
|
|
465
453
|
|
|
466
454
|
Args:
|
|
467
|
-
|
|
455
|
+
text: A single string or list of strings to embed
|
|
468
456
|
model: Optional model identifier (passed to embedding provider)
|
|
469
457
|
|
|
470
458
|
Returns:
|
|
471
|
-
List of embedding vectors (each is a list of floats)
|
|
459
|
+
List of embedding vectors (each is a list of floats).
|
|
460
|
+
If input was a single string, returns a list containing one vector.
|
|
472
461
|
|
|
473
462
|
Example:
|
|
474
|
-
|
|
475
|
-
vectors = await service.
|
|
463
|
+
# Single string
|
|
464
|
+
vectors = await service.embed("Hello world", model="text-embedding-3-small")
|
|
465
|
+
# vectors is [[0.1, 0.2, ...]]
|
|
466
|
+
|
|
467
|
+
# List of strings (batch - more efficient)
|
|
468
|
+
vectors = await service.embed(["chunk 1", "chunk 2"], model="text-embedding-3-small")
|
|
469
|
+
# vectors is [[0.1, ...], [0.2, ...]]
|
|
476
470
|
"""
|
|
471
|
+
# Normalize to list
|
|
472
|
+
chunks = [text] if isinstance(text, str) else text
|
|
473
|
+
|
|
477
474
|
if not chunks:
|
|
478
475
|
return []
|
|
479
476
|
|
|
480
477
|
try:
|
|
481
478
|
# Use EmbeddingProvider's embed method (handles retries, logging, etc.)
|
|
482
479
|
vectors = await self.embedding_provider.embed(chunks, model=model)
|
|
483
|
-
logger.info(f"Generated {len(vectors)}
|
|
480
|
+
logger.info(f"Generated {len(vectors)} embedding(s)")
|
|
484
481
|
return vectors
|
|
485
482
|
except (
|
|
486
483
|
AttributeError,
|
|
@@ -493,16 +490,36 @@ class EmbeddingService:
|
|
|
493
490
|
logger.error(f"Error generating embeddings: {e}", exc_info=True)
|
|
494
491
|
raise EmbeddingServiceError(f"Embedding generation failed: {str(e)}") from e
|
|
495
492
|
|
|
493
|
+
async def embed_chunks(self, chunks: list[str], model: str | None = None) -> list[list[float]]:
|
|
494
|
+
"""
|
|
495
|
+
Generate embeddings for text chunks (list only).
|
|
496
|
+
|
|
497
|
+
DEPRECATED: Use embed() instead, which accepts both strings and lists.
|
|
498
|
+
This method is kept for backward compatibility.
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
chunks: List of text chunks to embed
|
|
502
|
+
model: Optional model identifier (passed to embedding provider)
|
|
503
|
+
|
|
504
|
+
Returns:
|
|
505
|
+
List of embedding vectors (each is a list of floats)
|
|
506
|
+
|
|
507
|
+
Example:
|
|
508
|
+
chunks = ["chunk 1", "chunk 2"]
|
|
509
|
+
vectors = await service.embed_chunks(chunks, model="text-embedding-3-small")
|
|
510
|
+
"""
|
|
511
|
+
return await self.embed(chunks, model=model)
|
|
512
|
+
|
|
496
513
|
async def process_and_store(
|
|
497
514
|
self,
|
|
498
515
|
text_content: str,
|
|
499
516
|
source_id: str,
|
|
500
517
|
collection: Any, # MongoDB collection (AppDB Collection or Motor collection)
|
|
501
|
-
max_tokens:
|
|
502
|
-
tokenizer_model:
|
|
503
|
-
embedding_model:
|
|
504
|
-
metadata:
|
|
505
|
-
) ->
|
|
518
|
+
max_tokens: int | None = None,
|
|
519
|
+
tokenizer_model: str | None = None,
|
|
520
|
+
embedding_model: str | None = None,
|
|
521
|
+
metadata: dict[str, Any] | None = None,
|
|
522
|
+
) -> dict[str, Any]:
|
|
506
523
|
"""
|
|
507
524
|
Process text and store chunks with embeddings in MongoDB.
|
|
508
525
|
|
|
@@ -573,7 +590,7 @@ class EmbeddingService:
|
|
|
573
590
|
|
|
574
591
|
# Step 3: Prepare documents for insertion
|
|
575
592
|
documents_to_insert = []
|
|
576
|
-
for i, (chunk_text, vector) in enumerate(zip(chunks, vectors)):
|
|
593
|
+
for i, (chunk_text, vector) in enumerate(zip(chunks, vectors, strict=False)):
|
|
577
594
|
doc = {
|
|
578
595
|
"source_id": source_id,
|
|
579
596
|
"chunk_index": i,
|
|
@@ -626,10 +643,10 @@ class EmbeddingService:
|
|
|
626
643
|
async def process_text(
|
|
627
644
|
self,
|
|
628
645
|
text_content: str,
|
|
629
|
-
max_tokens:
|
|
630
|
-
tokenizer_model:
|
|
631
|
-
embedding_model:
|
|
632
|
-
) ->
|
|
646
|
+
max_tokens: int | None = None,
|
|
647
|
+
tokenizer_model: str | None = None,
|
|
648
|
+
embedding_model: str | None = None,
|
|
649
|
+
) -> list[dict[str, Any]]:
|
|
633
650
|
"""
|
|
634
651
|
Process text and return chunks with embeddings (without storing).
|
|
635
652
|
|
|
@@ -673,7 +690,7 @@ class EmbeddingService:
|
|
|
673
690
|
|
|
674
691
|
# Prepare results
|
|
675
692
|
results = []
|
|
676
|
-
for i, (chunk_text, vector) in enumerate(zip(chunks, vectors)):
|
|
693
|
+
for i, (chunk_text, vector) in enumerate(zip(chunks, vectors, strict=False)):
|
|
677
694
|
results.append(
|
|
678
695
|
{
|
|
679
696
|
"chunk_index": i,
|
|
@@ -692,8 +709,8 @@ class EmbeddingService:
|
|
|
692
709
|
|
|
693
710
|
# Dependency injection helper
|
|
694
711
|
def get_embedding_service(
|
|
695
|
-
embedding_provider:
|
|
696
|
-
config:
|
|
712
|
+
embedding_provider: BaseEmbeddingProvider | None = None,
|
|
713
|
+
config: dict[str, Any] | None = None,
|
|
697
714
|
) -> EmbeddingService:
|
|
698
715
|
"""
|
|
699
716
|
Create EmbeddingService instance with auto-detected or provided embedding provider.
|
mdb_engine/exceptions.py
CHANGED
|
@@ -5,7 +5,7 @@ These exceptions provide more specific error types while maintaining
|
|
|
5
5
|
backward compatibility with RuntimeError.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class MongoDBEngineError(RuntimeError):
|
|
@@ -21,7 +21,7 @@ class MongoDBEngineError(RuntimeError):
|
|
|
21
21
|
collection_name, etc.)
|
|
22
22
|
"""
|
|
23
23
|
|
|
24
|
-
def __init__(self, message: str, context:
|
|
24
|
+
def __init__(self, message: str, context: dict[str, Any] | None = None) -> None:
|
|
25
25
|
"""
|
|
26
26
|
Initialize the exception.
|
|
27
27
|
|
|
@@ -58,9 +58,9 @@ class InitializationError(MongoDBEngineError):
|
|
|
58
58
|
def __init__(
|
|
59
59
|
self,
|
|
60
60
|
message: str,
|
|
61
|
-
mongo_uri:
|
|
62
|
-
db_name:
|
|
63
|
-
context:
|
|
61
|
+
mongo_uri: str | None = None,
|
|
62
|
+
db_name: str | None = None,
|
|
63
|
+
context: dict[str, Any] | None = None,
|
|
64
64
|
) -> None:
|
|
65
65
|
"""
|
|
66
66
|
Initialize the initialization error.
|
|
@@ -99,10 +99,10 @@ class ManifestValidationError(MongoDBEngineError):
|
|
|
99
99
|
def __init__(
|
|
100
100
|
self,
|
|
101
101
|
message: str,
|
|
102
|
-
error_paths:
|
|
103
|
-
manifest_slug:
|
|
104
|
-
schema_version:
|
|
105
|
-
context:
|
|
102
|
+
error_paths: list[str] | None = None,
|
|
103
|
+
manifest_slug: str | None = None,
|
|
104
|
+
schema_version: str | None = None,
|
|
105
|
+
context: dict[str, Any] | None = None,
|
|
106
106
|
) -> None:
|
|
107
107
|
"""
|
|
108
108
|
Initialize the manifest validation error.
|
|
@@ -144,9 +144,9 @@ class ConfigurationError(MongoDBEngineError):
|
|
|
144
144
|
def __init__(
|
|
145
145
|
self,
|
|
146
146
|
message: str,
|
|
147
|
-
config_key:
|
|
148
|
-
config_value:
|
|
149
|
-
context:
|
|
147
|
+
config_key: str | None = None,
|
|
148
|
+
config_value: Any | None = None,
|
|
149
|
+
context: dict[str, Any] | None = None,
|
|
150
150
|
) -> None:
|
|
151
151
|
"""
|
|
152
152
|
Initialize the configuration error.
|
|
@@ -185,10 +185,10 @@ class QueryValidationError(MongoDBEngineError):
|
|
|
185
185
|
def __init__(
|
|
186
186
|
self,
|
|
187
187
|
message: str,
|
|
188
|
-
query_type:
|
|
189
|
-
operator:
|
|
190
|
-
path:
|
|
191
|
-
context:
|
|
188
|
+
query_type: str | None = None,
|
|
189
|
+
operator: str | None = None,
|
|
190
|
+
path: str | None = None,
|
|
191
|
+
context: dict[str, Any] | None = None,
|
|
192
192
|
) -> None:
|
|
193
193
|
"""
|
|
194
194
|
Initialize the query validation error.
|
|
@@ -231,10 +231,10 @@ class ResourceLimitExceeded(MongoDBEngineError):
|
|
|
231
231
|
def __init__(
|
|
232
232
|
self,
|
|
233
233
|
message: str,
|
|
234
|
-
limit_type:
|
|
235
|
-
limit_value:
|
|
236
|
-
actual_value:
|
|
237
|
-
context:
|
|
234
|
+
limit_type: str | None = None,
|
|
235
|
+
limit_value: Any | None = None,
|
|
236
|
+
actual_value: Any | None = None,
|
|
237
|
+
context: dict[str, Any] | None = None,
|
|
238
238
|
) -> None:
|
|
239
239
|
"""
|
|
240
240
|
Initialize the resource limit exceeded error.
|
mdb_engine/indexes/helpers.py
CHANGED
|
@@ -6,14 +6,14 @@ in index creation and management.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import logging
|
|
9
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def normalize_keys(
|
|
15
|
-
keys:
|
|
16
|
-
) ->
|
|
15
|
+
keys: dict[str, Any] | list[tuple[str, Any]],
|
|
16
|
+
) -> list[tuple[str, Any]]:
|
|
17
17
|
"""
|
|
18
18
|
Normalize index keys to a consistent format.
|
|
19
19
|
|
|
@@ -28,7 +28,7 @@ def normalize_keys(
|
|
|
28
28
|
return keys
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def keys_to_dict(keys:
|
|
31
|
+
def keys_to_dict(keys: dict[str, Any] | list[tuple[str, Any]]) -> dict[str, Any]:
|
|
32
32
|
"""
|
|
33
33
|
Convert index keys to dictionary format for comparison.
|
|
34
34
|
|
|
@@ -43,7 +43,7 @@ def keys_to_dict(keys: Union[Dict[str, Any], List[Tuple[str, Any]]]) -> Dict[str
|
|
|
43
43
|
return {k: v for k, v in keys}
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
def is_id_index(keys:
|
|
46
|
+
def is_id_index(keys: dict[str, Any] | list[tuple[str, Any]]) -> bool:
|
|
47
47
|
"""
|
|
48
48
|
Check if index keys target the _id field (which MongoDB creates automatically).
|
|
49
49
|
|
|
@@ -63,10 +63,10 @@ def is_id_index(keys: Union[Dict[str, Any], List[Tuple[str, Any]]]) -> bool:
|
|
|
63
63
|
async def check_and_update_index(
|
|
64
64
|
index_manager: Any,
|
|
65
65
|
index_name: str,
|
|
66
|
-
expected_keys:
|
|
67
|
-
expected_options:
|
|
66
|
+
expected_keys: dict[str, Any] | list[tuple[str, Any]],
|
|
67
|
+
expected_options: dict[str, Any] | None = None,
|
|
68
68
|
log_prefix: str = "",
|
|
69
|
-
) ->
|
|
69
|
+
) -> tuple[bool, dict[str, Any] | None]:
|
|
70
70
|
"""
|
|
71
71
|
Check if an index exists and matches the expected definition.
|
|
72
72
|
|
|
@@ -118,11 +118,11 @@ async def check_and_update_index(
|
|
|
118
118
|
|
|
119
119
|
|
|
120
120
|
def validate_index_definition_basic(
|
|
121
|
-
index_def:
|
|
121
|
+
index_def: dict[str, Any],
|
|
122
122
|
index_name: str,
|
|
123
|
-
required_fields:
|
|
123
|
+
required_fields: list[str],
|
|
124
124
|
log_prefix: str = "",
|
|
125
|
-
) ->
|
|
125
|
+
) -> tuple[bool, str | None]:
|
|
126
126
|
"""
|
|
127
127
|
Basic validation for index definitions.
|
|
128
128
|
|
mdb_engine/indexes/manager.py
CHANGED
|
@@ -8,7 +8,7 @@ This module is part of MDB_ENGINE - MongoDB Engine.
|
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
10
|
import logging
|
|
11
|
-
from typing import Any
|
|
11
|
+
from typing import Any
|
|
12
12
|
|
|
13
13
|
from motor.motor_asyncio import AsyncIOMotorDatabase
|
|
14
14
|
from pymongo.errors import (
|
|
@@ -44,7 +44,7 @@ logger = logging.getLogger(__name__)
|
|
|
44
44
|
|
|
45
45
|
async def _handle_regular_index(
|
|
46
46
|
index_manager: AsyncAtlasIndexManager,
|
|
47
|
-
index_def:
|
|
47
|
+
index_def: dict[str, Any],
|
|
48
48
|
index_name: str,
|
|
49
49
|
log_prefix: str,
|
|
50
50
|
) -> None:
|
|
@@ -156,7 +156,7 @@ async def _handle_regular_index(
|
|
|
156
156
|
|
|
157
157
|
async def _handle_ttl_index(
|
|
158
158
|
index_manager: AsyncAtlasIndexManager,
|
|
159
|
-
index_def:
|
|
159
|
+
index_def: dict[str, Any],
|
|
160
160
|
index_name: str,
|
|
161
161
|
log_prefix: str,
|
|
162
162
|
) -> None:
|
|
@@ -203,7 +203,7 @@ async def _handle_ttl_index(
|
|
|
203
203
|
|
|
204
204
|
async def _handle_partial_index(
|
|
205
205
|
index_manager: AsyncAtlasIndexManager,
|
|
206
|
-
index_def:
|
|
206
|
+
index_def: dict[str, Any],
|
|
207
207
|
index_name: str,
|
|
208
208
|
log_prefix: str,
|
|
209
209
|
) -> None:
|
|
@@ -269,7 +269,7 @@ async def _handle_partial_index(
|
|
|
269
269
|
|
|
270
270
|
async def _handle_text_index(
|
|
271
271
|
index_manager: AsyncAtlasIndexManager,
|
|
272
|
-
index_def:
|
|
272
|
+
index_def: dict[str, Any],
|
|
273
273
|
index_name: str,
|
|
274
274
|
log_prefix: str,
|
|
275
275
|
) -> None:
|
|
@@ -335,7 +335,7 @@ async def _handle_text_index(
|
|
|
335
335
|
|
|
336
336
|
async def _handle_geospatial_index(
|
|
337
337
|
index_manager: AsyncAtlasIndexManager,
|
|
338
|
-
index_def:
|
|
338
|
+
index_def: dict[str, Any],
|
|
339
339
|
index_name: str,
|
|
340
340
|
log_prefix: str,
|
|
341
341
|
) -> None:
|
|
@@ -400,7 +400,7 @@ async def _handle_geospatial_index(
|
|
|
400
400
|
|
|
401
401
|
async def _handle_search_index(
|
|
402
402
|
index_manager: AsyncAtlasIndexManager,
|
|
403
|
-
index_def:
|
|
403
|
+
index_def: dict[str, Any],
|
|
404
404
|
index_name: str,
|
|
405
405
|
index_type: str,
|
|
406
406
|
slug: str,
|
|
@@ -502,7 +502,7 @@ async def _handle_search_index(
|
|
|
502
502
|
|
|
503
503
|
async def _handle_hybrid_index(
|
|
504
504
|
index_manager: AsyncAtlasIndexManager,
|
|
505
|
-
index_def:
|
|
505
|
+
index_def: dict[str, Any],
|
|
506
506
|
index_name: str,
|
|
507
507
|
slug: str,
|
|
508
508
|
log_prefix: str,
|
|
@@ -692,7 +692,7 @@ async def run_index_creation_for_collection(
|
|
|
692
692
|
db: AsyncIOMotorDatabase,
|
|
693
693
|
slug: str,
|
|
694
694
|
collection_name: str,
|
|
695
|
-
index_definitions:
|
|
695
|
+
index_definitions: list[dict[str, Any]],
|
|
696
696
|
):
|
|
697
697
|
"""Create or update indexes for a collection based on index definitions."""
|
|
698
698
|
log_prefix = f"[{slug} -> {collection_name}]"
|
mdb_engine/memory/README.md
CHANGED
|
@@ -10,6 +10,8 @@ Mem0.ai integration for intelligent memory management in MDB_ENGINE applications
|
|
|
10
10
|
- **Semantic Search**: Vector-based semantic memory search
|
|
11
11
|
- **Memory Inference**: Optional LLM-based memory inference and summarization
|
|
12
12
|
- **Graph Memory**: Optional graph-based memory relationships (requires graph store config)
|
|
13
|
+
- **Bucket Organization**: Built-in support for organizing memories into buckets (general, file, conversation, etc.)
|
|
14
|
+
- **Dual Storage**: Store both extracted facts AND raw content for richer context retrieval
|
|
13
15
|
|
|
14
16
|
## Installation
|
|
15
17
|
|
|
@@ -203,6 +205,92 @@ await memory_service.delete(memory_id="memory_123", user_id="user123")
|
|
|
203
205
|
await memory_service.delete_all(user_id="user123")
|
|
204
206
|
```
|
|
205
207
|
|
|
208
|
+
### Bucket Organization
|
|
209
|
+
|
|
210
|
+
Organize memories into buckets for better management:
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
# Add memory to a bucket
|
|
214
|
+
memory = await memory_service.add(
|
|
215
|
+
messages=[{"role": "user", "content": "I love Python programming"}],
|
|
216
|
+
user_id="user123",
|
|
217
|
+
bucket_id="coding:user123",
|
|
218
|
+
bucket_type="general",
|
|
219
|
+
metadata={"category": "coding"}
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Get all buckets for a user
|
|
223
|
+
buckets = await memory_service.get_buckets(user_id="user123")
|
|
224
|
+
|
|
225
|
+
# Get only file buckets
|
|
226
|
+
file_buckets = await memory_service.get_buckets(
|
|
227
|
+
user_id="user123",
|
|
228
|
+
bucket_type="file"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Get all memories in a specific bucket
|
|
232
|
+
bucket_memories = await memory_service.get_bucket_memories(
|
|
233
|
+
bucket_id="file:document.pdf:user123",
|
|
234
|
+
user_id="user123"
|
|
235
|
+
)
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Store Both Facts and Raw Content
|
|
239
|
+
|
|
240
|
+
Store extracted facts alongside raw content for richer context:
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
# Store both extracted facts and raw content
|
|
244
|
+
facts, raw_memory_id = await memory_service.add_with_raw_content(
|
|
245
|
+
messages=[{"role": "user", "content": "Extract key facts from this document..."}],
|
|
246
|
+
raw_content="Full document text here...",
|
|
247
|
+
user_id="user123",
|
|
248
|
+
bucket_id="file:document.pdf:user123",
|
|
249
|
+
bucket_type="file",
|
|
250
|
+
infer=True # Extract facts
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Later, retrieve raw content when needed
|
|
254
|
+
raw_content = await memory_service.get_raw_content(
|
|
255
|
+
bucket_id="file:document.pdf:user123",
|
|
256
|
+
user_id="user123"
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Or include raw content when getting bucket memories
|
|
260
|
+
all_memories = await memory_service.get_bucket_memories(
|
|
261
|
+
bucket_id="file:document.pdf:user123",
|
|
262
|
+
user_id="user123",
|
|
263
|
+
include_raw_content=True
|
|
264
|
+
)
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Bucket Types
|
|
268
|
+
|
|
269
|
+
Common bucket types:
|
|
270
|
+
- **`general`**: General purpose buckets (e.g., category-based)
|
|
271
|
+
- **`file`**: File-specific buckets (one per uploaded file)
|
|
272
|
+
- **`conversation`**: Conversation-specific buckets
|
|
273
|
+
- **`user`**: User-level buckets
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
# General bucket (category-based)
|
|
277
|
+
await memory_service.add(
|
|
278
|
+
messages=[{"role": "user", "content": "I prefer dark mode"}],
|
|
279
|
+
user_id="user123",
|
|
280
|
+
bucket_id="preferences:user123",
|
|
281
|
+
bucket_type="general"
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# File bucket
|
|
285
|
+
await memory_service.add(
|
|
286
|
+
messages=[{"role": "user", "content": "Document content..."}],
|
|
287
|
+
user_id="user123",
|
|
288
|
+
bucket_id="file:report.pdf:user123",
|
|
289
|
+
bucket_type="file",
|
|
290
|
+
metadata={"filename": "report.pdf"}
|
|
291
|
+
)
|
|
292
|
+
```
|
|
293
|
+
|
|
206
294
|
### Memory Inference
|
|
207
295
|
|
|
208
296
|
With `infer=True`, the service can generate insights and summaries:
|
|
@@ -241,8 +329,11 @@ Mem0MemoryService(
|
|
|
241
329
|
|
|
242
330
|
#### Methods
|
|
243
331
|
|
|
244
|
-
- `add(messages, user_id, metadata=None)` - Add single memory
|
|
245
|
-
- `
|
|
332
|
+
- `add(messages, user_id, metadata=None, bucket_id=None, bucket_type=None, store_raw_content=False, raw_content=None)` - Add single memory with optional bucket and raw content storage
|
|
333
|
+
- `add_with_raw_content(messages, raw_content, user_id, bucket_id=None, bucket_type=None)` - Store both extracted facts and raw content
|
|
334
|
+
- `get_buckets(user_id, bucket_type=None, limit=None)` - Get all buckets for a user
|
|
335
|
+
- `get_bucket_memories(bucket_id, user_id, include_raw_content=False, limit=None)` - Get all memories in a bucket
|
|
336
|
+
- `get_raw_content(bucket_id, user_id)` - Get raw content for a bucket
|
|
246
337
|
- `search(query, user_id, limit=10, filters=None)` - Search memories
|
|
247
338
|
- `get(memory_id, user_id)` - Get specific memory
|
|
248
339
|
- `get_all(user_id, filters=None)` - Get all memories for user
|