mdb-engine 0.1.6__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mdb_engine/__init__.py +116 -11
  2. mdb_engine/auth/ARCHITECTURE.md +112 -0
  3. mdb_engine/auth/README.md +654 -11
  4. mdb_engine/auth/__init__.py +136 -29
  5. mdb_engine/auth/audit.py +592 -0
  6. mdb_engine/auth/base.py +252 -0
  7. mdb_engine/auth/casbin_factory.py +265 -70
  8. mdb_engine/auth/config_defaults.py +5 -5
  9. mdb_engine/auth/config_helpers.py +19 -18
  10. mdb_engine/auth/cookie_utils.py +12 -16
  11. mdb_engine/auth/csrf.py +483 -0
  12. mdb_engine/auth/decorators.py +10 -16
  13. mdb_engine/auth/dependencies.py +69 -71
  14. mdb_engine/auth/helpers.py +3 -3
  15. mdb_engine/auth/integration.py +61 -88
  16. mdb_engine/auth/jwt.py +11 -15
  17. mdb_engine/auth/middleware.py +79 -35
  18. mdb_engine/auth/oso_factory.py +21 -41
  19. mdb_engine/auth/provider.py +270 -171
  20. mdb_engine/auth/rate_limiter.py +505 -0
  21. mdb_engine/auth/restrictions.py +21 -36
  22. mdb_engine/auth/session_manager.py +24 -41
  23. mdb_engine/auth/shared_middleware.py +977 -0
  24. mdb_engine/auth/shared_users.py +775 -0
  25. mdb_engine/auth/token_lifecycle.py +10 -12
  26. mdb_engine/auth/token_store.py +17 -32
  27. mdb_engine/auth/users.py +99 -159
  28. mdb_engine/auth/utils.py +236 -42
  29. mdb_engine/cli/commands/generate.py +546 -10
  30. mdb_engine/cli/commands/validate.py +3 -7
  31. mdb_engine/cli/utils.py +7 -7
  32. mdb_engine/config.py +13 -28
  33. mdb_engine/constants.py +65 -0
  34. mdb_engine/core/README.md +117 -6
  35. mdb_engine/core/__init__.py +39 -7
  36. mdb_engine/core/app_registration.py +31 -50
  37. mdb_engine/core/app_secrets.py +289 -0
  38. mdb_engine/core/connection.py +20 -12
  39. mdb_engine/core/encryption.py +222 -0
  40. mdb_engine/core/engine.py +2862 -115
  41. mdb_engine/core/index_management.py +12 -16
  42. mdb_engine/core/manifest.py +628 -204
  43. mdb_engine/core/ray_integration.py +436 -0
  44. mdb_engine/core/seeding.py +13 -21
  45. mdb_engine/core/service_initialization.py +20 -30
  46. mdb_engine/core/types.py +40 -43
  47. mdb_engine/database/README.md +140 -17
  48. mdb_engine/database/__init__.py +17 -6
  49. mdb_engine/database/abstraction.py +37 -50
  50. mdb_engine/database/connection.py +51 -30
  51. mdb_engine/database/query_validator.py +367 -0
  52. mdb_engine/database/resource_limiter.py +204 -0
  53. mdb_engine/database/scoped_wrapper.py +747 -237
  54. mdb_engine/dependencies.py +427 -0
  55. mdb_engine/di/__init__.py +34 -0
  56. mdb_engine/di/container.py +247 -0
  57. mdb_engine/di/providers.py +206 -0
  58. mdb_engine/di/scopes.py +139 -0
  59. mdb_engine/embeddings/README.md +54 -24
  60. mdb_engine/embeddings/__init__.py +31 -24
  61. mdb_engine/embeddings/dependencies.py +38 -155
  62. mdb_engine/embeddings/service.py +78 -75
  63. mdb_engine/exceptions.py +104 -12
  64. mdb_engine/indexes/README.md +30 -13
  65. mdb_engine/indexes/__init__.py +1 -0
  66. mdb_engine/indexes/helpers.py +11 -11
  67. mdb_engine/indexes/manager.py +59 -123
  68. mdb_engine/memory/README.md +95 -4
  69. mdb_engine/memory/__init__.py +1 -2
  70. mdb_engine/memory/service.py +363 -1168
  71. mdb_engine/observability/README.md +4 -2
  72. mdb_engine/observability/__init__.py +26 -9
  73. mdb_engine/observability/health.py +17 -17
  74. mdb_engine/observability/logging.py +10 -10
  75. mdb_engine/observability/metrics.py +40 -19
  76. mdb_engine/repositories/__init__.py +34 -0
  77. mdb_engine/repositories/base.py +325 -0
  78. mdb_engine/repositories/mongo.py +233 -0
  79. mdb_engine/repositories/unit_of_work.py +166 -0
  80. mdb_engine/routing/README.md +1 -1
  81. mdb_engine/routing/__init__.py +1 -3
  82. mdb_engine/routing/websockets.py +41 -75
  83. mdb_engine/utils/__init__.py +3 -1
  84. mdb_engine/utils/mongo.py +117 -0
  85. mdb_engine-0.4.12.dist-info/METADATA +492 -0
  86. mdb_engine-0.4.12.dist-info/RECORD +97 -0
  87. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/WHEEL +1 -1
  88. mdb_engine-0.1.6.dist-info/METADATA +0 -213
  89. mdb_engine-0.1.6.dist-info/RECORD +0 -75
  90. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/entry_points.txt +0 -0
  91. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/licenses/LICENSE +0 -0
  92. {mdb_engine-0.1.6.dist-info → mdb_engine-0.4.12.dist-info}/top_level.txt +0 -0
@@ -1,43 +1,29 @@
1
1
  """
2
2
  Mem0 Memory Service Implementation
3
-
4
- This module provides a wrapper around Mem0.ai for intelligent memory management.
5
- It integrates seamlessly with mdb-engine's MongoDB connection.
6
- mem0 handles embeddings and LLM via environment variables (.env).
3
+ Production-ready wrapper for Mem0.ai with strict metadata schema for MongoDB.
7
4
  """
8
5
 
9
6
  import logging
10
7
  import os
11
8
  import tempfile
12
- from typing import Any, Dict, List, Optional, Union
9
+ from typing import Any
13
10
 
14
11
  # Set MEM0_DIR environment variable early to avoid permission issues
15
- # mem0 tries to create .mem0 directory at import time, so we set this before any import
16
12
  if "MEM0_DIR" not in os.environ:
17
- # Use /tmp/.mem0 which should be writable in most environments
18
13
  mem0_dir = os.path.join(tempfile.gettempdir(), ".mem0")
19
14
  try:
20
15
  os.makedirs(mem0_dir, exist_ok=True)
21
16
  os.environ["MEM0_DIR"] = mem0_dir
22
17
  except OSError:
23
- # Fallback: try user's home directory
24
- try:
25
- home_dir = os.path.expanduser("~")
26
- mem0_dir = os.path.join(home_dir, ".mem0")
27
- os.makedirs(mem0_dir, exist_ok=True)
28
- os.environ["MEM0_DIR"] = mem0_dir
29
- except OSError:
30
- # Last resort: current directory (may fail but won't crash import)
31
- os.environ["MEM0_DIR"] = os.path.join(os.getcwd(), ".mem0")
18
+ # Fallback: current directory
19
+ os.environ["MEM0_DIR"] = os.path.join(os.getcwd(), ".mem0")
32
20
 
33
- # Try to import mem0 (optional dependency)
34
- # Import is lazy to avoid permission issues at module load time
21
+ # Lazy Import
35
22
  MEM0_AVAILABLE = None
36
23
  Memory = None
37
24
 
38
25
 
39
26
  def _check_mem0_available():
40
- """Lazy check if mem0 is available."""
41
27
  global MEM0_AVAILABLE, Memory
42
28
  if MEM0_AVAILABLE is None:
43
29
  try:
@@ -47,1239 +33,448 @@ def _check_mem0_available():
47
33
  except ImportError:
48
34
  MEM0_AVAILABLE = False
49
35
  Memory = None
50
- except OSError as e:
51
- logger.warning(
52
- f"Failed to set up mem0 directory: {e}. Memory features may be limited."
53
- )
54
- MEM0_AVAILABLE = False
55
- Memory = None
56
-
57
36
  return MEM0_AVAILABLE
58
37
 
59
38
 
60
39
  logger = logging.getLogger(__name__)
61
40
 
62
41
 
63
- def _detect_provider_from_env() -> str:
64
- """
65
- Detect provider from environment variables.
66
-
67
- Returns:
68
- "azure" if Azure OpenAI credentials are present, otherwise "openai"
69
- """
70
- if os.getenv("AZURE_OPENAI_API_KEY") and os.getenv("AZURE_OPENAI_ENDPOINT"):
71
- return "azure"
72
- elif os.getenv("OPENAI_API_KEY"):
73
- return "openai"
74
- else:
75
- # Default to openai if nothing is configured
76
- return "openai"
77
-
78
-
79
- def _detect_embedding_dimensions(model_name: str) -> Optional[int]:
80
- """
81
- Auto-detect embedding dimensions from model name.
82
-
83
- Args:
84
- model_name: Embedding model name (e.g., "text-embedding-3-small")
85
-
86
- Returns:
87
- Number of dimensions, or None if unknown (should use config/default)
88
-
89
- Examples:
90
- >>> _detect_embedding_dimensions("text-embedding-3-small")
91
- 1536
92
- """
93
- # Normalize model name (remove provider prefix)
94
- normalized = model_name.lower()
95
- if "/" in normalized:
96
- normalized = normalized.split("/", 1)[1]
97
-
98
- # OpenAI models
99
- if "text-embedding-3-small" in normalized:
100
- return 1536
101
- elif "text-embedding-3-large" in normalized:
102
- return 3072
103
- elif "text-embedding-ada-002" in normalized or "ada-002" in normalized:
104
- return 1536
105
- elif "text-embedding-ada" in normalized:
106
- return 1536
107
-
108
- # Cohere models (common ones)
109
- if "embed-english-v3" in normalized:
110
- return 1024
111
- elif "embed-multilingual-v3" in normalized:
112
- return 1024
113
-
114
- # Unknown model - return None to use config/default
115
- return None
116
-
117
-
118
42
  class Mem0MemoryServiceError(Exception):
119
- """
120
- Base exception for all Mem0 Memory Service failures.
121
- """
122
-
123
43
  pass
124
44
 
125
45
 
126
- def _build_vector_store_config(
127
- db_name: str, collection_name: str, mongo_uri: str, embedding_model_dims: int
128
- ) -> Dict[str, Any]:
129
- """Build vector store configuration for mem0."""
130
- return {
131
- "vector_store": {
132
- "provider": "mongodb",
133
- "config": {
134
- "db_name": db_name,
135
- "collection_name": collection_name,
136
- "mongo_uri": mongo_uri,
137
- "embedding_model_dims": embedding_model_dims,
138
- },
139
- }
140
- }
141
-
142
-
143
- def _build_embedder_config(
144
- provider: str, embedding_model: str, app_slug: str
145
- ) -> Dict[str, Any]:
146
- """Build embedder configuration for mem0."""
147
- clean_embedding_model = embedding_model.replace("azure/", "").replace("openai/", "")
148
- if provider == "azure":
149
- azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
150
- azure_api_key = os.getenv("AZURE_OPENAI_API_KEY")
151
- azure_api_version = os.getenv(
152
- "AZURE_OPENAI_API_VERSION",
153
- os.getenv("OPENAI_API_VERSION", "2024-02-15-preview"),
154
- )
155
-
156
- if not azure_endpoint or not azure_api_key:
157
- raise Mem0MemoryServiceError(
158
- "Azure OpenAI requires AZURE_OPENAI_ENDPOINT and "
159
- "AZURE_OPENAI_API_KEY environment variables"
160
- )
161
-
162
- config = {
163
- "provider": "azure_openai",
164
- "config": {
165
- "model": clean_embedding_model,
166
- "azure_kwargs": {
167
- "azure_deployment": clean_embedding_model,
168
- "api_version": azure_api_version,
169
- "azure_endpoint": azure_endpoint,
170
- "api_key": azure_api_key,
171
- },
172
- },
173
- }
174
- else:
175
- config = {
176
- "provider": "openai",
177
- "config": {"model": clean_embedding_model},
178
- }
179
-
180
- provider_name = "Azure OpenAI" if provider == "azure" else "OpenAI"
181
- logger.info(
182
- f"Configuring mem0 embedder ({provider_name}): "
183
- f"provider='{config['provider']}', "
184
- f"model='{clean_embedding_model}'",
185
- extra={
186
- "app_slug": app_slug,
187
- "embedding_model": embedding_model,
188
- "embedder_provider": config["provider"],
189
- "provider": provider,
190
- },
191
- )
192
- return config
193
-
194
-
195
- def _build_llm_config(
196
- provider: str, chat_model: str, temperature: float, app_slug: str
197
- ) -> Dict[str, Any]:
198
- """Build LLM configuration for mem0."""
199
- clean_chat_model = chat_model.replace("azure/", "").replace("openai/", "")
200
- if provider == "azure":
201
- deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME") or clean_chat_model
202
- clean_chat_model = deployment_name
203
-
204
- azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
205
- azure_api_key = os.getenv("AZURE_OPENAI_API_KEY")
206
- azure_api_version = os.getenv(
207
- "AZURE_OPENAI_API_VERSION",
208
- os.getenv("OPENAI_API_VERSION", "2024-02-15-preview"),
209
- )
210
-
211
- if not azure_endpoint or not azure_api_key:
212
- raise Mem0MemoryServiceError(
213
- "Azure OpenAI LLM requires AZURE_OPENAI_ENDPOINT and "
214
- "AZURE_OPENAI_API_KEY environment variables"
215
- )
216
-
217
- config = {
218
- "provider": "azure_openai",
219
- "config": {
220
- "model": clean_chat_model,
221
- "temperature": temperature,
222
- "azure_kwargs": {
223
- "azure_deployment": clean_chat_model,
224
- "api_version": azure_api_version,
225
- "azure_endpoint": azure_endpoint,
226
- "api_key": azure_api_key,
227
- },
228
- },
229
- }
230
- else:
231
- config = {
232
- "provider": "openai",
233
- "config": {"model": clean_chat_model, "temperature": temperature},
234
- }
235
-
236
- llm_provider_name = "Azure OpenAI" if provider == "azure" else "OpenAI"
237
- logger.info(
238
- f"Configuring mem0 LLM ({llm_provider_name}): "
239
- f"provider='{config['provider']}', "
240
- f"model='{clean_chat_model}'",
241
- extra={
242
- "app_slug": app_slug,
243
- "original_model": chat_model,
244
- "llm_provider": config["provider"],
245
- "llm_provider_type": provider,
246
- "temperature": temperature,
247
- },
248
- )
249
- return config
250
-
251
-
252
- def _initialize_memory_instance(mem0_config: Dict[str, Any], app_slug: str) -> tuple:
253
- """Initialize Mem0 Memory instance and return (instance, init_method)."""
254
- logger.debug(
255
- "Initializing Mem0 Memory with config structure",
256
- extra={
257
- "app_slug": app_slug,
258
- "config_keys": list(mem0_config.keys()),
259
- "vector_store_provider": mem0_config.get("vector_store", {}).get(
260
- "provider"
261
- ),
262
- "embedder_provider": mem0_config.get("embedder", {}).get("provider"),
263
- "llm_provider": (
264
- mem0_config.get("llm", {}).get("provider")
265
- if mem0_config.get("llm")
266
- else None
267
- ),
268
- "full_config": mem0_config,
269
- },
270
- )
271
-
272
- init_method = None
273
- try:
274
- if hasattr(Memory, "from_config"):
275
- memory_instance = Memory.from_config(mem0_config)
276
- init_method = "Memory.from_config()"
277
- else:
278
- try:
279
- from mem0.config import Config
280
-
281
- config_obj = Config(**mem0_config)
282
- memory_instance = Memory(config_obj)
283
- init_method = "Memory(Config())"
284
- except (ImportError, TypeError) as config_error:
285
- logger.warning(
286
- f"Could not create Config object, trying dict: {config_error}",
287
- extra={"app_slug": app_slug},
288
- )
289
- memory_instance = Memory(mem0_config)
290
- init_method = "Memory(dict)"
291
- except (
292
- ImportError,
293
- AttributeError,
294
- TypeError,
295
- ValueError,
296
- RuntimeError,
297
- KeyError,
298
- ) as init_error:
299
- error_msg = str(init_error)
300
- logger.error(
301
- f"Failed to initialize Memory instance: {error_msg}",
302
- exc_info=True,
303
- extra={
304
- "app_slug": app_slug,
305
- "error": error_msg,
306
- "error_type": type(init_error).__name__,
307
- "config_keys": (
308
- list(mem0_config.keys())
309
- if isinstance(mem0_config, dict)
310
- else "not_dict"
311
- ),
312
- },
313
- )
314
- raise Mem0MemoryServiceError(
315
- f"Failed to initialize Memory instance: {error_msg}. "
316
- f"Ensure mem0ai is installed and Azure OpenAI environment "
317
- f"variables are set correctly."
318
- ) from init_error
319
-
320
- return memory_instance, init_method
321
-
322
-
323
46
  class Mem0MemoryService:
324
- """
325
- Service for managing user memories using Mem0.ai.
326
-
327
- This service provides intelligent memory management that:
328
- - Stores and retrieves memories in MongoDB (using mdb-engine's connection)
329
- - Uses mem0's embedder for embeddings (configured via environment variables)
330
- - Optionally extracts memories from conversations (requires LLM if infer: true)
331
- - Retrieves relevant memories for context-aware responses
332
- - Optionally builds knowledge graphs for entity relationships
333
-
334
- Embeddings and LLM are configured via environment variables (.env) and mem0 handles
335
- provider routing automatically.
336
- """
337
-
338
47
  def __init__(
339
48
  self,
340
49
  mongo_uri: str,
341
50
  db_name: str,
342
51
  app_slug: str,
343
- config: Optional[Dict[str, Any]] = None,
52
+ config: dict[str, Any] | None = None,
344
53
  ):
345
- """
346
- Initialize Mem0 Memory Service.
347
-
348
- Args:
349
- mongo_uri: MongoDB connection URI
350
- db_name: Database name
351
- app_slug: App slug (used for collection naming)
352
- config: Optional memory configuration dict (from manifest.json
353
- memory_config)
354
- Can include: collection_name, enable_graph, infer,
355
- embedding_model, chat_model, temperature, etc.
356
- Note: embedding_model_dims is auto-detected by embedding a
357
- test string - no need to specify!
358
- Embeddings and LLM are configured via environment variables
359
- (.env).
360
-
361
- Raises:
362
- Mem0MemoryServiceError: If mem0 is not available or initialization fails
363
- """
364
- # Lazy check for mem0 availability
365
54
  if not _check_mem0_available():
366
- raise Mem0MemoryServiceError(
367
- "Mem0 dependencies not available. Install with: pip install mem0ai"
368
- )
55
+ raise Mem0MemoryServiceError("Mem0 not installed. pip install mem0ai")
369
56
 
370
57
  self.mongo_uri = mongo_uri
371
58
  self.db_name = db_name
372
59
  self.app_slug = app_slug
373
-
374
- # Extract config with defaults
375
- self.collection_name = (config or {}).get(
376
- "collection_name", f"{app_slug}_memories"
377
- )
378
- config_embedding_dims = (config or {}).get(
379
- "embedding_model_dims"
380
- ) # Optional - will be auto-detected
381
- self.enable_graph = (config or {}).get("enable_graph", False)
60
+ self.collection_name = (config or {}).get("collection_name", f"{app_slug}_memories")
382
61
  self.infer = (config or {}).get("infer", True)
383
- self.async_mode = (config or {}).get("async_mode", True)
384
62
 
385
- # Get model names from config or environment
386
- # Default embedding model from config or env, fallback to common default
63
+ # Ensure GOOGLE_API_KEY is set for mem0 compatibility
64
+ # (mem0 expects GOOGLE_API_KEY, not GEMINI_API_KEY)
65
+ # This ensures we use the DIRECT Gemini API
66
+ # (generativelanguage.googleapis.com), NOT Vertex AI
67
+ if os.getenv("GEMINI_API_KEY") and not os.getenv("GOOGLE_API_KEY"):
68
+ os.environ["GOOGLE_API_KEY"] = os.getenv("GEMINI_API_KEY")
69
+ logger.info(
70
+ "Set GOOGLE_API_KEY from GEMINI_API_KEY for mem0 compatibility (direct Gemini API)"
71
+ )
72
+
73
+ # Verify we're NOT using Vertex AI (which would use GOOGLE_APPLICATION_CREDENTIALS)
74
+ if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
75
+ logger.warning(
76
+ "GOOGLE_APPLICATION_CREDENTIALS is set - this would use Vertex AI, "
77
+ "not direct Gemini API"
78
+ )
79
+
80
+ # 1. Models & Config
387
81
  embedding_model = (config or {}).get("embedding_model") or os.getenv(
388
82
  "EMBEDDING_MODEL", "text-embedding-3-small"
389
83
  )
390
- chat_model = (
391
- (config or {}).get("chat_model")
392
- or os.getenv("CHAT_MODEL")
393
- or os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", "gpt-4o")
394
- )
395
- temperature = (config or {}).get(
396
- "temperature", float(os.getenv("LLM_TEMPERATURE", "0.0"))
397
- )
398
-
399
- # Detect provider from environment variables
400
- provider = _detect_provider_from_env()
84
+ chat_model = (config or {}).get("chat_model") or os.getenv("CHAT_MODEL", "gpt-4o")
401
85
 
402
- # Verify required environment variables are set
403
- if provider == "azure":
404
- if not os.getenv("AZURE_OPENAI_API_KEY") or not os.getenv(
405
- "AZURE_OPENAI_ENDPOINT"
406
- ):
407
- raise Mem0MemoryServiceError(
408
- "Azure OpenAI provider requires AZURE_OPENAI_API_KEY and "
409
- "AZURE_OPENAI_ENDPOINT environment variables to be set."
410
- )
411
- else:
412
- if not os.getenv("OPENAI_API_KEY"):
413
- raise Mem0MemoryServiceError(
414
- "OpenAI provider requires OPENAI_API_KEY environment variable to be set."
415
- )
86
+ # 2. Build Mem0 Configuration
87
+ embedding_dims = (config or {}).get(
88
+ "embedding_model_dims"
89
+ ) or 1536 # Default for text-embedding-3-small
90
+ mem0_config = {
91
+ "vector_store": {
92
+ "provider": "mongodb",
93
+ "config": {
94
+ "db_name": db_name,
95
+ "collection_name": self.collection_name,
96
+ "mongo_uri": mongo_uri,
97
+ "embedding_model_dims": embedding_dims,
98
+ },
99
+ },
100
+ "embedder": self._build_provider_config("embedder", embedding_model),
101
+ "llm": self._build_provider_config("llm", chat_model) if self.infer else None,
102
+ }
416
103
 
104
+ # Add custom prompts to make fact extraction less restrictive (for document processing)
105
+ # The default mem0 prompts are too restrictive and filter out general facts
106
+ if self.infer:
107
+ # Long prompt string - using concatenation to avoid line length issues
108
+ fact_extraction_prompt = (
109
+ "You are a helpful assistant that extracts key facts, insights, "
110
+ "and information from documents and conversations.\n\n"
111
+ "Your task is to extract factual information, insights, and important details "
112
+ "from the provided content. Extract facts that would be useful for future "
113
+ "reference, including:\n"
114
+ "- Key concepts, definitions, and explanations\n"
115
+ "- Important dates, names, and entities\n"
116
+ "- Processes, procedures, and methodologies\n"
117
+ "- Insights, conclusions, and recommendations\n"
118
+ "- Relationships between concepts\n"
119
+ "- Any other factual information that would be valuable to remember\n\n"
120
+ 'Return your response as a JSON object with a "facts" array. '
121
+ "Each fact should be a clear, standalone statement.\n\n"
122
+ "Example:\n"
123
+ 'Input: "The Innovation Hub was established on August 14, 2024 by '
124
+ "David Vainchenker and Todd O'Brien. It focuses on experimental AI projects." + "\n"
125
+ 'Output: {{"facts": ["The Innovation Hub was established on August 14, 2024", '
126
+ '"The Innovation Hub was founded by David Vainchenker and Todd O\'Brien", '
127
+ '"The Innovation Hub focuses on experimental AI projects"]}}' + "\n\n"
128
+ "Now extract facts from the following content:"
129
+ )
130
+ mem0_config["prompts"] = {"fact_extraction": fact_extraction_prompt}
131
+
132
+ # Filter None
133
+ mem0_config = {k: v for k, v in mem0_config.items() if v is not None}
134
+
135
+ # 3. Initialize
417
136
  try:
418
- # Detect embedding dimensions using model name (fallback method)
419
- detected_dims = _detect_embedding_dimensions(embedding_model)
420
- self.embedding_model_dims = (
421
- detected_dims
422
- if detected_dims is not None
423
- else (config_embedding_dims or 1536)
424
- )
137
+ if hasattr(Memory, "from_config"):
138
+ self.memory = Memory.from_config(mem0_config)
139
+ else:
140
+ self.memory = Memory(mem0_config)
141
+ logger.info(f"✅ Mem0 Service active: {self.collection_name}")
142
+ except (
143
+ ValueError,
144
+ TypeError,
145
+ ConnectionError,
146
+ OSError,
147
+ AttributeError,
148
+ RuntimeError,
149
+ ) as e:
150
+ raise Mem0MemoryServiceError(f"Failed to init Mem0: {e}") from e
425
151
 
426
- # Build mem0 config with MongoDB as vector store
427
- mem0_config = _build_vector_store_config(
428
- self.db_name,
429
- self.collection_name,
430
- self.mongo_uri,
431
- self.embedding_model_dims,
432
- )
152
+ def _build_provider_config(self, component, model_name):
153
+ """
154
+ Build provider configuration for embeddings or LLM.
433
155
 
434
- # Configure mem0 embedder
435
- mem0_config["embedder"] = _build_embedder_config(
436
- provider, embedding_model, app_slug
437
- )
156
+ For embeddings: Always use Azure OpenAI if available, otherwise OpenAI
157
+ For LLM: Detect provider from model name (gemini/google -> google_ai, else Azure/OpenAI)
158
+ """
159
+ clean_model = (
160
+ model_name.replace("azure/", "")
161
+ .replace("openai/", "")
162
+ .replace("google/", "")
163
+ .replace("gemini/", "")
164
+ )
438
165
 
439
- # Configure LLM for inference (if infer: true)
440
- if self.infer:
441
- mem0_config["llm"] = _build_llm_config(
442
- provider, chat_model, temperature, app_slug
166
+ # For embeddings, always prefer Azure if available
167
+ if component == "embedder":
168
+ provider = "azure_openai" if os.getenv("AZURE_OPENAI_API_KEY") else "openai"
169
+ cfg = {"provider": provider, "config": {"model": clean_model}}
170
+
171
+ if provider == "azure_openai":
172
+ # Support both AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME and AZURE_EMBEDDING_DEPLOYMENT
173
+ deployment_name = (
174
+ os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
175
+ or os.getenv("AZURE_EMBEDDING_DEPLOYMENT")
176
+ or clean_model
443
177
  )
444
- except (ValueError, TypeError, KeyError, AttributeError, ImportError) as e:
445
- logger.error(
446
- f"Failed to configure mem0: {e}",
447
- extra={"app_slug": app_slug, "error": str(e)},
448
- )
449
- raise Mem0MemoryServiceError(f"Failed to configure mem0: {e}") from e
450
-
451
- # Add graph store configuration if enabled
452
- if self.enable_graph:
453
- # Note: Graph store requires separate configuration (neo4j, memgraph, etc.)
454
- # For now, we just enable it - actual graph store config should come from manifest
455
- graph_config = (config or {}).get("graph_store")
456
- if graph_config:
457
- mem0_config["graph_store"] = graph_config
458
- else:
459
- logger.warning(
460
- "Graph memory enabled but no graph_store config provided. "
461
- "Graph features will not work. Configure graph_store in manifest.json",
462
- extra={"app_slug": app_slug},
463
- )
464
-
465
- try:
466
- # Initialize Mem0 Memory instance
467
- self.memory, init_method = _initialize_memory_instance(
468
- mem0_config, app_slug
469
- )
470
-
471
- # Verify the memory instance has required methods
472
- if not hasattr(self.memory, "get_all"):
473
- logger.warning(
474
- f"Memory instance missing 'get_all' method for app '{app_slug}'",
475
- extra={"app_slug": app_slug, "init_method": init_method},
178
+ # Use API version from env or default
179
+ api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01")
180
+ cfg["config"]["azure_kwargs"] = {
181
+ "api_version": api_version,
182
+ "azure_deployment": deployment_name,
183
+ "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"),
184
+ "api_key": os.getenv("AZURE_OPENAI_API_KEY"),
185
+ }
186
+ logger.info(
187
+ f"Using Azure OpenAI embedding provider with deployment: "
188
+ f"{deployment_name}, API version: {api_version}"
476
189
  )
477
- if not hasattr(self.memory, "add"):
478
- logger.warning(
479
- f"Memory instance missing 'add' method for app '{app_slug}'",
480
- extra={"app_slug": app_slug, "init_method": init_method},
190
+ return cfg
191
+
192
+ # For LLM, detect provider from model name or env vars
193
+ model_lower = model_name.lower()
194
+ # Mem0 uses "gemini" as provider name (not "google_ai" or "vertexai")
195
+ # GOOGLE_API_KEY should already be set in __init__ if GEMINI_API_KEY was provided
196
+ has_gemini_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
197
+ if "gemini" in model_lower or "google" in model_lower or has_gemini_key:
198
+ # Use Gemini provider for Mem0 (direct Gemini API, NOT Vertex AI)
199
+ provider = "gemini"
200
+ # Explicitly set API key in config to ensure direct Gemini API usage
201
+ api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
202
+ cfg = {
203
+ "provider": provider,
204
+ "config": {
205
+ "model": clean_model,
206
+ "api_key": api_key, # Explicitly set to ensure direct API usage
207
+ },
208
+ }
209
+ logger.info(f"Using Gemini LLM provider (direct API) with model: {clean_model}")
210
+ return cfg
211
+ else:
212
+ # Use Azure OpenAI if available, otherwise OpenAI
213
+ provider = "azure_openai" if os.getenv("AZURE_OPENAI_API_KEY") else "openai"
214
+ cfg = {"provider": provider, "config": {"model": clean_model}}
215
+
216
+ if provider == "azure_openai":
217
+ deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME", clean_model)
218
+ # Use API version from env or default (match .env default)
219
+ api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01")
220
+ cfg["config"]["azure_kwargs"] = {
221
+ "api_version": api_version,
222
+ "azure_deployment": deployment_name,
223
+ "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT"),
224
+ "api_key": os.getenv("AZURE_OPENAI_API_KEY"),
225
+ }
226
+ logger.info(
227
+ f"Using Azure OpenAI LLM provider with deployment: "
228
+ f"{deployment_name}, API version: {api_version}"
481
229
  )
230
+ else:
231
+ logger.info(f"Using OpenAI LLM provider with model: {clean_model}")
232
+ return cfg
482
233
 
483
- logger.info(
484
- f"Mem0 Memory Service initialized using {init_method} for app '{app_slug}'",
485
- extra={
486
- "app_slug": app_slug,
487
- "init_method": init_method,
488
- "collection_name": self.collection_name,
489
- "db_name": self.db_name,
490
- "enable_graph": self.enable_graph,
491
- "infer": self.infer,
492
- "has_get_all": hasattr(self.memory, "get_all"),
493
- "has_add": hasattr(self.memory, "add"),
494
- "embedder_provider": mem0_config.get("embedder", {}).get(
495
- "provider"
496
- ),
497
- "embedder_model": mem0_config.get("embedder", {})
498
- .get("config", {})
499
- .get("model"),
500
- "llm_provider": (
501
- mem0_config.get("llm", {}).get("provider")
502
- if self.infer
503
- else None
504
- ),
505
- "llm_model": (
506
- mem0_config.get("llm", {}).get("config", {}).get("model")
507
- if self.infer
508
- else None
509
- ),
510
- },
511
- )
512
- except (
513
- ImportError,
514
- AttributeError,
515
- TypeError,
516
- ValueError,
517
- RuntimeError,
518
- KeyError,
519
- ) as e:
520
- logger.error(
521
- f"Failed to initialize Mem0 Memory Service for app '{app_slug}': {e}",
522
- exc_info=True,
523
- extra={"app_slug": app_slug, "error": str(e)},
524
- )
525
- raise Mem0MemoryServiceError(
526
- f"Failed to initialize Mem0 Memory Service: {e}"
527
- ) from e
234
+ # --- Core Operations ---
528
235
 
529
236
  def add(
530
237
  self,
531
- messages: Union[str, List[Dict[str, str]]],
532
- user_id: Optional[str] = None,
533
- metadata: Optional[Dict[str, Any]] = None,
238
+ messages: str | list[dict[str, str]],
239
+ user_id: str | None = None,
240
+ metadata: dict[str, Any] | None = None,
241
+ bucket_id: str | None = None,
242
+ bucket_type: str | None = None,
243
+ raw_content: str | None = None,
534
244
  **kwargs,
535
- ) -> List[Dict[str, Any]]:
245
+ ) -> list[dict[str, Any]]:
536
246
  """
537
- Add memories from messages or text.
538
-
539
- This method intelligently extracts memories from conversations
540
- and stores them in MongoDB. Memories are processed asynchronously
541
- by default for better performance.
247
+ Add memories with user scoping and metadata convenience.
248
+ All operations are scoped per user_id for safety.
249
+ bucket_id and bucket_type are stored in metadata for filtering convenience.
250
+ """
251
+ if isinstance(messages, str):
252
+ messages = [{"role": "user", "content": messages}]
542
253
 
543
- Args:
544
- messages: Either a string or list of message dicts with 'role' and 'content'
545
- user_id: Optional user ID to associate memories with
546
- metadata: Optional metadata dict (e.g., {"category": "preferences"})
547
- **kwargs: Additional mem0.add() parameters:
548
- - infer: Whether to infer memories (default: True)
549
- Note: async_mode is not a valid parameter for Mem0's add()
550
- method.
551
- Mem0 processes memories asynchronously by default.
552
- Graph features are configured at initialization via
553
- enable_graph in config, not per-add call.
254
+ # Merge metadata
255
+ final_metadata = dict(metadata) if metadata else {}
554
256
 
555
- Returns:
556
- List of memory events (each with 'id', 'event', 'data')
257
+ # CRITICAL: Database indexing relies on these fields being in metadata
258
+ if bucket_id:
259
+ final_metadata["bucket_id"] = bucket_id
260
+ final_metadata["context_id"] = bucket_id # Backwards compatibility
557
261
 
558
- Example:
559
- ```python
560
- memories = memory_service.add(
561
- messages=[
562
- {"role": "user", "content": "I love sci-fi movies"},
563
- {"role": "assistant", "content": "Noted! I'll remember that."}
564
- ],
565
- user_id="alice",
566
- metadata={"category": "preferences"}
567
- )
568
- ```
569
- """
570
- try:
571
- # Normalize messages format
572
- if isinstance(messages, str):
573
- messages = [{"role": "user", "content": messages}]
262
+ if bucket_type:
263
+ final_metadata["bucket_type"] = bucket_type
574
264
 
575
- # Prepare kwargs with defaults from config
576
- # async_mode is not a valid parameter for Mem0's add() method
577
- add_kwargs = {"infer": kwargs.pop("infer", self.infer), **kwargs}
578
- add_kwargs.pop("async_mode", None)
265
+ # Store raw_content in metadata if provided (metadata convenience)
266
+ if raw_content:
267
+ final_metadata["raw_content"] = raw_content
579
268
 
580
- # enable_graph is configured at initialization, not per-add call
581
- # Mem0 processes asynchronously by default
582
- # Log message content preview for debugging
583
- message_preview = []
584
- for i, msg in enumerate(messages[:5]): # Show first 5 messages
585
- if isinstance(msg, dict):
586
- role = msg.get("role", "unknown")
587
- content = msg.get("content", "")
588
- preview = content[:150] + "..." if len(content) > 150 else content
589
- message_preview.append(f"{i+1}. {role}: {preview}")
269
+ # Infer defaults to configured value unless overridden
270
+ infer = kwargs.pop("infer", self.infer)
590
271
 
591
- logger.info(
592
- f"🔵 CALLING mem0.add() - app_slug='{self.app_slug}', "
593
- f"user_id='{user_id}', messages={len(messages)}, "
594
- f"infer={add_kwargs.get('infer', 'N/A')}",
595
- extra={
596
- "app_slug": self.app_slug,
597
- "user_id": user_id,
598
- "collection_name": self.collection_name,
599
- "message_count": len(messages),
600
- "message_preview": "\n".join(message_preview),
601
- "infer": add_kwargs.get("infer"),
602
- "metadata": metadata or {},
603
- "add_kwargs": add_kwargs,
604
- },
272
+ try:
273
+ logger.debug(
274
+ f"Calling mem0.add() with infer={infer}, user_id={user_id}, bucket_id={bucket_id}"
605
275
  )
606
-
607
276
  result = self.memory.add(
608
277
  messages=messages,
609
- user_id=str(user_id), # Ensure string - mem0 might be strict about this
610
- metadata=metadata or {},
611
- **add_kwargs,
278
+ user_id=str(user_id) if user_id else None,
279
+ metadata=final_metadata,
280
+ infer=infer,
281
+ **kwargs,
612
282
  )
613
-
614
- # Normalize result format - mem0.add() may return different formats
615
- if isinstance(result, dict):
616
- # Some versions return {"results": [...]} or {"data": [...]}
617
- if "results" in result:
618
- result = result["results"]
619
- elif "data" in result:
620
- result = result["data"] if isinstance(result["data"], list) else []
621
- elif "memory" in result:
622
- # Single memory object
623
- result = [result]
624
-
625
- # Ensure result is always a list
626
- if not isinstance(result, list):
627
- result = [result] if result else []
628
-
629
- result_length = len(result) if isinstance(result, list) else 0
283
+ # Log raw result before normalization
630
284
  logger.debug(
631
- f"Raw result from mem0.add(): type={type(result)}, "
632
- f"length={result_length}",
633
- extra={
634
- "app_slug": self.app_slug,
635
- "user_id": user_id,
636
- "result_type": str(type(result)),
637
- "is_list": isinstance(result, list),
638
- "result_length": len(result) if isinstance(result, list) else 0,
639
- "result_sample": (
640
- result[0]
641
- if result and isinstance(result, list) and len(result) > 0
642
- else None
643
- ),
644
- },
285
+ f"mem0.add() raw result: type={type(result)}, "
286
+ f"value={str(result)[:500] if result else 'None'}"
645
287
  )
646
-
288
+ normalized = self._normalize_result(result)
647
289
  logger.info(
648
- f"Added {len(result)} memories for user '{user_id}'",
649
- extra={
650
- "app_slug": self.app_slug,
651
- "user_id": user_id,
652
- "message_count": len(messages),
653
- "memory_count": len(result) if isinstance(result, list) else 0,
654
- "memory_ids": (
655
- [
656
- m.get("id") or m.get("_id")
657
- for m in result
658
- if isinstance(m, dict)
659
- ]
660
- if result
661
- else []
662
- ),
663
- "infer_enabled": add_kwargs.get("infer", False),
664
- "has_llm": (
665
- hasattr(self.memory, "llm") and self.memory.llm is not None
666
- if hasattr(self.memory, "llm")
667
- else False
668
- ),
669
- },
290
+ f"mem0.add() normalized to {len(normalized)} memories "
291
+ f"(raw result type: {type(result)})"
670
292
  )
671
-
672
- # If 0 memories and infer is enabled, log helpful info
673
- if len(result) == 0 and add_kwargs.get("infer", False):
674
- # Extract conversation content for analysis
675
- conversation_text = "\n".join(
676
- [
677
- f"{msg.get('role', 'unknown')}: {msg.get('content', '')[:100]}"
678
- for msg in messages[:5]
679
- ]
293
+ if not normalized and infer:
294
+ logger.warning(
295
+ f"⚠️ mem0.add() with infer=True returned empty result. Raw result: {result}"
680
296
  )
681
-
682
- logger.info(
683
- "ℹ️ mem0.add() returned 0 memories. This is normal if the "
684
- "conversation doesn't contain extractable facts. "
685
- "mem0 extracts personal preferences, facts, and details - "
686
- "not generic greetings or small talk. "
687
- "Try conversations like 'I love pizza' or 'I work as a "
688
- "software engineer' to see memories extracted.",
689
- extra={
690
- "app_slug": self.app_slug,
691
- "user_id": user_id,
692
- "message_count": len(messages),
693
- "infer": True,
694
- "has_llm": (
695
- hasattr(self.memory, "llm") and self.memory.llm is not None
696
- if hasattr(self.memory, "llm")
697
- else False
698
- ),
699
- "conversation_preview": conversation_text,
700
- },
297
+ # Try to understand why - check if it's a dict with empty results
298
+ if isinstance(result, dict):
299
+ logger.warning(f" Result dict keys: {list(result.keys())}")
300
+ if "results" in result:
301
+ logger.warning(f" result['results']: {result['results']}")
302
+ if "data" in result:
303
+ logger.warning(f" result['data']: {result['data']}")
304
+ return normalized
305
+ except (
306
+ ValueError,
307
+ TypeError,
308
+ ConnectionError,
309
+ OSError,
310
+ AttributeError,
311
+ RuntimeError,
312
+ KeyError,
313
+ ) as e:
314
+ error_msg = str(e)
315
+ # Handle rate limit errors gracefully - try storing without inference
316
+ if (
317
+ "429" in error_msg
318
+ or "RESOURCE_EXHAUSTED" in error_msg
319
+ or "rate limit" in error_msg.lower()
320
+ ):
321
+ logger.warning(
322
+ f"Rate limit hit during memory inference, storing without inference: "
323
+ f"{error_msg}"
701
324
  )
702
-
703
- return result
704
-
705
- except (AttributeError, TypeError, ValueError, RuntimeError, KeyError) as e:
706
- logger.error(
707
- f"Failed to add memories: {e}",
708
- exc_info=True,
709
- extra={"app_slug": self.app_slug, "user_id": user_id, "error": str(e)},
710
- )
711
- raise Mem0MemoryServiceError(f"Failed to add memories: {e}") from e
325
+ # Retry without inference to at least store the raw content
326
+ try:
327
+ result = self.memory.add(
328
+ messages=messages,
329
+ user_id=str(user_id) if user_id else None,
330
+ metadata=final_metadata,
331
+ infer=False, # Disable inference to avoid rate limits
332
+ **kwargs,
333
+ )
334
+ logger.info("Successfully stored memory without inference due to rate limit")
335
+ return self._normalize_result(result)
336
+ except (
337
+ ValueError,
338
+ TypeError,
339
+ ConnectionError,
340
+ OSError,
341
+ AttributeError,
342
+ RuntimeError,
343
+ KeyError,
344
+ ) as retry_error:
345
+ logger.exception("Failed to store memory even without inference")
346
+ raise Mem0MemoryServiceError(
347
+ f"Add failed (rate limited, retry also failed): {retry_error}"
348
+ ) from retry_error
349
+ else:
350
+ logger.exception("Mem0 Add Failed")
351
+ raise Mem0MemoryServiceError(f"Add failed: {e}") from e
712
352
 
713
353
  def get_all(
714
354
  self,
715
- user_id: Optional[str] = None,
716
- limit: Optional[int] = None,
717
- retry_on_empty: bool = True,
718
- max_retries: int = 2,
719
- retry_delay: float = 0.5,
355
+ user_id: str | None = None,
356
+ limit: int = 100,
357
+ filters: dict[str, Any] | None = None,
720
358
  **kwargs,
721
- ) -> List[Dict[str, Any]]:
359
+ ) -> list[dict[str, Any]]:
722
360
  """
723
- Get all memories for a user.
724
-
725
- Args:
726
- user_id: User ID to retrieve memories for
727
- limit: Optional limit on number of memories to return
728
- retry_on_empty: If True, retry if result is empty (handles async processing delay)
729
- max_retries: Maximum number of retries if result is empty
730
- retry_delay: Delay in seconds between retries
731
- **kwargs: Additional mem0.get_all() parameters
732
-
733
- Returns:
734
- List of memory dictionaries
361
+ Get all memories with direct database filtering.
735
362
  """
736
- import time
737
-
738
363
  try:
739
- # Verify memory instance is valid before calling
740
- if not hasattr(self, "memory") or self.memory is None:
741
- logger.error(
742
- f"Memory instance is None or missing for app '{self.app_slug}'",
743
- extra={"app_slug": self.app_slug, "user_id": user_id},
744
- )
745
- return []
746
-
747
- logger.info(
748
- f"🟢 CALLING mem0.get_all() - app_slug='{self.app_slug}', "
749
- f"user_id='{user_id}' (type: {type(user_id).__name__}), "
750
- f"collection='{self.collection_name}'",
751
- extra={
752
- "app_slug": self.app_slug,
753
- "user_id": user_id,
754
- "user_id_type": type(user_id).__name__,
755
- "user_id_repr": repr(user_id),
756
- "collection_name": self.collection_name,
757
- "limit": limit,
758
- "kwargs": kwargs,
759
- },
760
- )
761
-
762
- result = None
763
- attempt = 0
764
-
765
- while attempt <= max_retries:
766
- if attempt > 0:
767
- # Wait before retry to allow async processing to complete
768
- time.sleep(retry_delay * attempt) # Exponential backoff
769
- logger.debug(
770
- f"Retrying mem0.get_all (attempt {attempt + 1}/{max_retries + 1})",
771
- extra={
772
- "app_slug": self.app_slug,
773
- "user_id": user_id,
774
- "attempt": attempt + 1,
775
- },
776
- )
777
-
778
- # Call with safety - catch any exceptions from mem0
779
- try:
780
- logger.debug(
781
- f"🟢 EXECUTING: memory.get_all(user_id='{user_id}', "
782
- f"limit={limit}, kwargs={kwargs})",
783
- extra={
784
- "app_slug": self.app_slug,
785
- "user_id": user_id,
786
- "collection_name": self.collection_name,
787
- "attempt": attempt + 1,
788
- },
789
- )
790
- result = self.memory.get_all(
791
- user_id=str(user_id), limit=limit, **kwargs
792
- ) # Ensure string
793
- result_length = (
794
- len(result) if isinstance(result, (list, dict)) else "N/A"
795
- )
796
- logger.debug(
797
- f"🟢 RESULT RECEIVED: type={type(result).__name__}, "
798
- f"length={result_length}",
799
- extra={
800
- "app_slug": self.app_slug,
801
- "user_id": user_id,
802
- "result_type": type(result).__name__,
803
- "result_length": (
804
- len(result) if isinstance(result, (list, dict)) else 0
805
- ),
806
- "attempt": attempt + 1,
807
- },
808
- )
809
- except AttributeError as attr_error:
810
- logger.error(
811
- f"Memory.get_all method not available: {attr_error}",
812
- extra={
813
- "app_slug": self.app_slug,
814
- "user_id": user_id,
815
- "error": str(attr_error),
816
- "attempt": attempt + 1,
817
- },
818
- )
819
- return [] # Return empty list instead of retrying
820
- # Type 4: Let other exceptions bubble up to framework handler
821
-
822
- logger.debug(
823
- f"Raw result from mem0.get_all (attempt {attempt + 1}): type={type(result)}",
824
- extra={
825
- "app_slug": self.app_slug,
826
- "user_id": user_id,
827
- "attempt": attempt + 1,
828
- "result_type": str(type(result)),
829
- "is_dict": isinstance(result, dict),
830
- "is_list": isinstance(result, list),
831
- "result_length": (
832
- len(result) if isinstance(result, (list, dict)) else 0
833
- ),
834
- },
835
- )
836
-
837
- # Handle Mem0 v2 API response format: {"results": [...], "total": N}
838
- if isinstance(result, dict):
839
- if "results" in result:
840
- result = result["results"] # Extract results array
841
- logger.debug(
842
- "Extracted results from dict response",
843
- extra={
844
- "app_slug": self.app_slug,
845
- "user_id": user_id,
846
- "result_count": (
847
- len(result) if isinstance(result, list) else 0
848
- ),
849
- },
850
- )
851
- elif "data" in result:
852
- # Alternative format: {"data": [...]}
853
- result = (
854
- result["data"] if isinstance(result["data"], list) else []
855
- )
856
-
857
- # Ensure result is always a list for backward compatibility
858
- if not isinstance(result, list):
859
- result = [result] if result else []
860
-
861
- # If we got results or retries are disabled, break
862
- if not retry_on_empty or len(result) > 0 or attempt >= max_retries:
863
- break
864
-
865
- attempt += 1
866
-
867
- logger.info(
868
- f"Retrieved {len(result)} memories for user '{user_id}' "
869
- f"(after {attempt + 1} attempt(s))",
870
- extra={
871
- "app_slug": self.app_slug,
872
- "user_id": user_id,
873
- "memory_count": len(result) if isinstance(result, list) else 0,
874
- "attempts": attempt + 1,
875
- "sample_memory": (
876
- result[0]
877
- if result and isinstance(result, list) and len(result) > 0
878
- else None
879
- ),
880
- },
881
- )
364
+ call_kwargs = {"limit": limit}
365
+ if user_id:
366
+ call_kwargs["user_id"] = str(user_id)
367
+ if filters:
368
+ call_kwargs["filters"] = filters # Passed to MongoDB $match
882
369
 
883
- return result
370
+ call_kwargs.update(kwargs)
884
371
 
885
- except (AttributeError, TypeError, ValueError, RuntimeError, KeyError) as e:
886
- attempt_num = (
887
- attempt + 1 if "attempt" in locals() and attempt is not None else 1
888
- )
889
- logger.error(
890
- f"Failed to get memories: {e}",
891
- exc_info=True,
892
- extra={
893
- "app_slug": self.app_slug,
894
- "user_id": user_id,
895
- "error": str(e),
896
- "error_type": type(e).__name__,
897
- "attempt": attempt_num,
898
- },
899
- )
900
- raise Mem0MemoryServiceError(f"Failed to get memories: {e}") from e
372
+ return self._normalize_result(self.memory.get_all(**call_kwargs))
373
+ except (
374
+ ValueError,
375
+ TypeError,
376
+ ConnectionError,
377
+ OSError,
378
+ AttributeError,
379
+ RuntimeError,
380
+ KeyError,
381
+ ):
382
+ logger.exception("Mem0 get_all failed")
383
+ return []
901
384
 
902
385
  def search(
903
386
  self,
904
387
  query: str,
905
- user_id: Optional[str] = None,
906
- limit: Optional[int] = None,
907
- metadata: Optional[Dict[str, Any]] = None,
908
- filters: Optional[Dict[str, Any]] = None,
388
+ user_id: str | None = None,
389
+ limit: int = 5,
390
+ filters: dict[str, Any] | None = None,
909
391
  **kwargs,
910
- ) -> List[Dict[str, Any]]:
392
+ ) -> list[dict[str, Any]]:
911
393
  """
912
- Search for relevant memories using semantic search.
913
-
914
- Args:
915
- query: Search query string
916
- user_id: Optional user ID to scope search to
917
- limit: Optional limit on number of results
918
- metadata: Optional metadata dict to filter results
919
- (e.g., {"category": "travel"})
920
- Deprecated in favor of 'filters' parameter for Mem0 1.0.0+
921
- filters: Optional enhanced filters dict (Mem0 1.0.0+) with operators
922
- like {"category": {"eq": "travel"}}
923
- **kwargs: Additional mem0.search() parameters
924
-
925
- Returns:
926
- List of relevant memory dictionaries
927
-
928
- Example:
929
- ```python
930
- # Simple metadata filter (backward compatible)
931
- results = memory_service.search(
932
- query="What are my travel plans?",
933
- user_id="alice",
934
- metadata={"category": "travel"}
935
- )
936
-
937
- # Enhanced filters (Mem0 1.0.0+)
938
- results = memory_service.search(
939
- query="high priority tasks",
940
- user_id="alice",
941
- filters={
942
- "AND": [
943
- {"category": "work"},
944
- {"priority": {"gte": 5}}
945
- ]
946
- }
947
- )
948
- ```
949
- """
950
- try:
951
- # Build search kwargs
952
- search_kwargs = {"limit": limit, **kwargs}
953
-
954
- # Prefer 'filters' parameter (Mem0 1.0.0+) over 'metadata' (legacy)
955
- if filters is not None:
956
- search_kwargs["filters"] = filters
957
- elif metadata:
958
- # Backward compatibility: convert simple metadata to filters format
959
- # Try 'filters' first, fallback to 'metadata' if it fails
960
- search_kwargs["filters"] = metadata
961
-
962
- # Call search - try with filters first, fallback to metadata if needed
963
- try:
964
- result = self.memory.search(
965
- query=query, user_id=user_id, **search_kwargs
966
- )
967
- except (TypeError, ValueError) as e:
968
- # If filters parameter doesn't work, try with metadata (backward compatibility)
969
- if "filters" in search_kwargs and metadata:
970
- logger.debug(
971
- f"Filters parameter failed, trying metadata parameter: {e}",
972
- extra={"app_slug": self.app_slug, "user_id": user_id},
973
- )
974
- search_kwargs.pop("filters", None)
975
- search_kwargs["metadata"] = metadata
976
- result = self.memory.search(
977
- query=query, user_id=user_id, **search_kwargs
978
- )
979
- else:
980
- raise
981
-
982
- # Handle response format - search may return dict with "results" key
983
- if isinstance(result, dict):
984
- if "results" in result:
985
- result = result["results"]
986
- elif "data" in result:
987
- result = result["data"] if isinstance(result["data"], list) else []
988
-
989
- # Ensure result is always a list
990
- if not isinstance(result, list):
991
- result = [result] if result else []
992
-
993
- logger.debug(
994
- f"Searched memories for user '{user_id}'",
995
- extra={
996
- "app_slug": self.app_slug,
997
- "user_id": user_id,
998
- "query": query,
999
- "metadata_filter": metadata,
1000
- "filters": filters,
1001
- "result_count": len(result) if isinstance(result, list) else 0,
1002
- },
1003
- )
1004
-
1005
- return result
1006
-
1007
- except (AttributeError, TypeError, ValueError, RuntimeError, KeyError) as e:
1008
- logger.error(
1009
- f"Failed to search memories: {e}",
1010
- exc_info=True,
1011
- extra={
1012
- "app_slug": self.app_slug,
1013
- "user_id": user_id,
1014
- "query": query,
1015
- "metadata": metadata,
1016
- "filters": filters,
1017
- "error": str(e),
1018
- },
1019
- )
1020
- raise Mem0MemoryServiceError(f"Failed to search memories: {e}") from e
1021
-
1022
- def get(
1023
- self, memory_id: str, user_id: Optional[str] = None, **kwargs
1024
- ) -> Dict[str, Any]:
1025
- """
1026
- Get a single memory by ID.
1027
-
1028
- Args:
1029
- memory_id: Memory ID to retrieve
1030
- user_id: Optional user ID for scoping
1031
- **kwargs: Additional mem0.get() parameters
1032
-
1033
- Returns:
1034
- Memory dictionary
1035
-
1036
- Example:
1037
- ```python
1038
- memory = memory_service.get(memory_id="mem_123", user_id="alice")
1039
- ```
1040
- """
1041
- try:
1042
- # Mem0's get() method doesn't accept user_id as a parameter
1043
- # User scoping should be handled via metadata or filters if needed
1044
- # For now, we just get by memory_id
1045
- result = self.memory.get(memory_id=memory_id, **kwargs)
1046
-
1047
- # If user_id is provided, verify the memory belongs to that user
1048
- # by checking metadata or user_id field in the result
1049
- if user_id and isinstance(result, dict):
1050
- result_user_id = result.get("user_id") or result.get(
1051
- "metadata", {}
1052
- ).get("user_id")
1053
- if result_user_id and result_user_id != user_id:
1054
- logger.warning(
1055
- f"Memory {memory_id} does not belong to user {user_id}",
1056
- extra={
1057
- "memory_id": memory_id,
1058
- "user_id": user_id,
1059
- "result_user_id": result_user_id,
1060
- },
1061
- )
1062
- raise Mem0MemoryServiceError(
1063
- f"Memory {memory_id} does not belong to user {user_id}"
1064
- )
1065
-
1066
- logger.debug(
1067
- f"Retrieved memory '{memory_id}' for user '{user_id}'",
1068
- extra={
1069
- "app_slug": self.app_slug,
1070
- "user_id": user_id,
1071
- "memory_id": memory_id,
1072
- },
1073
- )
1074
-
1075
- return result
1076
-
1077
- except (AttributeError, TypeError, ValueError, RuntimeError, KeyError) as e:
1078
- logger.error(
1079
- f"Failed to get memory: {e}",
1080
- exc_info=True,
1081
- extra={
1082
- "app_slug": self.app_slug,
1083
- "user_id": user_id,
1084
- "memory_id": memory_id,
1085
- "error": str(e),
1086
- },
1087
- )
1088
- raise Mem0MemoryServiceError(f"Failed to get memory: {e}") from e
1089
-
1090
- def update(
1091
- self,
1092
- memory_id: str,
1093
- data: Union[str, List[Dict[str, str]]],
1094
- user_id: Optional[str] = None,
1095
- metadata: Optional[Dict[str, Any]] = None,
1096
- **kwargs,
1097
- ) -> Dict[str, Any]:
394
+ Semantic search with metadata filters, scoped per user.
1098
395
  """
1099
- Update a memory by ID with new data.
1100
-
1101
- Args:
1102
- memory_id: Memory ID to update
1103
- data: New data (string or list of message dicts with 'role' and 'content')
1104
- user_id: Optional user ID for scoping
1105
- metadata: Optional metadata dict to update
1106
- **kwargs: Additional mem0.update() parameters
396
+ final_filters = filters or {}
1107
397
 
1108
- Returns:
1109
- Updated memory dictionary
1110
-
1111
- Example:
1112
- ```python
1113
- updated = memory_service.update(
1114
- memory_id="mem_123",
1115
- data="I am a software engineer using Python and FastAPI.",
1116
- user_id="bob"
1117
- )
1118
- ```
1119
- """
1120
398
  try:
1121
- # Normalize data format
1122
- if isinstance(data, str):
1123
- data = [{"role": "user", "content": data}]
1124
-
1125
- # Mem0's update() may not accept user_id directly
1126
- # Pass it in metadata if user_id is provided
1127
- update_metadata = metadata or {}
1128
- if user_id:
1129
- update_metadata["user_id"] = user_id
399
+ call_kwargs = {"limit": limit}
400
+ if final_filters:
401
+ call_kwargs["filters"] = final_filters
1130
402
 
1131
- # Try with user_id first, fall back without it if it fails
1132
- try:
1133
- result = self.memory.update(
1134
- memory_id=memory_id,
1135
- data=data,
1136
- user_id=user_id,
1137
- metadata=update_metadata,
1138
- **kwargs,
403
+ return self._normalize_result(
404
+ self.memory.search(
405
+ query=query, user_id=str(user_id) if user_id else None, **call_kwargs, **kwargs
1139
406
  )
1140
- except TypeError as e:
1141
- if "unexpected keyword argument 'user_id'" in str(e):
1142
- # Mem0 doesn't accept user_id, try without it
1143
- result = self.memory.update(
1144
- memory_id=memory_id,
1145
- data=data,
1146
- metadata=update_metadata,
1147
- **kwargs,
1148
- )
1149
- else:
1150
- raise
1151
-
1152
- logger.info(
1153
- f"Updated memory '{memory_id}' for user '{user_id}'",
1154
- extra={
1155
- "app_slug": self.app_slug,
1156
- "user_id": user_id,
1157
- "memory_id": memory_id,
1158
- },
1159
- )
1160
-
1161
- return result
1162
-
1163
- except (AttributeError, TypeError, ValueError, RuntimeError, KeyError) as e:
1164
- logger.error(
1165
- f"Failed to update memory: {e}",
1166
- exc_info=True,
1167
- extra={
1168
- "app_slug": self.app_slug,
1169
- "user_id": user_id,
1170
- "memory_id": memory_id,
1171
- "error": str(e),
1172
- },
1173
407
  )
1174
- raise Mem0MemoryServiceError(f"Failed to update memory: {e}") from e
1175
-
1176
- def delete(self, memory_id: str, user_id: Optional[str] = None, **kwargs) -> bool:
1177
- """
1178
- Delete a memory by ID.
1179
-
1180
- Args:
1181
- memory_id: Memory ID to delete
1182
- user_id: Optional user ID for scoping
1183
- **kwargs: Additional mem0.delete() parameters
408
+ except (
409
+ ValueError,
410
+ TypeError,
411
+ ConnectionError,
412
+ OSError,
413
+ AttributeError,
414
+ RuntimeError,
415
+ KeyError,
416
+ ):
417
+ logger.exception("Mem0 search failed")
418
+ return []
1184
419
 
1185
- Returns:
1186
- True if deletion was successful
1187
- """
420
+ def get(self, memory_id: str, user_id: str | None = None, **kwargs) -> dict[str, Any]:
1188
421
  try:
1189
- # Mem0's delete() may not accept user_id directly
1190
- # Try with user_id first, fall back without it if it fails
1191
- try:
1192
- result = self.memory.delete(
1193
- memory_id=memory_id, user_id=user_id, **kwargs
1194
- )
1195
- except TypeError as e:
1196
- if "unexpected keyword argument 'user_id'" in str(e):
1197
- # Mem0 doesn't accept user_id, try without it
1198
- # User scoping should be handled via metadata or filters
1199
- result = self.memory.delete(memory_id=memory_id, **kwargs)
1200
- else:
1201
- raise
1202
-
1203
- logger.info(
1204
- f"Deleted memory '{memory_id}' for user '{user_id}'",
1205
- extra={
1206
- "app_slug": self.app_slug,
1207
- "user_id": user_id,
1208
- "memory_id": memory_id,
1209
- },
1210
- )
1211
-
1212
- return result
1213
-
1214
- except (AttributeError, TypeError, ValueError, RuntimeError, KeyError) as e:
1215
- logger.error(
1216
- f"Failed to delete memory: {e}",
1217
- exc_info=True,
1218
- extra={
1219
- "app_slug": self.app_slug,
1220
- "user_id": user_id,
1221
- "memory_id": memory_id,
1222
- "error": str(e),
1223
- },
1224
- )
1225
- raise Mem0MemoryServiceError(f"Failed to delete memory: {e}") from e
1226
-
1227
- def delete_all(self, user_id: Optional[str] = None, **kwargs) -> bool:
1228
- """
1229
- Delete all memories for a user.
1230
-
1231
- Args:
1232
- user_id: User ID to delete all memories for
1233
- **kwargs: Additional mem0.delete_all() parameters
1234
-
1235
- Returns:
1236
- True if deletion was successful
422
+ return self.memory.get(memory_id, **kwargs)
423
+ except (
424
+ ValueError,
425
+ TypeError,
426
+ ConnectionError,
427
+ OSError,
428
+ AttributeError,
429
+ RuntimeError,
430
+ KeyError,
431
+ ):
432
+ return None
1237
433
 
1238
- Example:
1239
- ```python
1240
- success = memory_service.delete_all(user_id="alice")
1241
- ```
1242
- """
434
+ def delete(self, memory_id: str, user_id: str | None = None, **kwargs) -> bool:
1243
435
  try:
1244
- result = self.memory.delete_all(user_id=user_id, **kwargs)
1245
-
1246
- logger.info(
1247
- f"Deleted all memories for user '{user_id}'",
1248
- extra={"app_slug": self.app_slug, "user_id": user_id},
1249
- )
436
+ self.memory.delete(memory_id, **kwargs)
437
+ return True
438
+ except (
439
+ AttributeError,
440
+ ValueError,
441
+ RuntimeError,
442
+ KeyError,
443
+ TypeError,
444
+ ConnectionError,
445
+ OSError,
446
+ ):
447
+ return False
1250
448
 
449
+ def delete_all(self, user_id: str | None = None, **kwargs) -> bool:
450
+ try:
451
+ self.memory.delete_all(user_id=user_id, **kwargs)
452
+ return True
453
+ except (
454
+ AttributeError,
455
+ ValueError,
456
+ RuntimeError,
457
+ KeyError,
458
+ TypeError,
459
+ ConnectionError,
460
+ OSError,
461
+ ):
462
+ return False
463
+
464
+ def _normalize_result(self, result: Any) -> list[dict[str, Any]]:
465
+ """Normalize Mem0's return type (dict vs list)."""
466
+ if result is None:
467
+ return []
468
+ if isinstance(result, dict):
469
+ if "results" in result:
470
+ return result["results"]
471
+ if "data" in result:
472
+ return result["data"]
473
+ return [result]
474
+ if isinstance(result, list):
1251
475
  return result
476
+ return []
1252
477
 
1253
- except (AttributeError, TypeError, ValueError, RuntimeError, KeyError) as e:
1254
- logger.error(
1255
- f"Failed to delete all memories: {e}",
1256
- exc_info=True,
1257
- extra={"app_slug": self.app_slug, "user_id": user_id, "error": str(e)},
1258
- )
1259
- raise Mem0MemoryServiceError(f"Failed to delete all memories: {e}") from e
1260
-
1261
-
1262
- def get_memory_service(
1263
- mongo_uri: str, db_name: str, app_slug: str, config: Optional[Dict[str, Any]] = None
1264
- ) -> Mem0MemoryService:
1265
- """
1266
- Get or create a Mem0MemoryService instance (cached).
1267
-
1268
- Args:
1269
- mongo_uri: MongoDB connection URI
1270
- db_name: Database name
1271
- app_slug: App slug
1272
- config: Optional memory configuration dict
1273
-
1274
- Returns:
1275
- Mem0MemoryService instance
1276
- """
1277
- # Lazy check for mem0 availability
1278
- if not _check_mem0_available():
1279
- raise Mem0MemoryServiceError(
1280
- "Mem0 dependencies not available. Install with: pip install mem0ai"
1281
- )
1282
478
 
1283
- return Mem0MemoryService(
1284
- mongo_uri=mongo_uri, db_name=db_name, app_slug=app_slug, config=config
1285
- )
479
+ def get_memory_service(mongo_uri, db_name, app_slug, config=None):
480
+ return Mem0MemoryService(mongo_uri, db_name, app_slug, config)