powermem 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. powermem/__init__.py +103 -0
  2. powermem/agent/__init__.py +35 -0
  3. powermem/agent/abstract/__init__.py +22 -0
  4. powermem/agent/abstract/collaboration.py +259 -0
  5. powermem/agent/abstract/context.py +187 -0
  6. powermem/agent/abstract/manager.py +232 -0
  7. powermem/agent/abstract/permission.py +217 -0
  8. powermem/agent/abstract/privacy.py +267 -0
  9. powermem/agent/abstract/scope.py +199 -0
  10. powermem/agent/agent.py +791 -0
  11. powermem/agent/components/__init__.py +18 -0
  12. powermem/agent/components/collaboration_coordinator.py +645 -0
  13. powermem/agent/components/permission_controller.py +586 -0
  14. powermem/agent/components/privacy_protector.py +767 -0
  15. powermem/agent/components/scope_controller.py +685 -0
  16. powermem/agent/factories/__init__.py +16 -0
  17. powermem/agent/factories/agent_factory.py +266 -0
  18. powermem/agent/factories/config_factory.py +308 -0
  19. powermem/agent/factories/memory_factory.py +229 -0
  20. powermem/agent/implementations/__init__.py +16 -0
  21. powermem/agent/implementations/hybrid.py +728 -0
  22. powermem/agent/implementations/multi_agent.py +1040 -0
  23. powermem/agent/implementations/multi_user.py +1020 -0
  24. powermem/agent/types.py +53 -0
  25. powermem/agent/wrappers/__init__.py +14 -0
  26. powermem/agent/wrappers/agent_memory_wrapper.py +427 -0
  27. powermem/agent/wrappers/compatibility_wrapper.py +520 -0
  28. powermem/config_loader.py +318 -0
  29. powermem/configs.py +249 -0
  30. powermem/core/__init__.py +19 -0
  31. powermem/core/async_memory.py +1493 -0
  32. powermem/core/audit.py +258 -0
  33. powermem/core/base.py +165 -0
  34. powermem/core/memory.py +1567 -0
  35. powermem/core/setup.py +162 -0
  36. powermem/core/telemetry.py +215 -0
  37. powermem/integrations/__init__.py +17 -0
  38. powermem/integrations/embeddings/__init__.py +13 -0
  39. powermem/integrations/embeddings/aws_bedrock.py +100 -0
  40. powermem/integrations/embeddings/azure_openai.py +55 -0
  41. powermem/integrations/embeddings/base.py +31 -0
  42. powermem/integrations/embeddings/config/base.py +132 -0
  43. powermem/integrations/embeddings/configs.py +31 -0
  44. powermem/integrations/embeddings/factory.py +48 -0
  45. powermem/integrations/embeddings/gemini.py +39 -0
  46. powermem/integrations/embeddings/huggingface.py +41 -0
  47. powermem/integrations/embeddings/langchain.py +35 -0
  48. powermem/integrations/embeddings/lmstudio.py +29 -0
  49. powermem/integrations/embeddings/mock.py +11 -0
  50. powermem/integrations/embeddings/ollama.py +53 -0
  51. powermem/integrations/embeddings/openai.py +49 -0
  52. powermem/integrations/embeddings/qwen.py +102 -0
  53. powermem/integrations/embeddings/together.py +31 -0
  54. powermem/integrations/embeddings/vertexai.py +54 -0
  55. powermem/integrations/llm/__init__.py +18 -0
  56. powermem/integrations/llm/anthropic.py +87 -0
  57. powermem/integrations/llm/base.py +132 -0
  58. powermem/integrations/llm/config/anthropic.py +56 -0
  59. powermem/integrations/llm/config/azure.py +56 -0
  60. powermem/integrations/llm/config/base.py +62 -0
  61. powermem/integrations/llm/config/deepseek.py +56 -0
  62. powermem/integrations/llm/config/ollama.py +56 -0
  63. powermem/integrations/llm/config/openai.py +79 -0
  64. powermem/integrations/llm/config/qwen.py +68 -0
  65. powermem/integrations/llm/config/qwen_asr.py +46 -0
  66. powermem/integrations/llm/config/vllm.py +56 -0
  67. powermem/integrations/llm/configs.py +26 -0
  68. powermem/integrations/llm/deepseek.py +106 -0
  69. powermem/integrations/llm/factory.py +118 -0
  70. powermem/integrations/llm/gemini.py +201 -0
  71. powermem/integrations/llm/langchain.py +65 -0
  72. powermem/integrations/llm/ollama.py +106 -0
  73. powermem/integrations/llm/openai.py +166 -0
  74. powermem/integrations/llm/openai_structured.py +80 -0
  75. powermem/integrations/llm/qwen.py +207 -0
  76. powermem/integrations/llm/qwen_asr.py +171 -0
  77. powermem/integrations/llm/vllm.py +106 -0
  78. powermem/integrations/rerank/__init__.py +20 -0
  79. powermem/integrations/rerank/base.py +43 -0
  80. powermem/integrations/rerank/config/__init__.py +7 -0
  81. powermem/integrations/rerank/config/base.py +27 -0
  82. powermem/integrations/rerank/configs.py +23 -0
  83. powermem/integrations/rerank/factory.py +68 -0
  84. powermem/integrations/rerank/qwen.py +159 -0
  85. powermem/intelligence/__init__.py +17 -0
  86. powermem/intelligence/ebbinghaus_algorithm.py +354 -0
  87. powermem/intelligence/importance_evaluator.py +361 -0
  88. powermem/intelligence/intelligent_memory_manager.py +284 -0
  89. powermem/intelligence/manager.py +148 -0
  90. powermem/intelligence/plugin.py +229 -0
  91. powermem/prompts/__init__.py +29 -0
  92. powermem/prompts/graph/graph_prompts.py +217 -0
  93. powermem/prompts/graph/graph_tools_prompts.py +469 -0
  94. powermem/prompts/importance_evaluation.py +246 -0
  95. powermem/prompts/intelligent_memory_prompts.py +163 -0
  96. powermem/prompts/templates.py +193 -0
  97. powermem/storage/__init__.py +14 -0
  98. powermem/storage/adapter.py +896 -0
  99. powermem/storage/base.py +109 -0
  100. powermem/storage/config/base.py +13 -0
  101. powermem/storage/config/oceanbase.py +58 -0
  102. powermem/storage/config/pgvector.py +52 -0
  103. powermem/storage/config/sqlite.py +27 -0
  104. powermem/storage/configs.py +159 -0
  105. powermem/storage/factory.py +59 -0
  106. powermem/storage/migration_manager.py +438 -0
  107. powermem/storage/oceanbase/__init__.py +8 -0
  108. powermem/storage/oceanbase/constants.py +162 -0
  109. powermem/storage/oceanbase/oceanbase.py +1384 -0
  110. powermem/storage/oceanbase/oceanbase_graph.py +1441 -0
  111. powermem/storage/pgvector/__init__.py +7 -0
  112. powermem/storage/pgvector/pgvector.py +420 -0
  113. powermem/storage/sqlite/__init__.py +0 -0
  114. powermem/storage/sqlite/sqlite.py +218 -0
  115. powermem/storage/sqlite/sqlite_vector_store.py +311 -0
  116. powermem/utils/__init__.py +35 -0
  117. powermem/utils/utils.py +605 -0
  118. powermem/version.py +23 -0
  119. powermem-0.1.0.dist-info/METADATA +187 -0
  120. powermem-0.1.0.dist-info/RECORD +123 -0
  121. powermem-0.1.0.dist-info/WHEEL +5 -0
  122. powermem-0.1.0.dist-info/licenses/LICENSE +206 -0
  123. powermem-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1567 @@
1
+ """
2
+ Synchronous memory management implementation
3
+
4
+ This module provides the synchronous memory management interface.
5
+ """
6
+
7
+ import logging
8
+ import hashlib
9
+ import json
10
+ from typing import Any, Dict, List, Optional, Union
11
+ from datetime import datetime
12
+ from copy import deepcopy
13
+
14
+ from .base import MemoryBase
15
+ from ..configs import MemoryConfig
16
+ from ..storage.factory import VectorStoreFactory, GraphStoreFactory
17
+ from ..storage.adapter import StorageAdapter, SubStorageAdapter
18
+ from ..intelligence.manager import IntelligenceManager
19
+ from ..integrations.llm.factory import LLMFactory
20
+ from ..integrations.embeddings.factory import EmbedderFactory
21
+ from ..integrations.rerank.factory import RerankFactory
22
+ from .telemetry import TelemetryManager
23
+ from .audit import AuditLogger
24
+ from ..intelligence.plugin import IntelligentMemoryPlugin, EbbinghausIntelligencePlugin
25
+ from ..utils.utils import remove_code_blocks, convert_config_object_to_dict, parse_vision_messages
26
+ from ..prompts.intelligent_memory_prompts import (
27
+ FACT_RETRIEVAL_PROMPT,
28
+ FACT_EXTRACTION_PROMPT,
29
+ get_memory_update_prompt,
30
+ parse_messages_for_facts
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ def _auto_convert_config(config: Dict[str, Any]) -> Dict[str, Any]:
37
+ """
38
+ Convert legacy powermem config to format for compatibility.
39
+
40
+ Now powermem uses field names directly.
41
+
42
+ Args:
43
+ config: Configuration dictionary (legacy format)
44
+
45
+ Returns:
46
+ configuration dictionary
47
+ """
48
+ if not config:
49
+ return config
50
+
51
+ # First, convert any ConfigObject instances to dicts
52
+ config = convert_config_object_to_dict(config)
53
+
54
+ # Check if legacy powermem format (has database or embedding)
55
+ if "database" in config or ("llm" in config and "embedding" in config):
56
+ converted = config.copy()
57
+
58
+ # Convert llm
59
+ if "llm" in config:
60
+ converted["llm"] = config["llm"]
61
+
62
+ # Convert embedding to embedder
63
+ if "embedding" in config:
64
+ converted["embedder"] = config["embedding"]
65
+ converted.pop("embedding", None)
66
+
67
+ # Convert database to vector_store
68
+ if "database" in config:
69
+ db_config = config["database"]
70
+ converted["vector_store"] = {
71
+ "provider": db_config.get("provider", "oceanbase"),
72
+ "config": db_config.get("config", {})
73
+ }
74
+ converted.pop("database", None)
75
+ elif "vector_store" not in converted:
76
+ converted["vector_store"] = {
77
+ "provider": "oceanbase",
78
+ "config": {}
79
+ }
80
+
81
+ logger.info("Converted legacy powermem config format")
82
+ return converted
83
+
84
+ return config
85
+
86
+
87
+ class Memory(MemoryBase):
88
+ """
89
+ Synchronous memory management implementation.
90
+
91
+ This class provides the main interface for synchronous memory operations.
92
+ """
93
+
94
+ def __init__(
95
+ self,
96
+ config: Optional[Dict[str, Any] | MemoryConfig] = None,
97
+ storage_type: Optional[str] = None,
98
+ llm_provider: Optional[str] = None,
99
+ embedding_provider: Optional[str] = None,
100
+ agent_id: Optional[str] = None,
101
+ ):
102
+ """
103
+ Initialize the memory manager.
104
+
105
+ Compatible with both dict config and MemoryConfig object.
106
+
107
+ Args:
108
+ config: Configuration dictionary or MemoryConfig object containing all settings.
109
+ Dict format supports style (llm, embedder, vector_store)
110
+ and powermem style (database, llm, embedding)
111
+ storage_type: Type of storage backend to use (overrides config)
112
+ llm_provider: LLM provider to use (overrides config)
113
+ embedding_provider: Embedding provider to use (overrides config)
114
+ agent_id: Agent identifier for multi-agent scenarios
115
+
116
+ Example:
117
+ ```python
118
+ # Method 1: Using MemoryConfig object (recommended)
119
+
120
+ config = MemoryConfig(
121
+ vector_store=VectorStoreConfig(provider="oceanbase", config={...}),
122
+ llm=LlmConfig(provider="qwen", config={...}),
123
+ embedder=EmbedderConfig(provider="qwen", config={...})
124
+ )
125
+ memory = Memory(config)
126
+
127
+ # Method 2: Using dict (backward compatible - powermem style)
128
+ memory = Memory({
129
+ "database": {"provider": "oceanbase", "config": {...}},
130
+ "llm": {"provider": "qwen", "config": {...}},
131
+ })
132
+
133
+ # Method 3: Using dict
134
+ memory = Memory({
135
+ "llm": {"provider": "openai", "config": {...}},
136
+ "embedder": {"provider": "openai", "config": {...}},
137
+ "vector_store": {"provider": "chroma", "config": {...}},
138
+ })
139
+ ```
140
+ """
141
+ # Handle MemoryConfig object or dict
142
+
143
+ if isinstance(config, MemoryConfig):
144
+ # Use MemoryConfig object directly
145
+ self.memory_config = config
146
+ # For backward compatibility, also store as dict
147
+ self.config = config.model_dump()
148
+ else:
149
+ # Convert dict config
150
+ dict_config = config or {}
151
+ dict_config = _auto_convert_config(dict_config)
152
+ self.config = dict_config
153
+ # Try to create MemoryConfig from dict, fallback to dict if fails
154
+ try:
155
+ self.memory_config = MemoryConfig(**dict_config)
156
+ except Exception as e:
157
+ logger.warning(f"Could not parse config as MemoryConfig: {e}, using dict mode")
158
+ self.memory_config = None
159
+
160
+ self.agent_id = agent_id
161
+
162
+ # Extract providers from config with fallbacks
163
+ self.storage_type = storage_type or self._get_provider('vector_store', 'oceanbase')
164
+ self.llm_provider = llm_provider or self._get_provider('llm', 'mock')
165
+ self.embedding_provider = embedding_provider or self._get_provider('embedder', 'mock')
166
+
167
+ # Initialize reranker if configured
168
+ reranker = None
169
+ if self.memory_config and hasattr(self.memory_config, 'reranker'):
170
+ rerank_obj = self.memory_config.reranker
171
+ if rerank_obj.enabled:
172
+ try:
173
+ provider = rerank_obj.provider
174
+ reranker_params = rerank_obj.config if rerank_obj.config else {}
175
+ reranker = RerankFactory.create(provider, reranker_params)
176
+ logger.info(f"Reranker initialized from MemoryConfig: {provider}")
177
+ except Exception as e:
178
+ logger.warning(f"Failed to initialize reranker from MemoryConfig: {e}")
179
+ reranker = None
180
+ else:
181
+ rerank_config = self.config.get('reranker', {})
182
+ if rerank_config is not None and rerank_config.get('enabled', False):
183
+ try:
184
+ provider = rerank_config.get('provider', 'qwen')
185
+ reranker_params = rerank_config.get('config', {})
186
+ reranker = RerankFactory.create(provider, reranker_params)
187
+ logger.info(f"Reranker initialized from JSON config: {provider}")
188
+ except Exception as e:
189
+ logger.warning(f"Failed to initialize reranker from JSON config: {e}")
190
+ reranker = None
191
+
192
+ # Initialize components
193
+ vector_store_config = self._get_component_config('vector_store')
194
+
195
+ # Pass reranker to vector store if it's OceanBase
196
+ if self.storage_type.lower() == 'oceanbase' and reranker:
197
+ vector_store_config['reranker'] = reranker
198
+ logger.debug("Reranker passed to OceanBase vector store")
199
+
200
+ vector_store = VectorStoreFactory.create(self.storage_type, vector_store_config)
201
+
202
+ # Extract graph_store config
203
+ self.enable_graph = self._get_graph_enabled()
204
+ self.graph_store = None
205
+ if self.enable_graph:
206
+ logger.debug("Graph store enabled")
207
+ graph_store_config = self.config.get("graph_store", {})
208
+ if graph_store_config:
209
+ provider = graph_store_config.get("provider", "oceanbase")
210
+ config_to_pass = self.memory_config if self.memory_config else self.config
211
+ self.graph_store = GraphStoreFactory.create(provider, config_to_pass)
212
+
213
+
214
+ # Extract LLM config
215
+ llm_config = self._get_component_config('llm')
216
+ self.llm = LLMFactory.create(self.llm_provider, llm_config)
217
+
218
+ # Extract audio_llm config (optional, for audio transcription)
219
+ audio_llm_config = self._get_component_config('audio_llm')
220
+ audio_llm_provider = self._get_provider('audio_llm', None)
221
+ self.audio_llm = None
222
+ if audio_llm_provider and audio_llm_config:
223
+ try:
224
+ self.audio_llm = LLMFactory.create(audio_llm_provider, audio_llm_config)
225
+ logger.info(f"Audio LLM initialized: {audio_llm_provider}")
226
+ except Exception as e:
227
+ logger.warning(f"Failed to initialize audio_llm: {e}")
228
+
229
+ # Extract embedder config
230
+ embedder_config = self._get_component_config('embedder')
231
+ self.embedding = EmbedderFactory.create(self.embedding_provider, embedder_config, None)
232
+
233
+ # Initialize storage adapter with embedding service
234
+ # Automatically select adapter based on sub_stores configuration
235
+ sub_stores_list = self.config.get('sub_stores', [])
236
+ if sub_stores_list and self.storage_type.lower() == 'oceanbase':
237
+ # Use SubStorageAdapter if sub stores are configured and using OceanBase
238
+ self.storage = SubStorageAdapter(vector_store, self.embedding)
239
+ logger.info("Using SubStorageAdapter with sub-store support")
240
+ else:
241
+ if sub_stores_list:
242
+ logger.warning("The sub_stores function currently only supports oceanbase")
243
+ self.storage = StorageAdapter(vector_store, self.embedding)
244
+ logger.info("Using basic StorageAdapter")
245
+
246
+ self.intelligence = IntelligenceManager(self.config)
247
+ self.telemetry = TelemetryManager(self.config)
248
+ self.audit = AuditLogger(self.config)
249
+
250
+ # Save custom prompts from config
251
+ if self.memory_config:
252
+ self.custom_fact_extraction_prompt = self.memory_config.custom_fact_extraction_prompt
253
+ self.custom_update_memory_prompt = self.memory_config.custom_update_memory_prompt
254
+ else:
255
+ self.custom_fact_extraction_prompt = self.config.get('custom_fact_extraction_prompt')
256
+ self.custom_update_memory_prompt = self.config.get('custom_update_memory_prompt')
257
+
258
+ # Intelligent memory plugin (pluggable)
259
+ merged_cfg = self._get_intelligent_memory_config()
260
+
261
+ plugin_type = merged_cfg.get("plugin", "ebbinghaus")
262
+ self._intelligence_plugin: Optional[IntelligentMemoryPlugin] = None
263
+ if merged_cfg.get("enabled", False):
264
+ try:
265
+ if plugin_type == "ebbinghaus":
266
+ self._intelligence_plugin = EbbinghausIntelligencePlugin(merged_cfg)
267
+ else:
268
+ logger.warning(f"Unknown intelligence plugin: {plugin_type}")
269
+ except Exception as e:
270
+ logger.warning(f"Failed to initialize intelligence plugin: {e}")
271
+ self._intelligence_plugin = None
272
+
273
+
274
+ # Sub stores configuration (support multiple)
275
+ self.sub_stores_config: List[Dict] = []
276
+
277
+ # Initialize sub stores
278
+ self._init_sub_stores()
279
+
280
+ logger.info(f"Memory initialized with storage: {self.storage_type}, LLM: {self.llm_provider}, agent: {self.agent_id or 'default'}")
281
+ self.telemetry.capture_event("memory.init", {"storage_type": self.storage_type, "llm_provider": self.llm_provider, "agent_id": self.agent_id})
282
+
283
+ def _get_provider(self, component: str, default: str) -> str:
284
+ """
285
+ Helper method to get component provider uniformly.
286
+
287
+ Args:
288
+ component: Component name ('vector_store', 'llm', 'embedder')
289
+ default: Default provider name
290
+
291
+ Returns:
292
+ Provider name string
293
+ """
294
+ if self.memory_config:
295
+ component_obj = getattr(self.memory_config, component, None)
296
+ return component_obj.provider if component_obj else default
297
+ else:
298
+ return self.config.get(component, {}).get('provider', default)
299
+
300
+ def _get_component_config(self, component: str) -> Dict[str, Any]:
301
+ """
302
+ Helper method to get component configuration uniformly.
303
+
304
+ Args:
305
+ component: Component name ('vector_store', 'llm', 'embedder', 'graph_store')
306
+
307
+ Returns:
308
+ Component configuration dictionary
309
+ """
310
+ if self.memory_config:
311
+ component_obj = getattr(self.memory_config, component, None)
312
+ return component_obj.config or {} if component_obj else {}
313
+ else:
314
+ return self.config.get(component, {}).get('config', {})
315
+
316
+ def _get_graph_enabled(self) -> bool:
317
+ """
318
+ Helper method to get graph store enabled status.
319
+
320
+ Returns:
321
+ Boolean indicating whether graph store is enabled
322
+ """
323
+ if self.memory_config:
324
+ return self.memory_config.graph_store.enabled if self.memory_config.graph_store else False
325
+ else:
326
+ graph_store_config = self.config.get('graph_store', {})
327
+ return graph_store_config.get('enabled', False) if graph_store_config else False
328
+
329
+ def _get_intelligent_memory_config(self) -> Dict[str, Any]:
330
+ """
331
+ Helper method to get intelligent memory configuration.
332
+ Supports both "intelligence" and "intelligent_memory" config keys for backward compatibility.
333
+
334
+ Returns:
335
+ Merged intelligent memory configuration dictionary
336
+ """
337
+ if self.memory_config and self.memory_config.intelligent_memory:
338
+ # Use MemoryConfig's intelligent_memory
339
+ cfg = self.memory_config.intelligent_memory.model_dump()
340
+ # Merge custom_importance_evaluation_prompt from top level if present
341
+ if self.memory_config.custom_importance_evaluation_prompt:
342
+ cfg["custom_importance_evaluation_prompt"] = self.memory_config.custom_importance_evaluation_prompt
343
+ return cfg
344
+ else:
345
+ # Fallback to dict access
346
+ intelligence_cfg = (self.config or {}).get("intelligence", {})
347
+ intelligent_memory_cfg = (self.config or {}).get("intelligent_memory", {})
348
+ merged_cfg = {**intelligence_cfg, **intelligent_memory_cfg}
349
+ # Merge custom_importance_evaluation_prompt from top level if present
350
+ if "custom_importance_evaluation_prompt" in self.config:
351
+ merged_cfg["custom_importance_evaluation_prompt"] = self.config["custom_importance_evaluation_prompt"]
352
+ return merged_cfg
353
+
354
+ def _extract_facts(self, messages: Any) -> List[str]:
355
+ """
356
+ Extract facts from messages using LLM.
357
+ Integrates with IntelligenceManager for enhanced processing.
358
+
359
+ Args:
360
+ messages: Messages (list of dicts, single dict, or str)
361
+
362
+ Returns:
363
+ List of extracted facts
364
+ """
365
+ try:
366
+ # Parse messages into conversation format
367
+ conversation = parse_messages_for_facts(messages)
368
+
369
+ # Use custom prompt if provided, otherwise use default
370
+ if self.custom_fact_extraction_prompt:
371
+ system_prompt = self.custom_fact_extraction_prompt
372
+ user_prompt = f"Input:\n{conversation}"
373
+ else:
374
+ system_prompt = FACT_RETRIEVAL_PROMPT
375
+ user_prompt = f"Input:\n{conversation}"
376
+
377
+ # Call LLM to extract facts
378
+ try:
379
+ response = self.llm.generate_response(
380
+ messages=[
381
+ {"role": "system", "content": system_prompt},
382
+ {"role": "user", "content": user_prompt}
383
+ ],
384
+ response_format={"type": "json_object"}
385
+ )
386
+ except Exception as e:
387
+ logger.error(f"Error in fact extraction: {e}")
388
+ response = ""
389
+
390
+ # Parse response
391
+ try:
392
+ # Remove code blocks if present (LLM sometimes wraps JSON in code blocks)
393
+ response = remove_code_blocks(response)
394
+ facts_data = json.loads(response)
395
+ facts = facts_data.get("facts", [])
396
+
397
+ # Log for debugging
398
+ logger.debug(f"Extracted {len(facts)} facts: {facts}")
399
+
400
+ return facts
401
+ except Exception as e:
402
+ logger.error(f"Error in new_retrieved_facts: {e}")
403
+ return []
404
+
405
+ except Exception as e:
406
+ logger.error(f"Error extracting facts: {e}")
407
+ return []
408
+
409
+ def _decide_memory_actions(
410
+ self,
411
+ new_facts: List[str],
412
+ existing_memories: List[Dict[str, Any]],
413
+ user_id: Optional[str] = None,
414
+ agent_id: Optional[str] = None,
415
+ ) -> List[Dict[str, Any]]:
416
+ """
417
+ Use LLM to decide memory actions (ADD/UPDATE/DELETE/NONE).
418
+
419
+ Args:
420
+ new_facts: List of newly extracted facts
421
+ existing_memories: List of existing memories with 'id' and 'text'
422
+ user_id: User identifier
423
+ agent_id: Agent identifier
424
+
425
+ Returns:
426
+ List of memory action dictionaries
427
+ """
428
+ try:
429
+ if not new_facts:
430
+ logger.debug("No new facts to process")
431
+ return []
432
+
433
+ # Format existing memories for prompt
434
+ old_memory = []
435
+ for mem in existing_memories:
436
+ # Support both "memory" and "content" field names for compatibility
437
+ content = mem.get("memory", "") or mem.get("content", "")
438
+ old_memory.append({
439
+ "id": mem.get("id", "unknown"),
440
+ "text": content
441
+ })
442
+
443
+ # Generate update prompt with custom prompt if provided
444
+ custom_prompt = None
445
+ if hasattr(self, 'custom_update_memory_prompt') and self.custom_update_memory_prompt:
446
+ custom_prompt = self.custom_update_memory_prompt
447
+ update_prompt = get_memory_update_prompt(old_memory, new_facts, custom_prompt)
448
+
449
+ # Call LLM
450
+ try:
451
+ response = self.llm.generate_response(
452
+ messages=[{"role": "user", "content": update_prompt}],
453
+ response_format={"type": "json_object"}
454
+ )
455
+ except Exception as e:
456
+ logger.error(f"Error in new memory actions response: {e}")
457
+ response = ""
458
+
459
+ # Parse response
460
+ try:
461
+ response = remove_code_blocks(response)
462
+ actions_data = json.loads(response)
463
+ actions = actions_data.get("memory", [])
464
+ return actions
465
+ except Exception as e:
466
+ logger.error(f"Invalid JSON response: {e}")
467
+ return []
468
+
469
+ except Exception as e:
470
+ logger.error(f"Error deciding memory actions: {e}")
471
+ return []
472
+
473
+ def add(
474
+ self,
475
+ messages,
476
+ user_id: Optional[str] = None,
477
+ agent_id: Optional[str] = None,
478
+ run_id: Optional[str] = None,
479
+ metadata: Optional[Dict[str, Any]] = None,
480
+ filters: Optional[Dict[str, Any]] = None,
481
+ scope: Optional[str] = None,
482
+ memory_type: Optional[str] = None,
483
+ prompt: Optional[str] = None,
484
+ infer: bool = True,
485
+ ) -> Dict[str, Any]:
486
+ """Add a new memory with optional intelligent processing."""
487
+ try:
488
+ # Handle messages parameter
489
+ if messages is None:
490
+ raise ValueError("messages must be provided (str, dict, or list[dict])")
491
+
492
+ # Normalize input format
493
+ if isinstance(messages, str):
494
+ messages = [{"role": "user", "content": messages}]
495
+ elif isinstance(messages, dict):
496
+ messages = [messages]
497
+ elif not isinstance(messages, list):
498
+ raise ValueError("messages must be str, dict, or list[dict]")
499
+
500
+ # Vision-aware message processing
501
+ llm_cfg = {}
502
+ try:
503
+ llm_cfg = (self.config or {}).get("llm", {}).get("config", {})
504
+ except Exception:
505
+ llm_cfg = {}
506
+ if llm_cfg.get("enable_vision"):
507
+ messages = parse_vision_messages(messages, self.llm, llm_cfg.get("vision_details"), self.audio_llm)
508
+ else:
509
+ messages = parse_vision_messages(messages, None, None, self.audio_llm)
510
+
511
+ # Use self.agent_id as fallback if agent_id is not provided
512
+ agent_id = agent_id or self.agent_id
513
+
514
+ # Check if intelligent memory should be used
515
+ use_infer = infer and isinstance(messages, list) and len(messages) > 0
516
+
517
+ # If not using intelligent memory, fall back to simple mode
518
+ if not use_infer:
519
+ return self._simple_add(messages, user_id, agent_id, run_id, metadata, filters, scope, memory_type, prompt)
520
+
521
+ # Intelligent memory mode: extract facts, search similar memories, and consolidate
522
+ return self._intelligent_add(messages, user_id, agent_id, run_id, metadata, filters, scope, memory_type, prompt)
523
+
524
+ except Exception as e:
525
+ logger.error(f"Failed to add memory: {e}")
526
+ self.telemetry.capture_event("memory.add.error", {"error": str(e)})
527
+ raise
528
+
529
+ def _simple_add(
530
+ self,
531
+ messages,
532
+ user_id: Optional[str] = None,
533
+ agent_id: Optional[str] = None,
534
+ run_id: Optional[str] = None,
535
+ metadata: Optional[Dict[str, Any]] = None,
536
+ filters: Optional[Dict[str, Any]] = None,
537
+ scope: Optional[str] = None,
538
+ memory_type: Optional[str] = None,
539
+ prompt: Optional[str] = None,
540
+ ) -> Dict[str, Any]:
541
+ """Simple add mode: direct storage without intelligence."""
542
+ # Parse messages into content
543
+ if isinstance(messages, str):
544
+ content = messages
545
+ elif isinstance(messages, dict):
546
+ content = messages.get("content", "")
547
+ elif isinstance(messages, list):
548
+ content = "\n".join([msg.get("content", "") for msg in messages if isinstance(msg, dict) and msg.get("content")])
549
+ else:
550
+ raise ValueError("messages must be str, dict, or list[dict]")
551
+
552
+ # Validate content is not empty
553
+ if not content or not content.strip():
554
+ logger.error(f"Cannot store empty content. Messages: {messages}")
555
+ raise ValueError(f"Cannot create memory with empty content. Original messages: {messages}")
556
+
557
+ # Select embedding service based on metadata (for sub-store routing)
558
+ embedding_service = self._get_embedding_service(metadata)
559
+
560
+ # Generate embedding
561
+ embedding = embedding_service.embed(content, memory_action="add")
562
+
563
+ # Disabled LLM-based importance evaluation to save tokens
564
+ # Process with intelligence manager
565
+ # enhanced_metadata = self.intelligence.process_metadata(content, metadata)
566
+ enhanced_metadata = metadata # Use original metadata without LLM evaluation
567
+
568
+ # Intelligent plugin annotations
569
+ extra_fields = {}
570
+ if self._intelligence_plugin and self._intelligence_plugin.enabled:
571
+ extra_fields = self._intelligence_plugin.on_add(content=content, metadata=enhanced_metadata)
572
+
573
+
574
+ # Generate content hash for deduplication
575
+ content_hash = hashlib.md5(content.encode('utf-8')).hexdigest()
576
+
577
+ # Extract category from enhanced metadata if present
578
+ category = ""
579
+ if enhanced_metadata and isinstance(enhanced_metadata, dict):
580
+ category = enhanced_metadata.get("category", "")
581
+ # Remove category from metadata to avoid duplication
582
+ enhanced_metadata = {k: v for k, v in enhanced_metadata.items() if k != "category"}
583
+
584
+ # Final validation before storage
585
+ if not content or not content.strip():
586
+ raise ValueError(f"Refusing to store empty content. Original messages: {messages}")
587
+
588
+ # Use self.agent_id as fallback if agent_id is not provided
589
+ agent_id = agent_id or self.agent_id
590
+
591
+ # Store in database
592
+ memory_data = {
593
+ "content": content,
594
+ "embedding": embedding,
595
+ "user_id": user_id,
596
+ "agent_id": agent_id,
597
+ "run_id": run_id,
598
+ "hash": content_hash,
599
+ "category": category,
600
+ "metadata": enhanced_metadata or {},
601
+ "filters": filters or {},
602
+ "created_at": datetime.utcnow(),
603
+ "updated_at": datetime.utcnow(),
604
+ }
605
+
606
+ if extra_fields:
607
+ memory_data.update(extra_fields)
608
+
609
+ memory_id = self.storage.add_memory(memory_data)
610
+
611
+ # Log audit event
612
+ self.audit.log_event("memory.add", {
613
+ "memory_id": memory_id,
614
+ "user_id": user_id,
615
+ "agent_id": agent_id,
616
+ "content_length": len(content)
617
+ }, user_id=user_id, agent_id=agent_id)
618
+
619
+ # Capture telemetry
620
+ self.telemetry.capture_event("memory.add", {
621
+ "memory_id": memory_id,
622
+ "user_id": user_id,
623
+ "agent_id": agent_id
624
+ })
625
+
626
+ graph_result = self._add_to_graph(messages, filters, user_id, agent_id, run_id)
627
+
628
+ result: Dict[str, Any] = {
629
+ "results": [{
630
+ "id": memory_id,
631
+ "memory": content,
632
+ "event": "ADD",
633
+ "user_id": user_id,
634
+ "agent_id": agent_id,
635
+ "run_id": run_id,
636
+ "metadata": metadata,
637
+ "created_at": memory_data["created_at"].isoformat() if isinstance(memory_data["created_at"], datetime) else memory_data["created_at"],
638
+ }]
639
+ }
640
+ if graph_result:
641
+ result["relations"] = graph_result
642
+ return result
643
+
644
+ def _intelligent_add(
645
+ self,
646
+ messages,
647
+ user_id: Optional[str] = None,
648
+ agent_id: Optional[str] = None,
649
+ run_id: Optional[str] = None,
650
+ metadata: Optional[Dict[str, Any]] = None,
651
+ filters: Optional[Dict[str, Any]] = None,
652
+ scope: Optional[str] = None,
653
+ memory_type: Optional[str] = None,
654
+ prompt: Optional[str] = None,
655
+ ) -> Dict[str, Any]:
656
+ """Intelligent add mode: extract facts, consolidate with existing memories."""
657
+ # Use self.agent_id as fallback if agent_id is not provided
658
+ agent_id = agent_id or self.agent_id
659
+
660
+ # Step 1: Extract facts from messages
661
+ logger.info("Extracting facts from messages...")
662
+ facts = self._extract_facts(messages)
663
+
664
+ if not facts:
665
+ logger.debug("No facts extracted, skip intelligent add")
666
+ return {"results": []}
667
+
668
+ logger.info(f"Extracted {len(facts)} facts: {facts}")
669
+
670
+ # Step 2: Search for similar memories for each fact
671
+ existing_memories = []
672
+ fact_embeddings = {}
673
+
674
+ # Select embedding service based on metadata (for sub-store routing)
675
+ embedding_service = self._get_embedding_service(metadata)
676
+
677
+ for fact in facts:
678
+ fact_embedding = embedding_service.embed(fact, memory_action="add")
679
+ fact_embeddings[fact] = fact_embedding
680
+
681
+ # Merge metadata into filters for correct routing
682
+ search_filters = filters.copy() if filters else {}
683
+ if metadata:
684
+ # Filter metadata to only include simple values (strings, numbers, booleans, None)
685
+ # This prevents nested dicts like {'agent': {'agent_id': ...}} from causing issues
686
+ # when OceanBase's build_condition tries to parse them as operators
687
+ simple_metadata = {
688
+ k: v for k, v in metadata.items()
689
+ if not isinstance(v, (dict, list)) and k not in ['agent_id', 'user_id', 'run_id']
690
+ }
691
+ search_filters.update(simple_metadata)
692
+
693
+ # Search for similar memories with reduced limit to reduce noise
694
+ # Pass fact text to enable hybrid search for better results
695
+ similar = self.storage.search_memories(
696
+ query_embedding=fact_embedding,
697
+ user_id=user_id,
698
+ agent_id=agent_id,
699
+ run_id=run_id,
700
+ filters=search_filters,
701
+ limit=5,
702
+ query=fact # Enable hybrid search
703
+ )
704
+ existing_memories.extend(similar)
705
+
706
+ # Improved deduplication: prefer memories with better similarity scores
707
+ unique_memories = {}
708
+ for mem in existing_memories:
709
+ mem_id = mem.get("id")
710
+ if mem_id and mem_id not in unique_memories:
711
+ unique_memories[mem_id] = mem
712
+ elif mem_id:
713
+ # If duplicate ID, keep the one with better similarity (lower distance)
714
+ existing = unique_memories.get(mem_id)
715
+ mem_distance = mem.get("distance", float('inf'))
716
+ existing_distance = existing.get("distance", float('inf')) if existing else float('inf')
717
+ if mem_distance < existing_distance:
718
+ unique_memories[mem_id] = mem
719
+
720
+ # Limit candidates to avoid LLM prompt overload
721
+ existing_memories = list(unique_memories.values())[:10] # Max 10 memories
722
+
723
+ logger.info(f"Found {len(existing_memories)} existing memories to consider (after dedup and limiting)")
724
+
725
+ # Mapping IDs with integers for handling ID hallucinations
726
+ # Maps temporary string indices to real Snowflake IDs (integers)
727
+ temp_uuid_mapping = {}
728
+ for idx, item in enumerate(existing_memories):
729
+ temp_uuid_mapping[str(idx)] = item["id"]
730
+ existing_memories[idx]["id"] = str(idx)
731
+
732
+ # Step 3: Let LLM decide memory actions (only if we have new facts)
733
+ actions = []
734
+ if facts:
735
+ actions = self._decide_memory_actions(facts, existing_memories, user_id, agent_id)
736
+ logger.info(f"LLM decided on {len(actions)} memory actions")
737
+ else:
738
+ logger.debug("No new facts, skipping LLM decision step")
739
+
740
+ # Step 4: Execute actions
741
+ results = []
742
+ action_counts = {"ADD": 0, "UPDATE": 0, "DELETE": 0, "NONE": 0}
743
+
744
+ if not actions:
745
+ logger.warning("No actions returned from LLM, skip intelligent add")
746
+ return {"results": []}
747
+
748
+ for action in actions:
749
+ action_text = action.get("text", "") or action.get("memory", "")
750
+ event_type = action.get("event", "NONE")
751
+ action_id = action.get("id", "")
752
+
753
+ # Skip actions with empty text UNLESS it's a NONE event (duplicates may have empty text)
754
+ if not action_text and event_type != "NONE":
755
+ logger.warning(f"Skipping action with empty text: {action}")
756
+ continue
757
+
758
+ logger.debug(f"Processing action: {event_type} - '{action_text[:50] if action_text else 'NONE'}...' (id: {action_id})")
759
+
760
+ try:
761
+ if event_type == "ADD":
762
+ # Add new memory
763
+ memory_id = self._create_memory(
764
+ content=action_text,
765
+ user_id=user_id,
766
+ agent_id=agent_id,
767
+ run_id=run_id,
768
+ metadata=metadata,
769
+ filters=filters,
770
+ existing_embeddings=fact_embeddings
771
+ )
772
+ results.append({
773
+ "id": memory_id,
774
+ "memory": action_text,
775
+ "event": event_type
776
+ })
777
+ action_counts["ADD"] += 1
778
+
779
+ elif event_type == "UPDATE":
780
+ # Use ID mapping to get the real memory ID (Snowflake ID - integer)
781
+ real_memory_id = temp_uuid_mapping.get(str(action_id))
782
+ if real_memory_id:
783
+ self._update_memory(
784
+ memory_id=real_memory_id,
785
+ content=action_text,
786
+ user_id=user_id,
787
+ agent_id=agent_id,
788
+ existing_embeddings=fact_embeddings
789
+ )
790
+ results.append({
791
+ "id": real_memory_id,
792
+ "memory": action_text,
793
+ "event": event_type,
794
+ "previous_memory": action.get("old_memory")
795
+ })
796
+ action_counts["UPDATE"] += 1
797
+ else:
798
+ logger.warning(f"Could not find real memory ID for action ID: {action_id}")
799
+
800
+ elif event_type == "DELETE":
801
+ # Use ID mapping to get the real memory ID (Snowflake ID - integer)
802
+ real_memory_id = temp_uuid_mapping.get(str(action_id))
803
+ if real_memory_id:
804
+ self.delete(real_memory_id, user_id, agent_id)
805
+ results.append({
806
+ "id": real_memory_id,
807
+ "memory": action_text,
808
+ "event": event_type
809
+ })
810
+ action_counts["DELETE"] += 1
811
+ else:
812
+ logger.warning(f"Could not find real memory ID for action ID: {action_id}")
813
+
814
+ elif event_type == "NONE":
815
+ logger.debug("No action needed for memory (duplicate detected)")
816
+ action_counts["NONE"] += 1
817
+
818
+ except Exception as e:
819
+ logger.error(f"Error executing memory action {event_type}: {e}")
820
+
821
+ # Log audit event for intelligent add operation
822
+ self.audit.log_event("memory.intelligent_add", {
823
+ "user_id": user_id,
824
+ "agent_id": agent_id,
825
+ "facts_count": len(facts),
826
+ "action_counts": action_counts,
827
+ "results_count": len(results)
828
+ }, user_id=user_id, agent_id=agent_id)
829
+
830
+ # Add to graph store and get relations
831
+ graph_result = self._add_to_graph(messages, filters, user_id, agent_id, run_id)
832
+
833
+ # If we have results, return them
834
+ if results:
835
+ result: Dict[str, Any] = {"results": results}
836
+ if graph_result:
837
+ result["relations"] = graph_result
838
+ return result
839
+ # If we processed actions but they were all NONE (duplicates detected), return empty results
840
+ elif action_counts.get("NONE", 0) > 0:
841
+ logger.info(f"All actions were NONE (duplicates detected), returning empty results")
842
+ result: Dict[str, Any] = {"results": []}
843
+ if graph_result:
844
+ result["relations"] = graph_result
845
+ return result
846
+ # Return [] if we had no actions at all
847
+ else:
848
+ logger.warning("No actions returned from LLM, skip intelligent add")
849
+ return {"results": []}
850
+
851
+ def _add_to_graph(
852
+ self,
853
+ messages,
854
+ filters: Optional[Dict[str, Any]] = None,
855
+ user_id: Optional[str] = None,
856
+ agent_id: Optional[str] = None,
857
+ run_id: Optional[str] = None,
858
+ ) -> Optional[Dict[str, Any]]:
859
+ """
860
+ Add messages to graph store and return relations.
861
+
862
+ Returns:
863
+ dict with added_entities and deleted_entities, or None if graph store is disabled
864
+ """
865
+ if not self.enable_graph:
866
+ return None
867
+
868
+ # Extract content from messages for graph processing
869
+ if isinstance(messages, str):
870
+ data = messages
871
+ elif isinstance(messages, dict):
872
+ data = messages.get("content", "")
873
+ elif isinstance(messages, list):
874
+ data = "\n".join([
875
+ msg.get("content", "")
876
+ for msg in messages
877
+ if isinstance(msg, dict) and msg.get("content") and msg.get("role") != "system"
878
+ ])
879
+ else:
880
+ data = ""
881
+
882
+ if not data:
883
+ return None
884
+
885
+ graph_filters = {**(filters or {}), "user_id": user_id, "agent_id": agent_id, "run_id": run_id}
886
+ if graph_filters.get("user_id") is None:
887
+ graph_filters["user_id"] = "user"
888
+
889
+ return self.graph_store.add(data, graph_filters)
890
+
891
+ def _create_memory(
892
+ self,
893
+ content: str,
894
+ user_id: Optional[str] = None,
895
+ agent_id: Optional[str] = None,
896
+ run_id: Optional[str] = None,
897
+ metadata: Optional[Dict[str, Any]] = None,
898
+ filters: Optional[Dict[str, Any]] = None,
899
+ existing_embeddings: Optional[Dict[str, Any]] = None,
900
+ ) -> int:
901
+ """Create a memory with optional embeddings."""
902
+ # Validate content is not empty
903
+ if not content or not content.strip():
904
+ raise ValueError(f"Cannot create memory with empty content: '{content}'")
905
+
906
+ # Select embedding service based on metadata (for sub-store routing)
907
+ embedding_service = self._get_embedding_service(metadata)
908
+
909
+ # Generate or use existing embedding
910
+ if existing_embeddings and content in existing_embeddings:
911
+ embedding = existing_embeddings[content]
912
+ else:
913
+ embedding = embedding_service.embed(content, memory_action="add")
914
+
915
+ # Disabled LLM-based importance evaluation to save tokens
916
+ # Process metadata
917
+ # enhanced_metadata = self.intelligence.process_metadata(content, metadata)
918
+ enhanced_metadata = metadata # Use original metadata without LLM evaluation
919
+
920
+ # Generate content hash
921
+ content_hash = hashlib.md5(content.encode('utf-8')).hexdigest()
922
+
923
+ # Extract category
924
+ category = ""
925
+ if enhanced_metadata and isinstance(enhanced_metadata, dict):
926
+ category = enhanced_metadata.get("category", "")
927
+ enhanced_metadata = {k: v for k, v in enhanced_metadata.items() if k != "category"}
928
+
929
+ # Use self.agent_id as fallback if agent_id is not provided
930
+ agent_id = agent_id or self.agent_id
931
+
932
+ # Create memory data
933
+ memory_data = {
934
+ "content": content,
935
+ "embedding": embedding,
936
+ "user_id": user_id,
937
+ "agent_id": agent_id,
938
+ "run_id": run_id,
939
+ "hash": content_hash,
940
+ "category": category,
941
+ "metadata": enhanced_metadata or {},
942
+ "filters": filters or {},
943
+ "created_at": datetime.utcnow(),
944
+ "updated_at": datetime.utcnow(),
945
+ }
946
+
947
+ memory_id = self.storage.add_memory(memory_data)
948
+
949
+ return memory_id
950
+
951
+ def _update_memory(
952
+ self,
953
+ memory_id: int,
954
+ content: str,
955
+ user_id: Optional[str] = None,
956
+ agent_id: Optional[str] = None,
957
+ existing_embeddings: Optional[Dict[str, Any]] = None,
958
+ metadata: Optional[Dict[str, Any]] = None,
959
+ ):
960
+ """Update a memory with optional embeddings."""
961
+ # Use self.agent_id as fallback if agent_id is not provided
962
+ agent_id = agent_id or self.agent_id
963
+
964
+ # Validate content is not empty
965
+ if not content or not content.strip():
966
+ raise ValueError(f"Cannot update memory with empty content: '{content}'")
967
+
968
+ # Generate or use existing embedding
969
+ if existing_embeddings and content in existing_embeddings:
970
+ embedding = existing_embeddings[content]
971
+ else:
972
+ # If no metadata provided, try to get existing memory's metadata
973
+ if metadata is None:
974
+ existing = self.storage.get_memory(memory_id, user_id, agent_id)
975
+ if existing:
976
+ metadata = existing.get("metadata", {})
977
+
978
+ # Select embedding service based on metadata (for sub-store routing)
979
+ embedding_service = self._get_embedding_service(metadata)
980
+
981
+ embedding = embedding_service.embed(content, memory_action="update")
982
+
983
+ # Generate content hash
984
+ content_hash = hashlib.md5(content.encode('utf-8')).hexdigest()
985
+
986
+ update_data = {
987
+ "content": content,
988
+ "embedding": embedding,
989
+ "hash": content_hash, # Update hash
990
+ "updated_at": datetime.utcnow(),
991
+ }
992
+
993
+ logger.debug(f"Updating memory {memory_id} with content: '{content[:50]}...'")
994
+
995
+ self.storage.update_memory(memory_id, update_data, user_id, agent_id)
996
+
997
+ def search(
998
+ self,
999
+ query: str,
1000
+ user_id: Optional[str] = None,
1001
+ agent_id: Optional[str] = None,
1002
+ run_id: Optional[str] = None,
1003
+ filters: Optional[Dict[str, Any]] = None,
1004
+ limit: int = 30,
1005
+ threshold: Optional[float] = None,
1006
+ ) -> Dict[str, Any]:
1007
+ """Search for memories."""
1008
+ try:
1009
+ # Select embedding service based on filters (for sub-store routing)
1010
+ embedding_service = self._get_embedding_service(filters)
1011
+
1012
+ # Generate query embedding
1013
+ query_embedding = embedding_service.embed(query, memory_action="search")
1014
+
1015
+
1016
+ # Search in storage - pass query text to enable hybrid search
1017
+ results = self.storage.search_memories(
1018
+ query_embedding=query_embedding,
1019
+ user_id=user_id,
1020
+ agent_id=agent_id,
1021
+ run_id=run_id,
1022
+ filters=filters,
1023
+ limit=limit,
1024
+ query=query # Pass query text for hybrid search (vector + full-text)
1025
+ )
1026
+
1027
+ # Process results with intelligence manager (only if enabled to avoid unnecessary calls)
1028
+ if self.intelligence.enabled:
1029
+ processed_results = self.intelligence.process_search_results(results, query)
1030
+ else:
1031
+ processed_results = results
1032
+
1033
+ # Intelligent plugin lifecycle management on search
1034
+ if self._intelligence_plugin and self._intelligence_plugin.enabled:
1035
+ updates, deletes = self._intelligence_plugin.on_search(processed_results)
1036
+ for mem_id, upd in updates:
1037
+ try:
1038
+ self.storage.update_memory(mem_id, {**upd}, user_id, agent_id)
1039
+ except Exception:
1040
+ continue
1041
+ for mem_id in deletes:
1042
+ try:
1043
+ self.storage.delete_memory(mem_id, user_id, agent_id)
1044
+ except Exception:
1045
+ continue
1046
+
1047
+ # Transform results to match benchmark expected format
1048
+ # Benchmark expects: {"results": [{"memory": ..., "metadata": {...}, "score": ...}], "relations": [...]}
1049
+ transformed_results = []
1050
+ for result in processed_results:
1051
+ score = result.get("score", 0.0)
1052
+ # Apply threshold filtering
1053
+ # Only include results if threshold is None or score >= threshold
1054
+ if threshold is not None and score < threshold:
1055
+ continue
1056
+
1057
+ transformed_result = {
1058
+ "memory": result.get("memory", ""),
1059
+ "metadata": result.get("metadata", {}), # Keep metadata as-is from storage
1060
+ "score": score,
1061
+ }
1062
+ # Preserve other fields if needed
1063
+ for key in ["id", "created_at", "updated_at", "user_id", "agent_id", "run_id"]:
1064
+ if key in result:
1065
+ transformed_result[key] = result[key]
1066
+ transformed_results.append(transformed_result)
1067
+
1068
+ # Log audit event
1069
+ self.audit.log_event("memory.search", {
1070
+ "query": query,
1071
+ "user_id": user_id,
1072
+ "agent_id": agent_id,
1073
+ "results_count": len(transformed_results)
1074
+ }, user_id=user_id, agent_id=agent_id)
1075
+
1076
+ # Capture telemetry
1077
+ self.telemetry.capture_event("memory.search", {
1078
+ "user_id": user_id,
1079
+ "agent_id": agent_id,
1080
+ "results_count": len(transformed_results),
1081
+ "threshold": threshold
1082
+ })
1083
+
1084
+ # Search in graph store
1085
+ if self.enable_graph:
1086
+ filters = {**(filters or {}), "user_id": user_id, "agent_id": agent_id, "run_id": run_id}
1087
+ graph_results = self.graph_store.search(query, filters, limit)
1088
+ return {"results": transformed_results, "relations": graph_results}
1089
+
1090
+ # Return in benchmark expected format
1091
+ return {"results": transformed_results}
1092
+
1093
+ except Exception as e:
1094
+ logger.error(f"Failed to search memories: {e}")
1095
+ self.telemetry.capture_event("memory.search.error", {"error": str(e)})
1096
+ raise
1097
+
1098
+ def get(
1099
+ self,
1100
+ memory_id: int,
1101
+ user_id: Optional[str] = None,
1102
+ agent_id: Optional[str] = None,
1103
+ ) -> Optional[Dict[str, Any]]:
1104
+ """Get a specific memory by ID."""
1105
+ try:
1106
+
1107
+ result = self.storage.get_memory(memory_id, user_id, agent_id)
1108
+
1109
+ if result:
1110
+ # Intelligent plugin lifecycle on get
1111
+ if self._intelligence_plugin and self._intelligence_plugin.enabled:
1112
+ updates, delete_flag = self._intelligence_plugin.on_get(result)
1113
+ try:
1114
+ if delete_flag:
1115
+ self.storage.delete_memory(memory_id, user_id, agent_id)
1116
+ return None
1117
+ if updates:
1118
+ self.storage.update_memory(memory_id, {**updates}, user_id, agent_id)
1119
+ except Exception:
1120
+ pass
1121
+ self.audit.log_event("memory.get", {
1122
+ "memory_id": memory_id,
1123
+ "user_id": user_id,
1124
+ "agent_id": agent_id
1125
+ }, user_id=user_id, agent_id=agent_id)
1126
+
1127
+ return result
1128
+
1129
+ except Exception as e:
1130
+ logger.error(f"Failed to get memory {memory_id}: {e}")
1131
+ raise
1132
+
1133
+ def update(
1134
+ self,
1135
+ memory_id: int,
1136
+ content: str,
1137
+ user_id: Optional[str] = None,
1138
+ agent_id: Optional[str] = None,
1139
+ metadata: Optional[Dict[str, Any]] = None,
1140
+ ) -> Dict[str, Any]:
1141
+ """Update an existing memory."""
1142
+ try:
1143
+ # Validate content is not empty
1144
+ if not content or not content.strip():
1145
+ raise ValueError(f"Cannot update memory with empty content: '{content}'")
1146
+
1147
+ # If no metadata provided, try to get existing memory's metadata
1148
+ if metadata is None:
1149
+ existing = self.storage.get_memory(memory_id, user_id, agent_id)
1150
+ if existing:
1151
+ metadata = existing.get("metadata", {})
1152
+
1153
+ # Select embedding service based on metadata (for sub-store routing)
1154
+ embedding_service = self._get_embedding_service(metadata)
1155
+
1156
+ # Generate new embedding
1157
+ embedding = embedding_service.embed(content, memory_action="update")
1158
+
1159
+ # Process metadata with intelligence manager (if enabled)
1160
+ # Disabled LLM-based importance evaluation to save tokens (consistent with add method)
1161
+ # enhanced_metadata = self.intelligence.process_metadata(content, metadata)
1162
+ enhanced_metadata = metadata # Use original metadata without LLM evaluation
1163
+
1164
+ # Intelligent plugin annotations
1165
+ extra_fields = {}
1166
+ if self._intelligence_plugin and self._intelligence_plugin.enabled:
1167
+ # Get existing memory for context
1168
+ existing_memory = self.get(memory_id, user_id=user_id)
1169
+ if existing_memory:
1170
+ # Plugin can process update event
1171
+ extra_fields = self._intelligence_plugin.on_add(content=content, metadata=enhanced_metadata)
1172
+
1173
+ # Generate content hash for deduplication
1174
+ content_hash = hashlib.md5(content.encode('utf-8')).hexdigest()
1175
+
1176
+ # Extract category from enhanced metadata if present
1177
+ category = ""
1178
+ if enhanced_metadata and isinstance(enhanced_metadata, dict):
1179
+ category = enhanced_metadata.get("category", "")
1180
+ # Remove category from metadata to avoid duplication
1181
+ enhanced_metadata = {k: v for k, v in enhanced_metadata.items() if k != "category"}
1182
+
1183
+ # Merge extra fields from intelligence plugin
1184
+ if extra_fields and isinstance(extra_fields, dict):
1185
+ enhanced_metadata = {**(enhanced_metadata or {}), **extra_fields}
1186
+
1187
+ # Update in storage
1188
+ update_data = {
1189
+ "content": content,
1190
+ "embedding": embedding,
1191
+ "metadata": enhanced_metadata,
1192
+ "hash": content_hash, # Update hash
1193
+ "category": category,
1194
+ "updated_at": datetime.utcnow(),
1195
+ }
1196
+
1197
+ result = self.storage.update_memory(memory_id, update_data, user_id, agent_id)
1198
+
1199
+ # Log audit event
1200
+ self.audit.log_event("memory.update", {
1201
+ "memory_id": memory_id,
1202
+ "user_id": user_id,
1203
+ "agent_id": agent_id
1204
+ }, user_id=user_id, agent_id=agent_id)
1205
+
1206
+ return result
1207
+
1208
+ except Exception as e:
1209
+ logger.error(f"Failed to update memory {memory_id}: {e}")
1210
+ raise
1211
+
1212
+ def delete(
1213
+ self,
1214
+ memory_id: int,
1215
+ user_id: Optional[str] = None,
1216
+ agent_id: Optional[str] = None,
1217
+ ) -> bool:
1218
+ """Delete a memory."""
1219
+ try:
1220
+
1221
+ result = self.storage.delete_memory(memory_id, user_id, agent_id)
1222
+
1223
+ if result:
1224
+ self.audit.log_event("memory.delete", {
1225
+ "memory_id": memory_id,
1226
+ "user_id": user_id,
1227
+ "agent_id": agent_id
1228
+ }, user_id=user_id, agent_id=agent_id)
1229
+
1230
+ return result
1231
+
1232
+ except Exception as e:
1233
+ logger.error(f"Failed to delete memory {memory_id}: {e}")
1234
+ raise
1235
+
1236
+ def delete_all(
1237
+ self,
1238
+ user_id: Optional[str] = None,
1239
+ agent_id: Optional[str] = None,
1240
+ run_id: Optional[str] = None,
1241
+ ) -> bool:
1242
+ """Delete all memories for given identifiers."""
1243
+ try:
1244
+ result = self.storage.clear_memories(user_id, agent_id, run_id)
1245
+
1246
+ if result:
1247
+ self.audit.log_event("memory.delete_all", {
1248
+ "user_id": user_id,
1249
+ "agent_id": agent_id,
1250
+ "run_id": run_id
1251
+ }, user_id=user_id, agent_id=agent_id)
1252
+
1253
+ self.telemetry.capture_event("memory.delete_all", {
1254
+ "user_id": user_id,
1255
+ "agent_id": agent_id,
1256
+ "run_id": run_id
1257
+ })
1258
+
1259
+ if self.enable_graph:
1260
+ filters = {"user_id": user_id, "agent_id": agent_id, "run_id": run_id}
1261
+ self.graph_store.delete_all(filters)
1262
+
1263
+ return result
1264
+
1265
+ except Exception as e:
1266
+ logger.error(f"Failed to delete all memories: {e}")
1267
+ raise
1268
+
1269
+ def get_all(
1270
+ self,
1271
+ user_id: Optional[str] = None,
1272
+ agent_id: Optional[str] = None,
1273
+ run_id: Optional[str] = None,
1274
+ limit: int = 100,
1275
+ offset: int = 0,
1276
+ filters: Optional[Dict[str, Any]] = None,
1277
+ ) -> dict[str, list[dict[str, Any]]]:
1278
+ """Get all memories with optional filtering."""
1279
+ try:
1280
+ results = self.storage.get_all_memories(user_id, agent_id, run_id, limit, offset)
1281
+
1282
+ self.audit.log_event("memory.get_all", {
1283
+ "user_id": user_id,
1284
+ "agent_id": agent_id,
1285
+ "run_id": run_id,
1286
+ "limit": limit,
1287
+ "offset": offset,
1288
+ "results_count": len(results)
1289
+ }, user_id=user_id, agent_id=agent_id)
1290
+
1291
+ # get from graph store
1292
+ if self.enable_graph:
1293
+ filters = {**(filters or {}), "user_id": user_id, "agent_id": agent_id, "run_id": run_id}
1294
+ graph_results = self.graph_store.get_all(filters, limit + offset)
1295
+ results.extend(graph_results)
1296
+ return {"results": results, "relations": graph_results}
1297
+
1298
+ return {"results": results}
1299
+
1300
+ except Exception as e:
1301
+ logger.error(f"Failed to get all memories: {e}")
1302
+ raise
1303
+
1304
+ def reset(self):
1305
+ """
1306
+ Reset the memory store by:
1307
+ Deletes the vector store collection
1308
+ Resets the database
1309
+ Recreates the vector store with a new client
1310
+ """
1311
+ logger.warning("Resetting all memories")
1312
+
1313
+ try:
1314
+ # Reset vector store
1315
+ if hasattr(self.storage.vector_store, "reset"):
1316
+ self.storage.vector_store.reset()
1317
+ else:
1318
+ logger.warning("Vector store does not support reset. Skipping.")
1319
+ self.storage.vector_store.delete_col()
1320
+ # Recreate vector store
1321
+ from ..storage.factory import VectorStoreFactory
1322
+ vector_store_config = self._get_component_config('vector_store')
1323
+ self.storage.vector_store = VectorStoreFactory.create(self.storage_type, vector_store_config)
1324
+ # Update storage adapter
1325
+ self.storage = StorageAdapter(self.storage.vector_store, self.embedding)
1326
+
1327
+ # Reset graph store if enabled
1328
+ if self.enable_graph and hasattr(self.graph_store, "reset"):
1329
+ self.graph_store.reset()
1330
+
1331
+ # Log telemetry event
1332
+ self.telemetry.capture_event("memory.reset", {"sync_type": "sync"})
1333
+
1334
+ logger.info("Memory store reset completed successfully")
1335
+
1336
+ except Exception as e:
1337
+ logger.error(f"Failed to reset memory store: {e}")
1338
+ raise
1339
+
1340
+ def _init_sub_stores(self):
1341
+ """Initialize multiple sub stores configuration"""
1342
+ if self.sub_stores_config:
1343
+ logger.info(f"Sub stores enabled: {len(self.sub_stores_config)} stores")
1344
+
1345
+ sub_stores_list = self.config.get('sub_stores', [])
1346
+
1347
+ if not sub_stores_list:
1348
+ logger.info("No sub stores configured")
1349
+ return
1350
+
1351
+ # Sub store feature only supports OceanBase storage
1352
+ if self.storage_type.lower() != 'oceanbase':
1353
+ logger.warning(f"Sub store feature only supports OceanBase storage, current storage: {self.storage_type}")
1354
+ logger.warning("Sub stores configuration will be ignored")
1355
+ return
1356
+
1357
+ # Get main table information
1358
+ main_collection_name = self.config.get('vector_store', {}).get('config', {}).get('collection_name', 'memories')
1359
+ main_embedding_dims = self.config.get('vector_store', {}).get('config', {}).get('embedding_model_dims', 1536)
1360
+
1361
+ # Iterate through configs and initialize each sub store
1362
+ for index, sub_config in enumerate(sub_stores_list):
1363
+ try:
1364
+ self._init_single_sub_store(index, sub_config, main_collection_name, main_embedding_dims)
1365
+ except Exception as e:
1366
+ logger.error(f"Failed to initialize sub store {index}: {e}")
1367
+ continue
1368
+
1369
+ def _init_single_sub_store(
1370
+ self,
1371
+ index: int,
1372
+ sub_config: Dict,
1373
+ main_collection_name: str,
1374
+ main_embedding_dims: int
1375
+ ):
1376
+ """Initialize a single sub store"""
1377
+
1378
+ # 1. Determine sub store name (default: {main_table_name}_sub_{index})
1379
+ sub_store_name = sub_config.get(
1380
+ 'collection_name',
1381
+ f"{main_collection_name}_sub_{index}"
1382
+ )
1383
+
1384
+ # 2. Get routing rules (required)
1385
+ routing_filter = sub_config.get('routing_filter')
1386
+ if not routing_filter:
1387
+ logger.warning(f"Sub store {index} has no routing_filter, skipping")
1388
+ return
1389
+
1390
+ # 3. Determine vector dimension (default: same as main table)
1391
+ embedding_model_dims = sub_config.get('embedding_model_dims', main_embedding_dims)
1392
+
1393
+ # 4. Initialize sub store's embedding service
1394
+ sub_embedding_config = sub_config.get('embedder', sub_config.get('embedding', {}))
1395
+
1396
+ if sub_embedding_config:
1397
+ # Has independent embedding configuration
1398
+ sub_embedding_provider = sub_embedding_config.get('provider', self.embedding_provider)
1399
+ sub_embedding_params = sub_embedding_config.get('config', {})
1400
+
1401
+ # Inherit api_key and other configs from main table
1402
+ main_embedding_config = self.config.get('embedding', {}).get('config', {})
1403
+ for key in ['api_key', 'openai_base_url', 'timeout']:
1404
+ if key not in sub_embedding_params and key in main_embedding_config:
1405
+ sub_embedding_params[key] = main_embedding_config[key]
1406
+
1407
+ sub_embedding = EmbedderFactory.create(
1408
+ sub_embedding_provider,
1409
+ sub_embedding_params,
1410
+ None
1411
+ )
1412
+ logger.info(f"Created sub embedding service for store {index}: {sub_embedding_provider}")
1413
+ else:
1414
+ # Reuse main table's embedding service
1415
+ sub_embedding = self.embedding
1416
+ logger.info(f"Sub store {index} using main embedding service")
1417
+
1418
+ # 5. Create sub store storage instance
1419
+ db_config = self.config.get('vector_store', {}).get('config', {}).copy()
1420
+
1421
+ # Override with sub store specific vector_store config if provided
1422
+ sub_vector_store_config = sub_config.get('vector_store', {})
1423
+ if sub_vector_store_config:
1424
+ db_config.update(sub_vector_store_config)
1425
+ logger.info(f"Sub store {index} using custom vector_store config: {list(sub_vector_store_config.keys())}")
1426
+
1427
+ # Always override these critical fields
1428
+ db_config['collection_name'] = sub_store_name
1429
+ db_config['embedding_model_dims'] = embedding_model_dims
1430
+
1431
+ sub_vector_store = VectorStoreFactory.create(self.storage_type, db_config)
1432
+
1433
+ # 6. Register sub store in Adapter (with embedding service for migration)
1434
+ if isinstance(self.storage, SubStorageAdapter):
1435
+ self.storage.register_sub_store(
1436
+ store_name=sub_store_name,
1437
+ routing_filter=routing_filter,
1438
+ vector_store=sub_vector_store,
1439
+ embedding_service=sub_embedding,
1440
+ )
1441
+
1442
+ # 7. Save sub store configuration
1443
+ self.sub_stores_config.append({
1444
+ 'name': sub_store_name,
1445
+ 'routing_filter': routing_filter,
1446
+ 'embedding_service': sub_embedding,
1447
+ 'embedding_dims': embedding_model_dims,
1448
+ })
1449
+
1450
+ logger.info(f"Registered sub store {index}: {sub_store_name} (dims={embedding_model_dims})")
1451
+
1452
+ def _get_embedding_service(self, filters_or_metadata: Optional[Dict] = None):
1453
+ """
1454
+ Select appropriate embedding service based on filters or metadata
1455
+
1456
+ Args:
1457
+ filters_or_metadata: Query filters (for search) or memory metadata (for add)
1458
+
1459
+ Returns:
1460
+ Corresponding embedding service instance
1461
+ """
1462
+ if not filters_or_metadata or not self.sub_stores_config:
1463
+ return self.embedding
1464
+
1465
+ # Iterate through all sub stores to find a match
1466
+ if isinstance(self.storage, SubStorageAdapter):
1467
+ for sub_config in self.sub_stores_config:
1468
+ # Check if sub store is ready
1469
+ if not self.storage.is_sub_store_ready(sub_config['name']):
1470
+ continue
1471
+
1472
+ # Check if filters_or_metadata matches routing rules
1473
+ routing_filter = sub_config['routing_filter']
1474
+ if all(
1475
+ key in filters_or_metadata and filters_or_metadata[key] == value
1476
+ for key, value in routing_filter.items()
1477
+ ):
1478
+ logger.debug(f"Using sub embedding for store: {sub_config['name']}")
1479
+ return sub_config['embedding_service']
1480
+
1481
+ logger.debug("Using main embedding service")
1482
+ return self.embedding
1483
+
1484
+
1485
+ def migrate_to_sub_store(self, sub_store_index: int = 0, delete_source: bool = False) -> int:
1486
+ """
1487
+ Migrate data to specified sub store
1488
+
1489
+ Args:
1490
+ sub_store_index: Sub store index (default 0, i.e., first sub store)
1491
+ delete_source: Whether to delete source data
1492
+
1493
+ Returns:
1494
+ Number of migrated records
1495
+ """
1496
+ if not self.sub_stores_config:
1497
+ raise ValueError("No sub stores configured.")
1498
+
1499
+ if sub_store_index >= len(self.sub_stores_config):
1500
+ raise ValueError(f"Sub store index {sub_store_index} out of range")
1501
+
1502
+ sub_config = self.sub_stores_config[sub_store_index]
1503
+
1504
+ logger.info(f"Starting migration to sub store: {sub_config['name']}")
1505
+
1506
+ # Call adapter's migration method
1507
+ if isinstance(self.storage, SubStorageAdapter):
1508
+ migrated_count = self.storage.migrate_to_sub_store(
1509
+ store_name=sub_config['name'],
1510
+ delete_source=delete_source
1511
+ )
1512
+
1513
+ logger.info(f"Migration completed: {migrated_count} records migrated")
1514
+ return migrated_count
1515
+ else:
1516
+ raise ValueError("Storage adapter does not support migration")
1517
+
1518
+ def migrate_all_sub_stores(self, delete_source: bool = True) -> Dict[str, int]:
1519
+ """
1520
+ Migrate all sub stores
1521
+
1522
+ Args:
1523
+ delete_source: Whether to delete source data
1524
+
1525
+ Returns:
1526
+ Migration record count for each sub store {store_name: count}
1527
+ """
1528
+ results = {}
1529
+ for index, sub_config in enumerate(self.sub_stores_config):
1530
+ try:
1531
+ count = self.migrate_to_sub_store(index, delete_source)
1532
+ results[sub_config['name']] = count
1533
+ except Exception as e:
1534
+ logger.error(f"Failed to migrate sub store {index}: {e}")
1535
+ results[sub_config['name']] = 0
1536
+
1537
+ return results
1538
+
1539
+ @classmethod
1540
+ def from_config(cls, config: Optional[Dict[str, Any]] = None, **kwargs):
1541
+ """
1542
+ Create Memory instance from configuration.
1543
+
1544
+ Args:
1545
+ config: Configuration dictionary
1546
+ **kwargs: Additional parameters
1547
+
1548
+ Returns:
1549
+ Memory instance
1550
+
1551
+ Example:
1552
+ ```python
1553
+ memory = Memory.from_config({
1554
+ "llm": {"provider": "openai", "config": {"api_key": "..."}},
1555
+ "embedder": {"provider": "openai", "config": {"api_key": "..."}},
1556
+ "vector_store": {"provider": "oceanbase", "config": {...}},
1557
+ })
1558
+ ```
1559
+ """
1560
+ if config is None:
1561
+ # Use auto config from environment
1562
+ from ..config_loader import auto_config
1563
+ config = auto_config()
1564
+
1565
+ converted_config = _auto_convert_config(config)
1566
+
1567
+ return cls(config=converted_config, **kwargs)