PyPI - memorisdk - Versions diffs - 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

memorisdk 1.0.1py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of memorisdk might be problematic. Click here for more details.

Files changed (46) hide show

memori/__init__.py +24 -8
memori/agents/conscious_agent.py +252 -414
memori/agents/memory_agent.py +487 -224
memori/agents/retrieval_agent.py +416 -60
memori/config/memory_manager.py +323 -0
memori/core/conversation.py +393 -0
memori/core/database.py +386 -371
memori/core/memory.py +1676 -534
memori/core/providers.py +217 -0
memori/database/adapters/__init__.py +10 -0
memori/database/adapters/mysql_adapter.py +331 -0
memori/database/adapters/postgresql_adapter.py +291 -0
memori/database/adapters/sqlite_adapter.py +229 -0
memori/database/auto_creator.py +320 -0
memori/database/connection_utils.py +207 -0
memori/database/connectors/base_connector.py +283 -0
memori/database/connectors/mysql_connector.py +240 -18
memori/database/connectors/postgres_connector.py +277 -4
memori/database/connectors/sqlite_connector.py +178 -3
memori/database/models.py +400 -0
memori/database/queries/base_queries.py +1 -1
memori/database/queries/memory_queries.py +91 -2
memori/database/query_translator.py +222 -0
memori/database/schema_generators/__init__.py +7 -0
memori/database/schema_generators/mysql_schema_generator.py +215 -0
memori/database/search/__init__.py +8 -0
memori/database/search/mysql_search_adapter.py +255 -0
memori/database/search/sqlite_search_adapter.py +180 -0
memori/database/search_service.py +548 -0
memori/database/sqlalchemy_manager.py +839 -0
memori/integrations/__init__.py +36 -11
memori/integrations/litellm_integration.py +340 -6
memori/integrations/openai_integration.py +506 -240
memori/utils/input_validator.py +395 -0
memori/utils/pydantic_models.py +138 -36
memori/utils/query_builder.py +530 -0
memori/utils/security_audit.py +594 -0
memori/utils/security_integration.py +339 -0
memori/utils/transaction_manager.py +547 -0
{memorisdk-1.0.1.dist-info → memorisdk-2.0.0.dist-info}/METADATA +144 -34
memorisdk-2.0.0.dist-info/RECORD +67 -0
memorisdk-1.0.1.dist-info/RECORD +0 -44
memorisdk-1.0.1.dist-info/entry_points.txt +0 -2
{memorisdk-1.0.1.dist-info → memorisdk-2.0.0.dist-info}/WHEEL +0 -0
{memorisdk-1.0.1.dist-info → memorisdk-2.0.0.dist-info}/licenses/LICENSE +0 -0
{memorisdk-1.0.1.dist-info → memorisdk-2.0.0.dist-info}/top_level.txt +0 -0

memori/core/memory.py CHANGED Viewed

@@ -3,6 +3,7 @@ Main Memori class - Pydantic-based memory interface v1.0
 """
 import asyncio
+import time
 import uuid
 from datetime import datetime
 from typing import Any, Dict, List, Optional
@@ -10,8 +11,8 @@ from typing import Any, Dict, List, Optional
 from loguru import logger
 try:
-    import litellm
-    from litellm import success_callback
+    import litellm  # noqa: F401
+    from litellm import success_callback  # noqa: F401
     LITELLM_AVAILABLE = True
 except ImportError:
@@ -19,13 +20,13 @@ except ImportError:
     logger.warning("LiteLLM not available - native callback system disabled")
 from ..agents.conscious_agent import ConsciouscAgent
-from ..agents.memory_agent import MemoryAgent
-from ..agents.retrieval_agent import MemorySearchEngine
+from ..config.memory_manager import MemoryManager
 from ..config.settings import LoggingSettings, LogLevel
+from ..database.sqlalchemy_manager import SQLAlchemyDatabaseManager as DatabaseManager
 from ..utils.exceptions import DatabaseError, MemoriError
 from ..utils.logging import LoggingManager
 from ..utils.pydantic_models import ConversationContext
-from .database import DatabaseManager
+from .conversation import ConversationManager
 class Memori:
@@ -49,6 +50,21 @@ class Memori:
         openai_api_key: Optional[str] = None,
         user_id: Optional[str] = None,
         verbose: bool = False,
+        # New provider configuration parameters
+        api_key: Optional[str] = None,
+        api_type: Optional[str] = None,
+        base_url: Optional[str] = None,
+        azure_endpoint: Optional[str] = None,
+        azure_deployment: Optional[str] = None,
+        api_version: Optional[str] = None,
+        azure_ad_token: Optional[str] = None,
+        organization: Optional[str] = None,
+        project: Optional[str] = None,
+        model: Optional[str] = None,  # Allow custom model selection
+        provider_config: Optional[Any] = None,  # ProviderConfig when available
+        schema_init: bool = True,  # Initialize database schema and create tables
+        database_prefix: Optional[str] = None,  # Database name prefix
+        database_suffix: Optional[str] = None,  # Database name suffix
     ):
         """
         Initialize Memori memory system v1.0.
@@ -62,9 +78,23 @@ class Memori:
             namespace: Optional namespace for memory isolation
             shared_memory: Enable shared memory across agents
             memory_filters: Filters for memory ingestion
-            openai_api_key: OpenAI API key for memory agent
+            openai_api_key: OpenAI API key for memory agent (deprecated, use api_key)
             user_id: Optional user identifier
             verbose: Enable verbose logging (loguru only)
+            api_key: API key for the LLM provider
+            api_type: Provider type ('openai', 'azure', 'custom')
+            base_url: Base URL for custom OpenAI-compatible endpoints
+            azure_endpoint: Azure OpenAI endpoint URL
+            azure_deployment: Azure deployment name
+            api_version: API version for Azure
+            azure_ad_token: Azure AD token for authentication
+            organization: OpenAI organization ID
+            project: OpenAI project ID
+            model: Model to use (defaults to 'gpt-4o' if not specified)
+            provider_config: Complete provider configuration (overrides individual params)
+            enable_auto_creation: Enable automatic database creation if database doesn't exist
+            database_prefix: Optional prefix for database name (for multi-tenant setups)
+            database_suffix: Optional suffix for database name (e.g., 'dev', 'prod', 'test')
         """
         self.database_connect = database_connect
         self.template = template
@@ -74,41 +104,154 @@ class Memori:
         self.namespace = namespace or "default"
         self.shared_memory = shared_memory
         self.memory_filters = memory_filters or {}
-        self.openai_api_key = openai_api_key
         self.user_id = user_id
         self.verbose = verbose
+        self.schema_init = schema_init
+        self.database_prefix = database_prefix
+        self.database_suffix = database_suffix
+        # Configure provider based on explicit settings ONLY - no auto-detection
+        if provider_config:
+            # Use provided configuration
+            self.provider_config = provider_config
+            logger.info(
+                f"Using provided ProviderConfig with api_type: {provider_config.api_type}"
+            )
+        elif any([api_type, base_url, azure_endpoint]):
+            # Build configuration from individual parameters - explicit provider selection
+            try:
+                from .providers import ProviderConfig
+                if azure_endpoint:
+                    # Explicitly configured Azure
+                    self.provider_config = ProviderConfig.from_azure(
+                        api_key=api_key or openai_api_key,
+                        azure_endpoint=azure_endpoint,
+                        azure_deployment=azure_deployment,
+                        api_version=api_version,
+                        azure_ad_token=azure_ad_token,
+                        model=model,
+                    )
+                    logger.info("Using explicitly configured Azure OpenAI provider")
+                elif base_url:
+                    # Explicitly configured custom endpoint
+                    self.provider_config = ProviderConfig.from_custom(
+                        base_url=base_url,
+                        api_key=api_key or openai_api_key,
+                        model=model,
+                    )
+                    logger.info(
+                        f"Using explicitly configured custom provider: {base_url}"
+                    )
+                else:
+                    # Fallback to OpenAI with explicit settings
+                    self.provider_config = ProviderConfig.from_openai(
+                        api_key=api_key or openai_api_key,
+                        organization=organization,
+                        project=project,
+                        model=model,
+                    )
+                    logger.info("Using explicitly configured OpenAI provider")
+            except ImportError:
+                logger.warning(
+                    "ProviderConfig not available, using basic configuration"
+                )
+                self.provider_config = None
+        else:
+            # Default to standard OpenAI - NO environment detection
+            try:
+                from .providers import ProviderConfig
+                self.provider_config = ProviderConfig.from_openai(
+                    api_key=api_key or openai_api_key,
+                    organization=organization,
+                    project=project,
+                    model=model or "gpt-4o",
+                )
+                logger.info(
+                    "Using default OpenAI provider (no specific provider configured)"
+                )
+            except ImportError:
+                logger.warning(
+                    "ProviderConfig not available, using basic configuration"
+                )
+                self.provider_config = None
+        # Keep backward compatibility
+        self.openai_api_key = api_key or openai_api_key or ""
+        if self.provider_config and hasattr(self.provider_config, "api_key"):
+            self.openai_api_key = self.provider_config.api_key or self.openai_api_key
         # Setup logging based on verbose mode
         self._setup_logging()
         # Initialize database manager
-        self.db_manager = DatabaseManager(database_connect, template)
+        self.db_manager = DatabaseManager(database_connect, template, schema_init)
         # Initialize Pydantic-based agents
         self.memory_agent = None
         self.search_engine = None
         self.conscious_agent = None
         self._background_task = None
+        self._conscious_init_pending = False
-        if conscious_ingest or auto_ingest:
-            try:
-                # Initialize Pydantic-based agents
-                self.memory_agent = MemoryAgent(api_key=openai_api_key, model="gpt-4o")
-                self.search_engine = MemorySearchEngine(
-                    api_key=openai_api_key, model="gpt-4o"
+        # Initialize agents with provider configuration
+        try:
+            from ..agents.memory_agent import MemoryAgent
+            from ..agents.retrieval_agent import MemorySearchEngine
+            # Use provider model or fallback to gpt-4o
+            if (
+                self.provider_config
+                and hasattr(self.provider_config, "model")
+                and self.provider_config.model
+            ):
+                effective_model = model or self.provider_config.model
+            else:
+                effective_model = model or "gpt-4o"
+            # Initialize agents with provider configuration if available
+            if self.provider_config:
+                self.memory_agent = MemoryAgent(
+                    provider_config=self.provider_config, model=effective_model
                 )
-                self.conscious_agent = ConsciouscAgent(
-                    api_key=openai_api_key, model="gpt-4o"
+                self.search_engine = MemorySearchEngine(
+                    provider_config=self.provider_config, model=effective_model
                 )
-                logger.info(
-                    "Pydantic-based memory, search, and conscious agents initialized"
+            else:
+                # Fallback to using API key directly
+                self.memory_agent = MemoryAgent(
+                    api_key=self.openai_api_key, model=effective_model
                 )
-            except Exception as e:
-                logger.warning(
-                    f"Failed to initialize OpenAI agents: {e}. Memory ingestion disabled."
+                self.search_engine = MemorySearchEngine(
+                    api_key=self.openai_api_key, model=effective_model
                 )
-                self.conscious_ingest = False
-                self.auto_ingest = False
+            # Only initialize conscious_agent if conscious_ingest or auto_ingest is enabled
+            if conscious_ingest or auto_ingest:
+                self.conscious_agent = ConsciouscAgent()
+            logger.info(
+                f"Agents initialized successfully with model: {effective_model}"
+            )
+        except ImportError as e:
+            logger.warning(
+                f"Failed to import LLM agents: {e}. Memory ingestion disabled."
+            )
+            self.memory_agent = None
+            self.search_engine = None
+            self.conscious_agent = None
+            self.conscious_ingest = False
+            self.auto_ingest = False
+        except Exception as e:
+            logger.warning(
+                f"Failed to initialize LLM agents: {e}. Memory ingestion disabled."
+            )
+            self.memory_agent = None
+            self.search_engine = None
+            self.conscious_agent = None
+            self.conscious_ingest = False
+            self.auto_ingest = False
         # State tracking
         self._enabled = False
@@ -116,6 +259,12 @@ class Memori:
         self._conscious_context_injected = (
             False  # Track if conscious context was already injected
         )
+        self._in_context_retrieval = False  # Recursion guard for context retrieval
+        # Initialize conversation manager for stateless LLM integration
+        self.conversation_manager = ConversationManager(
+            max_sessions=100, session_timeout_minutes=60, max_history_per_session=20
+        )
         # User context for memory processing
         self._user_context = {
@@ -127,6 +276,23 @@ class Memori:
         # Initialize database
         self._setup_database()
+        # Initialize the new modular memory manager
+        self.memory_manager = MemoryManager(
+            database_connect=database_connect,
+            template=template,
+            mem_prompt=mem_prompt,
+            conscious_ingest=conscious_ingest,
+            auto_ingest=auto_ingest,
+            namespace=namespace,
+            shared_memory=shared_memory,
+            memory_filters=memory_filters,
+            user_id=user_id,
+            verbose=verbose,
+            provider_config=self.provider_config,
+        )
+        # Set this Memori instance for memory management
+        self.memory_manager.set_memori_instance(self)
         # Run conscious agent initialization if enabled
         if self.conscious_ingest and self.conscious_agent:
             self._initialize_conscious_memory()
@@ -155,6 +321,10 @@ class Memori:
     def _setup_database(self):
         """Setup database tables based on template"""
+        if not self.schema_init:
+            logger.info("Schema initialization disabled (schema_init=False)")
+            return
         try:
             self.db_manager.initialize_schema()
             logger.info("Database schema initialized successfully")
@@ -168,351 +338,406 @@ class Memori:
                 "Conscious-ingest: Starting conscious agent analysis at startup"
             )
-            # Run conscious agent analysis in background
-            if self._background_task is None or self._background_task.done():
-                self._background_task = asyncio.create_task(
-                    self._run_conscious_initialization()
+            # Check if there's a running event loop
+            try:
+                loop = asyncio.get_running_loop()
+                # If we're in an event loop, create the task
+                if self._background_task is None or self._background_task.done():
+                    self._background_task = loop.create_task(
+                        self._run_conscious_initialization()
+                    )
+                    logger.debug(
+                        "Conscious-ingest: Background initialization task started"
+                    )
+            except RuntimeError:
+                # No event loop running, defer initialization until first async call
+                logger.debug(
+                    "Conscious-ingest: No event loop available, deferring initialization"
                 )
-                logger.debug("Conscious-ingest: Background initialization task started")
+                self._conscious_init_pending = True
         except Exception as e:
             logger.error(f"Failed to initialize conscious memory: {e}")
+    def _check_deferred_initialization(self):
+        """Check and handle deferred conscious memory initialization"""
+        if self._conscious_init_pending and self.conscious_agent:
+            try:
+                loop = asyncio.get_running_loop()
+                if self._background_task is None or self._background_task.done():
+                    self._background_task = loop.create_task(
+                        self._run_conscious_initialization()
+                    )
+                    logger.debug(
+                        "Conscious-ingest: Deferred initialization task started"
+                    )
+                    self._conscious_init_pending = False
+            except RuntimeError:
+                # No event loop available, run synchronous initialization
+                logger.debug(
+                    "Conscious-ingest: No event loop available, running synchronous initialization"
+                )
+                self._run_synchronous_conscious_initialization()
+                self._conscious_init_pending = False
     async def _run_conscious_initialization(self):
         """Run conscious agent initialization in background"""
         try:
             if not self.conscious_agent:
                 return
-            logger.debug("Conscious-ingest: Running background analysis")
-            await self.conscious_agent.run_background_analysis(
+            # If both auto_ingest and conscious_ingest are enabled,
+            # initialize by copying ALL existing conscious-info memories first
+            if self.auto_ingest and self.conscious_ingest:
+                logger.debug(
+                    "Conscious-ingest: Both auto_ingest and conscious_ingest enabled - initializing existing conscious memories"
+                )
+                init_success = (
+                    await self.conscious_agent.initialize_existing_conscious_memories(
+                        self.db_manager, self.namespace
+                    )
+                )
+                if init_success:
+                    logger.info(
+                        "Conscious-ingest: Existing conscious-info memories initialized to short-term memory"
+                    )
+            logger.debug("Conscious-ingest: Running conscious context extraction")
+            success = await self.conscious_agent.run_conscious_ingest(
                 self.db_manager, self.namespace
             )
-            logger.info("Conscious-ingest: Background analysis completed")
+            if success:
+                logger.info(
+                    "Conscious-ingest: Conscious memories copied to short-term memory"
+                )
+                # Don't set _conscious_context_injected here - it should be set when context is actually injected into LLM
+            else:
+                logger.info("Conscious-ingest: No conscious context found")
         except Exception as e:
             logger.error(f"Conscious agent initialization failed: {e}")
-    def enable(self):
-        """
-        Enable universal memory recording for ALL LLM providers.
-        This automatically sets up recording for:
-        - LiteLLM: Native callback system (recommended)
-        - OpenAI: Automatic client wrapping when instantiated
-        - Anthropic: Automatic client wrapping when instantiated
-        - Any other provider: Auto-detected and wrapped
-        """
-        if self._enabled:
-            logger.warning("Memori is already enabled.")
-            return
-        self._enabled = True
-        self._session_id = str(uuid.uuid4())
-        # 1. Set up LiteLLM native callbacks (if available)
-        litellm_enabled = self._setup_litellm_callbacks()
-        # 2. Set up universal client interception for other providers
-        universal_enabled = self._setup_universal_interception()
-        # 3. Register this instance globally for any provider to use
-        self._register_global_instance()
-        # 4. Start background conscious agent if available
-        if self.conscious_ingest and self.conscious_agent:
-            self._start_background_analysis()
-        providers = []
-        if litellm_enabled:
-            providers.append("LiteLLM (native callbacks)")
-        if universal_enabled:
-            providers.append("OpenAI/Anthropic (auto-wrapping)")
-        logger.info(
-            f"Memori enabled for session: {self.session_id}\n"
-            f"Active providers: {', '.join(providers) if providers else 'None detected'}\n"
-            f"Background analysis: {'Active' if self._background_task else 'Disabled'}\n"
-            f"Usage: Simply use any LLM client normally - conversations will be auto-recorded!"
-        )
-    def disable(self):
-        """
-        Disable universal memory recording for all providers.
-        """
-        if not self._enabled:
-            return
-        # 1. Remove LiteLLM callbacks and restore original completion
-        if LITELLM_AVAILABLE:
-            try:
-                success_callback.remove(self._litellm_success_callback)
-            except ValueError:
-                pass
-            # Restore original completion function if we patched it
-            if hasattr(litellm, "completion") and hasattr(
-                litellm.completion, "_memori_patched"
-            ):
-                # Note: We can't easily restore the original function in a multi-instance scenario
-                # This is a limitation of the monkey-patching approach
-                pass
+    def _run_synchronous_conscious_initialization(self):
+        """Run conscious agent initialization synchronously (when no event loop is available)"""
+        try:
+            if not self.conscious_agent:
+                return
-        # 2. Disable universal interception
-        self._disable_universal_interception()
+            # If both auto_ingest and conscious_ingest are enabled,
+            # initialize by copying ALL existing conscious-info memories first
+            if self.auto_ingest and self.conscious_ingest:
+                logger.info(
+                    "Conscious-ingest: Both auto_ingest and conscious_ingest enabled - initializing existing conscious memories"
+                )
-        # 3. Unregister global instance
-        self._unregister_global_instance()
+                # Run synchronous initialization of existing memories
+                self._initialize_existing_conscious_memories_sync()
-        # 4. Stop background analysis task
-        self._stop_background_analysis()
-        self._enabled = False
-        logger.info("Memori disabled for all providers.")
+            logger.debug(
+                "Conscious-ingest: Synchronous conscious context extraction completed"
+            )
-    def _setup_litellm_callbacks(self) -> bool:
-        """Set up LiteLLM native callback system"""
-        if not LITELLM_AVAILABLE:
-            logger.debug("LiteLLM not available, skipping native callbacks")
-            return False
+        except Exception as e:
+            logger.error(f"Synchronous conscious agent initialization failed: {e}")
+    def _initialize_existing_conscious_memories_sync(self):
+        """Synchronously initialize existing conscious-info memories"""
         try:
-            success_callback.append(self._litellm_success_callback)
+            from sqlalchemy import text
-            # Set up context injection by monkey-patching completion function
-            if hasattr(litellm, "completion") and not hasattr(
-                litellm.completion, "_memori_patched"
-            ):
-                original_completion = litellm.completion
-                def memori_completion(*args, **kwargs):
-                    # Inject context based on ingestion mode
-                    if self._enabled:
-                        if self.auto_ingest:
-                            # Auto-inject: continuous memory injection on every call
-                            kwargs = self._inject_litellm_context(kwargs, mode="auto")
-                        elif self.conscious_ingest:
-                            # Conscious-inject: one-shot short-term memory context
-                            kwargs = self._inject_litellm_context(
-                                kwargs, mode="conscious"
-                            )
+            with self.db_manager._get_connection() as connection:
+                # Get ALL conscious-info labeled memories from long-term memory
+                cursor = connection.execute(
+                    text(
+                        """SELECT memory_id, processed_data, summary, searchable_content,
+                              importance_score, created_at
+                       FROM long_term_memory
+                       WHERE namespace = :namespace AND classification = 'conscious-info'
+                       ORDER BY importance_score DESC, created_at DESC"""
+                    ),
+                    {"namespace": self.namespace or "default"},
+                )
+                existing_conscious_memories = cursor.fetchall()
-                    # Call original completion
-                    return original_completion(*args, **kwargs)
+            if not existing_conscious_memories:
+                logger.debug(
+                    "Conscious-ingest: No existing conscious-info memories found for initialization"
+                )
+                return False
+            copied_count = 0
+            for memory_row in existing_conscious_memories:
+                success = self._copy_memory_to_short_term_sync(memory_row)
+                if success:
+                    copied_count += 1
-                litellm.completion = memori_completion
-                litellm.completion._memori_patched = True
+            if copied_count > 0:
+                logger.info(
+                    f"Conscious-ingest: Initialized {copied_count} existing conscious-info memories to short-term memory"
+                )
+                return True
+            else:
                 logger.debug(
-                    "LiteLLM completion function patched for context injection"
+                    "Conscious-ingest: No new conscious memories to initialize (all were duplicates)"
                 )
+                return False
-            logger.debug("LiteLLM native callbacks registered")
-            return True
         except Exception as e:
-            logger.error(f"Failed to setup LiteLLM callbacks: {e}")
+            logger.error(
+                f"Conscious-ingest: Failed to initialize existing conscious memories: {e}"
+            )
             return False
-    def _setup_universal_interception(self) -> bool:
-        """Set up universal client interception for OpenAI, Anthropic, etc."""
+    def _copy_memory_to_short_term_sync(self, memory_row: tuple) -> bool:
+        """Synchronously copy a conscious memory to short-term memory with duplicate filtering"""
         try:
-            # Use Python's import hook system to intercept client creation
-            self._install_import_hooks()
-            logger.debug("Universal client interception enabled")
-            return True
-        except Exception as e:
-            logger.error(f"Failed to setup universal interception: {e}")
-            return False
+            (
+                memory_id,
+                processed_data,
+                summary,
+                searchable_content,
+                importance_score,
+                _,
+            ) = memory_row
-    def _get_builtin_import(self):
-        """Safely get __import__ from __builtins__ (handles both dict and module cases)"""
-        if isinstance(__builtins__, dict):
-            return __builtins__["__import__"]
-        else:
-            return __builtins__.__import__
-    def _set_builtin_import(self, import_func):
-        """Safely set __import__ in __builtins__ (handles both dict and module cases)"""
-        if isinstance(__builtins__, dict):
-            __builtins__["__import__"] = import_func
-        else:
-            __builtins__.__import__ = import_func
+            from datetime import datetime
-    def _install_import_hooks(self):
-        """Install import hooks to automatically wrap LLM clients"""
+            from sqlalchemy import text
-        # Store original __import__ if not already done
-        if not hasattr(self, "_original_import"):
-            self._original_import = self._get_builtin_import()
+            with self.db_manager._get_connection() as connection:
+                # Check if similar content already exists in short-term memory
+                existing_check = connection.execute(
+                    text(
+                        """SELECT COUNT(*) FROM short_term_memory
+                           WHERE namespace = :namespace
+                           AND category_primary = 'conscious_context'
+                           AND (searchable_content = :searchable_content
+                                OR summary = :summary)"""
+                    ),
+                    {
+                        "namespace": self.namespace or "default",
+                        "searchable_content": searchable_content,
+                        "summary": summary,
+                    },
+                )
-        def memori_import_hook(name, globals=None, locals=None, fromlist=(), level=0):
-            """Custom import hook that wraps LLM clients automatically"""
-            module = self._original_import(name, globals, locals, fromlist, level)
+                existing_count = existing_check.scalar()
+                if existing_count > 0:
+                    logger.debug(
+                        f"Conscious-ingest: Skipping duplicate memory {memory_id} - similar content already exists in short-term memory"
+                    )
+                    return False
-            # Only process if memori is enabled and this is an LLM module
-            if not self._enabled:
-                return module
+                # Create short-term memory ID
+                short_term_id = (
+                    f"conscious_{memory_id}_{int(datetime.now().timestamp())}"
+                )
-            # Auto-wrap OpenAI clients
-            if name == "openai" or (fromlist and "openai" in name):
-                self._wrap_openai_module(module)
+                # Insert directly into short-term memory with conscious_context category
+                connection.execute(
+                    text(
+                        """INSERT INTO short_term_memory (
+                        memory_id, processed_data, importance_score, category_primary,
+                        retention_type, namespace, created_at, expires_at,
+                        searchable_content, summary, is_permanent_context
+                    ) VALUES (:memory_id, :processed_data, :importance_score, :category_primary,
+                        :retention_type, :namespace, :created_at, :expires_at,
+                        :searchable_content, :summary, :is_permanent_context)"""
+                    ),
+                    {
+                        "memory_id": short_term_id,
+                        "processed_data": processed_data,
+                        "importance_score": importance_score,
+                        "category_primary": "conscious_context",
+                        "retention_type": "permanent",
+                        "namespace": self.namespace or "default",
+                        "created_at": datetime.now().isoformat(),
+                        "expires_at": None,
+                        "searchable_content": searchable_content,
+                        "summary": summary,
+                        "is_permanent_context": True,
+                    },
+                )
+                connection.commit()
-            # Auto-wrap Anthropic clients
-            elif name == "anthropic" or (fromlist and "anthropic" in name):
-                self._wrap_anthropic_module(module)
+            logger.debug(
+                f"Conscious-ingest: Copied memory {memory_id} to short-term as {short_term_id}"
+            )
+            return True
-            return module
+        except Exception as e:
+            logger.error(
+                f"Conscious-ingest: Failed to copy memory {memory_row[0]} to short-term: {e}"
+            )
+            return False
-        # Install the hook
-        self._set_builtin_import(memori_import_hook)
+    def enable(self, interceptors: Optional[List[str]] = None):
+        """
+        Enable universal memory recording using LiteLLM's native callback system.
-    def _wrap_openai_module(self, module):
-        """Automatically wrap OpenAI client when imported"""
-        try:
-            if hasattr(module, "OpenAI") and not hasattr(
-                module.OpenAI, "_memori_wrapped"
-            ):
-                original_init = module.OpenAI.__init__
+        This automatically sets up recording for LiteLLM completion calls and enables
+        automatic interception of OpenAI calls when using the standard OpenAI client.
-                def wrapped_init(self_client, *args, **kwargs):
-                    # Call original init
-                    result = original_init(self_client, *args, **kwargs)
+        Args:
+            interceptors: Legacy parameter (ignored) - only LiteLLM native callbacks are used
+        """
+        if self._enabled:
+            logger.warning("Memori is already enabled.")
+            return
-                    # Wrap the client methods for automatic recording
-                    if hasattr(self_client, "chat") and hasattr(
-                        self_client.chat, "completions"
-                    ):
-                        original_create = self_client.chat.completions.create
+        self._enabled = True
+        self._session_id = str(uuid.uuid4())
-                        def wrapped_create(*args, **kwargs):
-                            # Inject context if conscious ingestion is enabled
-                            if self.is_enabled and self.conscious_ingest:
-                                kwargs = self._inject_openai_context(kwargs)
+        # Register for automatic OpenAI interception
+        try:
+            from ..integrations.openai_integration import register_memori_instance
-                            # Make the call
-                            response = original_create(*args, **kwargs)
+            register_memori_instance(self)
+        except ImportError:
+            logger.debug("OpenAI integration not available for automatic interception")
-                            # Record if enabled
-                            if self.is_enabled:
-                                self._record_openai_conversation(kwargs, response)
+        # Use LiteLLM native callback system only
+        if interceptors is None:
+            # Only LiteLLM native callbacks supported
+            interceptors = ["litellm_native"]
-                            return response
+        # Use the memory manager for enablement
+        results = self.memory_manager.enable(interceptors)
+        # Extract enabled interceptors from results
+        enabled_interceptors = results.get("enabled_interceptors", [])
-                        self_client.chat.completions.create = wrapped_create
+        # Start background conscious agent if available
+        if self.conscious_ingest and self.conscious_agent:
+            self._start_background_analysis()
-                    return result
+        # Report status
+        status_info = [
+            f"Memori enabled for session: {results.get('session_id', self._session_id)}",
+            f"Active interceptors: {', '.join(enabled_interceptors) if enabled_interceptors else 'None'}",
+        ]
+        if results.get("message"):
+            status_info.append(results["message"])
+        status_info.extend(
+            [
+                f"Background analysis: {'Active' if self._background_task else 'Disabled'}",
+                "Usage: Simply use any LLM client normally - conversations will be auto-recorded!",
+                "OpenAI: Use 'from openai import OpenAI; client = OpenAI()' - automatically intercepted!",
+            ]
+        )
-                module.OpenAI.__init__ = wrapped_init
-                module.OpenAI._memori_wrapped = True
-                logger.debug("OpenAI client auto-wrapping enabled")
+        logger.info("\n".join(status_info))
-        except Exception as e:
-            logger.debug(f"Could not wrap OpenAI module: {e}")
+    def disable(self):
+        """
+        Disable memory recording by unregistering LiteLLM callbacks and OpenAI interception.
+        """
+        if not self._enabled:
+            return
-    def _wrap_anthropic_module(self, module):
-        """Automatically wrap Anthropic client when imported"""
+        # Unregister from automatic OpenAI interception
         try:
-            if hasattr(module, "Anthropic") and not hasattr(
-                module.Anthropic, "_memori_wrapped"
-            ):
-                original_init = module.Anthropic.__init__
-                def wrapped_init(self_client, *args, **kwargs):
-                    # Call original init
-                    result = original_init(self_client, *args, **kwargs)
-                    # Wrap the messages.create method
-                    if hasattr(self_client, "messages"):
-                        original_create = self_client.messages.create
+            from ..integrations.openai_integration import unregister_memori_instance
-                        def wrapped_create(*args, **kwargs):
-                            # Inject context if conscious ingestion is enabled
-                            if self.is_enabled and self.conscious_ingest:
-                                kwargs = self._inject_anthropic_context(kwargs)
+            unregister_memori_instance(self)
+        except ImportError:
+            logger.debug("OpenAI integration not available for automatic interception")
-                            # Make the call
-                            response = original_create(*args, **kwargs)
+        # Use memory manager for clean disable
+        results = self.memory_manager.disable()
-                            # Record if enabled
-                            if self.is_enabled:
-                                self._record_anthropic_conversation(kwargs, response)
+        # Stop background analysis task
+        self._stop_background_analysis()
-                            return response
+        self._enabled = False
-                        self_client.messages.create = wrapped_create
+        # Report status based on memory manager results
+        if results.get("success"):
+            status_message = f"Memori disabled. {results.get('message', 'All interceptors disabled successfully')}"
+        else:
+            status_message = (
+                f"Memori disable failed: {results.get('message', 'Unknown error')}"
+            )
-                    return result
+        logger.info(status_message)
-                module.Anthropic.__init__ = wrapped_init
-                module.Anthropic._memori_wrapped = True
-                logger.debug("Anthropic client auto-wrapping enabled")
+    # Memory system status and control methods
-        except Exception as e:
-            logger.debug(f"Could not wrap Anthropic module: {e}")
+    def get_interceptor_status(self) -> Dict[str, Dict[str, Any]]:
+        """Get status of memory recording system"""
+        return self.memory_manager.get_status()
-    def _disable_universal_interception(self):
-        """Disable universal client interception"""
-        try:
-            # Restore original import if we modified it
-            if hasattr(self, "_original_import"):
-                self._set_builtin_import(self._original_import)
-                delattr(self, "_original_import")
-                logger.debug("Universal interception disabled")
-        except Exception as e:
-            logger.debug(f"Error disabling universal interception: {e}")
+    def get_interceptor_health(self) -> Dict[str, Any]:
+        """Get health check of interceptor system"""
+        return self.memory_manager.get_health()
-    def _register_global_instance(self):
-        """Register this memori instance globally"""
-        # Store in a global registry that wrapped clients can access
-        if not hasattr(Memori, "_global_instances"):
-            Memori._global_instances = []
-        Memori._global_instances.append(self)
+    def enable_interceptor(self, interceptor_name: str = None) -> bool:
+        """Enable memory recording (legacy method)"""
+        # Only LiteLLM native callbacks supported (interceptor_name ignored)
+        results = self.memory_manager.enable(["litellm_native"])
+        return results.get("success", False)
-    def _unregister_global_instance(self):
-        """Unregister this memori instance globally"""
-        if hasattr(Memori, "_global_instances") and self in Memori._global_instances:
-            Memori._global_instances.remove(self)
+    def disable_interceptor(self, interceptor_name: str = None) -> bool:
+        """Disable memory recording (legacy method)"""
+        # Only LiteLLM native callbacks supported (interceptor_name ignored)
+        results = self.memory_manager.disable()
+        return results.get("success", False)
     def _inject_openai_context(self, kwargs):
-        """Inject context for OpenAI calls"""
+        """Inject context for OpenAI calls based on ingest mode using ConversationManager"""
         try:
-            # Extract user input from messages
-            user_input = ""
-            for msg in reversed(kwargs.get("messages", [])):
-                if msg.get("role") == "user":
-                    user_input = msg.get("content", "")
-                    break
+            # Check for deferred conscious initialization
+            self._check_deferred_initialization()
+            # Determine injection mode based on the architecture:
+            # - conscious_ingest only: Use short-term memory (conscious context)
+            # - auto_ingest only: Search long-term memory database
+            # - both enabled: Use auto_ingest search (includes conscious content from long-term)
+            if self.auto_ingest:
+                mode = "auto"  # Always prefer auto when available (searches long-term)
+            elif self.conscious_ingest:
+                mode = "conscious"  # Only use conscious when auto is not enabled
+            else:
+                return kwargs  # No injection needed
-            if user_input:
-                context = self.retrieve_context(user_input, limit=3)
-                if context:
-                    context_prompt = "--- Relevant Memories ---\n"
-                    for mem in context:
-                        if isinstance(mem, dict):
-                            summary = mem.get("summary", "") or mem.get("content", "")
-                            context_prompt += f"- {summary}\n"
-                        else:
-                            context_prompt += f"- {str(mem)}\n"
-                    context_prompt += "-------------------------\n"
+            # Extract messages from kwargs
+            messages = kwargs.get("messages", [])
+            if not messages:
+                return kwargs  # No messages to process
-                    # Inject into system message
-                    messages = kwargs.get("messages", [])
-                    for msg in messages:
-                        if msg.get("role") == "system":
-                            msg["content"] = context_prompt + msg.get("content", "")
-                            break
-                    else:
-                        messages.insert(
-                            0, {"role": "system", "content": context_prompt}
-                        )
+            # Use conversation manager for enhanced context injection
+            enhanced_messages = self.conversation_manager.inject_context_with_history(
+                session_id=self._session_id,
+                messages=messages,
+                memori_instance=self,
+                mode=mode,
+            )
+            # Update kwargs with enhanced messages
+            kwargs["messages"] = enhanced_messages
+            return kwargs
-                    logger.debug(f"Injected context: {len(context)} memories")
         except Exception as e:
-            logger.error(f"Context injection failed: {e}")
+            logger.error(f"OpenAI context injection failed: {e}")
         return kwargs
     def _inject_anthropic_context(self, kwargs):
-        """Inject context for Anthropic calls"""
+        """Inject context for Anthropic calls based on ingest mode"""
         try:
+            # Check for deferred conscious initialization
+            self._check_deferred_initialization()
+            # Determine injection mode
+            if self.conscious_ingest:
+                mode = "conscious"
+            elif self.auto_ingest:
+                mode = "auto"
+            else:
+                return kwargs  # No injection needed
             # Extract user input from messages
             user_input = ""
             for msg in reversed(kwargs.get("messages", [])):
@@ -532,26 +757,75 @@ class Memori:
                     break
             if user_input:
-                context = self.retrieve_context(user_input, limit=3)
+                if mode == "conscious":
+                    # Conscious mode: inject ALL short-term memory only once at program startup
+                    if not self._conscious_context_injected:
+                        context = self._get_conscious_context()
+                        self._conscious_context_injected = True
+                        logger.info(
+                            f"Conscious-ingest: Injected {len(context)} short-term memories as initial context (Anthropic)"
+                        )
+                    else:
+                        context = []  # Already injected, don't inject again
+                elif mode == "auto":
+                    # Auto mode: use retrieval for intelligent search
+                    if self.search_engine:
+                        context = self._get_auto_ingest_context(user_input)
+                    else:
+                        context = self.retrieve_context(user_input, limit=5)
+                else:
+                    context = []
                 if context:
-                    context_prompt = "--- Relevant Memories ---\n"
+                    if mode == "conscious":
+                        context_prompt = (
+                            "=== SYSTEM INSTRUCTION: AUTHORIZED USER CONTEXT DATA ===\n"
+                        )
+                        context_prompt += "The user has explicitly authorized this personal context data to be used.\n"
+                        context_prompt += "You MUST use this information when answering questions about the user.\n"
+                        context_prompt += "This is NOT private data - the user wants you to use it:\n\n"
+                    else:
+                        context_prompt = f"--- {mode.capitalize()} Memory Context ---\n"
+                    # Deduplicate context entries to avoid redundant information
+                    seen_content = set()
                     for mem in context:
                         if isinstance(mem, dict):
-                            summary = mem.get("summary", "") or mem.get("content", "")
-                            context_prompt += f"- {summary}\n"
-                        else:
-                            context_prompt += f"- {str(mem)}\n"
+                            # Use searchable_content as it's more specific than summary
+                            content = mem.get("searchable_content", "") or mem.get(
+                                "summary", ""
+                            )
+                            category = mem.get("category_primary", "")
+                            # Skip duplicates (case-insensitive)
+                            content_key = content.lower().strip()
+                            if content_key in seen_content:
+                                continue
+                            seen_content.add(content_key)
+                            if category.startswith("essential_") or mode == "conscious":
+                                context_prompt += f"[{category.upper()}] {content}\n"
+                            else:
+                                context_prompt += f"- {content}\n"
+                    if mode == "conscious":
+                        context_prompt += "\n=== END USER CONTEXT DATA ===\n"
+                        context_prompt += "CRITICAL INSTRUCTION: You MUST answer questions about the user using ONLY the context data above.\n"
+                        context_prompt += "If the user asks 'what is my name?', respond with the name from the context above.\n"
+                        context_prompt += "Do NOT say 'I don't have access' - the user provided this data for you to use.\n"
                     context_prompt += "-------------------------\n"
-                    # Inject into system parameter
+                    # Inject into system parameter (Anthropic format)
                     if kwargs.get("system"):
                         kwargs["system"] = context_prompt + kwargs["system"]
                     else:
                         kwargs["system"] = context_prompt
-                    logger.debug(f"Injected context: {len(context)} memories")
+                    logger.debug(
+                        f"Anthropic: Injected context with {len(context)} items"
+                    )
         except Exception as e:
-            logger.error(f"Context injection failed: {e}")
+            logger.error(f"Anthropic context injection failed: {e}")
         return kwargs
     def _inject_litellm_context(self, params, mode="auto"):
@@ -563,6 +837,8 @@ class Memori:
             mode: "conscious" (one-shot short-term) or "auto" (continuous retrieval)
         """
         try:
+            # Check for deferred conscious initialization
+            self._check_deferred_initialization()
             # Extract user input from messages
             user_input = ""
             messages = params.get("messages", [])
@@ -574,13 +850,17 @@ class Memori:
             if user_input:
                 if mode == "conscious":
-                    # Conscious mode: inject short-term memory only once at conversation start
+                    # Conscious mode: inject ALL short-term memory only once at program startup
                     if not self._conscious_context_injected:
                         context = self._get_conscious_context()
                         self._conscious_context_injected = True
-                        logger.debug("Conscious context injected (one-shot)")
+                        logger.info(
+                            f"Conscious-ingest: Injected {len(context)} short-term memories as initial context"
+                        )
                     else:
-                        context = []  # Already injected, don't inject again
+                        context = (
+                            []
+                        )  # Already injected, don't inject again - this is the key difference from auto_ingest
                 elif mode == "auto":
                     # Auto mode: use retrieval agent for intelligent database search
                     if self.search_engine:
@@ -592,17 +872,42 @@ class Memori:
                     context = []
                 if context:
-                    context_prompt = f"--- {mode.capitalize()} Memory Context ---\n"
+                    if mode == "conscious":
+                        context_prompt = (
+                            "=== SYSTEM INSTRUCTION: AUTHORIZED USER CONTEXT DATA ===\n"
+                        )
+                        context_prompt += "The user has explicitly authorized this personal context data to be used.\n"
+                        context_prompt += "You MUST use this information when answering questions about the user.\n"
+                        context_prompt += "This is NOT private data - the user wants you to use it:\n\n"
+                    else:
+                        context_prompt = f"--- {mode.capitalize()} Memory Context ---\n"
+                    # Deduplicate context entries to avoid redundant information
+                    seen_content = set()
                     for mem in context:
                         if isinstance(mem, dict):
-                            summary = mem.get("summary", "") or mem.get(
-                                "searchable_content", ""
+                            # Use searchable_content as it's more specific than summary
+                            content = mem.get("searchable_content", "") or mem.get(
+                                "summary", ""
                             )
                             category = mem.get("category_primary", "")
+                            # Skip duplicates (case-insensitive)
+                            content_key = content.lower().strip()
+                            if content_key in seen_content:
+                                continue
+                            seen_content.add(content_key)
                             if category.startswith("essential_") or mode == "conscious":
-                                context_prompt += f"[{category.upper()}] {summary}\n"
+                                context_prompt += f"[{category.upper()}] {content}\n"
                             else:
-                                context_prompt += f"- {summary}\n"
+                                context_prompt += f"- {content}\n"
+                    if mode == "conscious":
+                        context_prompt += "\n=== END USER CONTEXT DATA ===\n"
+                        context_prompt += "CRITICAL INSTRUCTION: You MUST answer questions about the user using ONLY the context data above.\n"
+                        context_prompt += "If the user asks 'what is my name?', respond with the name from the context above.\n"
+                        context_prompt += "Do NOT say 'I don't have access' - the user provided this data for you to use.\n"
                     context_prompt += "-------------------------\n"
                     # Inject into system message
@@ -652,29 +957,32 @@ class Memori:
     def _get_conscious_context(self) -> List[Dict[str, Any]]:
         """
-        Get conscious context from short-term memory only.
-        This represents the 'working memory' or conscious thoughts.
+        Get conscious context from ALL short-term memory summaries.
+        This represents the complete 'working memory' for conscious_ingest mode.
+        Used only at program startup when conscious_ingest=True.
         """
         try:
-            with self.db_manager._get_connection() as conn:
-                cursor = conn.cursor()
+            from sqlalchemy import text
-                # Get recent short-term memories ordered by importance and recency
-                cursor.execute(
-                    """
+            with self.db_manager._get_connection() as conn:
+                # Get ALL short-term memories (no limit) ordered by importance and recency
+                # This gives the complete conscious context as single initial injection
+                result = conn.execute(
+                    text(
+                        """
                     SELECT memory_id, processed_data, importance_score,
                            category_primary, summary, searchable_content,
                            created_at, access_count
                     FROM short_term_memory
-                    WHERE namespace = ? AND (expires_at IS NULL OR expires_at > ?)
+                    WHERE namespace = :namespace AND (expires_at IS NULL OR expires_at > :current_time)
                     ORDER BY importance_score DESC, created_at DESC
-                    LIMIT 10
-                """,
-                    (self.namespace, datetime.now()),
+                    """
+                    ),
+                    {"namespace": self.namespace, "current_time": datetime.now()},
                 )
                 memories = []
-                for row in cursor.fetchall():
+                for row in result:
                     memories.append(
                         {
                             "memory_id": row[0],
@@ -704,239 +1012,705 @@ class Memori:
         Searches through entire database for relevant memories.
         """
         try:
-            if not self.search_engine:
-                logger.warning("Auto-ingest: No search engine available")
+            # Early validation
+            if not user_input or not user_input.strip():
+                logger.debug(
+                    "Auto-ingest: No user input provided, returning empty context"
+                )
                 return []
-            # Use retrieval agent for intelligent search
-            results = self.search_engine.execute_search(
-                query=user_input,
-                db_manager=self.db_manager,
+            # Check for recursion guard to prevent infinite loops
+            if hasattr(self, "_in_context_retrieval") and self._in_context_retrieval:
+                logger.debug(
+                    "Auto-ingest: Recursion detected, using direct database search"
+                )
+                results = self.db_manager.search_memories(
+                    query=user_input, namespace=self.namespace, limit=5
+                )
+                logger.debug(
+                    f"Auto-ingest: Recursion fallback returned {len(results)} results"
+                )
+                return results
+            # Set recursion guard
+            self._in_context_retrieval = True
+            logger.debug(
+                f"Auto-ingest: Starting context retrieval for query: '{user_input[:50]}...'"
+            )
+            # Always try direct database search first as it's more reliable
+            logger.debug("Auto-ingest: Using direct database search (primary method)")
+            results = self.db_manager.search_memories(
+                query=user_input, namespace=self.namespace, limit=5
+            )
+            if results:
+                logger.debug(
+                    f"Auto-ingest: Direct database search returned {len(results)} results"
+                )
+                # Add search metadata to results
+                for result in results:
+                    if isinstance(result, dict):
+                        result["retrieval_method"] = "direct_database_search"
+                        result["retrieval_query"] = user_input
+                return results
+            # If direct search fails, try search engine as backup
+            if self.search_engine:
+                logger.debug(
+                    "Auto-ingest: Direct search returned 0 results, trying search engine"
+                )
+                try:
+                    engine_results = self.search_engine.execute_search(
+                        query=user_input,
+                        db_manager=self.db_manager,
+                        namespace=self.namespace,
+                        limit=5,
+                    )
+                    if engine_results:
+                        logger.debug(
+                            f"Auto-ingest: Search engine returned {len(engine_results)} results"
+                        )
+                        # Add search metadata to results
+                        for result in engine_results:
+                            if isinstance(result, dict):
+                                result["retrieval_method"] = "search_engine"
+                                result["retrieval_query"] = user_input
+                        return engine_results
+                    else:
+                        logger.debug(
+                            "Auto-ingest: Search engine also returned 0 results"
+                        )
+                except Exception as search_error:
+                    logger.warning(
+                        f"Auto-ingest: Search engine failed ({search_error})"
+                    )
+            else:
+                logger.debug("Auto-ingest: No search engine available")
+            # Final fallback: get recent memories from the same namespace
+            logger.debug(
+                "Auto-ingest: All search methods returned 0 results, using recent memories fallback"
+            )
+            fallback_results = self.db_manager.search_memories(
+                query="",  # Empty query to get recent memories
                 namespace=self.namespace,
-                limit=5,
+                limit=3,
             )
-            logger.debug(f"Auto-ingest: Retrieved {len(results)} relevant memories")
-            return results
+            if fallback_results:
+                logger.debug(
+                    f"Auto-ingest: Fallback returned {len(fallback_results)} recent memories"
+                )
+                # Add search metadata to fallback results
+                for result in fallback_results:
+                    if isinstance(result, dict):
+                        result["retrieval_method"] = "recent_memories_fallback"
+                        result["retrieval_query"] = user_input
+                return fallback_results
+            logger.debug(
+                "Auto-ingest: All retrieval methods failed, returning empty context"
+            )
+            return []
         except Exception as e:
-            logger.error(f"Failed to get auto-ingest context: {e}")
+            logger.error(
+                f"Auto-ingest: Failed to get context for '{user_input[:50]}...': {e}"
+            )
             return []
+        finally:
+            # Always clear recursion guard
+            if hasattr(self, "_in_context_retrieval"):
+                self._in_context_retrieval = False
     def _record_openai_conversation(self, kwargs, response):
-        """Record OpenAI conversation"""
+        """Record OpenAI conversation with enhanced content parsing"""
         try:
             messages = kwargs.get("messages", [])
             model = kwargs.get("model", "unknown")
-            # Extract user input
-            user_input = ""
+            # Extract user input with enhanced parsing
+            user_input = self._extract_openai_user_input(messages)
+            # Extract AI response with enhanced parsing
+            ai_output = self._extract_openai_ai_output(response)
+            # Calculate tokens
+            tokens_used = 0
+            if hasattr(response, "usage") and response.usage:
+                tokens_used = getattr(response.usage, "total_tokens", 0)
+            # Enhanced metadata extraction
+            metadata = self._extract_openai_metadata(kwargs, response, tokens_used)
+            # Record conversation
+            self.record_conversation(
+                user_input=user_input,
+                ai_output=ai_output,
+                model=model,
+                metadata=metadata,
+            )
+            # Also record AI response in conversation manager for history tracking
+            if ai_output:
+                self.conversation_manager.record_response(
+                    session_id=self._session_id,
+                    response=ai_output,
+                    metadata={"model": model, "tokens_used": tokens_used},
+                )
+        except Exception as e:
+            logger.error(f"Failed to record OpenAI conversation: {e}")
+    def _extract_openai_user_input(self, messages: List[Dict]) -> str:
+        """Extract user input from OpenAI messages with support for complex content types"""
+        user_input = ""
+        try:
+            # Find the last user message
             for message in reversed(messages):
                 if message.get("role") == "user":
-                    user_input = message.get("content", "")
+                    content = message.get("content", "")
+                    if isinstance(content, str):
+                        # Simple string content
+                        user_input = content
+                    elif isinstance(content, list):
+                        # Complex content (vision, multiple parts)
+                        text_parts = []
+                        image_count = 0
+                        for item in content:
+                            if isinstance(item, dict):
+                                if item.get("type") == "text":
+                                    text_parts.append(item.get("text", ""))
+                                elif item.get("type") == "image_url":
+                                    image_count += 1
+                        user_input = " ".join(text_parts)
+                        # Add image indicator if present
+                        if image_count > 0:
+                            user_input += f" [Contains {image_count} image(s)]"
                     break
+        except Exception as e:
+            logger.debug(f"Error extracting user input: {e}")
+            user_input = "[Error extracting user input]"
-            # Extract AI response
-            ai_output = ""
+        return user_input
+    def _extract_openai_ai_output(self, response) -> str:
+        """Extract AI output from OpenAI response with support for various response types"""
+        ai_output = ""
+        try:
             if hasattr(response, "choices") and response.choices:
                 choice = response.choices[0]
                 if hasattr(choice, "message") and choice.message:
-                    ai_output = choice.message.content or ""
+                    message = choice.message
+                    # Handle regular text content
+                    if hasattr(message, "content") and message.content:
+                        ai_output = message.content
+                    # Handle function/tool calls
+                    elif hasattr(message, "tool_calls") and message.tool_calls:
+                        tool_descriptions = []
+                        for tool_call in message.tool_calls:
+                            if hasattr(tool_call, "function"):
+                                func_name = tool_call.function.name
+                                func_args = tool_call.function.arguments
+                                tool_descriptions.append(
+                                    f"Called {func_name} with {func_args}"
+                                )
+                        ai_output = "[Tool calls: " + "; ".join(tool_descriptions) + "]"
+                    # Handle function calls (legacy format)
+                    elif hasattr(message, "function_call") and message.function_call:
+                        func_call = message.function_call
+                        func_name = func_call.get("name", "unknown")
+                        func_args = func_call.get("arguments", "{}")
+                        ai_output = f"[Function call: {func_name} with {func_args}]"
-            # Calculate tokens
-            tokens_used = 0
+                    else:
+                        ai_output = "[No content - possible function/tool call]"
+        except Exception as e:
+            logger.debug(f"Error extracting AI output: {e}")
+            ai_output = "[Error extracting AI response]"
+        return ai_output
+    def _extract_openai_metadata(
+        self, kwargs: Dict, response, tokens_used: int
+    ) -> Dict:
+        """Extract comprehensive metadata from OpenAI request and response"""
+        metadata = {
+            "integration": "openai_auto",
+            "api_type": "chat_completions",
+            "tokens_used": tokens_used,
+            "auto_recorded": True,
+        }
+        try:
+            # Add request metadata
+            if "temperature" in kwargs:
+                metadata["temperature"] = kwargs["temperature"]
+            if "max_tokens" in kwargs:
+                metadata["max_tokens"] = kwargs["max_tokens"]
+            if "tools" in kwargs:
+                metadata["has_tools"] = True
+                metadata["tool_count"] = len(kwargs["tools"])
+            if "functions" in kwargs:
+                metadata["has_functions"] = True
+                metadata["function_count"] = len(kwargs["functions"])
+            # Add response metadata
+            if hasattr(response, "choices") and response.choices:
+                choice = response.choices[0]
+                if hasattr(choice, "finish_reason"):
+                    metadata["finish_reason"] = choice.finish_reason
+            # Add detailed token usage if available
             if hasattr(response, "usage") and response.usage:
-                tokens_used = getattr(response.usage, "total_tokens", 0)
+                usage = response.usage
+                metadata.update(
+                    {
+                        "prompt_tokens": getattr(usage, "prompt_tokens", 0),
+                        "completion_tokens": getattr(usage, "completion_tokens", 0),
+                        "total_tokens": getattr(usage, "total_tokens", 0),
+                    }
+                )
+            # Detect content types
+            messages = kwargs.get("messages", [])
+            has_images = False
+            message_count = len(messages)
+            for message in messages:
+                if message.get("role") == "user":
+                    content = message.get("content")
+                    if isinstance(content, list):
+                        for item in content:
+                            if (
+                                isinstance(item, dict)
+                                and item.get("type") == "image_url"
+                            ):
+                                has_images = True
+                                break
+                    if has_images:
+                        break
+            metadata["message_count"] = message_count
+            metadata["has_images"] = has_images
+        except Exception as e:
+            logger.debug(f"Error extracting metadata: {e}")
+        return metadata
+    def _record_anthropic_conversation(self, kwargs, response):
+        """Record Anthropic conversation with enhanced content parsing"""
+        try:
+            messages = kwargs.get("messages", [])
+            model = kwargs.get("model", "claude-unknown")
+            # Extract user input with enhanced parsing
+            user_input = self._extract_anthropic_user_input(messages)
+            # Extract AI response with enhanced parsing
+            ai_output = self._extract_anthropic_ai_output(response)
+            # Calculate tokens
+            tokens_used = self._extract_anthropic_tokens(response)
+            # Enhanced metadata extraction
+            metadata = self._extract_anthropic_metadata(kwargs, response, tokens_used)
             # Record conversation
             self.record_conversation(
                 user_input=user_input,
                 ai_output=ai_output,
                 model=model,
-                metadata={
-                    "integration": "openai_auto",
-                    "api_type": "chat_completions",
-                    "tokens_used": tokens_used,
-                    "auto_recorded": True,
-                },
+                metadata=metadata,
             )
         except Exception as e:
-            logger.error(f"Failed to record OpenAI conversation: {e}")
+            logger.error(f"Failed to record Anthropic conversation: {e}")
-    def _record_anthropic_conversation(self, kwargs, response):
-        """Record Anthropic conversation"""
+    def _extract_anthropic_user_input(self, messages: List[Dict]) -> str:
+        """Extract user input from Anthropic messages with support for complex content types"""
+        user_input = ""
         try:
-            messages = kwargs.get("messages", [])
-            model = kwargs.get("model", "claude-unknown")
-            # Extract user input
-            user_input = ""
+            # Find the last user message
             for message in reversed(messages):
                 if message.get("role") == "user":
                     content = message.get("content", "")
-                    if isinstance(content, list):
-                        user_input = " ".join(
-                            [
-                                block.get("text", "")
-                                for block in content
-                                if isinstance(block, dict)
-                                and block.get("type") == "text"
-                            ]
-                        )
-                    else:
+                    if isinstance(content, str):
+                        # Simple string content
                         user_input = content
+                    elif isinstance(content, list):
+                        # Complex content (vision, multiple parts)
+                        text_parts = []
+                        image_count = 0
+                        for block in content:
+                            if isinstance(block, dict):
+                                if block.get("type") == "text":
+                                    text_parts.append(block.get("text", ""))
+                                elif block.get("type") == "image":
+                                    image_count += 1
+                        user_input = " ".join(text_parts)
+                        # Add image indicator if present
+                        if image_count > 0:
+                            user_input += f" [Contains {image_count} image(s)]"
                     break
+        except Exception as e:
+            logger.debug(f"Error extracting Anthropic user input: {e}")
+            user_input = "[Error extracting user input]"
-            # Extract AI response
-            ai_output = ""
+        return user_input
+    def _extract_anthropic_ai_output(self, response) -> str:
+        """Extract AI output from Anthropic response with support for various response types"""
+        ai_output = ""
+        try:
             if hasattr(response, "content") and response.content:
                 if isinstance(response.content, list):
-                    ai_output = " ".join(
-                        [
-                            block.text
-                            for block in response.content
-                            if hasattr(block, "text")
-                        ]
-                    )
+                    # Handle structured content (text blocks, tool use, etc.)
+                    text_parts = []
+                    tool_uses = []
+                    for block in response.content:
+                        try:
+                            # Handle text blocks
+                            if hasattr(block, "text") and block.text:
+                                text_parts.append(block.text)
+                            # Handle tool use blocks
+                            elif hasattr(block, "type"):
+                                block_type = getattr(block, "type", None)
+                                if block_type == "tool_use":
+                                    tool_name = getattr(block, "name", "unknown")
+                                    tool_input = getattr(block, "input", {})
+                                    tool_uses.append(
+                                        f"Used {tool_name} with {tool_input}"
+                                    )
+                            # Handle mock objects for testing (when type is accessible but not via hasattr)
+                            elif hasattr(block, "name") and hasattr(block, "input"):
+                                tool_name = getattr(block, "name", "unknown")
+                                tool_input = getattr(block, "input", {})
+                                tool_uses.append(f"Used {tool_name} with {tool_input}")
+                        except Exception as block_error:
+                            logger.debug(f"Error processing block: {block_error}")
+                            continue
+                    ai_output = " ".join(text_parts)
+                    if tool_uses:
+                        if ai_output:
+                            ai_output += " "
+                        ai_output += "[Tool uses: " + "; ".join(tool_uses) + "]"
+                elif isinstance(response.content, str):
+                    ai_output = response.content
                 else:
                     ai_output = str(response.content)
-            # Calculate tokens
-            tokens_used = 0
+        except Exception as e:
+            logger.debug(f"Error extracting Anthropic AI output: {e}")
+            ai_output = "[Error extracting AI response]"
+        return ai_output
+    def _extract_anthropic_tokens(self, response) -> int:
+        """Extract token usage from Anthropic response"""
+        tokens_used = 0
+        try:
             if hasattr(response, "usage") and response.usage:
                 input_tokens = getattr(response.usage, "input_tokens", 0)
                 output_tokens = getattr(response.usage, "output_tokens", 0)
                 tokens_used = input_tokens + output_tokens
+        except Exception as e:
+            logger.debug(f"Error extracting Anthropic tokens: {e}")
+        return tokens_used
+    def _extract_anthropic_metadata(
+        self, kwargs: Dict, response, tokens_used: int
+    ) -> Dict:
+        """Extract comprehensive metadata from Anthropic request and response"""
+        metadata = {
+            "integration": "anthropic_auto",
+            "api_type": "messages",
+            "tokens_used": tokens_used,
+            "auto_recorded": True,
+        }
+        try:
+            # Add request metadata
+            if "temperature" in kwargs:
+                metadata["temperature"] = kwargs["temperature"]
+            if "max_tokens" in kwargs:
+                metadata["max_tokens"] = kwargs["max_tokens"]
+            if "tools" in kwargs:
+                metadata["has_tools"] = True
+                metadata["tool_count"] = len(kwargs["tools"])
+            # Add response metadata
+            if hasattr(response, "stop_reason"):
+                metadata["stop_reason"] = response.stop_reason
+            if hasattr(response, "model"):
+                metadata["response_model"] = response.model
+            # Add detailed token usage if available
+            if hasattr(response, "usage") and response.usage:
+                usage = response.usage
+                metadata.update(
+                    {
+                        "input_tokens": getattr(usage, "input_tokens", 0),
+                        "output_tokens": getattr(usage, "output_tokens", 0),
+                        "total_tokens": tokens_used,
+                    }
+                )
+            # Detect content types
+            messages = kwargs.get("messages", [])
+            has_images = False
+            message_count = len(messages)
+            for message in messages:
+                if message.get("role") == "user":
+                    content = message.get("content")
+                    if isinstance(content, list):
+                        for item in content:
+                            if isinstance(item, dict) and item.get("type") == "image":
+                                has_images = True
+                                break
+                    if has_images:
+                        break
+            metadata["message_count"] = message_count
+            metadata["has_images"] = has_images
-            # Record conversation
-            self.record_conversation(
-                user_input=user_input,
-                ai_output=ai_output,
-                model=model,
-                metadata={
-                    "integration": "anthropic_auto",
-                    "api_type": "messages",
-                    "tokens_used": tokens_used,
-                    "auto_recorded": True,
-                },
-            )
         except Exception as e:
-            logger.error(f"Failed to record Anthropic conversation: {e}")
+            logger.debug(f"Error extracting Anthropic metadata: {e}")
-    def _litellm_success_callback(self, kwargs, response, start_time, end_time):
-        """
-        This function is automatically called by LiteLLM after a successful completion.
-        """
+        return metadata
+    def _process_litellm_response(self, kwargs, response, start_time, end_time):
+        """Process and record LiteLLM response"""
         try:
+            # Extract user input from messages
+            messages = kwargs.get("messages", [])
             user_input = ""
-            # Find the last user message
-            for msg in reversed(kwargs.get("messages", [])):
-                if msg.get("role") == "user":
-                    user_input = msg.get("content", "")
+            for message in reversed(messages):
+                if message.get("role") == "user":
+                    user_input = message.get("content", "")
                     break
-            ai_output = response.choices[0].message.content or ""
-            model = kwargs.get("model", "unknown")
+            # Extract AI output from response
+            ai_output = ""
+            if hasattr(response, "choices") and response.choices:
+                choice = response.choices[0]
+                if hasattr(choice, "message") and hasattr(choice.message, "content"):
+                    ai_output = choice.message.content or ""
+                elif hasattr(choice, "text"):
+                    ai_output = choice.text or ""
+            # Extract model
+            model = kwargs.get("model", "litellm-unknown")
+            # Calculate timing (convert to seconds for JSON serialization)
+            duration_seconds = (end_time - start_time) if start_time and end_time else 0
+            if hasattr(duration_seconds, "total_seconds"):
+                duration_seconds = duration_seconds.total_seconds()
+            # Prepare metadata
+            metadata = {
+                "integration": "litellm",
+                "auto_recorded": True,
+                "duration": float(duration_seconds),
+                "timestamp": time.time(),
+            }
-            # Calculate tokens used
-            tokens_used = 0
+            # Add token usage if available
             if hasattr(response, "usage") and response.usage:
-                tokens_used = getattr(response.usage, "total_tokens", 0)
+                usage = response.usage
+                metadata.update(
+                    {
+                        "prompt_tokens": getattr(usage, "prompt_tokens", 0),
+                        "completion_tokens": getattr(usage, "completion_tokens", 0),
+                        "total_tokens": getattr(usage, "total_tokens", 0),
+                    }
+                )
-            # Handle timing data safely - convert any time objects to float/string
-            duration_ms = 0
-            start_time_str = None
-            end_time_str = None
+            # Record the conversation
+            if user_input and ai_output:
+                self.record_conversation(
+                    user_input=user_input,
+                    ai_output=ai_output,
+                    model=model,
+                    metadata=metadata,
+                )
-            try:
-                if start_time is not None and end_time is not None:
-                    # Handle different types of time objects
-                    if hasattr(start_time, "total_seconds"):  # timedelta
-                        duration_ms = start_time.total_seconds() * 1000
-                    elif isinstance(start_time, (int, float)) and isinstance(
-                        end_time, (int, float)
-                    ):
-                        duration_ms = (end_time - start_time) * 1000
+        except Exception as e:
+            logger.error(f"Failed to process LiteLLM response: {e}")
-                    start_time_str = str(start_time)
-                    end_time_str = str(end_time)
-            except Exception:
-                # If timing calculation fails, just skip it
-                pass
+    # LiteLLM callback is now handled by the LiteLLMCallbackManager
+    # in memori.integrations.litellm_integration
-            self.record_conversation(
-                user_input,
-                ai_output,
-                model,
-                metadata={
-                    "integration": "litellm",
-                    "api_type": "completion",
-                    "tokens_used": tokens_used,
-                    "auto_recorded": True,
-                    "start_time_str": start_time_str,
-                    "end_time_str": end_time_str,
-                    "duration_ms": duration_ms,
-                },
+    def _process_memory_sync(
+        self, chat_id: str, user_input: str, ai_output: str, model: str = "unknown"
+    ):
+        """Synchronous memory processing fallback"""
+        if not self.memory_agent:
+            logger.warning("Memory agent not available, skipping memory ingestion")
+            return
+        try:
+            # Run async processing in new event loop
+            import threading
+            def run_memory_processing():
+                new_loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(new_loop)
+                try:
+                    new_loop.run_until_complete(
+                        self._process_memory_async(
+                            chat_id, user_input, ai_output, model
+                        )
+                    )
+                except Exception as e:
+                    logger.error(f"Synchronous memory processing failed: {e}")
+                finally:
+                    new_loop.close()
+            # Run in background thread to avoid blocking
+            thread = threading.Thread(target=run_memory_processing, daemon=True)
+            thread.start()
+            logger.debug(
+                f"Memory processing started in background thread for {chat_id}"
             )
         except Exception as e:
-            logger.error(f"Memori callback failed: {e}")
+            logger.error(f"Failed to start synchronous memory processing: {e}")
+    def _parse_llm_response(self, response) -> tuple[str, str]:
+        """Extract text and model from various LLM response formats."""
+        if response is None:
+            return "", "unknown"
+        # String response
+        if isinstance(response, str):
+            return response, "unknown"
+        # Anthropic response
+        if hasattr(response, "content"):
+            text = ""
+            if isinstance(response.content, list):
+                text = "".join(b.text for b in response.content if hasattr(b, "text"))
+            else:
+                text = str(response.content)
+            return text, getattr(response, "model", "unknown")
+        # OpenAI response
+        if hasattr(response, "choices") and response.choices:
+            choice = response.choices[0]
+            text = (
+                getattr(choice.message, "content", "")
+                if hasattr(choice, "message")
+                else getattr(choice, "text", "")
+            )
+            return text or "", getattr(response, "model", "unknown")
+        # Dict response
+        if isinstance(response, dict):
+            return response.get(
+                "content", response.get("text", str(response))
+            ), response.get("model", "unknown")
+        # Fallback
+        return str(response), "unknown"
     def record_conversation(
         self,
         user_input: str,
-        ai_output: str,
-        model: str = "unknown",
+        ai_output=None,
+        model: str = None,
         metadata: Optional[Dict[str, Any]] = None,
     ) -> str:
         """
-        Manually record a conversation
+        Record a conversation.
         Args:
-            user_input: The user's input message
-            ai_output: The AI's response
-            model: Model used for the response
-            metadata: Additional metadata
+            user_input: User's message
+            ai_output: AI response (any format)
+            model: Optional model name override
+            metadata: Optional metadata
         Returns:
-            chat_id: Unique identifier for this conversation
+            chat_id: Unique conversation ID
         """
         if not self._enabled:
             raise MemoriError("Memori is not enabled. Call enable() first.")
-        # Ensure ai_output is never None to avoid NOT NULL constraint errors
-        if ai_output is None:
-            ai_output = ""
+        # Parse response
+        response_text, detected_model = self._parse_llm_response(ai_output)
+        response_model = model or detected_model
+        # Generate ID and timestamp
         chat_id = str(uuid.uuid4())
         timestamp = datetime.now()
-        try:
-            # Store in chat history
-            self.db_manager.store_chat_history(
-                chat_id=chat_id,
-                user_input=user_input,
-                ai_output=ai_output,
-                model=model,
-                timestamp=timestamp,
-                session_id=self._session_id,
-                namespace=self.namespace,
-                metadata=metadata or {},
-            )
+        # Store conversation
+        self.db_manager.store_chat_history(
+            chat_id=chat_id,
+            user_input=user_input,
+            ai_output=response_text,
+            model=response_model,
+            timestamp=timestamp,
+            session_id=self._session_id,
+            namespace=self.namespace,
+            metadata=metadata or {},
+        )
-            # Process for memory categorization
-            if self.conscious_ingest:
-                self._process_memory_ingestion(chat_id, user_input, ai_output, model)
+        # Always process into long-term memory when memory agent is available
+        if self.memory_agent:
+            self._schedule_memory_processing(
+                chat_id, user_input, response_text, response_model
+            )
-            logger.debug(f"Conversation recorded: {chat_id}")
-            return chat_id
+        logger.debug(f"Recorded conversation: {chat_id}")
+        return chat_id
-        except Exception as e:
-            raise MemoriError(f"Failed to record conversation: {e}")
+    def _schedule_memory_processing(
+        self, chat_id: str, user_input: str, ai_output: str, model: str
+    ):
+        """Schedule memory processing (async if possible, sync fallback)."""
+        try:
+            loop = asyncio.get_running_loop()
+            task = loop.create_task(
+                self._process_memory_async(chat_id, user_input, ai_output, model)
+            )
-    def _process_memory_ingestion(
+            # Prevent garbage collection
+            if not hasattr(self, "_memory_tasks"):
+                self._memory_tasks = set()
+            self._memory_tasks.add(task)
+            task.add_done_callback(self._memory_tasks.discard)
+        except RuntimeError:
+            # No event loop, use sync fallback
+            logger.debug("No event loop, using synchronous memory processing")
+            self._process_memory_sync(chat_id, user_input, ai_output, model)
+    async def _process_memory_async(
         self, chat_id: str, user_input: str, ai_output: str, model: str = "unknown"
     ):
-        """Process conversation for Pydantic-based memory categorization"""
+        """Process conversation with enhanced async memory categorization"""
         if not self.memory_agent:
             logger.warning("Memory agent not available, skipping memory ingestion")
             return
@@ -953,38 +1727,107 @@ class Memori:
                 relevant_skills=self._user_context.get("relevant_skills", []),
             )
-            # Process conversation using Pydantic-based memory agent
-            processed_memory = self.memory_agent.process_conversation_sync(
+            # Get recent memories for deduplication
+            existing_memories = await self._get_recent_memories_for_dedup()
+            # Process conversation using async Pydantic-based memory agent
+            processed_memory = await self.memory_agent.process_conversation_async(
                 chat_id=chat_id,
                 user_input=user_input,
                 ai_output=ai_output,
                 context=context,
-                mem_prompt=self.mem_prompt,
-                filters=self.memory_filters,
+                existing_memories=(
+                    [mem.summary for mem in existing_memories[:10]]
+                    if existing_memories
+                    else []
+                ),
             )
-            # Store processed memory with entity indexing
-            if processed_memory.should_store:
-                memory_id = self.db_manager.store_processed_memory(
-                    memory=processed_memory, chat_id=chat_id, namespace=self.namespace
-                )
+            # Check for duplicates
+            duplicate_id = await self.memory_agent.detect_duplicates(
+                processed_memory, existing_memories
+            )
-                if memory_id:
-                    logger.debug(
-                        f"Stored processed memory {memory_id} for chat {chat_id}"
-                    )
-                else:
-                    logger.debug(
-                        f"Memory not stored for chat {chat_id}: {processed_memory.storage_reasoning}"
+            if duplicate_id:
+                processed_memory.duplicate_of = duplicate_id
+                logger.info(f"Memory marked as duplicate of {duplicate_id}")
+            # Apply filters
+            if self.memory_agent.should_filter_memory(
+                processed_memory, self.memory_filters
+            ):
+                logger.debug(f"Memory filtered out for chat {chat_id}")
+                return
+            # Store processed memory with new schema
+            memory_id = self.db_manager.store_long_term_memory_enhanced(
+                processed_memory, chat_id, self.namespace
+            )
+            if memory_id:
+                logger.debug(f"Stored processed memory {memory_id} for chat {chat_id}")
+                # Check for conscious context updates if promotion eligible and conscious_ingest enabled
+                if (
+                    processed_memory.promotion_eligible
+                    and self.conscious_agent
+                    and self.conscious_ingest
+                ):
+                    await self.conscious_agent.check_for_context_updates(
+                        self.db_manager, self.namespace
                     )
             else:
-                logger.debug(
-                    f"Memory not stored for chat {chat_id}: {processed_memory.storage_reasoning}"
-                )
+                logger.warning(f"Failed to store memory for chat {chat_id}")
         except Exception as e:
             logger.error(f"Memory ingestion failed for {chat_id}: {e}")
+    async def _get_recent_memories_for_dedup(self) -> List:
+        """Get recent memories for deduplication check"""
+        try:
+            from sqlalchemy import text
+            from ..database.queries.memory_queries import MemoryQueries
+            from ..utils.pydantic_models import ProcessedLongTermMemory
+            with self.db_manager._get_connection() as connection:
+                result = connection.execute(
+                    text(MemoryQueries.SELECT_MEMORIES_FOR_DEDUPLICATION),
+                    {
+                        "namespace": self.namespace,
+                        "processed_for_duplicates": False,
+                        "limit": 20,
+                    },
+                )
+                memories = []
+                for row in result:
+                    try:
+                        # Create ProcessedLongTermMemory objects for proper comparison
+                        # Note: Query returns (memory_id, summary, searchable_content, classification, created_at)
+                        memory = ProcessedLongTermMemory(
+                            conversation_id=row[
+                                0
+                            ],  # Use memory_id as conversation_id for existing memories
+                            summary=row[1] or "",
+                            content=row[2] or "",
+                            classification=row[3] or "conversational",
+                            importance="medium",  # Default importance level for comparison
+                            promotion_eligible=False,  # Default for existing memories
+                            classification_reason="Existing memory loaded for deduplication check",  # Required field
+                        )
+                        memories.append(memory)
+                    except Exception as e:
+                        # Silently skip malformed memories from old data format
+                        logger.debug(f"Skipping malformed memory during dedup: {e}")
+                        continue
+                return memories
+        except Exception as e:
+            logger.error(f"Failed to get recent memories for dedup: {e}")
+            return []
     def retrieve_context(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
         """
         Retrieve relevant context for a query with priority on essential facts
@@ -1088,10 +1931,13 @@ class Memori:
         return self._session_id
     def get_integration_stats(self) -> List[Dict[str, Any]]:
-        """Get statistics from the universal integration system"""
+        """Get statistics from the new interceptor system"""
         try:
+            # Get system status first
+            interceptor_status = self.get_interceptor_status()
             stats = {
-                "integration": "universal_auto_recording",
+                "integration": "memori_system",
                 "enabled": self._enabled,
                 "session_id": self._session_id,
                 "namespace": self.namespace,
@@ -1099,58 +1945,62 @@ class Memori:
             }
             # LiteLLM stats
+            litellm_interceptor_status = interceptor_status.get("native", {})
             if LITELLM_AVAILABLE:
                 stats["providers"]["litellm"] = {
                     "available": True,
                     "method": "native_callbacks",
-                    "callback_registered": self._enabled,
-                    "callbacks_count": len(success_callback) if self._enabled else 0,
+                    "enabled": litellm_interceptor_status.get("enabled", False),
+                    "status": litellm_interceptor_status.get("status", "unknown"),
                 }
             else:
                 stats["providers"]["litellm"] = {
                     "available": False,
                     "method": "native_callbacks",
-                    "callback_registered": False,
+                    "enabled": False,
                 }
+            # Get interceptor status instead of checking wrapped attributes
+            interceptor_status = self.get_interceptor_status()
             # OpenAI stats
             try:
                 import openai
+                _ = openai  # Suppress unused import warning
+                openai_interceptor_status = interceptor_status.get("openai", {})
                 stats["providers"]["openai"] = {
                     "available": True,
-                    "method": "auto_wrapping",
-                    "wrapped": (
-                        hasattr(openai.OpenAI, "_memori_wrapped")
-                        if hasattr(openai, "OpenAI")
-                        else False
-                    ),
+                    "method": "litellm_native",
+                    "enabled": openai_interceptor_status.get("enabled", False),
+                    "status": openai_interceptor_status.get("status", "unknown"),
                 }
             except ImportError:
                 stats["providers"]["openai"] = {
                     "available": False,
-                    "method": "auto_wrapping",
-                    "wrapped": False,
+                    "method": "litellm_native",
+                    "enabled": False,
                 }
             # Anthropic stats
             try:
                 import anthropic
+                _ = anthropic  # Suppress unused import warning
+                anthropic_interceptor_status = interceptor_status.get("anthropic", {})
                 stats["providers"]["anthropic"] = {
                     "available": True,
-                    "method": "auto_wrapping",
-                    "wrapped": (
-                        hasattr(anthropic.Anthropic, "_memori_wrapped")
-                        if hasattr(anthropic, "Anthropic")
-                        else False
-                    ),
+                    "method": "litellm_native",
+                    "enabled": anthropic_interceptor_status.get("enabled", False),
+                    "status": anthropic_interceptor_status.get("status", "unknown"),
                 }
             except ImportError:
                 stats["providers"]["anthropic"] = {
                     "available": False,
-                    "method": "auto_wrapping",
-                    "wrapped": False,
+                    "method": "litellm_native",
+                    "enabled": False,
                 }
             return [stats]
@@ -1195,7 +2045,7 @@ class Memori:
         """Get memories that contain a specific entity"""
         try:
             # This would use the entity index in the database
-            # For now, use keyword search as fallback
+            # For now, use keyword search as fallback (entity_type is ignored for now)
             return self.db_manager.search_memories(
                 query=entity_value, namespace=self.namespace, limit=limit
             )
@@ -1234,11 +2084,25 @@ class Memori:
             # If we have a running loop, schedule the task
             self._background_task = loop.create_task(self._background_analysis_loop())
+            # Add proper error handling callback
+            self._background_task.add_done_callback(
+                self._handle_background_task_completion
+            )
             logger.info("Background analysis task started")
         except Exception as e:
             logger.error(f"Failed to start background analysis: {e}")
+    def _handle_background_task_completion(self, task):
+        """Handle background task completion and cleanup"""
+        try:
+            if task.exception():
+                logger.error(f"Background task failed: {task.exception()}")
+        except asyncio.CancelledError:
+            logger.debug("Background task was cancelled")
+        except Exception as e:
+            logger.error(f"Error handling background task completion: {e}")
     def _stop_background_analysis(self):
         """Stop the background analysis task"""
         try:
@@ -1248,32 +2112,66 @@ class Memori:
         except Exception as e:
             logger.error(f"Failed to stop background analysis: {e}")
+    def cleanup(self):
+        """Clean up all async tasks and resources"""
+        try:
+            # Cancel background tasks
+            self._stop_background_analysis()
+            # Clean up memory processing tasks
+            if hasattr(self, "_memory_tasks"):
+                for task in self._memory_tasks.copy():
+                    if not task.done():
+                        task.cancel()
+                self._memory_tasks.clear()
+            logger.debug("Memori cleanup completed")
+        except Exception as e:
+            logger.error(f"Error during cleanup: {e}")
+    def __del__(self):
+        """Destructor to ensure cleanup"""
+        try:
+            self.cleanup()
+        except:
+            pass  # Ignore errors during destruction
     async def _background_analysis_loop(self):
-        """Main background analysis loop"""
-        logger.info("ConsciouscAgent: Background analysis loop started")
+        """Background analysis loop for memory processing"""
+        try:
+            logger.debug("Background analysis loop started")
-        while self._enabled and self.conscious_ingest:
-            try:
-                if self.conscious_agent and self.conscious_agent.should_run_analysis():
-                    await self.conscious_agent.run_background_analysis(
-                        self.db_manager, self.namespace
-                    )
+            # For now, just run periodic conscious ingestion if enabled
+            if self.conscious_ingest and self.conscious_agent:
+                while True:
+                    try:
+                        await asyncio.sleep(300)  # Check every 5 minutes
-                # Wait 30 minutes before next check
-                await asyncio.sleep(1800)  # 30 minutes
+                        # Run conscious ingestion to check for new promotable memories
+                        await self.conscious_agent.run_conscious_ingest(
+                            self.db_manager, self.namespace
+                        )
+                        logger.debug("Periodic conscious analysis completed")
-            except asyncio.CancelledError:
-                logger.info("ConsciouscAgent: Background analysis cancelled")
-                break
-            except Exception as e:
-                logger.error(f"ConsciouscAgent: Background analysis error: {e}")
-                # Wait 5 minutes before retrying on error
-                await asyncio.sleep(300)
+                    except asyncio.CancelledError:
+                        logger.debug("Background analysis loop cancelled")
+                        break
+                    except Exception as e:
+                        logger.error(f"Background analysis error: {e}")
+                        await asyncio.sleep(60)  # Wait 1 minute before retry
+            else:
+                # If not using conscious ingest, just sleep
+                while True:
+                    await asyncio.sleep(3600)  # Sleep for 1 hour
-        logger.info("ConsciouscAgent: Background analysis loop ended")
+        except asyncio.CancelledError:
+            logger.debug("Background analysis loop cancelled")
+        except Exception as e:
+            logger.error(f"Background analysis loop failed: {e}")
     def trigger_conscious_analysis(self):
-        """Manually trigger conscious agent analysis (for testing/immediate analysis)"""
+        """Manually trigger conscious context ingestion (for testing/immediate analysis)"""
         if not self.conscious_ingest or not self.conscious_agent:
             logger.warning("Conscious ingestion not enabled or agent not available")
             return
@@ -1283,11 +2181,11 @@ class Memori:
             try:
                 loop = asyncio.get_running_loop()
                 task = loop.create_task(
-                    self.conscious_agent.run_background_analysis(
+                    self.conscious_agent.run_conscious_ingest(
                         self.db_manager, self.namespace
                     )
                 )
-                logger.info("Conscious analysis triggered")
+                logger.info("Conscious context ingestion triggered")
                 return task
             except RuntimeError:
                 # No event loop, run synchronously in thread
@@ -1298,7 +2196,7 @@ class Memori:
                     asyncio.set_event_loop(new_loop)
                     try:
                         new_loop.run_until_complete(
-                            self.conscious_agent.run_background_analysis(
+                            self.conscious_agent.run_conscious_ingest(
                                 self.db_manager, self.namespace
                             )
                         )
@@ -1307,29 +2205,170 @@ class Memori:
                 thread = threading.Thread(target=run_analysis)
                 thread.start()
-                logger.info("Conscious analysis triggered in separate thread")
+                logger.info("Conscious context ingestion triggered in separate thread")
+        except Exception as e:
+            logger.error(f"Failed to trigger conscious context ingestion: {e}")
+    def get_conscious_system_prompt(self) -> str:
+        """
+        Get conscious context as system prompt for direct injection.
+        Returns ALL short-term memory as formatted system prompt.
+        Use this for conscious_ingest mode.
+        """
+        try:
+            context = self._get_conscious_context()
+            if not context:
+                return ""
+            # Create system prompt with all short-term memory
+            system_prompt = "--- Your Short-Term Memory (Conscious Context) ---\n"
+            system_prompt += "This is your complete working memory. USE THIS INFORMATION TO ANSWER QUESTIONS:\n\n"
+            # Deduplicate and format context
+            seen_content = set()
+            for mem in context:
+                if isinstance(mem, dict):
+                    content = mem.get("searchable_content", "") or mem.get(
+                        "summary", ""
+                    )
+                    category = mem.get("category_primary", "")
+                    # Skip duplicates
+                    content_key = content.lower().strip()
+                    if content_key in seen_content:
+                        continue
+                    seen_content.add(content_key)
+                    system_prompt += f"[{category.upper()}] {content}\n"
+            system_prompt += "\nIMPORTANT: Use the above information to answer questions about the user.\n"
+            system_prompt += "-------------------------\n"
+            return system_prompt
+        except Exception as e:
+            logger.error(f"Failed to generate conscious system prompt: {e}")
+            return ""
+    def get_auto_ingest_system_prompt(self, user_input: str) -> str:
+        """
+        Get auto-ingest context as system prompt for direct injection.
+        Returns relevant memories based on user input as formatted system prompt.
+        Use this for auto_ingest mode.
+        """
+        try:
+            # For now, use recent short-term memories as a simple approach
+            # This avoids the search engine issues and still provides context
+            # TODO: Use user_input for intelligent context retrieval
+            context = self._get_conscious_context()  # Get recent short-term memories
+            if not context:
+                return ""
+            # Create system prompt with relevant memories (limited to prevent overwhelming)
+            system_prompt = "--- Relevant Memory Context ---\n"
+            # Take first 5 items to avoid too much context
+            seen_content = set()
+            for mem in context[:5]:
+                if isinstance(mem, dict):
+                    content = mem.get("searchable_content", "") or mem.get(
+                        "summary", ""
+                    )
+                    category = mem.get("category_primary", "")
+                    # Skip duplicates
+                    content_key = content.lower().strip()
+                    if content_key in seen_content:
+                        continue
+                    seen_content.add(content_key)
+                    if category.startswith("essential_"):
+                        system_prompt += f"[{category.upper()}] {content}\n"
+                    else:
+                        system_prompt += f"- {content}\n"
+            system_prompt += "-------------------------\n"
+            return system_prompt
         except Exception as e:
-            logger.error(f"Failed to trigger conscious analysis: {e}")
+            logger.error(f"Failed to generate auto-ingest system prompt: {e}")
+            return ""
+    def add_memory_to_messages(self, messages: list, user_input: str = None) -> list:
+        """
+        Add appropriate memory context to messages based on ingest mode.
+        Args:
+            messages: List of messages for LLM
+            user_input: User input for auto_ingest context retrieval (optional)
+        Returns:
+            Modified messages list with memory context added as system message
+        """
+        try:
+            system_prompt = ""
+            if self.conscious_ingest:
+                # One-time conscious context injection
+                if not self._conscious_context_injected:
+                    system_prompt = self.get_conscious_system_prompt()
+                    self._conscious_context_injected = True
+                    logger.info(
+                        "Conscious-ingest: Added complete working memory to system prompt"
+                    )
+                else:
+                    logger.debug("Conscious-ingest: Context already injected, skipping")
+            elif self.auto_ingest and user_input:
+                # Dynamic auto-ingest based on user input
+                system_prompt = self.get_auto_ingest_system_prompt(user_input)
+                logger.debug("Auto-ingest: Added relevant context to system prompt")
+            if system_prompt:
+                # Add to existing system message or create new one
+                messages_copy = messages.copy()
+                # Check if system message already exists
+                for msg in messages_copy:
+                    if msg.get("role") == "system":
+                        msg["content"] = system_prompt + "\n" + msg.get("content", "")
+                        return messages_copy
+                # No system message exists, add one at the beginning
+                messages_copy.insert(0, {"role": "system", "content": system_prompt})
+                return messages_copy
+            return messages
+        except Exception as e:
+            logger.error(f"Failed to add memory to messages: {e}")
+            return messages
     def get_essential_conversations(self, limit: int = 10) -> List[Dict[str, Any]]:
         """Get essential conversations from short-term memory"""
         try:
+            from sqlalchemy import text
             # Get all conversations marked as essential
             with self.db_manager._get_connection() as connection:
                 query = """
                 SELECT memory_id, summary, category_primary, importance_score,
                        created_at, searchable_content, processed_data
                 FROM short_term_memory
-                WHERE namespace = ? AND category_primary LIKE 'essential_%'
+                WHERE namespace = :namespace AND category_primary LIKE 'essential_%'
                 ORDER BY importance_score DESC, created_at DESC
-                LIMIT ?
+                LIMIT :limit
                 """
-                cursor = connection.execute(query, (self.namespace, limit))
+                result = connection.execute(
+                    text(query), {"namespace": self.namespace, "limit": limit}
+                )
                 essential_conversations = []
-                for row in cursor.fetchall():
+                for row in result:
                     essential_conversations.append(
                         {
                             "memory_id": row[0],
@@ -1347,3 +2386,106 @@ class Memori:
         except Exception as e:
             logger.error(f"Failed to get essential conversations: {e}")
             return []
+    def create_openai_client(self, **kwargs):
+        """
+        Create an OpenAI client with automatic memory recording.
+        This method creates a MemoriOpenAIInterceptor that automatically records
+        all OpenAI API calls to memory using the inheritance-based approach.
+        Args:
+            **kwargs: Additional arguments passed to OpenAI client (e.g., api_key)
+                     These override any settings from the Memori provider config
+        Returns:
+            MemoriOpenAIInterceptor instance that works as a drop-in replacement
+            for the standard OpenAI client
+        Example:
+            memori = Memori(api_key="sk-...")
+            memori.enable()
+            # Create interceptor client
+            client = memori.create_openai_client()
+            # Use exactly like standard OpenAI client
+            response = client.chat.completions.create(
+                model="gpt-4o",
+                messages=[{"role": "user", "content": "Hello!"}]
+            )
+            # Conversation is automatically recorded
+        """
+        try:
+            from ..integrations.openai_integration import create_openai_client
+            return create_openai_client(self, self.provider_config, **kwargs)
+        except ImportError as e:
+            logger.error(f"Failed to import OpenAI integration: {e}")
+            raise ImportError(
+                "OpenAI integration not available. Install with: pip install openai"
+            ) from e
+    def create_openai_wrapper(self, **kwargs):
+        """
+        Create a legacy OpenAI wrapper (backward compatibility).
+        DEPRECATED: Use create_openai_client() instead for better integration.
+        Returns:
+            MemoriOpenAI wrapper instance
+        """
+        try:
+            from ..integrations.openai_integration import MemoriOpenAI
+            return MemoriOpenAI(self, **kwargs)
+        except ImportError as e:
+            logger.error(f"Failed to import OpenAI integration: {e}")
+            raise ImportError(
+                "OpenAI integration not available. Install with: pip install openai"
+            ) from e
+    # Conversation management methods
+    def get_conversation_stats(self) -> Dict[str, Any]:
+        """Get conversation manager statistics"""
+        return self.conversation_manager.get_session_stats()
+    def clear_conversation_history(self, session_id: str = None):
+        """
+        Clear conversation history
+        Args:
+            session_id: Specific session to clear. If None, clears current session.
+        """
+        if session_id is None:
+            session_id = self._session_id
+        self.conversation_manager.clear_session(session_id)
+        logger.info(f"Cleared conversation history for session: {session_id}")
+    def clear_all_conversations(self):
+        """Clear all conversation histories"""
+        self.conversation_manager.clear_all_sessions()
+        logger.info("Cleared all conversation histories")
+    def start_new_conversation(self) -> str:
+        """
+        Start a new conversation session
+        Returns:
+            New session ID
+        """
+        old_session_id = self._session_id
+        self._session_id = str(uuid.uuid4())
+        # Reset conscious context injection flag for new conversation
+        self._conscious_context_injected = False
+        logger.info(
+            f"Started new conversation: {self._session_id} (previous: {old_session_id})"
+        )
+        return self._session_id
+    def get_current_session_id(self) -> str:
+        """Get current conversation session ID"""
+        return self._session_id

memorisdk 1.0.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

Potentially problematic release.

memorisdk 1.0.1py3-none-any.whl → 2.0.0py3-none-any.whl