PyPI - dao-ai - Versions diffs - 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl - Mend

dao-ai 0.1.19py3-none-any.whl → 0.1.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

dao_ai/cli.py +37 -7
dao_ai/config.py +265 -10
dao_ai/genie/__init__.py +55 -7
dao_ai/genie/cache/__init__.py +36 -9
dao_ai/genie/cache/base.py +143 -2
dao_ai/genie/cache/context_aware/__init__.py +52 -0
dao_ai/genie/cache/context_aware/base.py +1204 -0
dao_ai/genie/cache/{in_memory_semantic.py → context_aware/in_memory.py} +233 -383
dao_ai/genie/cache/context_aware/optimization.py +930 -0
dao_ai/genie/cache/context_aware/persistent.py +802 -0
dao_ai/genie/cache/context_aware/postgres.py +1343 -0
dao_ai/genie/cache/lru.py +248 -70
dao_ai/genie/core.py +235 -11
dao_ai/middleware/__init__.py +8 -1
dao_ai/middleware/tool_call_observability.py +227 -0
dao_ai/nodes.py +4 -4
dao_ai/tools/__init__.py +2 -2
dao_ai/tools/genie.py +10 -10
dao_ai/utils.py +7 -3
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/METADATA +1 -1
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/RECORD +24 -19
dao_ai/genie/cache/semantic.py +0 -1004
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/WHEEL +0 -0
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/entry_points.txt +0 -0
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/licenses/LICENSE +0 -0

dao_ai/genie/cache/{in_memory_semantic.py → context_aware/in_memory.py} RENAMED Viewed

@@ -1,22 +1,18 @@
 """
-In-memory semantic cache implementation for Genie SQL queries.
+In-memory context-aware Genie cache implementation.
-This module provides a semantic cache that stores embeddings and cache entries
+This module provides a context-aware cache that stores embeddings and cache entries
 entirely in memory, without requiring external database dependencies like PostgreSQL
 or Databricks Lakebase. It uses L2 distance for similarity search and supports
 dual embedding matching (question + conversation context).
-The cache supports conversation-aware embedding using a rolling window approach
-to capture context from recent conversation turns, improving accuracy for
-multi-turn conversations with anaphoric references.
 Use this when:
 - No external database access is available
 - Single-instance deployments (cache not shared across instances)
 - Cache persistence across restarts is not required
 - Cache sizes are moderate (hundreds to low thousands of entries)
-For multi-instance deployments or large cache sizes, use SemanticCacheService
+For multi-instance deployments or large cache sizes, use PostgresContextAwareGenieService
 with PostgreSQL backend instead.
 """
@@ -29,25 +25,19 @@ from typing import Any
 import mlflow
 import numpy as np
-import pandas as pd
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.service.sql import StatementResponse, StatementState
 from databricks_ai_bridge.genie import GenieResponse
 from loguru import logger
 from dao_ai.config import (
     GenieInMemorySemanticCacheParametersModel,
-    LLMModel,
     WarehouseModel,
 )
 from dao_ai.genie.cache.base import (
-    CacheResult,
     GenieServiceBase,
     SQLCacheEntry,
 )
-from dao_ai.genie.cache.semantic import (
-    get_conversation_history,
-)
+from dao_ai.genie.cache.context_aware.base import ContextAwareGenieService
 @dataclass
@@ -59,6 +49,19 @@ class InMemoryCacheEntry:
     dual embeddings (question + context) for high-precision semantic matching.
     Uses LRU (Least Recently Used) eviction strategy when capacity is reached.
+    Attributes:
+        genie_space_id: The Genie space ID this entry belongs to
+        question: The original question text
+        conversation_context: Previous conversation context for embedding
+        question_embedding: Embedding vector for the question
+        context_embedding: Embedding vector for the conversation context
+        sql_query: The SQL query to re-execute on cache hit
+        description: Description of the query
+        conversation_id: The conversation ID where this query originated
+        created_at: When the entry was created
+        last_accessed_at: Last access time for LRU eviction
+        message_id: The original Genie message ID (for feedback on cache hits)
     """
     genie_space_id: str
@@ -71,6 +74,7 @@ class InMemoryCacheEntry:
     conversation_id: str
     created_at: datetime
     last_accessed_at: datetime  # Track last access time for LRU eviction
+    message_id: str | None = None  # Original Genie message ID for feedback
 def l2_distance(a: list[float], b: list[float]) -> float:
@@ -106,9 +110,9 @@ def distance_to_similarity(distance: float) -> float:
     return 1.0 / (1.0 + distance)
-class InMemorySemanticCacheService(GenieServiceBase):
+class InMemoryContextAwareGenieService(ContextAwareGenieService):
     """
-    In-memory semantic caching decorator using dual embeddings for similarity lookup.
+    In-memory context-aware caching decorator using dual embeddings for similarity lookup.
     This service caches the SQL query generated by Genie along with dual embeddings
     (question + conversation context) for high-precision semantic matching. On
@@ -123,7 +127,7 @@ class InMemorySemanticCacheService(GenieServiceBase):
     Example:
         from dao_ai.config import GenieInMemorySemanticCacheParametersModel
-        from dao_ai.genie.cache import InMemorySemanticCacheService
+        from dao_ai.genie.cache.context_aware import InMemoryContextAwareGenieService
         cache_params = GenieInMemorySemanticCacheParametersModel(
             warehouse=warehouse_model,
@@ -132,7 +136,7 @@ class InMemorySemanticCacheService(GenieServiceBase):
             similarity_threshold=0.85,
             capacity=1000,  # Limit to 1000 entries
         )
-        genie = InMemorySemanticCacheService(
+        genie = InMemoryContextAwareGenieService(
             impl=GenieService(Genie(space_id="my-space")),
             parameters=cache_params,
             workspace_client=workspace_client,
@@ -143,7 +147,7 @@ class InMemorySemanticCacheService(GenieServiceBase):
     impl: GenieServiceBase
     parameters: GenieInMemorySemanticCacheParametersModel
-    workspace_client: WorkspaceClient | None
+    _workspace_client: WorkspaceClient | None
     name: str
     _embeddings: Any  # DatabricksEmbeddings
     _cache: list[InMemoryCacheEntry]
@@ -159,7 +163,7 @@ class InMemorySemanticCacheService(GenieServiceBase):
         name: str | None = None,
     ) -> None:
         """
-        Initialize the in-memory semantic cache service.
+        Initialize the in-memory context-aware cache service.
         Args:
             impl: The underlying GenieServiceBase to delegate to on cache miss.
@@ -171,7 +175,7 @@ class InMemorySemanticCacheService(GenieServiceBase):
         """
         self.impl = impl
         self.parameters = parameters
-        self.workspace_client = workspace_client
+        self._workspace_client = workspace_client
         self.name = name if name is not None else self.__class__.__name__
         self._embeddings = None
         self._cache = []
@@ -179,56 +183,27 @@ class InMemorySemanticCacheService(GenieServiceBase):
         self._embedding_dims = None
         self._setup_complete = False
-    def initialize(self) -> "InMemorySemanticCacheService":
-        """
-        Eagerly initialize the cache service.
-        Call this during tool creation to:
-        - Validate configuration early (fail fast)
-        - Initialize embeddings model before any requests
-        - Avoid first-request latency from lazy initialization
-        Returns:
-            self for method chaining
-        """
-        self._setup()
-        return self
     def _setup(self) -> None:
         """Initialize embeddings model lazily."""
         if self._setup_complete:
             return
-        # Initialize embeddings
-        # Convert embedding_model to LLMModel if it's a string
-        embedding_model: LLMModel = (
-            LLMModel(name=self.parameters.embedding_model)
-            if isinstance(self.parameters.embedding_model, str)
-            else self.parameters.embedding_model
+        # Initialize embeddings using base class helper
+        self._initialize_embeddings(
+            self.parameters.embedding_model,
+            self.parameters.embedding_dims,
         )
-        self._embeddings = embedding_model.as_embeddings_model()
-        # Auto-detect embedding dimensions if not provided
-        if self.parameters.embedding_dims is None:
-            sample_embedding: list[float] = self._embeddings.embed_query("test")
-            self._embedding_dims = len(sample_embedding)
-            logger.debug(
-                "Auto-detected embedding dimensions",
-                layer=self.name,
-                dims=self._embedding_dims,
-            )
-        else:
-            self._embedding_dims = self.parameters.embedding_dims
         self._setup_complete = True
         logger.debug(
-            "In-memory semantic cache initialized",
+            "In-memory context-aware cache initialized",
             layer=self.name,
             space_id=self.space_id,
             dims=self._embedding_dims,
             capacity=self.parameters.capacity,
         )
+    # Property implementations
     @property
     def warehouse(self) -> WarehouseModel:
         """The warehouse used for executing cached SQL queries."""
@@ -248,23 +223,25 @@ class InMemorySemanticCacheService(GenieServiceBase):
         return self.parameters.similarity_threshold
     @property
-    def embedding_dims(self) -> int:
-        """Dimension size for embeddings (auto-detected if not configured)."""
-        if self._embedding_dims is None:
-            raise RuntimeError(
-                "Embedding dimensions not yet initialized. Call _setup() first."
-            )
-        return self._embedding_dims
+    def context_similarity_threshold(self) -> float:
+        """Minimum similarity for context matching."""
+        return self.parameters.context_similarity_threshold
+    @property
+    def question_weight(self) -> float:
+        """Weight for question similarity in combined score."""
+        return self.parameters.question_weight
+    @property
+    def context_weight(self) -> float:
+        """Weight for context similarity in combined score."""
+        return self.parameters.context_weight
     def _embed_question(
         self, question: str, conversation_id: str | None = None
     ) -> tuple[list[float], list[float], str]:
         """
-        Generate dual embeddings: one for the question, one for the conversation context.
-        This enables separate matching of question similarity vs context similarity,
-        improving precision by ensuring both the question AND the conversation context
-        are semantically similar before returning a cached result.
+        Generate dual embeddings using Genie API for conversation history.
         Args:
             question: The question to embed
@@ -272,84 +249,13 @@ class InMemorySemanticCacheService(GenieServiceBase):
         Returns:
             Tuple of (question_embedding, context_embedding, conversation_context_string)
-            - question_embedding: Vector embedding of just the question
-            - context_embedding: Vector embedding of the conversation context (or zero vector if no context)
-            - conversation_context_string: The conversation context string (empty if no context)
         """
-        conversation_context = ""
-        # If conversation context is enabled and available
-        if (
-            self.workspace_client is not None
-            and conversation_id is not None
-            and self.parameters.context_window_size > 0
-        ):
-            try:
-                # Retrieve conversation history
-                conversation_messages = get_conversation_history(
-                    workspace_client=self.workspace_client,
-                    space_id=self.space_id,
-                    conversation_id=conversation_id,
-                    max_messages=self.parameters.context_window_size
-                    * 2,  # Get extra for safety
-                )
-                # Build context string (just the "Previous:" messages, not the current question)
-                if conversation_messages:
-                    recent_messages = (
-                        conversation_messages[-self.parameters.context_window_size :]
-                        if len(conversation_messages)
-                        > self.parameters.context_window_size
-                        else conversation_messages
-                    )
-                    context_parts: list[str] = []
-                    for msg in recent_messages:
-                        if msg.content:
-                            content: str = msg.content
-                            if len(content) > 500:
-                                content = content[:500] + "..."
-                            context_parts.append(f"Previous: {content}")
-                    conversation_context = "\n".join(context_parts)
-                    # Truncate if too long
-                    estimated_tokens = len(conversation_context) / 4
-                    if estimated_tokens > self.parameters.max_context_tokens:
-                        target_chars = self.parameters.max_context_tokens * 4
-                        conversation_context = (
-                            conversation_context[:target_chars] + "..."
-                        )
-                logger.trace(
-                    "Using conversation context",
-                    layer=self.name,
-                    messages_count=len(conversation_messages),
-                    window_size=self.parameters.context_window_size,
-                )
-            except Exception as e:
-                logger.warning(
-                    "Failed to build conversation context, using question only",
-                    layer=self.name,
-                    error=str(e),
-                )
-                conversation_context = ""
-        # Generate dual embeddings
-        if conversation_context:
-            # Embed both question and context
-            embeddings: list[list[float]] = self._embeddings.embed_documents(
-                [question, conversation_context]
-            )
-            question_embedding = embeddings[0]
-            context_embedding = embeddings[1]
-        else:
-            # Only embed question, use zero vector for context
-            embeddings = self._embeddings.embed_documents([question])
-            question_embedding = embeddings[0]
-            context_embedding = [0.0] * len(question_embedding)  # Zero vector
-        return question_embedding, context_embedding, conversation_context
+        return self._embed_question_with_genie_history(
+            question,
+            conversation_id,
+            self.parameters.context_window_size,
+            self.parameters.max_context_tokens,
+        )
     @mlflow.trace(name="semantic_search_in_memory")
     def _find_similar(
@@ -363,9 +269,6 @@ class InMemorySemanticCacheService(GenieServiceBase):
         """
         Find a semantically similar cached entry using dual embedding matching.
-        This method matches BOTH the question AND the conversation context separately,
-        ensuring high precision by requiring both to be semantically similar.
         Performs linear scan through all cache entries, filtering by space_id and
         calculating L2 distances for similarity matching.
@@ -382,8 +285,8 @@ class InMemorySemanticCacheService(GenieServiceBase):
         ttl_seconds = self.parameters.time_to_live_seconds
         ttl_disabled = ttl_seconds is None or ttl_seconds < 0
-        question_weight: float = self.parameters.question_weight
-        context_weight: float = self.parameters.context_weight
+        question_weight = self.question_weight
+        context_weight = self.context_weight
         best_entry: InMemoryCacheEntry | None = None
         best_question_sim: float = 0.0
@@ -406,7 +309,6 @@ class InMemorySemanticCacheService(GenieServiceBase):
                     is_valid = age.total_seconds() <= ttl_seconds
                 if not is_valid:
-                    # Mark for deletion
                     entries_to_delete.append(idx)
                     continue
@@ -436,11 +338,7 @@ class InMemorySemanticCacheService(GenieServiceBase):
             # Delete expired entries
             for idx in reversed(entries_to_delete):
                 del self._cache[idx]
-                logger.trace(
-                    "Deleted expired entry",
-                    layer=self.name,
-                    index=idx,
-                )
+                logger.trace("Deleted expired entry", layer=self.name, index=idx)
         # No entries found
         if best_entry is None:
@@ -461,32 +359,28 @@ class InMemorySemanticCacheService(GenieServiceBase):
             context_sim=f"{best_context_sim:.4f}",
             combined_sim=f"{best_combined_sim:.4f}",
             cached_question=best_entry.question[:50],
-            cached_context=best_entry.conversation_context[:80],
         )
-        # Check BOTH similarity thresholds (dual embedding precision check)
-        if best_question_sim < self.parameters.similarity_threshold:
+        # Check BOTH similarity thresholds
+        if best_question_sim < self.similarity_threshold:
             logger.info(
                 "Cache MISS (question similarity too low)",
                 layer=self.name,
                 question_sim=f"{best_question_sim:.4f}",
-                threshold=self.parameters.similarity_threshold,
-                delegating_to=type(self.impl).__name__,
+                threshold=self.similarity_threshold,
             )
             return None
-        if best_context_sim < self.parameters.context_similarity_threshold:
+        if best_context_sim < self.context_similarity_threshold:
             logger.info(
                 "Cache MISS (context similarity too low)",
                 layer=self.name,
                 context_sim=f"{best_context_sim:.4f}",
-                threshold=self.parameters.context_similarity_threshold,
-                delegating_to=type(self.impl).__name__,
+                threshold=self.context_similarity_threshold,
             )
             return None
-        # Cache HIT!
-        # Update last accessed time for LRU eviction
+        # Cache HIT - Update last accessed time
         with self._lock:
             best_entry.last_accessed_at = datetime.now()
@@ -501,8 +395,6 @@ class InMemorySemanticCacheService(GenieServiceBase):
             question_similarity=f"{best_question_sim:.4f}",
             context_similarity=f"{best_context_sim:.4f}",
             combined_similarity=f"{best_combined_sim:.4f}",
-            cached_sql=best_entry.sql_query[:80] if best_entry.sql_query else None,
-            ttl_seconds=self.parameters.time_to_live_seconds,
         )
         cache_entry = SQLCacheEntry(
@@ -510,6 +402,9 @@ class InMemorySemanticCacheService(GenieServiceBase):
             description=best_entry.description,
             conversation_id=best_entry.conversation_id,
             created_at=best_entry.created_at,
+            message_id=best_entry.message_id,
+            # In-memory caches don't have database row IDs
+            cache_entry_id=None,
         )
         return cache_entry, best_combined_sim
@@ -520,9 +415,10 @@ class InMemorySemanticCacheService(GenieServiceBase):
         question_embedding: list[float],
         context_embedding: list[float],
         response: GenieResponse,
+        message_id: str | None = None,
     ) -> None:
         """
-        Store a new cache entry with dual embeddings for this Genie space.
+        Store a new cache entry with dual embeddings and message_id.
         If capacity is set and reached, evicts least recently used entry (LRU).
         """
@@ -537,19 +433,19 @@ class InMemorySemanticCacheService(GenieServiceBase):
             description=response.description,
             conversation_id=response.conversation_id,
             created_at=now,
-            last_accessed_at=now,  # Initialize to now; updated on cache hits (traditional LRU)
+            last_accessed_at=now,
+            message_id=message_id,
         )
         with self._lock:
             # Enforce capacity limit (LRU eviction)
             if self.parameters.capacity is not None:
-                # Count entries for this space_id
                 space_entries = [
                     e for e in self._cache if e.genie_space_id == self.space_id
                 ]
                 while len(space_entries) >= self.parameters.capacity:
-                    # Find and remove least recently used entry for this space
+                    # Find and remove least recently used entry
                     lru_idx = None
                     lru_time = None
@@ -578,8 +474,6 @@ class InMemorySemanticCacheService(GenieServiceBase):
                 "Stored cache entry",
                 layer=self.name,
                 question=question[:50],
-                context=conversation_context[:80],
-                sql=response.query[:50] if response.query else None,
                 space=self.space_id,
                 cache_size=len(
                     [e for e in self._cache if e.genie_space_id == self.space_id]
@@ -587,189 +481,53 @@ class InMemorySemanticCacheService(GenieServiceBase):
                 capacity=self.parameters.capacity,
             )
-    @mlflow.trace(name="execute_cached_sql_in_memory_semantic")
-    def _execute_sql(self, sql: str) -> pd.DataFrame | str:
-        """Execute SQL using the warehouse and return results."""
-        client: WorkspaceClient = self.warehouse.workspace_client
-        warehouse_id: str = str(self.warehouse.warehouse_id)
-        statement_response: StatementResponse = (
-            client.statement_execution.execute_statement(
-                warehouse_id=warehouse_id,
-                statement=sql,
-                wait_timeout="30s",
-            )
-        )
-        if (
-            statement_response.status is not None
-            and statement_response.status.state != StatementState.SUCCEEDED
-        ):
-            error_msg: str = (
-                f"SQL execution failed: {statement_response.status.error.message}"
-                if statement_response.status.error is not None
-                else f"SQL execution failed with state: {statement_response.status.state}"
-            )
-            logger.error("SQL execution failed", layer=self.name, error=error_msg)
-            return error_msg
-        if statement_response.result and statement_response.result.data_array:
-            columns: list[str] = []
-            if (
-                statement_response.manifest
-                and statement_response.manifest.schema
-                and statement_response.manifest.schema.columns
-            ):
-                columns = [
-                    col.name
-                    for col in statement_response.manifest.schema.columns
-                    if col.name is not None
-                ]
-            data: list[list[Any]] = statement_response.result.data_array
-            if columns:
-                return pd.DataFrame(data, columns=columns)
-            else:
-                return pd.DataFrame(data)
-        return pd.DataFrame()
-    def ask_question(
-        self, question: str, conversation_id: str | None = None
-    ) -> CacheResult:
-        """
-        Ask a question, using semantic cache if a similar query exists.
+    def _on_stale_cache_entry(self, question: str) -> None:
+        """Remove stale cache entry from memory."""
+        with self._lock:
+            for idx, entry in enumerate(self._cache):
+                if entry.genie_space_id == self.space_id and entry.question == question:
+                    del self._cache[idx]
+                    logger.info(
+                        "Deleted stale cache entry from memory",
+                        layer=self.name,
+                        question=question[:50],
+                    )
+                    break
-        On cache hit, re-executes the cached SQL to get fresh data.
-        Returns CacheResult with cache metadata.
+    def _invalidate_by_question(self, question: str) -> bool:
         """
-        return self.ask_question_with_cache_info(question, conversation_id)
+        Invalidate cache entries matching a specific question.
-    @mlflow.trace(name="genie_in_memory_semantic_cache_lookup")
-    def ask_question_with_cache_info(
-        self,
-        question: str,
-        conversation_id: str | None = None,
-    ) -> CacheResult:
-        """
-        Ask a question with detailed cache hit information.
-        On cache hit, the cached SQL is re-executed to return fresh data, but the
-        conversation_id returned is the current conversation_id (not the cached one).
+        This method is called when negative feedback is received to remove
+        the corresponding cache entry from the in-memory cache.
         Args:
-            question: The question to ask
-            conversation_id: Optional conversation ID for context and continuation
+            question: The question text to match and invalidate
         Returns:
-            CacheResult with fresh response and cache metadata
-        """
-        # Ensure initialization (lazy init if initialize() wasn't called)
-        self._setup()
-        # Generate dual embeddings for the question and conversation context
-        question_embedding: list[float]
-        context_embedding: list[float]
-        conversation_context: str
-        question_embedding, context_embedding, conversation_context = (
-            self._embed_question(question, conversation_id)
-        )
-        # Check cache using dual embedding similarity
-        cache_result: tuple[SQLCacheEntry, float] | None = self._find_similar(
-            question,
-            conversation_context,
-            question_embedding,
-            context_embedding,
-            conversation_id,
-        )
-        if cache_result is not None:
-            cached, combined_similarity = cache_result
-            logger.debug(
-                "In-memory semantic cache hit",
-                layer=self.name,
-                combined_similarity=f"{combined_similarity:.3f}",
-                question=question[:50],
-                conversation_id=conversation_id,
-            )
-            # Re-execute the cached SQL to get fresh data
-            result: pd.DataFrame | str = self._execute_sql(cached.query)
-            # IMPORTANT: Use the current conversation_id (from the request), not the cached one
-            # This ensures the conversation continues properly
-            response: GenieResponse = GenieResponse(
-                result=result,
-                query=cached.query,
-                description=cached.description,
-                conversation_id=conversation_id
-                if conversation_id
-                else cached.conversation_id,
-            )
-            return CacheResult(response=response, cache_hit=True, served_by=self.name)
-        # Cache miss - delegate to wrapped service
-        logger.info(
-            "Cache MISS",
-            layer=self.name,
-            question=question[:80],
-            conversation_id=conversation_id,
-            space_id=self.space_id,
-            similarity_threshold=self.similarity_threshold,
-            delegating_to=type(self.impl).__name__,
-        )
-        result: CacheResult = self.impl.ask_question(question, conversation_id)
-        # Store in cache if we got a SQL query
-        if result.response.query:
-            logger.debug(
-                "Storing new cache entry",
-                layer=self.name,
-                question=question[:50],
-                conversation_id=conversation_id,
-                space=self.space_id,
-            )
-            self._store_entry(
-                question,
-                conversation_context,
-                question_embedding,
-                context_embedding,
-                result.response,
-            )
-        elif not result.response.query:
-            logger.warning(
-                "Not caching: response has no SQL query",
-                layer=self.name,
-                question=question[:50],
-            )
-        return CacheResult(response=result.response, cache_hit=False, served_by=None)
-    @property
-    def space_id(self) -> str:
-        return self.impl.space_id
-    def invalidate_expired(self) -> int:
+            True if an entry was found and invalidated, False otherwise
         """
-        Remove expired entries from the cache for this Genie space.
+        with self._lock:
+            for idx, entry in enumerate(self._cache):
+                if entry.genie_space_id == self.space_id and entry.question == question:
+                    del self._cache[idx]
+                    logger.info(
+                        "Invalidated cache entry by question",
+                        layer=self.name,
+                        question=question[:50],
+                        space_id=self.space_id,
+                    )
+                    return True
+            return False
-        Returns 0 if TTL is disabled (entries never expire).
-        """
-        self._setup()
-        ttl_seconds = self.parameters.time_to_live_seconds
+    # Note: ask_question_with_cache_info is inherited from ContextAwareGenieService
+    # using the Template Method pattern. InMemoryContextAwareGenieService uses the
+    # default empty hook implementations since it doesn't track prompt history.
-        # If TTL is disabled, nothing can expire
-        if ttl_seconds is None or ttl_seconds < 0:
-            logger.trace(
-                "TTL disabled, no entries to expire",
-                layer=self.name,
-                space=self.space_id,
-            )
-            return 0
+    # Template Method implementations for invalidate_expired() and clear()
+    def _delete_expired_entries(self, ttl_seconds: int) -> int:
+        """Delete expired entries from the cache."""
         deleted = 0
         with self._lock:
             indices_to_delete: list[int] = []
@@ -783,7 +541,7 @@ class InMemorySemanticCacheService(GenieServiceBase):
                 if age.total_seconds() > ttl_seconds:
                     indices_to_delete.append(idx)
-            # Delete in reverse order to preserve indices
+            # Delete in reverse order
             for idx in reversed(indices_to_delete):
                 del self._cache[idx]
                 deleted += 1
@@ -792,18 +550,15 @@ class InMemorySemanticCacheService(GenieServiceBase):
                 "Deleted expired entries",
                 layer=self.name,
                 deleted_count=deleted,
-                space=self.space_id,
             )
         return deleted
-    def clear(self) -> int:
-        """Clear all entries from the cache for this Genie space."""
-        self._setup()
+    def _delete_all_entries(self) -> int:
+        """Delete all entries for this Genie space."""
         deleted = 0
         with self._lock:
-            # Find indices for this space
             indices_to_delete: list[int] = []
             for idx, entry in enumerate(self._cache):
                 if entry.genie_space_id == self.space_id:
@@ -815,10 +570,7 @@ class InMemorySemanticCacheService(GenieServiceBase):
                 deleted += 1
             logger.debug(
-                "Cleared cache entries",
-                layer=self.name,
-                deleted_count=deleted,
-                space=self.space_id,
+                "Cleared cache entries", layer=self.name, deleted_count=deleted
             )
         return deleted
@@ -830,42 +582,140 @@ class InMemorySemanticCacheService(GenieServiceBase):
         with self._lock:
             return len([e for e in self._cache if e.genie_space_id == self.space_id])
-    def stats(self) -> dict[str, int | float | None]:
-        """Return cache statistics for this Genie space."""
-        self._setup()
-        ttl_seconds = self.parameters.time_to_live_seconds
-        ttl = self.time_to_live
+    # Template Method implementations for stats()
+    def _count_all_entries(self) -> int:
+        """Count all cache entries for this Genie space."""
+        with self._lock:
+            return len([e for e in self._cache if e.genie_space_id == self.space_id])
+    def _count_entries_with_ttl(self, ttl_seconds: int) -> tuple[int, int]:
+        """Count total and expired entries for this Genie space."""
+        now = datetime.now()
         with self._lock:
             space_entries = [
                 e for e in self._cache if e.genie_space_id == self.space_id
             ]
             total = len(space_entries)
-            # If TTL is disabled, all entries are valid
-            if ttl_seconds is None or ttl_seconds < 0:
-                return {
-                    "size": total,
-                    "capacity": self.parameters.capacity,
-                    "ttl_seconds": None,
-                    "similarity_threshold": self.similarity_threshold,
-                    "expired_entries": 0,
-                    "valid_entries": total,
-                }
-            # Count expired entries
-            now = datetime.now()
             expired = 0
             for entry in space_entries:
                 age = now - entry.created_at
                 if age.total_seconds() > ttl_seconds:
                     expired += 1
+            return total, expired
+    def _get_additional_stats(self) -> dict[str, Any]:
+        """Add capacity info to stats."""
+        return {"capacity": self.parameters.capacity}
+    def get_entries(
+        self,
+        limit: int | None = None,
+        offset: int | None = None,
+        include_embeddings: bool = False,
+        conversation_id: str | None = None,
+        created_after: datetime | None = None,
+        created_before: datetime | None = None,
+        question_contains: str | None = None,
+    ) -> list[dict[str, Any]]:
+        """
+        Get cache entries with optional filtering.
+        This method retrieves cache entries for inspection, debugging, or
+        generating evaluation datasets for threshold optimization.
+        Args:
+            limit: Maximum number of entries to return (None = no limit)
+            offset: Number of entries to skip for pagination (None = 0)
+            include_embeddings: Whether to include embedding vectors in results.
+                Embeddings are large, so set False for general inspection.
+            conversation_id: Filter by conversation ID (None = all conversations)
+            created_after: Only entries created after this time (None = no filter)
+            created_before: Only entries created before this time (None = no filter)
+            question_contains: Case-insensitive text search on question field
+        Returns:
+            List of cache entry dicts. See base class for full key documentation.
+        Example:
+            # Get entries with embeddings for evaluation dataset generation
+            entries = cache.get_entries(include_embeddings=True, limit=100)
+            eval_dataset = generate_eval_dataset_from_cache(entries)
+        """
+        self._setup()
+        with self._lock:
+            # Filter entries for this space
+            filtered_entries: list[InMemoryCacheEntry] = []
+            for entry in self._cache:
+                # Filter by space_id
+                if entry.genie_space_id != self.space_id:
+                    continue
+                # Filter by conversation_id
+                if (
+                    conversation_id is not None
+                    and entry.conversation_id != conversation_id
+                ):
+                    continue
+                # Filter by created_after
+                if created_after is not None and entry.created_at <= created_after:
+                    continue
+                # Filter by created_before
+                if created_before is not None and entry.created_at >= created_before:
+                    continue
+                # Filter by question_contains (case-insensitive)
+                if question_contains is not None:
+                    if question_contains.lower() not in entry.question.lower():
+                        continue
+                filtered_entries.append(entry)
+            # Sort by created_at descending (most recent first)
+            filtered_entries.sort(key=lambda e: e.created_at, reverse=True)
+            # Apply offset
+            if offset is not None and offset > 0:
+                filtered_entries = filtered_entries[offset:]
+            # Apply limit
+            if limit is not None:
+                filtered_entries = filtered_entries[:limit]
+            # Convert to dicts
+            entries: list[dict[str, Any]] = []
+            for entry in filtered_entries:
+                result: dict[str, Any] = {
+                    "id": None,  # In-memory caches don't have database IDs
+                    "question": entry.question,
+                    "conversation_context": entry.conversation_context,
+                    "sql_query": entry.sql_query,
+                    "description": entry.description,
+                    "conversation_id": entry.conversation_id,
+                    "created_at": entry.created_at,
+                }
+                if include_embeddings:
+                    result["question_embedding"] = entry.question_embedding
+                    result["context_embedding"] = entry.context_embedding
+                entries.append(result)
+            logger.debug(
+                "Retrieved cache entries",
+                layer=self.name,
+                count=len(entries),
+                include_embeddings=include_embeddings,
+                filters={
+                    "conversation_id": conversation_id,
+                    "created_after": str(created_after) if created_after else None,
+                    "created_before": str(created_before) if created_before else None,
+                    "question_contains": question_contains,
+                },
+            )
-            return {
-                "size": total,
-                "capacity": self.parameters.capacity,
-                "ttl_seconds": ttl.total_seconds() if ttl else None,
-                "similarity_threshold": self.similarity_threshold,
-                "expired_entries": expired,
-                "valid_entries": total - expired,
-            }
+            return entries

dao-ai 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl

dao-ai 0.1.19py3-none-any.whl → 0.1.21py3-none-any.whl