PyPI - dao-ai - Versions diffs - 0.0.36__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

dao-ai 0.0.36py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

dao_ai/__init__.py +29 -0
dao_ai/cli.py +195 -30
dao_ai/config.py +770 -244
dao_ai/genie/__init__.py +1 -22
dao_ai/genie/cache/__init__.py +1 -2
dao_ai/genie/cache/base.py +20 -70
dao_ai/genie/cache/core.py +75 -0
dao_ai/genie/cache/lru.py +44 -21
dao_ai/genie/cache/semantic.py +390 -109
dao_ai/genie/core.py +35 -0
dao_ai/graph.py +27 -253
dao_ai/hooks/__init__.py +9 -6
dao_ai/hooks/core.py +22 -190
dao_ai/memory/__init__.py +10 -0
dao_ai/memory/core.py +23 -5
dao_ai/memory/databricks.py +389 -0
dao_ai/memory/postgres.py +2 -2
dao_ai/messages.py +6 -4
dao_ai/middleware/__init__.py +125 -0
dao_ai/middleware/assertions.py +778 -0
dao_ai/middleware/base.py +50 -0
dao_ai/middleware/core.py +61 -0
dao_ai/middleware/guardrails.py +415 -0
dao_ai/middleware/human_in_the_loop.py +228 -0
dao_ai/middleware/message_validation.py +554 -0
dao_ai/middleware/summarization.py +192 -0
dao_ai/models.py +1177 -108
dao_ai/nodes.py +118 -161
dao_ai/optimization.py +664 -0
dao_ai/orchestration/__init__.py +52 -0
dao_ai/orchestration/core.py +287 -0
dao_ai/orchestration/supervisor.py +264 -0
dao_ai/orchestration/swarm.py +226 -0
dao_ai/prompts.py +126 -29
dao_ai/providers/databricks.py +126 -381
dao_ai/state.py +139 -21
dao_ai/tools/__init__.py +8 -5
dao_ai/tools/core.py +57 -4
dao_ai/tools/email.py +280 -0
dao_ai/tools/genie.py +47 -24
dao_ai/tools/mcp.py +4 -3
dao_ai/tools/memory.py +50 -0
dao_ai/tools/python.py +4 -12
dao_ai/tools/search.py +14 -0
dao_ai/tools/slack.py +1 -1
dao_ai/tools/unity_catalog.py +8 -6
dao_ai/tools/vector_search.py +16 -9
dao_ai/utils.py +72 -8
dao_ai-0.1.0.dist-info/METADATA +1878 -0
dao_ai-0.1.0.dist-info/RECORD +62 -0
dao_ai/chat_models.py +0 -204
dao_ai/guardrails.py +0 -112
dao_ai/tools/genie/__init__.py +0 -236
dao_ai/tools/human_in_the_loop.py +0 -100
dao_ai-0.0.36.dist-info/METADATA +0 -951
dao_ai-0.0.36.dist-info/RECORD +0 -47
{dao_ai-0.0.36.dist-info → dao_ai-0.1.0.dist-info}/WHEEL +0 -0
{dao_ai-0.0.36.dist-info → dao_ai-0.1.0.dist-info}/entry_points.txt +0 -0
{dao_ai-0.0.36.dist-info → dao_ai-0.1.0.dist-info}/licenses/LICENSE +0 -0

dao_ai/genie/cache/semantic.py CHANGED Viewed

@@ -4,6 +4,10 @@ Semantic cache implementation for Genie SQL queries using PostgreSQL pg_vector.
 This module provides a semantic cache that uses embeddings and similarity search
 to find cached queries that match the intent of new questions. Cache entries are
 partitioned by genie_space_id to ensure proper isolation between Genie spaces.
+The cache supports conversation-aware embedding using a rolling window approach
+to capture context from recent conversation turns, improving accuracy for
+multi-turn conversations with anaphoric references.
 """
 from datetime import timedelta
@@ -12,14 +16,18 @@ from typing import Any
 import mlflow
 import pandas as pd
 from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.dashboards import (
+    GenieListConversationMessagesResponse,
+    GenieMessage,
+)
 from databricks.sdk.service.sql import StatementResponse, StatementState
 from databricks_ai_bridge.genie import GenieResponse
 from loguru import logger
-from mlflow.entities import SpanType
 from dao_ai.config import (
     DatabaseModel,
     GenieSemanticCacheParametersModel,
+    LLMModel,
     WarehouseModel,
 )
 from dao_ai.genie.cache.base import (
@@ -32,6 +40,112 @@ from dao_ai.genie.cache.base import (
 DbRow = dict[str, Any]
+def get_conversation_history(
+    workspace_client: WorkspaceClient,
+    space_id: str,
+    conversation_id: str,
+    max_messages: int = 10,
+) -> list[GenieMessage]:
+    """
+    Retrieve conversation history from Genie.
+    Args:
+        workspace_client: The Databricks workspace client
+        space_id: The Genie space ID
+        conversation_id: The conversation ID to retrieve
+        max_messages: Maximum number of messages to retrieve
+    Returns:
+        List of GenieMessage objects representing the conversation history
+    """
+    try:
+        # Use the Genie API to retrieve conversation messages
+        response: GenieListConversationMessagesResponse = (
+            workspace_client.genie.list_conversation_messages(
+                space_id=space_id,
+                conversation_id=conversation_id,
+            )
+        )
+        # Return the most recent messages up to max_messages
+        if response.messages is not None:
+            all_messages: list[GenieMessage] = list(response.messages)
+            return (
+                all_messages[-max_messages:]
+                if len(all_messages) > max_messages
+                else all_messages
+            )
+        return []
+    except Exception as e:
+        logger.warning(
+            f"Failed to retrieve conversation history for conversation_id={conversation_id}: {e}"
+        )
+        return []
+def build_context_string(
+    question: str,
+    conversation_messages: list[GenieMessage],
+    window_size: int,
+    max_tokens: int = 2000,
+) -> str:
+    """
+    Build a context-aware question string using rolling window.
+    This function creates a concatenated string that includes recent conversation
+    turns to provide context for semantic similarity matching.
+    Args:
+        question: The current question
+        conversation_messages: List of previous conversation messages
+        window_size: Number of previous turns to include
+        max_tokens: Maximum estimated tokens (rough approximation: 4 chars = 1 token)
+    Returns:
+        Context-aware question string formatted for embedding
+    """
+    if window_size <= 0 or not conversation_messages:
+        return question
+    # Take the last window_size messages (most recent)
+    recent_messages = (
+        conversation_messages[-window_size:]
+        if len(conversation_messages) > window_size
+        else conversation_messages
+    )
+    # Build context parts
+    context_parts: list[str] = []
+    for msg in recent_messages:
+        # Only include messages with content from the history
+        if msg.content:
+            # Limit message length to prevent token overflow
+            content: str = msg.content
+            if len(content) > 500:  # Truncate very long messages
+                content = content[:500] + "..."
+            context_parts.append(f"Previous: {content}")
+    # Add current question
+    context_parts.append(f"Current: {question}")
+    # Join with newlines
+    context_string = "\n".join(context_parts)
+    # Rough token limit check (4 chars ≈ 1 token)
+    estimated_tokens = len(context_string) / 4
+    if estimated_tokens > max_tokens:
+        # Truncate to fit max_tokens
+        target_chars = max_tokens * 4
+        context_string = context_string[:target_chars] + "..."
+        logger.debug(
+            f"Truncated context string from {len(context_string)} to {target_chars} chars "
+            f"(estimated {max_tokens} tokens)"
+        )
+    return context_string
 class SemanticCacheService(GenieServiceBase):
     """
     Semantic caching decorator that uses PostgreSQL pg_vector for similarity lookup.
@@ -59,8 +173,7 @@ class SemanticCacheService(GenieServiceBase):
         )
         genie = SemanticCacheService(
             impl=GenieService(Genie(space_id="my-space")),
-            parameters=cache_params,
-            genie_space_id="my-space"
+            parameters=cache_params
         )
     Thread-safe: Uses connection pooling from psycopg_pool.
@@ -68,7 +181,7 @@ class SemanticCacheService(GenieServiceBase):
     impl: GenieServiceBase
     parameters: GenieSemanticCacheParametersModel
-    genie_space_id: str
+    workspace_client: WorkspaceClient | None
     name: str
     _embeddings: Any  # DatabricksEmbeddings
     _pool: Any  # ConnectionPool
@@ -79,21 +192,23 @@ class SemanticCacheService(GenieServiceBase):
         self,
         impl: GenieServiceBase,
         parameters: GenieSemanticCacheParametersModel,
-        genie_space_id: str,
+        workspace_client: WorkspaceClient | None = None,
         name: str | None = None,
     ) -> None:
         """
         Initialize the semantic cache service.
         Args:
-            impl: The underlying GenieServiceBase to delegate to on cache miss
+            impl: The underlying GenieServiceBase to delegate to on cache miss.
+                The space_id will be obtained from impl.space_id.
             parameters: Cache configuration including database, warehouse, embedding model
-            genie_space_id: The Genie space ID for partitioning cache entries
+            workspace_client: Optional WorkspaceClient for retrieving conversation history.
+                If None, conversation context will not be used.
             name: Name for this cache layer (for logging). Defaults to class name.
         """
         self.impl = impl
         self.parameters = parameters
-        self.genie_space_id = genie_space_id
+        self.workspace_client = workspace_client
         self.name = name if name is not None else self.__class__.__name__
         self._embeddings = None
         self._pool = None
@@ -120,17 +235,16 @@ class SemanticCacheService(GenieServiceBase):
         if self._setup_complete:
             return
-        from databricks_langchain import DatabricksEmbeddings
         from dao_ai.memory.postgres import PostgresPoolManager
         # Initialize embeddings
-        embedding_model: str = (
-            self.parameters.embedding_model
+        # Convert embedding_model to LLMModel if it's a string
+        embedding_model: LLMModel = (
+            LLMModel(name=self.parameters.embedding_model)
             if isinstance(self.parameters.embedding_model, str)
-            else self.parameters.embedding_model.name
+            else self.parameters.embedding_model
         )
-        self._embeddings = DatabricksEmbeddings(endpoint=embedding_model)
+        self._embeddings = embedding_model.as_embeddings_model()
         # Auto-detect embedding dimensions if not provided
         if self.parameters.embedding_dims is None:
@@ -150,7 +264,7 @@ class SemanticCacheService(GenieServiceBase):
         self._setup_complete = True
         logger.debug(
-            f"[{self.name}] Semantic cache initialized for space '{self.genie_space_id}' "
+            f"[{self.name}] Semantic cache initialized for space '{self.space_id}' "
             f"with table '{self.table_name}' (dims={self._embedding_dims})"
         )
@@ -212,7 +326,10 @@ class SemanticCacheService(GenieServiceBase):
                 id SERIAL PRIMARY KEY,
                 genie_space_id TEXT NOT NULL,
                 question TEXT NOT NULL,
+                conversation_context TEXT,
+                context_string TEXT,
                 question_embedding vector({self.embedding_dims}),
+                context_embedding vector({self.embedding_dims}),
                 sql_query TEXT NOT NULL,
                 description TEXT,
                 conversation_id TEXT,
@@ -221,12 +338,18 @@ class SemanticCacheService(GenieServiceBase):
         """
         # Index for efficient similarity search partitioned by genie_space_id
         # Use L2 (Euclidean) distance - optimal for Databricks GTE embeddings
-        create_embedding_index_sql: str = f"""
-            CREATE INDEX IF NOT EXISTS {self.table_name}_embedding_idx
+        create_question_embedding_index_sql: str = f"""
+            CREATE INDEX IF NOT EXISTS {self.table_name}_question_embedding_idx
             ON {self.table_name}
             USING ivfflat (question_embedding vector_l2_ops)
             WITH (lists = 100)
         """
+        create_context_embedding_index_sql: str = f"""
+            CREATE INDEX IF NOT EXISTS {self.table_name}_context_embedding_idx
+            ON {self.table_name}
+            USING ivfflat (context_embedding vector_l2_ops)
+            WITH (lists = 100)
+        """
         # Index for filtering by genie_space_id
         create_space_index_sql: str = f"""
             CREATE INDEX IF NOT EXISTS {self.table_name}_space_idx
@@ -257,35 +380,132 @@ class SemanticCacheService(GenieServiceBase):
                 cur.execute(create_table_sql)
                 cur.execute(create_space_index_sql)
-                cur.execute(create_embedding_index_sql)
+                cur.execute(create_question_embedding_index_sql)
+                cur.execute(create_context_embedding_index_sql)
+    def _embed_question(
+        self, question: str, conversation_id: str | None = None
+    ) -> tuple[list[float], list[float], str]:
+        """
+        Generate dual embeddings: one for the question, one for the conversation context.
+        This enables separate matching of question similarity vs context similarity,
+        improving precision by ensuring both the question AND the conversation context
+        are semantically similar before returning a cached result.
+        Args:
+            question: The question to embed
+            conversation_id: Optional conversation ID for retrieving context
+        Returns:
+            Tuple of (question_embedding, context_embedding, conversation_context_string)
+            - question_embedding: Vector embedding of just the question
+            - context_embedding: Vector embedding of the conversation context (or zero vector if no context)
+            - conversation_context_string: The conversation context string (empty if no context)
+        """
+        conversation_context = ""
+        # If conversation context is enabled and available
+        if (
+            self.workspace_client is not None
+            and conversation_id is not None
+            and self.parameters.context_window_size > 0
+        ):
+            try:
+                # Retrieve conversation history
+                conversation_messages = get_conversation_history(
+                    workspace_client=self.workspace_client,
+                    space_id=self.space_id,
+                    conversation_id=conversation_id,
+                    max_messages=self.parameters.context_window_size
+                    * 2,  # Get extra for safety
+                )
+                # Build context string (just the "Previous:" messages, not the current question)
+                if conversation_messages:
+                    recent_messages = (
+                        conversation_messages[-self.parameters.context_window_size :]
+                        if len(conversation_messages)
+                        > self.parameters.context_window_size
+                        else conversation_messages
+                    )
+                    context_parts: list[str] = []
+                    for msg in recent_messages:
+                        if msg.content:
+                            content: str = msg.content
+                            if len(content) > 500:
+                                content = content[:500] + "..."
+                            context_parts.append(f"Previous: {content}")
+                    conversation_context = "\n".join(context_parts)
-    def _embed_question(self, question: str) -> list[float]:
-        """Generate embedding for a question."""
-        embeddings: list[list[float]] = self._embeddings.embed_documents([question])
-        return embeddings[0]
+                    # Truncate if too long
+                    estimated_tokens = len(conversation_context) / 4
+                    if estimated_tokens > self.parameters.max_context_tokens:
+                        target_chars = self.parameters.max_context_tokens * 4
+                        conversation_context = (
+                            conversation_context[:target_chars] + "..."
+                        )
+                logger.debug(
+                    f"[{self.name}] Using conversation context: {len(conversation_messages)} messages "
+                    f"(window_size={self.parameters.context_window_size})"
+                )
+            except Exception as e:
+                logger.warning(
+                    f"[{self.name}] Failed to build conversation context, using question only: {e}"
+                )
+                conversation_context = ""
+        # Generate dual embeddings
+        if conversation_context:
+            # Embed both question and context
+            embeddings: list[list[float]] = self._embeddings.embed_documents(
+                [question, conversation_context]
+            )
+            question_embedding = embeddings[0]
+            context_embedding = embeddings[1]
+        else:
+            # Only embed question, use zero vector for context
+            embeddings = self._embeddings.embed_documents([question])
+            question_embedding = embeddings[0]
+            context_embedding = [0.0] * len(question_embedding)  # Zero vector
+        return question_embedding, context_embedding, conversation_context
     @mlflow.trace(name="semantic_search")
     def _find_similar(
-        self, question: str, embedding: list[float]
+        self,
+        question: str,
+        conversation_context: str,
+        question_embedding: list[float],
+        context_embedding: list[float],
     ) -> tuple[SQLCacheEntry, float] | None:
         """
-        Find a semantically similar cached entry for this Genie space.
+        Find a semantically similar cached entry using dual embedding matching.
+        This method matches BOTH the question AND the conversation context separately,
+        ensuring high precision by requiring both to be semantically similar.
         Args:
-            question: The question to search for
-            embedding: The embedding vector of the question
+            question: The original question (for logging)
+            conversation_context: The conversation context string
+            question_embedding: The embedding vector of just the question
+            context_embedding: The embedding vector of the conversation context
         Returns:
-            Tuple of (SQLCacheEntry, similarity_score) if found, None otherwise
+            Tuple of (SQLCacheEntry, combined_similarity_score) if found, None otherwise
         """
         # Use L2 (Euclidean) distance - optimal for Databricks GTE embeddings
         # pg_vector's <-> operator returns L2 distance (0 = identical)
         # Convert to similarity: 1 / (1 + distance) gives range [0, 1]
         #
-        # Refresh-on-hit strategy:
-        # 1. Search without TTL filter to find best semantic match
-        # 2. If match is within TTL (or TTL disabled) → cache hit
-        # 3. If match is expired → delete it, return miss (triggers refresh)
+        # Dual embedding strategy:
+        # 1. Calculate separate similarities for question and context
+        # 2. BOTH must exceed their respective thresholds
+        # 3. Combined score is weighted average
+        # 4. Refresh-on-hit: check TTL after similarity check
         ttl_seconds = self.parameters.time_to_live_seconds
         ttl_disabled = ttl_seconds is None or ttl_seconds < 0
@@ -295,63 +515,87 @@ class SemanticCacheService(GenieServiceBase):
         else:
             is_valid_expr = f"created_at > NOW() - INTERVAL '{ttl_seconds} seconds'"
+        # Weighted combined similarity for ordering
+        question_weight: float = self.parameters.question_weight
+        context_weight: float = self.parameters.context_weight
         search_sql: str = f"""
             SELECT
                 id,
                 question,
+                conversation_context,
                 sql_query,
                 description,
                 conversation_id,
                 created_at,
-                1.0 / (1.0 + (question_embedding <-> %s::vector)) as similarity,
+                1.0 / (1.0 + (question_embedding <-> %s::vector)) as question_similarity,
+                1.0 / (1.0 + (context_embedding <-> %s::vector)) as context_similarity,
+                ({question_weight} * (1.0 / (1.0 + (question_embedding <-> %s::vector)))) +
+                ({context_weight} * (1.0 / (1.0 + (context_embedding <-> %s::vector)))) as combined_similarity,
                 {is_valid_expr} as is_valid
             FROM {self.table_name}
             WHERE genie_space_id = %s
-            ORDER BY question_embedding <-> %s::vector
+            ORDER BY combined_similarity DESC
             LIMIT 1
         """
-        embedding_str: str = f"[{','.join(str(x) for x in embedding)}]"
+        question_emb_str: str = f"[{','.join(str(x) for x in question_embedding)}]"
+        context_emb_str: str = f"[{','.join(str(x) for x in context_embedding)}]"
         with self._pool.connection() as conn:
             with conn.cursor() as cur:
                 cur.execute(
                     search_sql,
-                    (embedding_str, self.genie_space_id, embedding_str),
+                    (
+                        question_emb_str,
+                        context_emb_str,
+                        question_emb_str,
+                        context_emb_str,
+                        self.space_id,
+                    ),
                 )
                 row: DbRow | None = cur.fetchone()
                 if row is None:
                     logger.info(
                         f"[{self.name}] MISS (no entries): "
-                        f"question='{question[:50]}...' space='{self.genie_space_id}'"
+                        f"question='{question[:50]}...' space='{self.space_id}'"
                     )
                     return None
                 # Extract values from dict row
-                entry_id = row.get("id")
-                cached_question = row.get("question", "")
-                sql_query = row["sql_query"]
-                description = row.get("description", "")
-                conversation_id = row.get("conversation_id", "")
-                created_at = row["created_at"]
-                similarity = row["similarity"]
-                is_valid = row.get("is_valid", False)
-                # Log best match info (L2 distance can be computed from similarity: d = 1/s - 1)
-                l2_distance = (
-                    (1.0 / similarity) - 1.0 if similarity > 0 else float("inf")
-                )
+                entry_id: Any = row.get("id")
+                cached_question: str = row.get("question", "")
+                cached_context: str = row.get("conversation_context", "")
+                sql_query: str = row["sql_query"]
+                description: str = row.get("description", "")
+                conversation_id_cached: str = row.get("conversation_id", "")
+                created_at: Any = row["created_at"]
+                question_similarity: float = row["question_similarity"]
+                context_similarity: float = row["context_similarity"]
+                combined_similarity: float = row["combined_similarity"]
+                is_valid: bool = row.get("is_valid", False)
+                # Log best match info
                 logger.info(
-                    f"[{self.name}] Best match: l2_distance={l2_distance:.4f}, similarity={similarity:.4f}, "
-                    f"is_valid={is_valid}, question='{cached_question[:50]}...'"
+                    f"[{self.name}] Best match: "
+                    f"question_sim={question_similarity:.4f}, context_sim={context_similarity:.4f}, "
+                    f"combined_sim={combined_similarity:.4f}, is_valid={is_valid}, "
+                    f"question='{cached_question[:50]}...', context='{cached_context[:80]}...'"
                 )
-                # Check similarity threshold
-                if similarity < self.similarity_threshold:
+                # Check BOTH similarity thresholds (dual embedding precision check)
+                if question_similarity < self.parameters.similarity_threshold:
+                    logger.info(
+                        f"[{self.name}] MISS (question similarity too low): "
+                        f"question_sim={question_similarity:.4f} < threshold={self.parameters.similarity_threshold}"
+                    )
+                    return None
+                if context_similarity < self.parameters.context_similarity_threshold:
                     logger.info(
-                        f"[{self.name}] MISS (below threshold): similarity={similarity:.4f} < threshold={self.similarity_threshold} "
-                        f"(cached_question='{cached_question[:50]}...')"
+                        f"[{self.name}] MISS (context similarity too low): "
+                        f"context_sim={context_similarity:.4f} < threshold={self.parameters.context_similarity_threshold}"
                     )
                     return None
@@ -361,43 +605,59 @@ class SemanticCacheService(GenieServiceBase):
                     delete_sql = f"DELETE FROM {self.table_name} WHERE id = %s"
                     cur.execute(delete_sql, (entry_id,))
                     logger.info(
-                        f"[{self.name}] MISS (expired, deleted for refresh): similarity={similarity:.4f}, "
-                        f"ttl={ttl_seconds}s, question='{cached_question[:50]}...'"
+                        f"[{self.name}] MISS (expired, deleted for refresh): "
+                        f"combined_sim={combined_similarity:.4f}, ttl={ttl_seconds}s, question='{cached_question[:50]}...'"
                     )
                     return None
                 logger.info(
-                    f"[{self.name}] HIT: similarity={similarity:.4f} >= threshold={self.similarity_threshold} "
-                    f"(cached_question='{cached_question[:50]}...')"
+                    f"[{self.name}] HIT: question_sim={question_similarity:.4f}, context_sim={context_similarity:.4f}, "
+                    f"combined_sim={combined_similarity:.4f} (cached_question='{cached_question[:50]}...')"
                 )
                 entry = SQLCacheEntry(
                     query=sql_query,
                     description=description,
-                    conversation_id=conversation_id,
+                    conversation_id=conversation_id_cached,
                     created_at=created_at,
                 )
-                return entry, similarity
+                return entry, combined_similarity
     def _store_entry(
-        self, question: str, embedding: list[float], response: GenieResponse
+        self,
+        question: str,
+        conversation_context: str,
+        question_embedding: list[float],
+        context_embedding: list[float],
+        response: GenieResponse,
     ) -> None:
-        """Store a new cache entry for this Genie space."""
+        """Store a new cache entry with dual embeddings for this Genie space."""
         insert_sql: str = f"""
             INSERT INTO {self.table_name}
-            (genie_space_id, question, question_embedding, sql_query, description, conversation_id)
-            VALUES (%s, %s, %s::vector, %s, %s, %s)
+            (genie_space_id, question, conversation_context, context_string,
+             question_embedding, context_embedding, sql_query, description, conversation_id)
+            VALUES (%s, %s, %s, %s, %s::vector, %s::vector, %s, %s, %s)
         """
-        embedding_str: str = f"[{','.join(str(x) for x in embedding)}]"
+        question_emb_str: str = f"[{','.join(str(x) for x in question_embedding)}]"
+        context_emb_str: str = f"[{','.join(str(x) for x in context_embedding)}]"
+        # Build full context string for backward compatibility (used in logging)
+        if conversation_context:
+            full_context_string = f"{conversation_context}\nCurrent: {question}"
+        else:
+            full_context_string = question
         with self._pool.connection() as conn:
             with conn.cursor() as cur:
                 cur.execute(
                     insert_sql,
                     (
-                        self.genie_space_id,
+                        self.space_id,
                         question,
-                        embedding_str,
+                        conversation_context,
+                        full_context_string,
+                        question_emb_str,
+                        context_emb_str,
                         response.query,
                         response.description,
                         response.conversation_id,
@@ -405,14 +665,15 @@ class SemanticCacheService(GenieServiceBase):
                 )
                 logger.info(
                     f"[{self.name}] Stored cache entry: question='{question[:50]}...' "
-                    f"sql='{response.query[:50]}...' (space={self.genie_space_id}, table={self.table_name})"
+                    f"context='{conversation_context[:80]}...' "
+                    f"sql='{response.query[:50]}...' (space={self.space_id}, table={self.table_name})"
                 )
     @mlflow.trace(name="execute_cached_sql_semantic")
     def _execute_sql(self, sql: str) -> pd.DataFrame | str:
         """Execute SQL using the warehouse and return results."""
         client: WorkspaceClient = self.warehouse.workspace_client
-        warehouse_id: str = self.warehouse.warehouse_id
+        warehouse_id: str = str(self.warehouse.warehouse_id)
         statement_response: StatementResponse = (
             client.statement_execution.execute_statement(
@@ -422,10 +683,13 @@ class SemanticCacheService(GenieServiceBase):
             )
         )
-        if statement_response.status.state != StatementState.SUCCEEDED:
+        if (
+            statement_response.status is not None
+            and statement_response.status.state != StatementState.SUCCEEDED
+        ):
             error_msg: str = (
                 f"SQL execution failed: {statement_response.status.error.message}"
-                if statement_response.status.error
+                if statement_response.status.error is not None
                 else f"SQL execution failed with state: {statement_response.status.state}"
             )
             logger.error(f"[{self.name}] {error_msg}")
@@ -439,10 +703,10 @@ class SemanticCacheService(GenieServiceBase):
                 and statement_response.manifest.schema.columns
             ):
                 columns = [
-                    col.name for col in statement_response.manifest.schema.columns
+                    col.name
+                    for col in statement_response.manifest.schema.columns
+                    if col.name is not None
                 ]
-            elif hasattr(statement_response.result, "schema"):
-                columns = [col.name for col in statement_response.result.schema.columns]
             data: list[list[Any]] = statement_response.result.data_array
             if columns:
@@ -454,19 +718,16 @@ class SemanticCacheService(GenieServiceBase):
     def ask_question(
         self, question: str, conversation_id: str | None = None
-    ) -> GenieResponse:
+    ) -> CacheResult:
         """
         Ask a question, using semantic cache if a similar query exists.
         On cache hit, re-executes the cached SQL to get fresh data.
-        Implements GenieServiceBase for seamless chaining.
+        Returns CacheResult with cache metadata.
         """
-        result: CacheResult = self.ask_question_with_cache_info(
-            question, conversation_id
-        )
-        return result.response
+        return self.ask_question_with_cache_info(question, conversation_id)
-    @mlflow.trace(name="genie_semantic_cache_lookup", span_type=SpanType.TOOL)
+    @mlflow.trace(name="genie_semantic_cache_lookup")
     def ask_question_with_cache_info(
         self,
         question: str,
@@ -475,11 +736,12 @@ class SemanticCacheService(GenieServiceBase):
         """
         Ask a question with detailed cache hit information.
-        On cache hit, the cached SQL is re-executed to return fresh data.
+        On cache hit, the cached SQL is re-executed to return fresh data, but the
+        conversation_id returned is the current conversation_id (not the cached one).
         Args:
             question: The question to ask
-            conversation_id: Optional conversation ID
+            conversation_id: Optional conversation ID for context and continuation
         Returns:
             CacheResult with fresh response and cache metadata
@@ -487,28 +749,37 @@ class SemanticCacheService(GenieServiceBase):
         # Ensure initialization (lazy init if initialize() wasn't called)
         self._setup()
-        # Generate embedding for the question
-        embedding: list[float] = self._embed_question(question)
+        # Generate dual embeddings for the question and conversation context
+        question_embedding: list[float]
+        context_embedding: list[float]
+        conversation_context: str
+        question_embedding, context_embedding, conversation_context = (
+            self._embed_question(question, conversation_id)
+        )
-        # Check cache
+        # Check cache using dual embedding similarity
         cache_result: tuple[SQLCacheEntry, float] | None = self._find_similar(
-            question, embedding
+            question, conversation_context, question_embedding, context_embedding
         )
         if cache_result is not None:
-            cached, similarity = cache_result
+            cached, combined_similarity = cache_result
             logger.debug(
-                f"[{self.name}] Semantic cache hit (similarity={similarity:.3f}): {question[:50]}..."
+                f"[{self.name}] Semantic cache hit (combined_similarity={combined_similarity:.3f}): {question[:50]}..."
             )
             # Re-execute the cached SQL to get fresh data
             result: pd.DataFrame | str = self._execute_sql(cached.query)
+            # IMPORTANT: Use the current conversation_id (from the request), not the cached one
+            # This ensures the conversation continues properly
             response: GenieResponse = GenieResponse(
                 result=result,
                 query=cached.query,
                 description=cached.description,
-                conversation_id=cached.conversation_id,
+                conversation_id=conversation_id
+                if conversation_id
+                else cached.conversation_id,
             )
             return CacheResult(response=response, cache_hit=True, served_by=self.name)
@@ -516,22 +787,32 @@ class SemanticCacheService(GenieServiceBase):
         # Cache miss - delegate to wrapped service
         logger.debug(f"[{self.name}] Miss: {question[:50]}...")
-        response = self.impl.ask_question(question, conversation_id)
+        result: CacheResult = self.impl.ask_question(question, conversation_id)
         # Store in cache if we got a SQL query
-        if response.query:
+        if result.response.query:
             logger.info(
                 f"[{self.name}] Storing new cache entry for question: '{question[:50]}...' "
-                f"(space={self.genie_space_id})"
+                f"(space={self.space_id})"
             )
-            self._store_entry(question, embedding, response)
-        elif not response.query:
+            self._store_entry(
+                question,
+                conversation_context,
+                question_embedding,
+                context_embedding,
+                result.response,
+            )
+        elif not result.response.query:
             logger.warning(
                 f"[{self.name}] Not caching: response has no SQL query "
                 f"(question='{question[:50]}...')"
             )
-        return CacheResult(response=response, cache_hit=False, served_by=None)
+        return CacheResult(response=result.response, cache_hit=False, served_by=None)
+    @property
+    def space_id(self) -> str:
+        return self.impl.space_id
     def invalidate_expired(self) -> int:
         """Remove expired entries from the cache for this Genie space.
@@ -544,7 +825,7 @@ class SemanticCacheService(GenieServiceBase):
         # If TTL is disabled, nothing can expire
         if ttl_seconds is None or ttl_seconds < 0:
             logger.debug(
-                f"[{self.name}] TTL disabled, no entries to expire for space {self.genie_space_id}"
+                f"[{self.name}] TTL disabled, no entries to expire for space {self.space_id}"
             )
             return 0
@@ -556,10 +837,10 @@ class SemanticCacheService(GenieServiceBase):
         with self._pool.connection() as conn:
             with conn.cursor() as cur:
-                cur.execute(delete_sql, (self.genie_space_id, ttl_seconds))
+                cur.execute(delete_sql, (self.space_id, ttl_seconds))
                 deleted: int = cur.rowcount
                 logger.debug(
-                    f"[{self.name}] Deleted {deleted} expired entries for space {self.genie_space_id}"
+                    f"[{self.name}] Deleted {deleted} expired entries for space {self.space_id}"
                 )
                 return deleted
@@ -570,10 +851,10 @@ class SemanticCacheService(GenieServiceBase):
         with self._pool.connection() as conn:
             with conn.cursor() as cur:
-                cur.execute(delete_sql, (self.genie_space_id,))
+                cur.execute(delete_sql, (self.space_id,))
                 deleted: int = cur.rowcount
                 logger.debug(
-                    f"[{self.name}] Cleared {deleted} entries for space {self.genie_space_id}"
+                    f"[{self.name}] Cleared {deleted} entries for space {self.space_id}"
                 )
                 return deleted
@@ -587,7 +868,7 @@ class SemanticCacheService(GenieServiceBase):
         with self._pool.connection() as conn:
             with conn.cursor() as cur:
-                cur.execute(count_sql, (self.genie_space_id,))
+                cur.execute(count_sql, (self.space_id,))
                 row: DbRow | None = cur.fetchone()
                 return row.get("count", 0) if row else 0
@@ -605,7 +886,7 @@ class SemanticCacheService(GenieServiceBase):
             """
             with self._pool.connection() as conn:
                 with conn.cursor() as cur:
-                    cur.execute(count_sql, (self.genie_space_id,))
+                    cur.execute(count_sql, (self.space_id,))
                     row: DbRow | None = cur.fetchone()
                     total = row.get("total", 0) if row else 0
                     return {
@@ -627,12 +908,12 @@ class SemanticCacheService(GenieServiceBase):
         with self._pool.connection() as conn:
             with conn.cursor() as cur:
-                cur.execute(stats_sql, (ttl_seconds, ttl_seconds, self.genie_space_id))
-                row: DbRow | None = cur.fetchone()
+                cur.execute(stats_sql, (ttl_seconds, ttl_seconds, self.space_id))
+                stats_row: DbRow | None = cur.fetchone()
                 return {
-                    "size": row.get("total", 0) if row else 0,
+                    "size": stats_row.get("total", 0) if stats_row else 0,
                     "ttl_seconds": ttl.total_seconds() if ttl else None,
                     "similarity_threshold": self.similarity_threshold,
-                    "expired_entries": row.get("expired", 0) if row else 0,
-                    "valid_entries": row.get("valid", 0) if row else 0,
+                    "expired_entries": stats_row.get("expired", 0) if stats_row else 0,
+                    "valid_entries": stats_row.get("valid", 0) if stats_row else 0,
                 }

dao-ai 0.0.36__py3-none-any.whl → 0.1.0__py3-none-any.whl

dao-ai 0.0.36py3-none-any.whl → 0.1.0py3-none-any.whl