PyPI - dao-ai - Versions diffs - 0.0.25__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

dao-ai 0.0.25py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

dao_ai/__init__.py +29 -0
dao_ai/agent_as_code.py +5 -5
dao_ai/cli.py +245 -40
dao_ai/config.py +1863 -338
dao_ai/genie/__init__.py +38 -0
dao_ai/genie/cache/__init__.py +43 -0
dao_ai/genie/cache/base.py +72 -0
dao_ai/genie/cache/core.py +79 -0
dao_ai/genie/cache/lru.py +347 -0
dao_ai/genie/cache/semantic.py +970 -0
dao_ai/genie/core.py +35 -0
dao_ai/graph.py +27 -228
dao_ai/hooks/__init__.py +9 -6
dao_ai/hooks/core.py +27 -195
dao_ai/logging.py +56 -0
dao_ai/memory/__init__.py +10 -0
dao_ai/memory/core.py +65 -30
dao_ai/memory/databricks.py +402 -0
dao_ai/memory/postgres.py +79 -38
dao_ai/messages.py +6 -4
dao_ai/middleware/__init__.py +125 -0
dao_ai/middleware/assertions.py +806 -0
dao_ai/middleware/base.py +50 -0
dao_ai/middleware/core.py +67 -0
dao_ai/middleware/guardrails.py +420 -0
dao_ai/middleware/human_in_the_loop.py +232 -0
dao_ai/middleware/message_validation.py +586 -0
dao_ai/middleware/summarization.py +197 -0
dao_ai/models.py +1306 -114
dao_ai/nodes.py +261 -166
dao_ai/optimization.py +674 -0
dao_ai/orchestration/__init__.py +52 -0
dao_ai/orchestration/core.py +294 -0
dao_ai/orchestration/supervisor.py +278 -0
dao_ai/orchestration/swarm.py +271 -0
dao_ai/prompts.py +128 -31
dao_ai/providers/databricks.py +645 -172
dao_ai/state.py +157 -21
dao_ai/tools/__init__.py +13 -5
dao_ai/tools/agent.py +1 -3
dao_ai/tools/core.py +64 -11
dao_ai/tools/email.py +232 -0
dao_ai/tools/genie.py +144 -295
dao_ai/tools/mcp.py +220 -133
dao_ai/tools/memory.py +50 -0
dao_ai/tools/python.py +9 -14
dao_ai/tools/search.py +14 -0
dao_ai/tools/slack.py +22 -10
dao_ai/tools/sql.py +202 -0
dao_ai/tools/time.py +30 -7
dao_ai/tools/unity_catalog.py +165 -88
dao_ai/tools/vector_search.py +360 -40
dao_ai/utils.py +218 -16
dao_ai-0.1.2.dist-info/METADATA +455 -0
dao_ai-0.1.2.dist-info/RECORD +64 -0
{dao_ai-0.0.25.dist-info → dao_ai-0.1.2.dist-info}/WHEEL +1 -1
dao_ai/chat_models.py +0 -204
dao_ai/guardrails.py +0 -112
dao_ai/tools/human_in_the_loop.py +0 -100
dao_ai-0.0.25.dist-info/METADATA +0 -1165
dao_ai-0.0.25.dist-info/RECORD +0 -41
{dao_ai-0.0.25.dist-info → dao_ai-0.1.2.dist-info}/entry_points.txt +0 -0
{dao_ai-0.0.25.dist-info → dao_ai-0.1.2.dist-info}/licenses/LICENSE +0 -0

dao_ai/genie/cache/semantic.py ADDED Viewed

@@ -0,0 +1,970 @@
+"""
+Semantic cache implementation for Genie SQL queries using PostgreSQL pg_vector.
+This module provides a semantic cache that uses embeddings and similarity search
+to find cached queries that match the intent of new questions. Cache entries are
+partitioned by genie_space_id to ensure proper isolation between Genie spaces.
+The cache supports conversation-aware embedding using a rolling window approach
+to capture context from recent conversation turns, improving accuracy for
+multi-turn conversations with anaphoric references.
+"""
+from datetime import timedelta
+from typing import Any
+import mlflow
+import pandas as pd
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.dashboards import (
+    GenieListConversationMessagesResponse,
+    GenieMessage,
+)
+from databricks.sdk.service.sql import StatementResponse, StatementState
+from databricks_ai_bridge.genie import GenieResponse
+from loguru import logger
+from dao_ai.config import (
+    DatabaseModel,
+    GenieSemanticCacheParametersModel,
+    LLMModel,
+    WarehouseModel,
+)
+from dao_ai.genie.cache.base import (
+    CacheResult,
+    GenieServiceBase,
+    SQLCacheEntry,
+)
+# Type alias for database row (dict due to row_factory=dict_row)
+DbRow = dict[str, Any]
+def get_conversation_history(
+    workspace_client: WorkspaceClient,
+    space_id: str,
+    conversation_id: str,
+    max_messages: int = 10,
+) -> list[GenieMessage]:
+    """
+    Retrieve conversation history from Genie.
+    Args:
+        workspace_client: The Databricks workspace client
+        space_id: The Genie space ID
+        conversation_id: The conversation ID to retrieve
+        max_messages: Maximum number of messages to retrieve
+    Returns:
+        List of GenieMessage objects representing the conversation history
+    """
+    try:
+        # Use the Genie API to retrieve conversation messages
+        response: GenieListConversationMessagesResponse = (
+            workspace_client.genie.list_conversation_messages(
+                space_id=space_id,
+                conversation_id=conversation_id,
+            )
+        )
+        # Return the most recent messages up to max_messages
+        if response.messages is not None:
+            all_messages: list[GenieMessage] = list(response.messages)
+            return (
+                all_messages[-max_messages:]
+                if len(all_messages) > max_messages
+                else all_messages
+            )
+        return []
+    except Exception as e:
+        logger.warning(
+            "Failed to retrieve conversation history",
+            conversation_id=conversation_id,
+            error=str(e),
+        )
+        return []
+def build_context_string(
+    question: str,
+    conversation_messages: list[GenieMessage],
+    window_size: int,
+    max_tokens: int = 2000,
+) -> str:
+    """
+    Build a context-aware question string using rolling window.
+    This function creates a concatenated string that includes recent conversation
+    turns to provide context for semantic similarity matching.
+    Args:
+        question: The current question
+        conversation_messages: List of previous conversation messages
+        window_size: Number of previous turns to include
+        max_tokens: Maximum estimated tokens (rough approximation: 4 chars = 1 token)
+    Returns:
+        Context-aware question string formatted for embedding
+    """
+    if window_size <= 0 or not conversation_messages:
+        return question
+    # Take the last window_size messages (most recent)
+    recent_messages = (
+        conversation_messages[-window_size:]
+        if len(conversation_messages) > window_size
+        else conversation_messages
+    )
+    # Build context parts
+    context_parts: list[str] = []
+    for msg in recent_messages:
+        # Only include messages with content from the history
+        if msg.content:
+            # Limit message length to prevent token overflow
+            content: str = msg.content
+            if len(content) > 500:  # Truncate very long messages
+                content = content[:500] + "..."
+            context_parts.append(f"Previous: {content}")
+    # Add current question
+    context_parts.append(f"Current: {question}")
+    # Join with newlines
+    context_string = "\n".join(context_parts)
+    # Rough token limit check (4 chars ≈ 1 token)
+    estimated_tokens = len(context_string) / 4
+    if estimated_tokens > max_tokens:
+        # Truncate to fit max_tokens
+        target_chars = max_tokens * 4
+        original_length = len(context_string)
+        context_string = context_string[:target_chars] + "..."
+        logger.trace(
+            "Truncated context string",
+            original_chars=original_length,
+            target_chars=target_chars,
+            max_tokens=max_tokens,
+        )
+    return context_string
+class SemanticCacheService(GenieServiceBase):
+    """
+    Semantic caching decorator that uses PostgreSQL pg_vector for similarity lookup.
+    This service caches the SQL query generated by Genie along with an embedding
+    of the original question. On subsequent queries, it performs a semantic similarity
+    search to find cached queries that match the intent of the new question.
+    Cache entries are partitioned by genie_space_id to ensure queries from different
+    Genie spaces don't return incorrect cache hits.
+    On cache hit, it re-executes the cached SQL using the provided warehouse
+    to return fresh data while avoiding the Genie NL-to-SQL translation cost.
+    Example:
+        from dao_ai.config import GenieSemanticCacheParametersModel, DatabaseModel
+        from dao_ai.genie.cache import SemanticCacheService
+        cache_params = GenieSemanticCacheParametersModel(
+            database=database_model,
+            warehouse=warehouse_model,
+            embedding_model="databricks-gte-large-en",
+            time_to_live_seconds=86400,  # 24 hours
+            similarity_threshold=0.85
+        )
+        genie = SemanticCacheService(
+            impl=GenieService(Genie(space_id="my-space")),
+            parameters=cache_params
+        )
+    Thread-safe: Uses connection pooling from psycopg_pool.
+    """
+    impl: GenieServiceBase
+    parameters: GenieSemanticCacheParametersModel
+    workspace_client: WorkspaceClient | None
+    name: str
+    _embeddings: Any  # DatabricksEmbeddings
+    _pool: Any  # ConnectionPool
+    _embedding_dims: int | None
+    _setup_complete: bool
+    def __init__(
+        self,
+        impl: GenieServiceBase,
+        parameters: GenieSemanticCacheParametersModel,
+        workspace_client: WorkspaceClient | None = None,
+        name: str | None = None,
+    ) -> None:
+        """
+        Initialize the semantic cache service.
+        Args:
+            impl: The underlying GenieServiceBase to delegate to on cache miss.
+                The space_id will be obtained from impl.space_id.
+            parameters: Cache configuration including database, warehouse, embedding model
+            workspace_client: Optional WorkspaceClient for retrieving conversation history.
+                If None, conversation context will not be used.
+            name: Name for this cache layer (for logging). Defaults to class name.
+        """
+        self.impl = impl
+        self.parameters = parameters
+        self.workspace_client = workspace_client
+        self.name = name if name is not None else self.__class__.__name__
+        self._embeddings = None
+        self._pool = None
+        self._embedding_dims = None
+        self._setup_complete = False
+    def initialize(self) -> "SemanticCacheService":
+        """
+        Eagerly initialize the cache service.
+        Call this during tool creation to:
+        - Validate configuration early (fail fast)
+        - Create the database table before any requests
+        - Avoid first-request latency from lazy initialization
+        Returns:
+            self for method chaining
+        """
+        self._setup()
+        return self
+    def _setup(self) -> None:
+        """Initialize embeddings and database connection pool lazily."""
+        if self._setup_complete:
+            return
+        from dao_ai.memory.postgres import PostgresPoolManager
+        # Initialize embeddings
+        # Convert embedding_model to LLMModel if it's a string
+        embedding_model: LLMModel = (
+            LLMModel(name=self.parameters.embedding_model)
+            if isinstance(self.parameters.embedding_model, str)
+            else self.parameters.embedding_model
+        )
+        self._embeddings = embedding_model.as_embeddings_model()
+        # Auto-detect embedding dimensions if not provided
+        if self.parameters.embedding_dims is None:
+            sample_embedding: list[float] = self._embeddings.embed_query("test")
+            self._embedding_dims = len(sample_embedding)
+            logger.debug(
+                "Auto-detected embedding dimensions",
+                layer=self.name,
+                dims=self._embedding_dims,
+            )
+        else:
+            self._embedding_dims = self.parameters.embedding_dims
+        # Get connection pool
+        self._pool = PostgresPoolManager.get_pool(self.parameters.database)
+        # Ensure table exists
+        self._create_table_if_not_exists()
+        self._setup_complete = True
+        logger.debug(
+            "Semantic cache initialized",
+            layer=self.name,
+            space_id=self.space_id,
+            table_name=self.table_name,
+            dims=self._embedding_dims,
+        )
+    @property
+    def database(self) -> DatabaseModel:
+        """The database used for storing cache entries."""
+        return self.parameters.database
+    @property
+    def warehouse(self) -> WarehouseModel:
+        """The warehouse used for executing cached SQL queries."""
+        return self.parameters.warehouse
+    @property
+    def time_to_live(self) -> timedelta | None:
+        """Time-to-live for cache entries. None means never expires."""
+        ttl = self.parameters.time_to_live_seconds
+        if ttl is None or ttl < 0:
+            return None
+        return timedelta(seconds=ttl)
+    @property
+    def similarity_threshold(self) -> float:
+        """Minimum similarity for cache hit (using L2 distance converted to similarity)."""
+        return self.parameters.similarity_threshold
+    @property
+    def embedding_dims(self) -> int:
+        """Dimension size for embeddings (auto-detected if not configured)."""
+        if self._embedding_dims is None:
+            raise RuntimeError(
+                "Embedding dimensions not yet initialized. Call _setup() first."
+            )
+        return self._embedding_dims
+    @property
+    def table_name(self) -> str:
+        """Name of the cache table."""
+        return self.parameters.table_name
+    def _create_table_if_not_exists(self) -> None:
+        """Create the cache table with pg_vector extension if it doesn't exist.
+        If the table exists but has a different embedding dimension, it will be
+        dropped and recreated with the new dimension size.
+        """
+        create_extension_sql: str = "CREATE EXTENSION IF NOT EXISTS vector"
+        # Check if table exists and get current embedding dimensions
+        check_dims_sql: str = """
+            SELECT atttypmod
+            FROM pg_attribute
+            WHERE attrelid = %s::regclass
+              AND attname = 'question_embedding'
+        """
+        create_table_sql: str = f"""
+            CREATE TABLE IF NOT EXISTS {self.table_name} (
+                id SERIAL PRIMARY KEY,
+                genie_space_id TEXT NOT NULL,
+                question TEXT NOT NULL,
+                conversation_context TEXT,
+                context_string TEXT,
+                question_embedding vector({self.embedding_dims}),
+                context_embedding vector({self.embedding_dims}),
+                sql_query TEXT NOT NULL,
+                description TEXT,
+                conversation_id TEXT,
+                created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+            )
+        """
+        # Index for efficient similarity search partitioned by genie_space_id
+        # Use L2 (Euclidean) distance - optimal for Databricks GTE embeddings
+        create_question_embedding_index_sql: str = f"""
+            CREATE INDEX IF NOT EXISTS {self.table_name}_question_embedding_idx
+            ON {self.table_name}
+            USING ivfflat (question_embedding vector_l2_ops)
+            WITH (lists = 100)
+        """
+        create_context_embedding_index_sql: str = f"""
+            CREATE INDEX IF NOT EXISTS {self.table_name}_context_embedding_idx
+            ON {self.table_name}
+            USING ivfflat (context_embedding vector_l2_ops)
+            WITH (lists = 100)
+        """
+        # Index for filtering by genie_space_id
+        create_space_index_sql: str = f"""
+            CREATE INDEX IF NOT EXISTS {self.table_name}_space_idx
+            ON {self.table_name} (genie_space_id)
+        """
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(create_extension_sql)
+                # Check if table exists and verify embedding dimensions
+                try:
+                    cur.execute(check_dims_sql, (self.table_name,))
+                    row: DbRow | None = cur.fetchone()
+                    if row is not None:
+                        # atttypmod for vector type contains the dimension
+                        current_dims = row.get("atttypmod", 0)
+                        if current_dims != self.embedding_dims:
+                            logger.warning(
+                                "Embedding dimension mismatch, dropping and recreating table",
+                                layer=self.name,
+                                table_dims=current_dims,
+                                expected_dims=self.embedding_dims,
+                                table_name=self.table_name,
+                            )
+                            cur.execute(f"DROP TABLE {self.table_name}")
+                except Exception:
+                    # Table doesn't exist, which is fine
+                    pass
+                cur.execute(create_table_sql)
+                cur.execute(create_space_index_sql)
+                cur.execute(create_question_embedding_index_sql)
+                cur.execute(create_context_embedding_index_sql)
+    def _embed_question(
+        self, question: str, conversation_id: str | None = None
+    ) -> tuple[list[float], list[float], str]:
+        """
+        Generate dual embeddings: one for the question, one for the conversation context.
+        This enables separate matching of question similarity vs context similarity,
+        improving precision by ensuring both the question AND the conversation context
+        are semantically similar before returning a cached result.
+        Args:
+            question: The question to embed
+            conversation_id: Optional conversation ID for retrieving context
+        Returns:
+            Tuple of (question_embedding, context_embedding, conversation_context_string)
+            - question_embedding: Vector embedding of just the question
+            - context_embedding: Vector embedding of the conversation context (or zero vector if no context)
+            - conversation_context_string: The conversation context string (empty if no context)
+        """
+        conversation_context = ""
+        # If conversation context is enabled and available
+        if (
+            self.workspace_client is not None
+            and conversation_id is not None
+            and self.parameters.context_window_size > 0
+        ):
+            try:
+                # Retrieve conversation history
+                conversation_messages = get_conversation_history(
+                    workspace_client=self.workspace_client,
+                    space_id=self.space_id,
+                    conversation_id=conversation_id,
+                    max_messages=self.parameters.context_window_size
+                    * 2,  # Get extra for safety
+                )
+                # Build context string (just the "Previous:" messages, not the current question)
+                if conversation_messages:
+                    recent_messages = (
+                        conversation_messages[-self.parameters.context_window_size :]
+                        if len(conversation_messages)
+                        > self.parameters.context_window_size
+                        else conversation_messages
+                    )
+                    context_parts: list[str] = []
+                    for msg in recent_messages:
+                        if msg.content:
+                            content: str = msg.content
+                            if len(content) > 500:
+                                content = content[:500] + "..."
+                            context_parts.append(f"Previous: {content}")
+                    conversation_context = "\n".join(context_parts)
+                    # Truncate if too long
+                    estimated_tokens = len(conversation_context) / 4
+                    if estimated_tokens > self.parameters.max_context_tokens:
+                        target_chars = self.parameters.max_context_tokens * 4
+                        conversation_context = (
+                            conversation_context[:target_chars] + "..."
+                        )
+                logger.trace(
+                    "Using conversation context",
+                    layer=self.name,
+                    messages_count=len(conversation_messages),
+                    window_size=self.parameters.context_window_size,
+                )
+            except Exception as e:
+                logger.warning(
+                    "Failed to build conversation context, using question only",
+                    layer=self.name,
+                    error=str(e),
+                )
+                conversation_context = ""
+        # Generate dual embeddings
+        if conversation_context:
+            # Embed both question and context
+            embeddings: list[list[float]] = self._embeddings.embed_documents(
+                [question, conversation_context]
+            )
+            question_embedding = embeddings[0]
+            context_embedding = embeddings[1]
+        else:
+            # Only embed question, use zero vector for context
+            embeddings = self._embeddings.embed_documents([question])
+            question_embedding = embeddings[0]
+            context_embedding = [0.0] * len(question_embedding)  # Zero vector
+        return question_embedding, context_embedding, conversation_context
+    @mlflow.trace(name="semantic_search")
+    def _find_similar(
+        self,
+        question: str,
+        conversation_context: str,
+        question_embedding: list[float],
+        context_embedding: list[float],
+    ) -> tuple[SQLCacheEntry, float] | None:
+        """
+        Find a semantically similar cached entry using dual embedding matching.
+        This method matches BOTH the question AND the conversation context separately,
+        ensuring high precision by requiring both to be semantically similar.
+        Args:
+            question: The original question (for logging)
+            conversation_context: The conversation context string
+            question_embedding: The embedding vector of just the question
+            context_embedding: The embedding vector of the conversation context
+        Returns:
+            Tuple of (SQLCacheEntry, combined_similarity_score) if found, None otherwise
+        """
+        # Use L2 (Euclidean) distance - optimal for Databricks GTE embeddings
+        # pg_vector's <-> operator returns L2 distance (0 = identical)
+        # Convert to similarity: 1 / (1 + distance) gives range [0, 1]
+        #
+        # Dual embedding strategy:
+        # 1. Calculate separate similarities for question and context
+        # 2. BOTH must exceed their respective thresholds
+        # 3. Combined score is weighted average
+        # 4. Refresh-on-hit: check TTL after similarity check
+        ttl_seconds = self.parameters.time_to_live_seconds
+        ttl_disabled = ttl_seconds is None or ttl_seconds < 0
+        # When TTL is disabled, all entries are always valid
+        if ttl_disabled:
+            is_valid_expr = "TRUE"
+        else:
+            is_valid_expr = f"created_at > NOW() - INTERVAL '{ttl_seconds} seconds'"
+        # Weighted combined similarity for ordering
+        question_weight: float = self.parameters.question_weight
+        context_weight: float = self.parameters.context_weight
+        search_sql: str = f"""
+            SELECT
+                id,
+                question,
+                conversation_context,
+                sql_query,
+                description,
+                conversation_id,
+                created_at,
+                1.0 / (1.0 + (question_embedding <-> %s::vector)) as question_similarity,
+                1.0 / (1.0 + (context_embedding <-> %s::vector)) as context_similarity,
+                ({question_weight} * (1.0 / (1.0 + (question_embedding <-> %s::vector)))) +
+                ({context_weight} * (1.0 / (1.0 + (context_embedding <-> %s::vector)))) as combined_similarity,
+                {is_valid_expr} as is_valid
+            FROM {self.table_name}
+            WHERE genie_space_id = %s
+            ORDER BY combined_similarity DESC
+            LIMIT 1
+        """
+        question_emb_str: str = f"[{','.join(str(x) for x in question_embedding)}]"
+        context_emb_str: str = f"[{','.join(str(x) for x in context_embedding)}]"
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(
+                    search_sql,
+                    (
+                        question_emb_str,
+                        context_emb_str,
+                        question_emb_str,
+                        context_emb_str,
+                        self.space_id,
+                    ),
+                )
+                row: DbRow | None = cur.fetchone()
+                if row is None:
+                    logger.info(
+                        "Cache MISS (no entries)",
+                        layer=self.name,
+                        question_prefix=question[:50],
+                        space=self.space_id,
+                    )
+                    return None
+                # Extract values from dict row
+                entry_id: Any = row.get("id")
+                cached_question: str = row.get("question", "")
+                cached_context: str = row.get("conversation_context", "")
+                sql_query: str = row["sql_query"]
+                description: str = row.get("description", "")
+                conversation_id_cached: str = row.get("conversation_id", "")
+                created_at: Any = row["created_at"]
+                question_similarity: float = row["question_similarity"]
+                context_similarity: float = row["context_similarity"]
+                combined_similarity: float = row["combined_similarity"]
+                is_valid: bool = row.get("is_valid", False)
+                # Log best match info
+                logger.debug(
+                    "Best match found",
+                    layer=self.name,
+                    question_sim=f"{question_similarity:.4f}",
+                    context_sim=f"{context_similarity:.4f}",
+                    combined_sim=f"{combined_similarity:.4f}",
+                    is_valid=is_valid,
+                    cached_question_prefix=cached_question[:50],
+                    cached_context_prefix=cached_context[:80],
+                )
+                # Check BOTH similarity thresholds (dual embedding precision check)
+                if question_similarity < self.parameters.similarity_threshold:
+                    logger.info(
+                        "Cache MISS (question similarity too low)",
+                        layer=self.name,
+                        question_sim=f"{question_similarity:.4f}",
+                        threshold=self.parameters.similarity_threshold,
+                    )
+                    return None
+                if context_similarity < self.parameters.context_similarity_threshold:
+                    logger.info(
+                        "Cache MISS (context similarity too low)",
+                        layer=self.name,
+                        context_sim=f"{context_similarity:.4f}",
+                        threshold=self.parameters.context_similarity_threshold,
+                    )
+                    return None
+                # Check TTL - refresh on hit strategy
+                if not is_valid:
+                    # Entry is expired - delete it and return miss to trigger refresh
+                    delete_sql = f"DELETE FROM {self.table_name} WHERE id = %s"
+                    cur.execute(delete_sql, (entry_id,))
+                    logger.info(
+                        "Cache MISS (expired, deleted for refresh)",
+                        layer=self.name,
+                        combined_sim=f"{combined_similarity:.4f}",
+                        ttl_seconds=ttl_seconds,
+                        cached_question_prefix=cached_question[:50],
+                    )
+                    return None
+                logger.info(
+                    "Cache HIT",
+                    layer=self.name,
+                    question_sim=f"{question_similarity:.4f}",
+                    context_sim=f"{context_similarity:.4f}",
+                    combined_sim=f"{combined_similarity:.4f}",
+                    cached_question_prefix=cached_question[:50],
+                )
+                entry = SQLCacheEntry(
+                    query=sql_query,
+                    description=description,
+                    conversation_id=conversation_id_cached,
+                    created_at=created_at,
+                )
+                return entry, combined_similarity
+    def _store_entry(
+        self,
+        question: str,
+        conversation_context: str,
+        question_embedding: list[float],
+        context_embedding: list[float],
+        response: GenieResponse,
+    ) -> None:
+        """Store a new cache entry with dual embeddings for this Genie space."""
+        insert_sql: str = f"""
+            INSERT INTO {self.table_name}
+            (genie_space_id, question, conversation_context, context_string,
+             question_embedding, context_embedding, sql_query, description, conversation_id)
+            VALUES (%s, %s, %s, %s, %s::vector, %s::vector, %s, %s, %s)
+        """
+        question_emb_str: str = f"[{','.join(str(x) for x in question_embedding)}]"
+        context_emb_str: str = f"[{','.join(str(x) for x in context_embedding)}]"
+        # Build full context string for backward compatibility (used in logging)
+        if conversation_context:
+            full_context_string = f"{conversation_context}\nCurrent: {question}"
+        else:
+            full_context_string = question
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(
+                    insert_sql,
+                    (
+                        self.space_id,
+                        question,
+                        conversation_context,
+                        full_context_string,
+                        question_emb_str,
+                        context_emb_str,
+                        response.query,
+                        response.description,
+                        response.conversation_id,
+                    ),
+                )
+                logger.info(
+                    "Stored cache entry",
+                    layer=self.name,
+                    question_prefix=question[:50],
+                    context_prefix=conversation_context[:80],
+                    sql_prefix=response.query[:50] if response.query else None,
+                    space=self.space_id,
+                    table=self.table_name,
+                )
+    @mlflow.trace(name="execute_cached_sql_semantic")
+    def _execute_sql(self, sql: str) -> pd.DataFrame | str:
+        """Execute SQL using the warehouse and return results."""
+        client: WorkspaceClient = self.warehouse.workspace_client
+        warehouse_id: str = str(self.warehouse.warehouse_id)
+        statement_response: StatementResponse = (
+            client.statement_execution.execute_statement(
+                warehouse_id=warehouse_id,
+                statement=sql,
+                wait_timeout="30s",
+            )
+        )
+        if (
+            statement_response.status is not None
+            and statement_response.status.state != StatementState.SUCCEEDED
+        ):
+            error_msg: str = (
+                f"SQL execution failed: {statement_response.status.error.message}"
+                if statement_response.status.error is not None
+                else f"SQL execution failed with state: {statement_response.status.state}"
+            )
+            logger.error("SQL execution failed", layer=self.name, error=error_msg)
+            return error_msg
+        if statement_response.result and statement_response.result.data_array:
+            columns: list[str] = []
+            if (
+                statement_response.manifest
+                and statement_response.manifest.schema
+                and statement_response.manifest.schema.columns
+            ):
+                columns = [
+                    col.name
+                    for col in statement_response.manifest.schema.columns
+                    if col.name is not None
+                ]
+            data: list[list[Any]] = statement_response.result.data_array
+            if columns:
+                return pd.DataFrame(data, columns=columns)
+            else:
+                return pd.DataFrame(data)
+        return pd.DataFrame()
+    def ask_question(
+        self, question: str, conversation_id: str | None = None
+    ) -> CacheResult:
+        """
+        Ask a question, using semantic cache if a similar query exists.
+        On cache hit, re-executes the cached SQL to get fresh data.
+        Returns CacheResult with cache metadata.
+        """
+        return self.ask_question_with_cache_info(question, conversation_id)
+    @mlflow.trace(name="genie_semantic_cache_lookup")
+    def ask_question_with_cache_info(
+        self,
+        question: str,
+        conversation_id: str | None = None,
+    ) -> CacheResult:
+        """
+        Ask a question with detailed cache hit information.
+        On cache hit, the cached SQL is re-executed to return fresh data, but the
+        conversation_id returned is the current conversation_id (not the cached one).
+        Args:
+            question: The question to ask
+            conversation_id: Optional conversation ID for context and continuation
+        Returns:
+            CacheResult with fresh response and cache metadata
+        """
+        # Ensure initialization (lazy init if initialize() wasn't called)
+        self._setup()
+        # Generate dual embeddings for the question and conversation context
+        question_embedding: list[float]
+        context_embedding: list[float]
+        conversation_context: str
+        question_embedding, context_embedding, conversation_context = (
+            self._embed_question(question, conversation_id)
+        )
+        # Check cache using dual embedding similarity
+        cache_result: tuple[SQLCacheEntry, float] | None = self._find_similar(
+            question, conversation_context, question_embedding, context_embedding
+        )
+        if cache_result is not None:
+            cached, combined_similarity = cache_result
+            logger.debug(
+                "Semantic cache hit",
+                layer=self.name,
+                combined_similarity=f"{combined_similarity:.3f}",
+                question_prefix=question[:50],
+            )
+            # Re-execute the cached SQL to get fresh data
+            result: pd.DataFrame | str = self._execute_sql(cached.query)
+            # IMPORTANT: Use the current conversation_id (from the request), not the cached one
+            # This ensures the conversation continues properly
+            response: GenieResponse = GenieResponse(
+                result=result,
+                query=cached.query,
+                description=cached.description,
+                conversation_id=conversation_id
+                if conversation_id
+                else cached.conversation_id,
+            )
+            return CacheResult(response=response, cache_hit=True, served_by=self.name)
+        # Cache miss - delegate to wrapped service
+        logger.trace("Cache miss", layer=self.name, question_prefix=question[:50])
+        result: CacheResult = self.impl.ask_question(question, conversation_id)
+        # Store in cache if we got a SQL query
+        if result.response.query:
+            logger.info(
+                "Storing new cache entry",
+                layer=self.name,
+                question_prefix=question[:50],
+                space=self.space_id,
+            )
+            self._store_entry(
+                question,
+                conversation_context,
+                question_embedding,
+                context_embedding,
+                result.response,
+            )
+        elif not result.response.query:
+            logger.warning(
+                "Not caching: response has no SQL query",
+                layer=self.name,
+                question_prefix=question[:50],
+            )
+        return CacheResult(response=result.response, cache_hit=False, served_by=None)
+    @property
+    def space_id(self) -> str:
+        return self.impl.space_id
+    def invalidate_expired(self) -> int:
+        """Remove expired entries from the cache for this Genie space.
+        Returns 0 if TTL is disabled (entries never expire).
+        """
+        self._setup()
+        ttl_seconds = self.parameters.time_to_live_seconds
+        # If TTL is disabled, nothing can expire
+        if ttl_seconds is None or ttl_seconds < 0:
+            logger.trace(
+                "TTL disabled, no entries to expire",
+                layer=self.name,
+                space=self.space_id,
+            )
+            return 0
+        delete_sql: str = f"""
+            DELETE FROM {self.table_name}
+            WHERE genie_space_id = %s
+              AND created_at < NOW() - INTERVAL '%s seconds'
+        """
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(delete_sql, (self.space_id, ttl_seconds))
+                deleted: int = cur.rowcount
+                logger.trace(
+                    "Deleted expired entries",
+                    layer=self.name,
+                    deleted_count=deleted,
+                    space=self.space_id,
+                )
+                return deleted
+    def clear(self) -> int:
+        """Clear all entries from the cache for this Genie space."""
+        self._setup()
+        delete_sql: str = f"DELETE FROM {self.table_name} WHERE genie_space_id = %s"
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(delete_sql, (self.space_id,))
+                deleted: int = cur.rowcount
+                logger.debug(
+                    "Cleared cache entries",
+                    layer=self.name,
+                    deleted_count=deleted,
+                    space=self.space_id,
+                )
+                return deleted
+    @property
+    def size(self) -> int:
+        """Current number of entries in the cache for this Genie space."""
+        self._setup()
+        count_sql: str = (
+            f"SELECT COUNT(*) as count FROM {self.table_name} WHERE genie_space_id = %s"
+        )
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(count_sql, (self.space_id,))
+                row: DbRow | None = cur.fetchone()
+                return row.get("count", 0) if row else 0
+    def stats(self) -> dict[str, int | float | None]:
+        """Return cache statistics for this Genie space."""
+        self._setup()
+        ttl_seconds = self.parameters.time_to_live_seconds
+        ttl = self.time_to_live
+        # If TTL is disabled, all entries are valid
+        if ttl_seconds is None or ttl_seconds < 0:
+            count_sql: str = f"""
+                SELECT COUNT(*) as total FROM {self.table_name}
+                WHERE genie_space_id = %s
+            """
+            with self._pool.connection() as conn:
+                with conn.cursor() as cur:
+                    cur.execute(count_sql, (self.space_id,))
+                    row: DbRow | None = cur.fetchone()
+                    total = row.get("total", 0) if row else 0
+                    return {
+                        "size": total,
+                        "ttl_seconds": None,
+                        "similarity_threshold": self.similarity_threshold,
+                        "expired_entries": 0,
+                        "valid_entries": total,
+                    }
+        stats_sql: str = f"""
+            SELECT
+                COUNT(*) as total,
+                COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '%s seconds') as valid,
+                COUNT(*) FILTER (WHERE created_at <= NOW() - INTERVAL '%s seconds') as expired
+            FROM {self.table_name}
+            WHERE genie_space_id = %s
+        """
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(stats_sql, (ttl_seconds, ttl_seconds, self.space_id))
+                stats_row: DbRow | None = cur.fetchone()
+                return {
+                    "size": stats_row.get("total", 0) if stats_row else 0,
+                    "ttl_seconds": ttl.total_seconds() if ttl else None,
+                    "similarity_threshold": self.similarity_threshold,
+                    "expired_entries": stats_row.get("expired", 0) if stats_row else 0,
+                    "valid_entries": stats_row.get("valid", 0) if stats_row else 0,
+                }

dao-ai 0.0.25__py3-none-any.whl → 0.1.2__py3-none-any.whl

dao-ai 0.0.25py3-none-any.whl → 0.1.2py3-none-any.whl