PyPI - dao-ai - Versions diffs - 0.0.35__py3-none-any.whl → 0.0.36__py3-none-any.whl - Mend

dao-ai 0.0.35py3-none-any.whl → 0.0.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

dao_ai/config.py +29 -0
dao_ai/genie/__init__.py +59 -0
dao_ai/genie/cache/__init__.py +44 -0
dao_ai/genie/cache/base.py +122 -0
dao_ai/genie/cache/lru.py +306 -0
dao_ai/genie/cache/semantic.py +638 -0
dao_ai/tools/__init__.py +3 -0
dao_ai/tools/genie/__init__.py +236 -0
dao_ai/tools/genie.py +65 -15
dao_ai-0.0.36.dist-info/METADATA +951 -0
{dao_ai-0.0.35.dist-info → dao_ai-0.0.36.dist-info}/RECORD +14 -8
dao_ai-0.0.35.dist-info/METADATA +0 -1169
{dao_ai-0.0.35.dist-info → dao_ai-0.0.36.dist-info}/WHEEL +0 -0
{dao_ai-0.0.35.dist-info → dao_ai-0.0.36.dist-info}/entry_points.txt +0 -0
{dao_ai-0.0.35.dist-info → dao_ai-0.0.36.dist-info}/licenses/LICENSE +0 -0

dao_ai/genie/cache/semantic.py ADDED Viewed

@@ -0,0 +1,638 @@
+"""
+Semantic cache implementation for Genie SQL queries using PostgreSQL pg_vector.
+This module provides a semantic cache that uses embeddings and similarity search
+to find cached queries that match the intent of new questions. Cache entries are
+partitioned by genie_space_id to ensure proper isolation between Genie spaces.
+"""
+from datetime import timedelta
+from typing import Any
+import mlflow
+import pandas as pd
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.sql import StatementResponse, StatementState
+from databricks_ai_bridge.genie import GenieResponse
+from loguru import logger
+from mlflow.entities import SpanType
+from dao_ai.config import (
+    DatabaseModel,
+    GenieSemanticCacheParametersModel,
+    WarehouseModel,
+)
+from dao_ai.genie.cache.base import (
+    CacheResult,
+    GenieServiceBase,
+    SQLCacheEntry,
+)
+# Type alias for database row (dict due to row_factory=dict_row)
+DbRow = dict[str, Any]
+class SemanticCacheService(GenieServiceBase):
+    """
+    Semantic caching decorator that uses PostgreSQL pg_vector for similarity lookup.
+    This service caches the SQL query generated by Genie along with an embedding
+    of the original question. On subsequent queries, it performs a semantic similarity
+    search to find cached queries that match the intent of the new question.
+    Cache entries are partitioned by genie_space_id to ensure queries from different
+    Genie spaces don't return incorrect cache hits.
+    On cache hit, it re-executes the cached SQL using the provided warehouse
+    to return fresh data while avoiding the Genie NL-to-SQL translation cost.
+    Example:
+        from dao_ai.config import GenieSemanticCacheParametersModel, DatabaseModel
+        from dao_ai.genie.cache import SemanticCacheService
+        cache_params = GenieSemanticCacheParametersModel(
+            database=database_model,
+            warehouse=warehouse_model,
+            embedding_model="databricks-gte-large-en",
+            time_to_live_seconds=86400,  # 24 hours
+            similarity_threshold=0.85
+        )
+        genie = SemanticCacheService(
+            impl=GenieService(Genie(space_id="my-space")),
+            parameters=cache_params,
+            genie_space_id="my-space"
+        )
+    Thread-safe: Uses connection pooling from psycopg_pool.
+    """
+    impl: GenieServiceBase
+    parameters: GenieSemanticCacheParametersModel
+    genie_space_id: str
+    name: str
+    _embeddings: Any  # DatabricksEmbeddings
+    _pool: Any  # ConnectionPool
+    _embedding_dims: int | None
+    _setup_complete: bool
+    def __init__(
+        self,
+        impl: GenieServiceBase,
+        parameters: GenieSemanticCacheParametersModel,
+        genie_space_id: str,
+        name: str | None = None,
+    ) -> None:
+        """
+        Initialize the semantic cache service.
+        Args:
+            impl: The underlying GenieServiceBase to delegate to on cache miss
+            parameters: Cache configuration including database, warehouse, embedding model
+            genie_space_id: The Genie space ID for partitioning cache entries
+            name: Name for this cache layer (for logging). Defaults to class name.
+        """
+        self.impl = impl
+        self.parameters = parameters
+        self.genie_space_id = genie_space_id
+        self.name = name if name is not None else self.__class__.__name__
+        self._embeddings = None
+        self._pool = None
+        self._embedding_dims = None
+        self._setup_complete = False
+    def initialize(self) -> "SemanticCacheService":
+        """
+        Eagerly initialize the cache service.
+        Call this during tool creation to:
+        - Validate configuration early (fail fast)
+        - Create the database table before any requests
+        - Avoid first-request latency from lazy initialization
+        Returns:
+            self for method chaining
+        """
+        self._setup()
+        return self
+    def _setup(self) -> None:
+        """Initialize embeddings and database connection pool lazily."""
+        if self._setup_complete:
+            return
+        from databricks_langchain import DatabricksEmbeddings
+        from dao_ai.memory.postgres import PostgresPoolManager
+        # Initialize embeddings
+        embedding_model: str = (
+            self.parameters.embedding_model
+            if isinstance(self.parameters.embedding_model, str)
+            else self.parameters.embedding_model.name
+        )
+        self._embeddings = DatabricksEmbeddings(endpoint=embedding_model)
+        # Auto-detect embedding dimensions if not provided
+        if self.parameters.embedding_dims is None:
+            sample_embedding: list[float] = self._embeddings.embed_query("test")
+            self._embedding_dims = len(sample_embedding)
+            logger.debug(
+                f"[{self.name}] Auto-detected embedding dimensions: {self._embedding_dims}"
+            )
+        else:
+            self._embedding_dims = self.parameters.embedding_dims
+        # Get connection pool
+        self._pool = PostgresPoolManager.get_pool(self.parameters.database)
+        # Ensure table exists
+        self._create_table_if_not_exists()
+        self._setup_complete = True
+        logger.debug(
+            f"[{self.name}] Semantic cache initialized for space '{self.genie_space_id}' "
+            f"with table '{self.table_name}' (dims={self._embedding_dims})"
+        )
+    @property
+    def database(self) -> DatabaseModel:
+        """The database used for storing cache entries."""
+        return self.parameters.database
+    @property
+    def warehouse(self) -> WarehouseModel:
+        """The warehouse used for executing cached SQL queries."""
+        return self.parameters.warehouse
+    @property
+    def time_to_live(self) -> timedelta | None:
+        """Time-to-live for cache entries. None means never expires."""
+        ttl = self.parameters.time_to_live_seconds
+        if ttl is None or ttl < 0:
+            return None
+        return timedelta(seconds=ttl)
+    @property
+    def similarity_threshold(self) -> float:
+        """Minimum similarity for cache hit (using L2 distance converted to similarity)."""
+        return self.parameters.similarity_threshold
+    @property
+    def embedding_dims(self) -> int:
+        """Dimension size for embeddings (auto-detected if not configured)."""
+        if self._embedding_dims is None:
+            raise RuntimeError(
+                "Embedding dimensions not yet initialized. Call _setup() first."
+            )
+        return self._embedding_dims
+    @property
+    def table_name(self) -> str:
+        """Name of the cache table."""
+        return self.parameters.table_name
+    def _create_table_if_not_exists(self) -> None:
+        """Create the cache table with pg_vector extension if it doesn't exist.
+        If the table exists but has a different embedding dimension, it will be
+        dropped and recreated with the new dimension size.
+        """
+        create_extension_sql: str = "CREATE EXTENSION IF NOT EXISTS vector"
+        # Check if table exists and get current embedding dimensions
+        check_dims_sql: str = """
+            SELECT atttypmod
+            FROM pg_attribute
+            WHERE attrelid = %s::regclass
+              AND attname = 'question_embedding'
+        """
+        create_table_sql: str = f"""
+            CREATE TABLE IF NOT EXISTS {self.table_name} (
+                id SERIAL PRIMARY KEY,
+                genie_space_id TEXT NOT NULL,
+                question TEXT NOT NULL,
+                question_embedding vector({self.embedding_dims}),
+                sql_query TEXT NOT NULL,
+                description TEXT,
+                conversation_id TEXT,
+                created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
+            )
+        """
+        # Index for efficient similarity search partitioned by genie_space_id
+        # Use L2 (Euclidean) distance - optimal for Databricks GTE embeddings
+        create_embedding_index_sql: str = f"""
+            CREATE INDEX IF NOT EXISTS {self.table_name}_embedding_idx
+            ON {self.table_name}
+            USING ivfflat (question_embedding vector_l2_ops)
+            WITH (lists = 100)
+        """
+        # Index for filtering by genie_space_id
+        create_space_index_sql: str = f"""
+            CREATE INDEX IF NOT EXISTS {self.table_name}_space_idx
+            ON {self.table_name} (genie_space_id)
+        """
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(create_extension_sql)
+                # Check if table exists and verify embedding dimensions
+                try:
+                    cur.execute(check_dims_sql, (self.table_name,))
+                    row: DbRow | None = cur.fetchone()
+                    if row is not None:
+                        # atttypmod for vector type contains the dimension
+                        current_dims = row.get("atttypmod", 0)
+                        if current_dims != self.embedding_dims:
+                            logger.warning(
+                                f"[{self.name}] Embedding dimension mismatch: "
+                                f"table has {current_dims}, expected {self.embedding_dims}. "
+                                f"Dropping and recreating table '{self.table_name}'."
+                            )
+                            cur.execute(f"DROP TABLE {self.table_name}")
+                except Exception:
+                    # Table doesn't exist, which is fine
+                    pass
+                cur.execute(create_table_sql)
+                cur.execute(create_space_index_sql)
+                cur.execute(create_embedding_index_sql)
+    def _embed_question(self, question: str) -> list[float]:
+        """Generate embedding for a question."""
+        embeddings: list[list[float]] = self._embeddings.embed_documents([question])
+        return embeddings[0]
+    @mlflow.trace(name="semantic_search")
+    def _find_similar(
+        self, question: str, embedding: list[float]
+    ) -> tuple[SQLCacheEntry, float] | None:
+        """
+        Find a semantically similar cached entry for this Genie space.
+        Args:
+            question: The question to search for
+            embedding: The embedding vector of the question
+        Returns:
+            Tuple of (SQLCacheEntry, similarity_score) if found, None otherwise
+        """
+        # Use L2 (Euclidean) distance - optimal for Databricks GTE embeddings
+        # pg_vector's <-> operator returns L2 distance (0 = identical)
+        # Convert to similarity: 1 / (1 + distance) gives range [0, 1]
+        #
+        # Refresh-on-hit strategy:
+        # 1. Search without TTL filter to find best semantic match
+        # 2. If match is within TTL (or TTL disabled) → cache hit
+        # 3. If match is expired → delete it, return miss (triggers refresh)
+        ttl_seconds = self.parameters.time_to_live_seconds
+        ttl_disabled = ttl_seconds is None or ttl_seconds < 0
+        # When TTL is disabled, all entries are always valid
+        if ttl_disabled:
+            is_valid_expr = "TRUE"
+        else:
+            is_valid_expr = f"created_at > NOW() - INTERVAL '{ttl_seconds} seconds'"
+        search_sql: str = f"""
+            SELECT
+                id,
+                question,
+                sql_query,
+                description,
+                conversation_id,
+                created_at,
+                1.0 / (1.0 + (question_embedding <-> %s::vector)) as similarity,
+                {is_valid_expr} as is_valid
+            FROM {self.table_name}
+            WHERE genie_space_id = %s
+            ORDER BY question_embedding <-> %s::vector
+            LIMIT 1
+        """
+        embedding_str: str = f"[{','.join(str(x) for x in embedding)}]"
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(
+                    search_sql,
+                    (embedding_str, self.genie_space_id, embedding_str),
+                )
+                row: DbRow | None = cur.fetchone()
+                if row is None:
+                    logger.info(
+                        f"[{self.name}] MISS (no entries): "
+                        f"question='{question[:50]}...' space='{self.genie_space_id}'"
+                    )
+                    return None
+                # Extract values from dict row
+                entry_id = row.get("id")
+                cached_question = row.get("question", "")
+                sql_query = row["sql_query"]
+                description = row.get("description", "")
+                conversation_id = row.get("conversation_id", "")
+                created_at = row["created_at"]
+                similarity = row["similarity"]
+                is_valid = row.get("is_valid", False)
+                # Log best match info (L2 distance can be computed from similarity: d = 1/s - 1)
+                l2_distance = (
+                    (1.0 / similarity) - 1.0 if similarity > 0 else float("inf")
+                )
+                logger.info(
+                    f"[{self.name}] Best match: l2_distance={l2_distance:.4f}, similarity={similarity:.4f}, "
+                    f"is_valid={is_valid}, question='{cached_question[:50]}...'"
+                )
+                # Check similarity threshold
+                if similarity < self.similarity_threshold:
+                    logger.info(
+                        f"[{self.name}] MISS (below threshold): similarity={similarity:.4f} < threshold={self.similarity_threshold} "
+                        f"(cached_question='{cached_question[:50]}...')"
+                    )
+                    return None
+                # Check TTL - refresh on hit strategy
+                if not is_valid:
+                    # Entry is expired - delete it and return miss to trigger refresh
+                    delete_sql = f"DELETE FROM {self.table_name} WHERE id = %s"
+                    cur.execute(delete_sql, (entry_id,))
+                    logger.info(
+                        f"[{self.name}] MISS (expired, deleted for refresh): similarity={similarity:.4f}, "
+                        f"ttl={ttl_seconds}s, question='{cached_question[:50]}...'"
+                    )
+                    return None
+                logger.info(
+                    f"[{self.name}] HIT: similarity={similarity:.4f} >= threshold={self.similarity_threshold} "
+                    f"(cached_question='{cached_question[:50]}...')"
+                )
+                entry = SQLCacheEntry(
+                    query=sql_query,
+                    description=description,
+                    conversation_id=conversation_id,
+                    created_at=created_at,
+                )
+                return entry, similarity
+    def _store_entry(
+        self, question: str, embedding: list[float], response: GenieResponse
+    ) -> None:
+        """Store a new cache entry for this Genie space."""
+        insert_sql: str = f"""
+            INSERT INTO {self.table_name}
+            (genie_space_id, question, question_embedding, sql_query, description, conversation_id)
+            VALUES (%s, %s, %s::vector, %s, %s, %s)
+        """
+        embedding_str: str = f"[{','.join(str(x) for x in embedding)}]"
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(
+                    insert_sql,
+                    (
+                        self.genie_space_id,
+                        question,
+                        embedding_str,
+                        response.query,
+                        response.description,
+                        response.conversation_id,
+                    ),
+                )
+                logger.info(
+                    f"[{self.name}] Stored cache entry: question='{question[:50]}...' "
+                    f"sql='{response.query[:50]}...' (space={self.genie_space_id}, table={self.table_name})"
+                )
+    @mlflow.trace(name="execute_cached_sql_semantic")
+    def _execute_sql(self, sql: str) -> pd.DataFrame | str:
+        """Execute SQL using the warehouse and return results."""
+        client: WorkspaceClient = self.warehouse.workspace_client
+        warehouse_id: str = self.warehouse.warehouse_id
+        statement_response: StatementResponse = (
+            client.statement_execution.execute_statement(
+                warehouse_id=warehouse_id,
+                statement=sql,
+                wait_timeout="30s",
+            )
+        )
+        if statement_response.status.state != StatementState.SUCCEEDED:
+            error_msg: str = (
+                f"SQL execution failed: {statement_response.status.error.message}"
+                if statement_response.status.error
+                else f"SQL execution failed with state: {statement_response.status.state}"
+            )
+            logger.error(f"[{self.name}] {error_msg}")
+            return error_msg
+        if statement_response.result and statement_response.result.data_array:
+            columns: list[str] = []
+            if (
+                statement_response.manifest
+                and statement_response.manifest.schema
+                and statement_response.manifest.schema.columns
+            ):
+                columns = [
+                    col.name for col in statement_response.manifest.schema.columns
+                ]
+            elif hasattr(statement_response.result, "schema"):
+                columns = [col.name for col in statement_response.result.schema.columns]
+            data: list[list[Any]] = statement_response.result.data_array
+            if columns:
+                return pd.DataFrame(data, columns=columns)
+            else:
+                return pd.DataFrame(data)
+        return pd.DataFrame()
+    def ask_question(
+        self, question: str, conversation_id: str | None = None
+    ) -> GenieResponse:
+        """
+        Ask a question, using semantic cache if a similar query exists.
+        On cache hit, re-executes the cached SQL to get fresh data.
+        Implements GenieServiceBase for seamless chaining.
+        """
+        result: CacheResult = self.ask_question_with_cache_info(
+            question, conversation_id
+        )
+        return result.response
+    @mlflow.trace(name="genie_semantic_cache_lookup", span_type=SpanType.TOOL)
+    def ask_question_with_cache_info(
+        self,
+        question: str,
+        conversation_id: str | None = None,
+    ) -> CacheResult:
+        """
+        Ask a question with detailed cache hit information.
+        On cache hit, the cached SQL is re-executed to return fresh data.
+        Args:
+            question: The question to ask
+            conversation_id: Optional conversation ID
+        Returns:
+            CacheResult with fresh response and cache metadata
+        """
+        # Ensure initialization (lazy init if initialize() wasn't called)
+        self._setup()
+        # Generate embedding for the question
+        embedding: list[float] = self._embed_question(question)
+        # Check cache
+        cache_result: tuple[SQLCacheEntry, float] | None = self._find_similar(
+            question, embedding
+        )
+        if cache_result is not None:
+            cached, similarity = cache_result
+            logger.debug(
+                f"[{self.name}] Semantic cache hit (similarity={similarity:.3f}): {question[:50]}..."
+            )
+            # Re-execute the cached SQL to get fresh data
+            result: pd.DataFrame | str = self._execute_sql(cached.query)
+            response: GenieResponse = GenieResponse(
+                result=result,
+                query=cached.query,
+                description=cached.description,
+                conversation_id=cached.conversation_id,
+            )
+            return CacheResult(response=response, cache_hit=True, served_by=self.name)
+        # Cache miss - delegate to wrapped service
+        logger.debug(f"[{self.name}] Miss: {question[:50]}...")
+        response = self.impl.ask_question(question, conversation_id)
+        # Store in cache if we got a SQL query
+        if response.query:
+            logger.info(
+                f"[{self.name}] Storing new cache entry for question: '{question[:50]}...' "
+                f"(space={self.genie_space_id})"
+            )
+            self._store_entry(question, embedding, response)
+        elif not response.query:
+            logger.warning(
+                f"[{self.name}] Not caching: response has no SQL query "
+                f"(question='{question[:50]}...')"
+            )
+        return CacheResult(response=response, cache_hit=False, served_by=None)
+    def invalidate_expired(self) -> int:
+        """Remove expired entries from the cache for this Genie space.
+        Returns 0 if TTL is disabled (entries never expire).
+        """
+        self._setup()
+        ttl_seconds = self.parameters.time_to_live_seconds
+        # If TTL is disabled, nothing can expire
+        if ttl_seconds is None or ttl_seconds < 0:
+            logger.debug(
+                f"[{self.name}] TTL disabled, no entries to expire for space {self.genie_space_id}"
+            )
+            return 0
+        delete_sql: str = f"""
+            DELETE FROM {self.table_name}
+            WHERE genie_space_id = %s
+              AND created_at < NOW() - INTERVAL '%s seconds'
+        """
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(delete_sql, (self.genie_space_id, ttl_seconds))
+                deleted: int = cur.rowcount
+                logger.debug(
+                    f"[{self.name}] Deleted {deleted} expired entries for space {self.genie_space_id}"
+                )
+                return deleted
+    def clear(self) -> int:
+        """Clear all entries from the cache for this Genie space."""
+        self._setup()
+        delete_sql: str = f"DELETE FROM {self.table_name} WHERE genie_space_id = %s"
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(delete_sql, (self.genie_space_id,))
+                deleted: int = cur.rowcount
+                logger.debug(
+                    f"[{self.name}] Cleared {deleted} entries for space {self.genie_space_id}"
+                )
+                return deleted
+    @property
+    def size(self) -> int:
+        """Current number of entries in the cache for this Genie space."""
+        self._setup()
+        count_sql: str = (
+            f"SELECT COUNT(*) as count FROM {self.table_name} WHERE genie_space_id = %s"
+        )
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(count_sql, (self.genie_space_id,))
+                row: DbRow | None = cur.fetchone()
+                return row.get("count", 0) if row else 0
+    def stats(self) -> dict[str, int | float | None]:
+        """Return cache statistics for this Genie space."""
+        self._setup()
+        ttl_seconds = self.parameters.time_to_live_seconds
+        ttl = self.time_to_live
+        # If TTL is disabled, all entries are valid
+        if ttl_seconds is None or ttl_seconds < 0:
+            count_sql: str = f"""
+                SELECT COUNT(*) as total FROM {self.table_name}
+                WHERE genie_space_id = %s
+            """
+            with self._pool.connection() as conn:
+                with conn.cursor() as cur:
+                    cur.execute(count_sql, (self.genie_space_id,))
+                    row: DbRow | None = cur.fetchone()
+                    total = row.get("total", 0) if row else 0
+                    return {
+                        "size": total,
+                        "ttl_seconds": None,
+                        "similarity_threshold": self.similarity_threshold,
+                        "expired_entries": 0,
+                        "valid_entries": total,
+                    }
+        stats_sql: str = f"""
+            SELECT
+                COUNT(*) as total,
+                COUNT(*) FILTER (WHERE created_at > NOW() - INTERVAL '%s seconds') as valid,
+                COUNT(*) FILTER (WHERE created_at <= NOW() - INTERVAL '%s seconds') as expired
+            FROM {self.table_name}
+            WHERE genie_space_id = %s
+        """
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(stats_sql, (ttl_seconds, ttl_seconds, self.genie_space_id))
+                row: DbRow | None = cur.fetchone()
+                return {
+                    "size": row.get("total", 0) if row else 0,
+                    "ttl_seconds": ttl.total_seconds() if ttl else None,
+                    "similarity_threshold": self.similarity_threshold,
+                    "expired_entries": row.get("expired", 0) if row else 0,
+                    "valid_entries": row.get("valid", 0) if row else 0,
+                }

dao_ai/tools/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from dao_ai.genie.cache import LRUCacheService, SemanticCacheService
 from dao_ai.hooks.core import create_hooks
 from dao_ai.tools.agent import create_agent_endpoint_tool
 from dao_ai.tools.core import (
@@ -35,7 +36,9 @@ __all__ = [
     "current_time_tool",
     "format_time_tool",
     "is_business_hours_tool",
+    "LRUCacheService",
     "search_tool",
+    "SemanticCacheService",
     "time_difference_tool",
     "time_in_timezone_tool",
     "time_until_tool",

dao-ai 0.0.35__py3-none-any.whl → 0.0.36__py3-none-any.whl

dao-ai 0.0.35py3-none-any.whl → 0.0.36py3-none-any.whl