PyPI - dao-ai - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.20__py3-none-any.whl - Mend

dao-ai 0.1.2py3-none-any.whl → 0.1.20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

dao_ai/apps/__init__.py +24 -0
dao_ai/apps/handlers.py +105 -0
dao_ai/apps/model_serving.py +29 -0
dao_ai/apps/resources.py +1122 -0
dao_ai/apps/server.py +39 -0
dao_ai/cli.py +546 -37
dao_ai/config.py +1179 -139
dao_ai/evaluation.py +543 -0
dao_ai/genie/__init__.py +55 -7
dao_ai/genie/cache/__init__.py +34 -7
dao_ai/genie/cache/base.py +143 -2
dao_ai/genie/cache/context_aware/__init__.py +31 -0
dao_ai/genie/cache/context_aware/base.py +1151 -0
dao_ai/genie/cache/context_aware/in_memory.py +609 -0
dao_ai/genie/cache/context_aware/persistent.py +802 -0
dao_ai/genie/cache/context_aware/postgres.py +1166 -0
dao_ai/genie/cache/core.py +1 -1
dao_ai/genie/cache/lru.py +257 -75
dao_ai/genie/cache/optimization.py +890 -0
dao_ai/genie/core.py +235 -11
dao_ai/memory/postgres.py +175 -39
dao_ai/middleware/__init__.py +38 -0
dao_ai/middleware/assertions.py +3 -3
dao_ai/middleware/context_editing.py +230 -0
dao_ai/middleware/core.py +4 -4
dao_ai/middleware/guardrails.py +3 -3
dao_ai/middleware/human_in_the_loop.py +3 -2
dao_ai/middleware/message_validation.py +4 -4
dao_ai/middleware/model_call_limit.py +77 -0
dao_ai/middleware/model_retry.py +121 -0
dao_ai/middleware/pii.py +157 -0
dao_ai/middleware/summarization.py +1 -1
dao_ai/middleware/tool_call_limit.py +210 -0
dao_ai/middleware/tool_retry.py +174 -0
dao_ai/middleware/tool_selector.py +129 -0
dao_ai/models.py +327 -370
dao_ai/nodes.py +9 -16
dao_ai/orchestration/core.py +33 -9
dao_ai/orchestration/supervisor.py +29 -13
dao_ai/orchestration/swarm.py +6 -1
dao_ai/{prompts.py → prompts/__init__.py} +12 -61
dao_ai/prompts/instructed_retriever_decomposition.yaml +58 -0
dao_ai/prompts/instruction_reranker.yaml +14 -0
dao_ai/prompts/router.yaml +37 -0
dao_ai/prompts/verifier.yaml +46 -0
dao_ai/providers/base.py +28 -2
dao_ai/providers/databricks.py +363 -33
dao_ai/state.py +1 -0
dao_ai/tools/__init__.py +5 -3
dao_ai/tools/genie.py +103 -26
dao_ai/tools/instructed_retriever.py +366 -0
dao_ai/tools/instruction_reranker.py +202 -0
dao_ai/tools/mcp.py +539 -97
dao_ai/tools/router.py +89 -0
dao_ai/tools/slack.py +13 -2
dao_ai/tools/sql.py +7 -3
dao_ai/tools/unity_catalog.py +32 -10
dao_ai/tools/vector_search.py +493 -160
dao_ai/tools/verifier.py +159 -0
dao_ai/utils.py +182 -2
dao_ai/vector_search.py +46 -1
{dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/METADATA +45 -9
dao_ai-0.1.20.dist-info/RECORD +89 -0
dao_ai/agent_as_code.py +0 -22
dao_ai/genie/cache/semantic.py +0 -970
dao_ai-0.1.2.dist-info/RECORD +0 -64
{dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/WHEEL +0 -0
{dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/entry_points.txt +0 -0
{dao_ai-0.1.2.dist-info → dao_ai-0.1.20.dist-info}/licenses/LICENSE +0 -0

dao_ai/genie/cache/core.py CHANGED Viewed

@@ -38,7 +38,7 @@ def execute_sql_via_warehouse(
     w: WorkspaceClient = warehouse.workspace_client
     warehouse_id: str = str(warehouse.warehouse_id)
-    logger.trace("Executing cached SQL", layer=layer_name, sql_prefix=sql[:100])
+    logger.trace("Executing cached SQL", layer=layer_name, sql=sql[:100])
     statement_response: StatementResponse = w.statement_execution.execute_statement(
         statement=sql,

dao_ai/genie/cache/lru.py CHANGED Viewed

@@ -6,15 +6,16 @@ by Genie. On cache hit, the cached SQL is re-executed against the warehouse
 to return fresh data while avoiding the Genie NL-to-SQL translation cost.
 """
+from __future__ import annotations
 from collections import OrderedDict
 from datetime import datetime, timedelta
 from threading import Lock
-from typing import Any
 import mlflow
 import pandas as pd
 from databricks.sdk import WorkspaceClient
-from databricks.sdk.service.sql import StatementResponse, StatementState
+from databricks.sdk.service.dashboards import GenieFeedbackRating
 from databricks_ai_bridge.genie import GenieResponse
 from loguru import logger
@@ -24,6 +25,7 @@ from dao_ai.genie.cache.base import (
     GenieServiceBase,
     SQLCacheEntry,
 )
+from dao_ai.genie.cache.core import execute_sql_via_warehouse
 class LRUCacheService(GenieServiceBase):
@@ -124,9 +126,7 @@ class LRUCacheService(GenieServiceBase):
         if self._cache:
             oldest_key: str = next(iter(self._cache))
             del self._cache[oldest_key]
-            logger.trace(
-                "Evicted cache entry", layer=self.name, key_prefix=oldest_key[:50]
-            )
+            logger.trace("Evicted cache entry", layer=self.name, key=oldest_key[:50])
     def _get(self, key: str) -> SQLCacheEntry | None:
         """Get from cache, returning None if not found or expired."""
@@ -137,14 +137,26 @@ class LRUCacheService(GenieServiceBase):
         if self._is_expired(entry):
             del self._cache[key]
-            logger.trace("Expired cache entry", layer=self.name, key_prefix=key[:50])
+            logger.trace("Expired cache entry", layer=self.name, key=key[:50])
             return None
         self._cache.move_to_end(key)
         return entry
-    def _put(self, key: str, response: GenieResponse) -> None:
+    def _put(
+        self, key: str, response: GenieResponse, message_id: str | None = None
+    ) -> None:
         """Store SQL query in cache, evicting if at capacity."""
+        # Skip caching if query is empty or whitespace
+        if not response.query or not response.query.strip():
+            logger.warning(
+                "Not caching: response has no SQL query",
+                layer=self.name,
+                key=key[:50],
+                description=response.description[:80] if response.description else None,
+            )
+            return
         if key in self._cache:
             del self._cache[key]
@@ -156,14 +168,18 @@ class LRUCacheService(GenieServiceBase):
             description=response.description,
             conversation_id=response.conversation_id,
             created_at=datetime.now(),
+            message_id=message_id,
+            # LRU cache is in-memory only, no database row ID
+            cache_entry_id=None,
         )
-        logger.info(
+        logger.debug(
             "Stored cache entry",
             layer=self.name,
-            key_prefix=key[:50],
-            sql_prefix=response.query[:50] if response.query else None,
+            key=key[:50],
+            sql=response.query[:50] if response.query else None,
             cache_size=len(self._cache),
             capacity=self.capacity,
+            message_id=message_id,
         )
     @mlflow.trace(name="execute_cached_sql")
@@ -177,50 +193,22 @@ class LRUCacheService(GenieServiceBase):
         Returns:
             DataFrame with results, or error message string
         """
-        w: WorkspaceClient = self.warehouse.workspace_client
-        warehouse_id: str = str(self.warehouse.warehouse_id)
-        logger.trace("Executing cached SQL", layer=self.name, sql_prefix=sql[:100])
-        statement_response: StatementResponse = w.statement_execution.execute_statement(
-            statement=sql,
-            warehouse_id=warehouse_id,
-            wait_timeout="30s",
-        )
-        # Poll for completion if still running
-        while statement_response.status.state in [
-            StatementState.PENDING,
-            StatementState.RUNNING,
-        ]:
-            statement_response = w.statement_execution.get_statement(
-                statement_response.statement_id
-            )
-        if statement_response.status.state != StatementState.SUCCEEDED:
-            error_msg: str = f"SQL execution failed: {statement_response.status}"
+        # Validate SQL is not empty
+        if not sql or not sql.strip():
+            error_msg: str = "Cannot execute empty SQL query"
             logger.error(
-                "SQL execution failed",
+                "SQL execution failed: empty query",
                 layer=self.name,
-                status=str(statement_response.status),
+                sql=repr(sql),
             )
             return error_msg
-        # Convert to DataFrame
-        if statement_response.result and statement_response.result.data_array:
-            columns: list[str] = []
-            if statement_response.manifest and statement_response.manifest.schema:
-                columns = [
-                    col.name for col in statement_response.manifest.schema.columns
-                ]
-            data: list[list[Any]] = statement_response.result.data_array
-            if columns:
-                return pd.DataFrame(data, columns=columns)
-            else:
-                return pd.DataFrame(data)
-        return pd.DataFrame()
+        # Use shared utility function for SQL execution
+        return execute_sql_via_warehouse(
+            warehouse=self.warehouse,
+            sql=sql,
+            layer_name=self.name,
+        )
     def ask_question(
         self, question: str, conversation_id: str | None = None
@@ -258,50 +246,159 @@ class LRUCacheService(GenieServiceBase):
             cached: SQLCacheEntry | None = self._get(key)
         if cached is not None:
-            logger.info(
-                "Cache HIT",
-                layer=self.name,
-                question_prefix=question[:50],
-                conversation_id=conversation_id,
-                cache_size=self.size,
-                capacity=self.capacity,
-            )
-            # Re-execute the cached SQL to get fresh data
-            result: pd.DataFrame | str = self._execute_sql(cached.query)
-            # Use current conversation_id, not the cached one
-            response: GenieResponse = GenieResponse(
-                result=result,
-                query=cached.query,
-                description=cached.description,
-                conversation_id=conversation_id
-                if conversation_id
-                else cached.conversation_id,
-            )
-            return CacheResult(response=response, cache_hit=True, served_by=self.name)
+            # Defensive check: if cached query is empty, treat as cache miss
+            if not cached.query or not cached.query.strip():
+                logger.warning(
+                    "Cache HIT but query is empty, treating as MISS",
+                    layer=self.name,
+                    question=question[:80],
+                    conversation_id=conversation_id,
+                    key=key[:50],
+                )
+                # Invalidate this bad cache entry
+                with self._lock:
+                    if key in self._cache:
+                        del self._cache[key]
+                # Fall through to cache miss logic below
+            else:
+                cache_age_seconds = (datetime.now() - cached.created_at).total_seconds()
+                logger.info(
+                    "Cache HIT",
+                    layer=self.name,
+                    question=question[:80],
+                    conversation_id=conversation_id,
+                    cached_sql=cached.query[:80] if cached.query else None,
+                    cache_age_seconds=round(cache_age_seconds, 1),
+                    cache_size=self.size,
+                    capacity=self.capacity,
+                    ttl_seconds=self.parameters.time_to_live_seconds,
+                )
+                # Re-execute the cached SQL to get fresh data
+                result: pd.DataFrame | str = self._execute_sql(cached.query)
+                # Check if SQL execution failed (returns error string instead of DataFrame)
+                if isinstance(result, str):
+                    logger.warning(
+                        "Cached SQL execution failed, falling back to Genie",
+                        layer=self.name,
+                        question=question[:80],
+                        conversation_id=conversation_id,
+                        cached_sql=cached.query[:80],
+                        error=result[:200],
+                        cache_key=key[:50],
+                    )
+                    # Invalidate the bad cache entry
+                    with self._lock:
+                        if key in self._cache:
+                            del self._cache[key]
+                            logger.info(
+                                "Invalidated stale cache entry",
+                                layer=self.name,
+                                cache_key=key[:50],
+                                cache_size=len(self._cache),
+                                capacity=self.capacity,
+                            )
+                    # Fall back to Genie to get fresh SQL
+                    logger.info(
+                        "Delegating to Genie for fresh SQL",
+                        layer=self.name,
+                        question=question[:80],
+                        delegating_to=type(self.impl).__name__,
+                    )
+                    fallback_result: CacheResult = self.impl.ask_question(
+                        question, conversation_id
+                    )
+                    # Store the fresh SQL in cache (including message_id for feedback)
+                    if fallback_result.response.query:
+                        with self._lock:
+                            self._put(
+                                key,
+                                fallback_result.response,
+                                message_id=fallback_result.message_id,
+                            )
+                        logger.info(
+                            "Stored fresh SQL from fallback",
+                            layer=self.name,
+                            fresh_sql=fallback_result.response.query[:80],
+                            cache_size=len(self._cache),
+                            capacity=self.capacity,
+                            message_id=fallback_result.message_id,
+                        )
+                    else:
+                        logger.warning(
+                            "Fallback response has no SQL query to cache",
+                            layer=self.name,
+                            question=question[:80],
+                        )
+                    logger.info(
+                        "Fallback completed successfully",
+                        layer=self.name,
+                        question=question[:80],
+                        fallback_from="stale_cache",
+                        has_result=fallback_result.response.result is not None,
+                    )
+                    # Return as cache miss (fallback scenario)
+                    # Propagate message_id from fallback result
+                    return CacheResult(
+                        response=fallback_result.response,
+                        cache_hit=False,
+                        served_by=None,
+                        message_id=fallback_result.message_id,
+                    )
+                # Use current conversation_id, not the cached one
+                response: GenieResponse = GenieResponse(
+                    result=result,
+                    query=cached.query,
+                    description=cached.description,
+                    conversation_id=conversation_id
+                    if conversation_id
+                    else cached.conversation_id,
+                )
+                # Cache hit - include message_id from original response for feedback support
+                return CacheResult(
+                    response=response,
+                    cache_hit=True,
+                    served_by=self.name,
+                    message_id=cached.message_id,
+                    # LRU cache is in-memory only, no cache_entry_id for traceability
+                    cache_entry_id=None,
+                )
         # Cache miss - delegate to wrapped service
         logger.info(
             "Cache MISS",
             layer=self.name,
-            question_prefix=question[:50],
+            question=question[:80],
             conversation_id=conversation_id,
             cache_size=self.size,
             capacity=self.capacity,
+            ttl_seconds=self.parameters.time_to_live_seconds,
             delegating_to=type(self.impl).__name__,
         )
         result: CacheResult = self.impl.ask_question(question, conversation_id)
         with self._lock:
-            self._put(key, result.response)
-        return CacheResult(response=result.response, cache_hit=False, served_by=None)
+            self._put(key, result.response, message_id=result.message_id)
+        # Propagate the inner cache's result - if it was a hit there, preserve that info
+        return result
     @property
     def space_id(self) -> str:
         return self.impl.space_id
+    @property
+    def workspace_client(self) -> WorkspaceClient | None:
+        """Get workspace client by delegating to impl."""
+        return self.impl.workspace_client
     def invalidate(self, question: str, conversation_id: str | None = None) -> bool:
         """
         Remove a specific entry from the cache.
@@ -345,3 +442,88 @@ class LRUCacheService(GenieServiceBase):
                 "expired_entries": expired,
                 "valid_entries": len(self._cache) - expired,
             }
+    @mlflow.trace(name="genie_lru_cache_send_feedback")
+    def send_feedback(
+        self,
+        conversation_id: str,
+        rating: GenieFeedbackRating,
+        message_id: str | None = None,
+        was_cache_hit: bool = False,
+    ) -> None:
+        """
+        Send feedback for a Genie message with cache invalidation.
+        For LRU cache, this method:
+        1. If was_cache_hit is False: forwards feedback to the underlying service
+        2. If rating is NEGATIVE: invalidates any matching cache entries
+        Args:
+            conversation_id: The conversation containing the message
+            rating: The feedback rating (POSITIVE, NEGATIVE, or NONE)
+            message_id: Optional message ID. If None, looks up the most recent message.
+            was_cache_hit: Whether the response being rated was served from cache.
+        Note:
+            For cached responses (was_cache_hit=True), only cache invalidation is
+            performed. No feedback is sent to the Genie API because cached responses
+            don't have a corresponding Genie message.
+            Future Enhancement: To enable full Genie feedback for cached responses,
+            the cache would need to store the original message_id. See GenieServiceBase
+            docstring for details on required changes.
+        """
+        # Handle cache invalidation on negative feedback
+        invalidated = False
+        if rating == GenieFeedbackRating.NEGATIVE:
+            # For LRU cache, we invalidate by conversation_id since that's part of the key
+            # Iterate through cache and remove entries matching the conversation_id
+            with self._lock:
+                keys_to_remove: list[str] = []
+                for key, entry in self._cache.items():
+                    if entry.conversation_id == conversation_id:
+                        keys_to_remove.append(key)
+                for key in keys_to_remove:
+                    del self._cache[key]
+                    invalidated = True
+                    logger.info(
+                        "Invalidated cache entry due to negative feedback",
+                        layer=self.name,
+                        cache_key=key[:50],
+                        conversation_id=conversation_id,
+                    )
+            if not keys_to_remove:
+                logger.debug(
+                    "No cache entries found to invalidate for negative feedback",
+                    layer=self.name,
+                    conversation_id=conversation_id,
+                )
+        # Forward feedback to underlying service if not a cache hit
+        # For cache hits, there's no Genie message to provide feedback on
+        if was_cache_hit:
+            logger.info(
+                "Skipping Genie API feedback - response was served from cache",
+                layer=self.name,
+                conversation_id=conversation_id,
+                rating=rating.value if rating else None,
+                cache_invalidated=invalidated,
+            )
+            return
+        # Forward to underlying service
+        logger.debug(
+            "Forwarding feedback to underlying service",
+            layer=self.name,
+            conversation_id=conversation_id,
+            rating=rating.value if rating else None,
+            delegating_to=type(self.impl).__name__,
+        )
+        self.impl.send_feedback(
+            conversation_id=conversation_id,
+            rating=rating,
+            message_id=message_id,
+            was_cache_hit=False,  # Already handled, so pass False
+        )

dao-ai 0.1.2__py3-none-any.whl → 0.1.20__py3-none-any.whl

dao-ai 0.1.2py3-none-any.whl → 0.1.20py3-none-any.whl