PyPI - dao-ai - Versions diffs - 0.0.36__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

dao-ai 0.0.36py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

dao_ai/__init__.py +29 -0
dao_ai/cli.py +195 -30
dao_ai/config.py +770 -244
dao_ai/genie/__init__.py +1 -22
dao_ai/genie/cache/__init__.py +1 -2
dao_ai/genie/cache/base.py +20 -70
dao_ai/genie/cache/core.py +75 -0
dao_ai/genie/cache/lru.py +44 -21
dao_ai/genie/cache/semantic.py +390 -109
dao_ai/genie/core.py +35 -0
dao_ai/graph.py +27 -253
dao_ai/hooks/__init__.py +9 -6
dao_ai/hooks/core.py +22 -190
dao_ai/memory/__init__.py +10 -0
dao_ai/memory/core.py +23 -5
dao_ai/memory/databricks.py +389 -0
dao_ai/memory/postgres.py +2 -2
dao_ai/messages.py +6 -4
dao_ai/middleware/__init__.py +125 -0
dao_ai/middleware/assertions.py +778 -0
dao_ai/middleware/base.py +50 -0
dao_ai/middleware/core.py +61 -0
dao_ai/middleware/guardrails.py +415 -0
dao_ai/middleware/human_in_the_loop.py +228 -0
dao_ai/middleware/message_validation.py +554 -0
dao_ai/middleware/summarization.py +192 -0
dao_ai/models.py +1177 -108
dao_ai/nodes.py +118 -161
dao_ai/optimization.py +664 -0
dao_ai/orchestration/__init__.py +52 -0
dao_ai/orchestration/core.py +287 -0
dao_ai/orchestration/supervisor.py +264 -0
dao_ai/orchestration/swarm.py +226 -0
dao_ai/prompts.py +126 -29
dao_ai/providers/databricks.py +126 -381
dao_ai/state.py +139 -21
dao_ai/tools/__init__.py +8 -5
dao_ai/tools/core.py +57 -4
dao_ai/tools/email.py +280 -0
dao_ai/tools/genie.py +47 -24
dao_ai/tools/mcp.py +4 -3
dao_ai/tools/memory.py +50 -0
dao_ai/tools/python.py +4 -12
dao_ai/tools/search.py +14 -0
dao_ai/tools/slack.py +1 -1
dao_ai/tools/unity_catalog.py +8 -6
dao_ai/tools/vector_search.py +16 -9
dao_ai/utils.py +72 -8
dao_ai-0.1.0.dist-info/METADATA +1878 -0
dao_ai-0.1.0.dist-info/RECORD +62 -0
dao_ai/chat_models.py +0 -204
dao_ai/guardrails.py +0 -112
dao_ai/tools/genie/__init__.py +0 -236
dao_ai/tools/human_in_the_loop.py +0 -100
dao_ai-0.0.36.dist-info/METADATA +0 -951
dao_ai-0.0.36.dist-info/RECORD +0 -47
{dao_ai-0.0.36.dist-info → dao_ai-0.1.0.dist-info}/WHEEL +0 -0
{dao_ai-0.0.36.dist-info → dao_ai-0.1.0.dist-info}/entry_points.txt +0 -0
{dao_ai-0.0.36.dist-info → dao_ai-0.1.0.dist-info}/licenses/LICENSE +0 -0

dao_ai/genie/__init__.py CHANGED Viewed

@@ -17,9 +17,6 @@ Example usage:
     from dao_ai.genie.cache import LRUCacheService, SemanticCacheService
 """
-import mlflow
-from databricks_ai_bridge.genie import Genie, GenieResponse
 from dao_ai.genie.cache import (
     CacheResult,
     GenieServiceBase,
@@ -27,25 +24,7 @@ from dao_ai.genie.cache import (
     SemanticCacheService,
     SQLCacheEntry,
 )
-class GenieService(GenieServiceBase):
-    """Concrete implementation of GenieServiceBase using the Genie SDK."""
-    genie: Genie
-    def __init__(self, genie: Genie) -> None:
-        self.genie = genie
-    @mlflow.trace(name="genie_ask_question")
-    def ask_question(
-        self, question: str, conversation_id: str | None = None
-    ) -> GenieResponse:
-        response: GenieResponse = self.genie.ask_question(
-            question, conversation_id=conversation_id
-        )
-        return response
+from dao_ai.genie.core import GenieService
 __all__ = [
     # Service classes

dao_ai/genie/cache/__init__.py CHANGED Viewed

@@ -15,7 +15,6 @@ Example usage:
     genie_service = SemanticCacheService(
         impl=GenieService(genie),
         parameters=semantic_params,
-        genie_space_id=space_id,
     )
     genie_service = LRUCacheService(
         impl=genie_service,
@@ -27,8 +26,8 @@ from dao_ai.genie.cache.base import (
     CacheResult,
     GenieServiceBase,
     SQLCacheEntry,
-    execute_sql_via_warehouse,
 )
+from dao_ai.genie.cache.core import execute_sql_via_warehouse
 from dao_ai.genie.cache.lru import LRUCacheService
 from dao_ai.genie.cache.semantic import SemanticCacheService

dao_ai/genie/cache/base.py CHANGED Viewed

@@ -2,21 +2,21 @@
 Base classes and types for Genie cache implementations.
 This module provides the foundational types used across different cache
-implementations (LRU, Semantic, etc.).
+implementations (LRU, Semantic, etc.). It contains only abstract base classes
+and data structures - no concrete implementations.
 """
+from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from datetime import datetime
-from typing import Any
+from typing import TYPE_CHECKING
-import pandas as pd
-from databricks.sdk import WorkspaceClient
-from databricks.sdk.service.sql import StatementResponse, StatementState
 from databricks_ai_bridge.genie import GenieResponse
-from loguru import logger
-from dao_ai.config import WarehouseModel
+if TYPE_CHECKING:
+    from dao_ai.genie.cache.base import CacheResult
 class GenieServiceBase(ABC):
@@ -25,8 +25,19 @@ class GenieServiceBase(ABC):
     @abstractmethod
     def ask_question(
         self, question: str, conversation_id: str | None = None
-    ) -> GenieResponse:
-        """Ask a question to Genie and return the response."""
+    ) -> "CacheResult":
+        """
+        Ask a question to Genie and return the response with cache metadata.
+        All implementations return CacheResult to provide consistent cache information,
+        even when caching is disabled (cache_hit=False, served_by=None).
+        """
+        pass
+    @property
+    @abstractmethod
+    def space_id(self) -> str:
+        """The space ID for the Genie service."""
         pass
@@ -59,64 +70,3 @@ class CacheResult:
     response: GenieResponse
     cache_hit: bool
     served_by: str | None = None
-def execute_sql_via_warehouse(
-    warehouse: WarehouseModel,
-    sql: str,
-    layer_name: str = "cache",
-) -> pd.DataFrame | str:
-    """
-    Execute SQL using a Databricks warehouse and return results as DataFrame.
-    This is a shared utility for cache implementations that need to re-execute
-    cached SQL queries.
-    Args:
-        warehouse: The warehouse configuration for SQL execution
-        sql: The SQL query to execute
-        layer_name: Name of the cache layer (for logging)
-    Returns:
-        DataFrame with results, or error message string
-    """
-    w: WorkspaceClient = warehouse.workspace_client
-    warehouse_id: str = str(warehouse.warehouse_id)
-    logger.debug(f"[{layer_name}] Executing cached SQL: {sql[:100]}...")
-    statement_response: StatementResponse = w.statement_execution.execute_statement(
-        statement=sql,
-        warehouse_id=warehouse_id,
-        wait_timeout="30s",
-    )
-    # Poll for completion if still running
-    while statement_response.status.state in [
-        StatementState.PENDING,
-        StatementState.RUNNING,
-    ]:
-        statement_response = w.statement_execution.get_statement(
-            statement_response.statement_id
-        )
-    if statement_response.status.state != StatementState.SUCCEEDED:
-        error_msg: str = f"SQL execution failed: {statement_response.status}"
-        logger.error(f"[{layer_name}] {error_msg}")
-        return error_msg
-    # Convert to DataFrame
-    if statement_response.result and statement_response.result.data_array:
-        columns: list[str] = []
-        if statement_response.manifest and statement_response.manifest.schema:
-            columns = [col.name for col in statement_response.manifest.schema.columns]
-        elif hasattr(statement_response.result, "schema"):
-            columns = [col.name for col in statement_response.result.schema.columns]
-        data: list[list[Any]] = statement_response.result.data_array
-        if columns:
-            return pd.DataFrame(data, columns=columns)
-        else:
-            return pd.DataFrame(data)
-    return pd.DataFrame()

dao_ai/genie/cache/core.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""
+Core utilities for Genie cache implementations.
+This module provides shared utility functions used by different cache
+implementations (LRU, Semantic, etc.). These are concrete implementations
+of common operations needed across cache types.
+"""
+from typing import Any
+import pandas as pd
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.sql import StatementResponse, StatementState
+from loguru import logger
+from dao_ai.config import WarehouseModel
+def execute_sql_via_warehouse(
+    warehouse: WarehouseModel,
+    sql: str,
+    layer_name: str = "cache",
+) -> pd.DataFrame | str:
+    """
+    Execute SQL using a Databricks warehouse and return results as DataFrame.
+    This is a shared utility for cache implementations that need to re-execute
+    cached SQL queries.
+    Args:
+        warehouse: The warehouse configuration for SQL execution
+        sql: The SQL query to execute
+        layer_name: Name of the cache layer (for logging)
+    Returns:
+        DataFrame with results, or error message string
+    """
+    w: WorkspaceClient = warehouse.workspace_client
+    warehouse_id: str = str(warehouse.warehouse_id)
+    logger.debug(f"[{layer_name}] Executing cached SQL: {sql[:100]}...")
+    statement_response: StatementResponse = w.statement_execution.execute_statement(
+        statement=sql,
+        warehouse_id=warehouse_id,
+        wait_timeout="30s",
+    )
+    # Poll for completion if still running
+    while statement_response.status.state in [
+        StatementState.PENDING,
+        StatementState.RUNNING,
+    ]:
+        statement_response = w.statement_execution.get_statement(
+            statement_response.statement_id
+        )
+    if statement_response.status.state != StatementState.SUCCEEDED:
+        error_msg: str = f"SQL execution failed: {statement_response.status}"
+        logger.error(f"[{layer_name}] {error_msg}")
+        return error_msg
+    # Convert to DataFrame
+    if statement_response.result and statement_response.result.data_array:
+        columns: list[str] = []
+        if statement_response.manifest and statement_response.manifest.schema:
+            columns = [col.name for col in statement_response.manifest.schema.columns]
+        data: list[list[Any]] = statement_response.result.data_array
+        if columns:
+            return pd.DataFrame(data, columns=columns)
+        else:
+            return pd.DataFrame(data)
+    return pd.DataFrame()

dao_ai/genie/cache/lru.py CHANGED Viewed

@@ -96,9 +96,21 @@ class LRUCacheService(GenieServiceBase):
         return timedelta(seconds=ttl)
     @staticmethod
-    def _normalize_key(question: str) -> str:
-        """Normalize the question to create a consistent cache key."""
-        return question.strip().lower()
+    def _normalize_key(question: str, conversation_id: str | None = None) -> str:
+        """
+        Normalize the question and conversation_id to create a consistent cache key.
+        Args:
+            question: The question text
+            conversation_id: Optional conversation ID to include in the key
+        Returns:
+            A normalized cache key combining question and conversation_id
+        """
+        normalized_question = question.strip().lower()
+        if conversation_id:
+            return f"{conversation_id}::{normalized_question}"
+        return normalized_question
     def _is_expired(self, entry: SQLCacheEntry) -> bool:
         """Check if a cache entry has exceeded its TTL. Returns False if TTL is disabled."""
@@ -192,8 +204,6 @@ class LRUCacheService(GenieServiceBase):
                 columns = [
                     col.name for col in statement_response.manifest.schema.columns
                 ]
-            elif hasattr(statement_response.result, "schema"):
-                columns = [col.name for col in statement_response.result.schema.columns]
             data: list[list[Any]] = statement_response.result.data_array
             if columns:
@@ -205,17 +215,14 @@ class LRUCacheService(GenieServiceBase):
     def ask_question(
         self, question: str, conversation_id: str | None = None
-    ) -> GenieResponse:
+    ) -> CacheResult:
         """
         Ask a question, using cached SQL query if available.
         On cache hit, re-executes the cached SQL to get fresh data.
-        Implements GenieServiceBase for seamless chaining.
+        Returns CacheResult with cache metadata.
         """
-        result: CacheResult = self.ask_question_with_cache_info(
-            question, conversation_id
-        )
-        return result.response
+        return self.ask_question_with_cache_info(question, conversation_id)
     @mlflow.trace(name="genie_lru_cache_lookup")
     def ask_question_with_cache_info(
@@ -235,7 +242,7 @@ class LRUCacheService(GenieServiceBase):
         Returns:
             CacheResult with fresh response and cache metadata
         """
-        key: str = self._normalize_key(question)
+        key: str = self._normalize_key(question, conversation_id)
         # Check cache
         with self._lock:
@@ -244,17 +251,20 @@ class LRUCacheService(GenieServiceBase):
         if cached is not None:
             logger.info(
                 f"[{self.name}] Cache HIT: '{question[:50]}...' "
-                f"(cache_size={self.size}/{self.capacity})"
+                f"(conversation_id={conversation_id}, cache_size={self.size}/{self.capacity})"
             )
             # Re-execute the cached SQL to get fresh data
             result: pd.DataFrame | str = self._execute_sql(cached.query)
+            # Use current conversation_id, not the cached one
             response: GenieResponse = GenieResponse(
                 result=result,
                 query=cached.query,
                 description=cached.description,
-                conversation_id=cached.conversation_id,
+                conversation_id=conversation_id
+                if conversation_id
+                else cached.conversation_id,
             )
             return CacheResult(response=response, cache_hit=True, served_by=self.name)
@@ -262,17 +272,30 @@ class LRUCacheService(GenieServiceBase):
         # Cache miss - delegate to wrapped service
         logger.info(
             f"[{self.name}] Cache MISS: '{question[:50]}...' "
-            f"(cache_size={self.size}/{self.capacity}, delegating to {type(self.impl).__name__})"
+            f"(conversation_id={conversation_id}, cache_size={self.size}/{self.capacity}, delegating to {type(self.impl).__name__})"
         )
-        response = self.impl.ask_question(question, conversation_id)
+        result: CacheResult = self.impl.ask_question(question, conversation_id)
         with self._lock:
-            self._put(key, response)
-        return CacheResult(response=response, cache_hit=False, served_by=None)
+            self._put(key, result.response)
+        return CacheResult(response=result.response, cache_hit=False, served_by=None)
-    def invalidate(self, question: str) -> bool:
-        """Remove a specific entry from the cache."""
-        key: str = self._normalize_key(question)
+    @property
+    def space_id(self) -> str:
+        return self.impl.space_id
+    def invalidate(self, question: str, conversation_id: str | None = None) -> bool:
+        """
+        Remove a specific entry from the cache.
+        Args:
+            question: The question text
+            conversation_id: Optional conversation ID to match
+        Returns:
+            True if the entry was found and removed, False otherwise
+        """
+        key: str = self._normalize_key(question, conversation_id)
         with self._lock:
             if key in self._cache:
                 del self._cache[key]

dao-ai 0.0.36__py3-none-any.whl → 0.1.0__py3-none-any.whl

dao-ai 0.0.36py3-none-any.whl → 0.1.0py3-none-any.whl