PyPI - dao-ai - Versions diffs - 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl - Mend

dao-ai 0.1.19py3-none-any.whl → 0.1.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

dao_ai/cli.py +37 -7
dao_ai/config.py +265 -10
dao_ai/genie/__init__.py +55 -7
dao_ai/genie/cache/__init__.py +36 -9
dao_ai/genie/cache/base.py +143 -2
dao_ai/genie/cache/context_aware/__init__.py +52 -0
dao_ai/genie/cache/context_aware/base.py +1204 -0
dao_ai/genie/cache/{in_memory_semantic.py → context_aware/in_memory.py} +233 -383
dao_ai/genie/cache/context_aware/optimization.py +930 -0
dao_ai/genie/cache/context_aware/persistent.py +802 -0
dao_ai/genie/cache/context_aware/postgres.py +1343 -0
dao_ai/genie/cache/lru.py +248 -70
dao_ai/genie/core.py +235 -11
dao_ai/middleware/__init__.py +8 -1
dao_ai/middleware/tool_call_observability.py +227 -0
dao_ai/nodes.py +4 -4
dao_ai/tools/__init__.py +2 -2
dao_ai/tools/genie.py +10 -10
dao_ai/utils.py +7 -3
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/METADATA +1 -1
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/RECORD +24 -19
dao_ai/genie/cache/semantic.py +0 -1004
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/WHEEL +0 -0
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/entry_points.txt +0 -0
{dao_ai-0.1.19.dist-info → dao_ai-0.1.21.dist-info}/licenses/LICENSE +0 -0

dao_ai/cli.py CHANGED Viewed

@@ -2,6 +2,7 @@ import argparse
 import getpass
 import json
 import os
+import signal
 import subprocess
 import sys
 import traceback
@@ -454,6 +455,18 @@ def handle_chat_command(options: Namespace) -> None:
     """Interactive chat REPL with the DAO AI system with Human-in-the-Loop support."""
     logger.debug("Starting chat session with DAO AI system...")
+    # Set up signal handler for clean Ctrl+C handling
+    def signal_handler(sig: int, frame: Any) -> None:
+        try:
+            print("\n\n👋 Chat session interrupted. Goodbye!")
+            sys.stdout.flush()
+        except Exception:
+            pass
+        sys.exit(0)
+    # Store original handler and set our handler
+    original_handler = signal.signal(signal.SIGINT, signal_handler)
     try:
         # Set default user_id if not provided
         if options.user_id is None:
@@ -667,6 +680,12 @@ def handle_chat_command(options: Namespace) -> None:
                 try:
                     result = loop.run_until_complete(_invoke_with_hitl())
+                except KeyboardInterrupt:
+                    # Re-raise to be caught by outer handler
+                    raise
+                except asyncio.CancelledError:
+                    # Treat cancellation like KeyboardInterrupt
+                    raise KeyboardInterrupt
                 except Exception as e:
                     logger.error(f"Error invoking graph: {e}")
                     print(f"\n❌ Error: {e}")
@@ -732,23 +751,34 @@ def handle_chat_command(options: Namespace) -> None:
                     logger.error(f"Response processing error: {e}")
                     logger.error(f"Stack trace: {traceback.format_exc()}")
-            except EOFError:
-                # Handle Ctrl-D
-                print("\n\n👋 Goodbye! Chat session ended.")
-                break
-            except KeyboardInterrupt:
-                # Handle Ctrl-C
-                print("\n\n👋 Chat session interrupted. Goodbye!")
+            except (EOFError, KeyboardInterrupt):
+                # Handle Ctrl-D (EOF) or Ctrl-C (interrupt)
+                # Use try/except for print in case stdout is closed
+                try:
+                    print("\n\n👋 Goodbye! Chat session ended.")
+                    sys.stdout.flush()
+                except Exception:
+                    pass
                 break
             except Exception as e:
                 print(f"\n❌ Error: {e}")
                 logger.error(f"Chat error: {e}")
                 traceback.print_exc()
+    except (EOFError, KeyboardInterrupt):
+        # Handle interrupts during initialization
+        try:
+            print("\n\n👋 Chat session interrupted. Goodbye!")
+            sys.stdout.flush()
+        except Exception:
+            pass
     except Exception as e:
         logger.error(f"Failed to initialize chat session: {e}")
         print(f"❌ Failed to start chat session: {e}")
         sys.exit(1)
+    finally:
+        # Restore original signal handler
+        signal.signal(signal.SIGINT, original_handler)
 def handle_schema_command(options: Namespace) -> None:

dao_ai/config.py CHANGED Viewed

@@ -20,6 +20,10 @@ from typing import (
 )
 if TYPE_CHECKING:
+    from dao_ai.genie.cache.context_aware.optimization import (
+        ContextAwareCacheEvalDataset,
+        ThresholdOptimizationResult,
+    )
     from dao_ai.state import Context
 from databricks.sdk import WorkspaceClient
@@ -1710,7 +1714,7 @@ class GenieLRUCacheParametersModel(BaseModel):
     warehouse: WarehouseModel
-class GenieSemanticCacheParametersModel(BaseModel):
+class GenieContextAwareCacheParametersModel(BaseModel):
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
     time_to_live_seconds: int | None = (
         60 * 60 * 24
@@ -1728,10 +1732,21 @@ class GenieSemanticCacheParametersModel(BaseModel):
     database: DatabaseModel
     warehouse: WarehouseModel
     table_name: str = "genie_semantic_cache"
-    context_window_size: int = 3  # Number of previous turns to include for context
+    context_window_size: int = 2  # Number of previous turns to include for context
     max_context_tokens: int = (
         2000  # Maximum context length to prevent extremely long embeddings
     )
+    # Prompt history configuration
+    # Prompt history is always enabled - it stores all user prompts to maintain
+    # conversation context for accurate semantic matching even when cache hits occur
+    prompt_history_table: str = "genie_prompt_history"  # Table name for prompt history
+    max_prompt_history_length: int = 50  # Maximum prompts to keep per conversation
+    use_genie_api_for_history: bool = (
+        False  # Fallback to Genie API if local history empty
+    )
+    prompt_history_ttl_seconds: int | None = (
+        None  # TTL for prompts (None = use cache TTL)
+    )
     @model_validator(mode="after")
     def compute_and_validate_weights(self) -> Self:
@@ -1805,7 +1820,7 @@ class GenieInMemorySemanticCacheParametersModel(BaseModel):
     - Cache persistence across restarts is not required
     - Cache sizes are moderate (hundreds to low thousands of entries)
-    For multi-instance deployments or large cache sizes, use GenieSemanticCacheParametersModel
+    For multi-instance deployments or large cache sizes, use GenieContextAwareCacheParametersModel
     with PostgreSQL backend instead.
     """
@@ -2301,6 +2316,7 @@ class FunctionType(str, Enum):
     FACTORY = "factory"
     UNITY_CATALOG = "unity_catalog"
     MCP = "mcp"
+    INLINE = "inline"
 class HumanInTheLoopModel(BaseModel):
@@ -2402,6 +2418,72 @@ class FactoryFunctionModel(BaseFunctionModel, HasFullName):
         return self
+class InlineFunctionModel(BaseFunctionModel):
+    """
+    Inline function model for defining tool code directly in YAML configuration.
+    This allows you to define simple tools without creating separate Python files.
+    The code should define a function decorated with @tool from langchain.tools.
+    Example YAML:
+        tools:
+          calculator:
+            name: calculator
+            function:
+              type: inline
+              code: |
+                from langchain.tools import tool
+                @tool
+                def calculator(expression: str) -> str:
+                    '''Evaluate a mathematical expression.'''
+                    return str(eval(expression))
+    The code block must:
+    - Import @tool from langchain.tools
+    - Define exactly one function decorated with @tool
+    - The function name becomes the tool name
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    type: Literal[FunctionType.INLINE] = FunctionType.INLINE
+    code: str = Field(
+        ...,
+        description="Python code defining a tool function decorated with @tool",
+    )
+    def as_tools(self, **kwargs: Any) -> Sequence[RunnableLike]:
+        """Execute the inline code and return the tool(s) defined in it."""
+        from langchain_core.tools import BaseTool
+        # Create a namespace for executing the code
+        namespace: dict[str, Any] = {}
+        # Execute the code in the namespace
+        try:
+            exec(self.code, namespace)
+        except Exception as e:
+            raise ValueError(f"Failed to execute inline tool code: {e}") from e
+        # Find all tools (functions decorated with @tool) in the namespace
+        tools: list[RunnableLike] = []
+        for name, obj in namespace.items():
+            if isinstance(obj, BaseTool):
+                tools.append(obj)
+        if not tools:
+            raise ValueError(
+                "Inline code must define at least one function decorated with @tool. "
+                "Make sure to import and use: from langchain.tools import tool"
+            )
+        logger.debug(
+            "Created inline tools",
+            tool_names=[t.name for t in tools if hasattr(t, "name")],
+        )
+        return tools
 class TransportType(str, Enum):
     STREAMABLE_HTTP = "streamable_http"
     STDIO = "stdio"
@@ -2722,6 +2804,7 @@ AnyTool: TypeAlias = (
     Union[
         PythonFunctionModel,
         FactoryFunctionModel,
+        InlineFunctionModel,
         UnityCatalogFunctionModel,
         McpFunctionModel,
     ]
@@ -3644,20 +3727,25 @@ class OptimizationsModel(BaseModel):
     prompt_optimizations: dict[str, PromptOptimizationModel] = Field(
         default_factory=dict
     )
+    cache_threshold_optimizations: dict[str, "ContextAwareCacheOptimizationModel"] = (
+        Field(default_factory=dict)
+    )
-    def optimize(self, w: WorkspaceClient | None = None) -> dict[str, PromptModel]:
+    def optimize(self, w: WorkspaceClient | None = None) -> dict[str, Any]:
         """
-        Optimize all prompts in this configuration.
+        Optimize all prompts and cache thresholds in this configuration.
         This method:
         1. Ensures all training datasets are created/registered in MLflow
         2. Runs each prompt optimization
+        3. Runs each cache threshold optimization
         Args:
             w: Optional WorkspaceClient for Databricks operations
         Returns:
-            dict[str, PromptModel]: Dictionary mapping optimization names to optimized prompts
+            dict[str, Any]: Dictionary with 'prompts' and 'cache_thresholds' keys
+                containing the respective optimization results
         """
         # First, ensure all training datasets are created/registered in MLflow
         logger.info(f"Ensuring {len(self.training_datasets)} training datasets exist")
@@ -3665,11 +3753,178 @@ class OptimizationsModel(BaseModel):
             logger.debug(f"Creating/updating dataset: {dataset_name}")
             dataset_model.as_dataset()
-        # Run optimizations
-        results: dict[str, PromptModel] = {}
+        # Run prompt optimizations
+        prompt_results: dict[str, PromptModel] = {}
         for name, optimization in self.prompt_optimizations.items():
-            results[name] = optimization.optimize(w)
-        return results
+            prompt_results[name] = optimization.optimize(w)
+        # Run cache threshold optimizations
+        cache_results: dict[str, Any] = {}
+        for name, optimization in self.cache_threshold_optimizations.items():
+            cache_results[name] = optimization.optimize(w)
+        return {"prompts": prompt_results, "cache_thresholds": cache_results}
+class ContextAwareCacheEvalEntryModel(BaseModel):
+    """Single evaluation entry for context-aware cache threshold optimization.
+    Represents a pair of question/context combinations to evaluate
+    whether the cache should return a hit or miss.
+    Example:
+        entry:
+          question: "What are total sales?"
+          question_embedding: [0.1, 0.2, ...]  # Pre-computed
+          context: "Previous: Show me revenue"
+          context_embedding: [0.1, 0.2, ...]
+          cached_question: "Show total sales"
+          cached_question_embedding: [0.1, 0.2, ...]
+          cached_context: "Previous: Show me revenue"
+          cached_context_embedding: [0.1, 0.2, ...]
+          expected_match: true
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    question: str
+    question_embedding: list[float]
+    context: str = ""
+    context_embedding: list[float] = Field(default_factory=list)
+    cached_question: str
+    cached_question_embedding: list[float]
+    cached_context: str = ""
+    cached_context_embedding: list[float] = Field(default_factory=list)
+    expected_match: Optional[bool] = None  # None = use LLM judge
+class ContextAwareCacheEvalDatasetModel(BaseModel):
+    """Dataset for context-aware cache threshold optimization.
+    Contains pairs of questions/contexts to evaluate whether thresholds
+    correctly identify semantic matches.
+    Example:
+        dataset:
+          name: my_cache_eval_dataset
+          description: "Evaluation data for cache tuning"
+          entries:
+            - question: "What are total sales?"
+              # ... entry fields
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    name: str
+    description: str = ""
+    entries: list[ContextAwareCacheEvalEntryModel] = Field(default_factory=list)
+    def as_eval_dataset(self) -> "ContextAwareCacheEvalDataset":
+        """Convert to internal evaluation dataset format."""
+        from dao_ai.genie.cache.context_aware.optimization import (
+            ContextAwareCacheEvalDataset,
+            ContextAwareCacheEvalEntry,
+        )
+        entries = [
+            ContextAwareCacheEvalEntry(
+                question=e.question,
+                question_embedding=e.question_embedding,
+                context=e.context,
+                context_embedding=e.context_embedding,
+                cached_question=e.cached_question,
+                cached_question_embedding=e.cached_question_embedding,
+                cached_context=e.cached_context,
+                cached_context_embedding=e.cached_context_embedding,
+                expected_match=e.expected_match,
+            )
+            for e in self.entries
+        ]
+        return ContextAwareCacheEvalDataset(
+            name=self.name,
+            entries=entries,
+            description=self.description,
+        )
+class ContextAwareCacheOptimizationModel(BaseModel):
+    """Configuration for context-aware cache threshold optimization.
+    Uses Optuna Bayesian optimization to find optimal threshold values
+    that maximize cache hit accuracy (F1 score by default).
+    Example:
+        optimizations:
+          cache_threshold_optimizations:
+            my_optimization:
+              name: optimize_cache_thresholds
+              cache_parameters: *my_cache_params
+              dataset: *my_eval_dataset
+              judge_model: databricks-meta-llama-3-3-70b-instruct
+              n_trials: 50
+              metric: f1
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    name: str
+    cache_parameters: Optional[GenieContextAwareCacheParametersModel] = None
+    dataset: ContextAwareCacheEvalDatasetModel
+    judge_model: Optional[LLMModel | str] = "databricks-meta-llama-3-3-70b-instruct"
+    n_trials: int = 50
+    metric: Literal["f1", "precision", "recall", "fbeta"] = "f1"
+    beta: float = 1.0  # For fbeta metric
+    seed: Optional[int] = None
+    def optimize(
+        self, w: WorkspaceClient | None = None
+    ) -> "ThresholdOptimizationResult":
+        """
+        Optimize semantic cache thresholds.
+        Args:
+            w: Optional WorkspaceClient (not used, kept for API compatibility)
+        Returns:
+            ThresholdOptimizationResult with optimized thresholds
+        """
+        from dao_ai.genie.cache.context_aware.optimization import (
+            ThresholdOptimizationResult,
+            optimize_context_aware_cache_thresholds,
+        )
+        # Convert dataset
+        eval_dataset = self.dataset.as_eval_dataset()
+        # Get original thresholds from cache_parameters
+        original_thresholds: dict[str, float] | None = None
+        if self.cache_parameters:
+            original_thresholds = {
+                "similarity_threshold": self.cache_parameters.similarity_threshold,
+                "context_similarity_threshold": self.cache_parameters.context_similarity_threshold,
+                "question_weight": self.cache_parameters.question_weight or 0.6,
+            }
+        # Get judge model
+        judge_model_name: str
+        if isinstance(self.judge_model, str):
+            judge_model_name = self.judge_model
+        elif self.judge_model:
+            judge_model_name = self.judge_model.uri
+        else:
+            judge_model_name = "databricks-meta-llama-3-3-70b-instruct"
+        result: ThresholdOptimizationResult = optimize_context_aware_cache_thresholds(
+            dataset=eval_dataset,
+            original_thresholds=original_thresholds,
+            judge_model=judge_model_name,
+            n_trials=self.n_trials,
+            metric=self.metric,
+            beta=self.beta,
+            register_if_improved=True,
+            study_name=self.name,
+            seed=self.seed,
+        )
+        return result
 class DatasetFormat(str, Enum):

dao_ai/genie/__init__.py CHANGED Viewed

@@ -5,34 +5,82 @@ This package provides core Genie functionality that can be used across
 different contexts (tools, direct integration, etc.).
 Main exports:
-- GenieService: Core service implementation wrapping Databricks Genie SDK
+- Genie: Extended Genie class that captures message_id in responses
+- GenieResponse: Extended response class with message_id field
+- GenieService: Service implementation wrapping Genie
 - GenieServiceBase: Abstract base class for service implementations
+- GenieFeedbackRating: Enum for feedback ratings (POSITIVE, NEGATIVE, NONE)
+Original databricks_ai_bridge classes (aliased):
+- DatabricksGenie: Original Genie from databricks_ai_bridge
+- DatabricksGenieResponse: Original GenieResponse from databricks_ai_bridge
 Cache implementations are available in the cache subpackage:
 - dao_ai.genie.cache.lru: LRU (Least Recently Used) cache
-- dao_ai.genie.cache.semantic: Semantic similarity cache using pg_vector
+- dao_ai.genie.cache.context_aware.postgres: PostgreSQL context-aware cache
+- dao_ai.genie.cache.context_aware.in_memory: In-memory context-aware cache
 Example usage:
-    from dao_ai.genie import GenieService
-    from dao_ai.genie.cache import LRUCacheService, SemanticCacheService
+    from dao_ai.genie import Genie, GenieService, GenieFeedbackRating
+    # Create Genie with message_id support
+    genie = Genie(space_id="my-space")
+    response = genie.ask_question("What are total sales?")
+    print(response.message_id)  # Now available!
+    # Use with GenieService
+    service = GenieService(genie)
+    result = service.ask_question("What are total sales?")
+    # Send feedback using captured message_id
+    service.send_feedback(
+        conversation_id=result.response.conversation_id,
+        rating=GenieFeedbackRating.POSITIVE,
+        message_id=result.message_id,  # Available from CacheResult
+        was_cache_hit=result.cache_hit,
+    )
 """
+from databricks.sdk.service.dashboards import GenieFeedbackRating
 from dao_ai.genie.cache import (
     CacheResult,
+    ContextAwareGenieService,
     GenieServiceBase,
+    InMemoryContextAwareGenieService,
     LRUCacheService,
-    SemanticCacheService,
+    PostgresContextAwareGenieService,
     SQLCacheEntry,
 )
-from dao_ai.genie.core import GenieService
+from dao_ai.genie.cache.base import get_latest_message_id, get_message_content
+from dao_ai.genie.core import (
+    DatabricksGenie,
+    DatabricksGenieResponse,
+    Genie,
+    GenieResponse,
+    GenieService,
+)
 __all__ = [
+    # Extended Genie classes (primary - use these)
+    "Genie",
+    "GenieResponse",
+    # Original databricks_ai_bridge classes (aliased)
+    "DatabricksGenie",
+    "DatabricksGenieResponse",
     # Service classes
     "GenieService",
     "GenieServiceBase",
+    # Feedback
+    "GenieFeedbackRating",
+    # Helper functions
+    "get_latest_message_id",
+    "get_message_content",
     # Cache types (from cache subpackage)
     "CacheResult",
+    "ContextAwareGenieService",
+    "InMemoryContextAwareGenieService",
     "LRUCacheService",
-    "SemanticCacheService",
+    "PostgresContextAwareGenieService",
     "SQLCacheEntry",
 ]

dao_ai/genie/cache/__init__.py CHANGED Viewed

@@ -6,15 +6,16 @@ chained together using the decorator pattern.
 Available cache implementations:
 - LRUCacheService: In-memory LRU cache with O(1) exact match lookup
-- SemanticCacheService: PostgreSQL pg_vector-based semantic similarity cache
+- PostgresContextAwareGenieService: PostgreSQL pg_vector-based context-aware cache
+- InMemoryContextAwareGenieService: In-memory context-aware cache
 Example usage:
-    from dao_ai.genie.cache import LRUCacheService, SemanticCacheService
+    from dao_ai.genie.cache import LRUCacheService, PostgresContextAwareGenieService
-    # Chain caches: LRU (checked first) -> Semantic (checked second) -> Genie
-    genie_service = SemanticCacheService(
+    # Chain caches: LRU (checked first) -> Context-aware (checked second) -> Genie
+    genie_service = PostgresContextAwareGenieService(
         impl=GenieService(genie),
-        parameters=semantic_params,
+        parameters=context_aware_params,
     )
     genie_service = LRUCacheService(
         impl=genie_service,
@@ -27,10 +28,25 @@ from dao_ai.genie.cache.base import (
     GenieServiceBase,
     SQLCacheEntry,
 )
+from dao_ai.genie.cache.context_aware import (
+    ContextAwareGenieService,
+    InMemoryContextAwareGenieService,
+    PersistentContextAwareGenieCacheService,
+    PostgresContextAwareGenieService,
+)
+# Re-export optimization from context_aware for backwards compatibility
+from dao_ai.genie.cache.context_aware.optimization import (
+    ContextAwareCacheEvalDataset,
+    ContextAwareCacheEvalEntry,
+    ThresholdOptimizationResult,
+    clear_judge_cache,
+    generate_eval_dataset_from_cache,
+    optimize_context_aware_cache_thresholds,
+    semantic_match_judge,
+)
 from dao_ai.genie.cache.core import execute_sql_via_warehouse
-from dao_ai.genie.cache.in_memory_semantic import InMemorySemanticCacheService
 from dao_ai.genie.cache.lru import LRUCacheService
-from dao_ai.genie.cache.semantic import SemanticCacheService
 __all__ = [
     # Base types
@@ -38,8 +54,19 @@ __all__ = [
     "GenieServiceBase",
     "SQLCacheEntry",
     "execute_sql_via_warehouse",
+    # Context-aware base classes
+    "ContextAwareGenieService",
+    "PersistentContextAwareGenieCacheService",
     # Cache implementations
-    "InMemorySemanticCacheService",
+    "InMemoryContextAwareGenieService",
     "LRUCacheService",
-    "SemanticCacheService",
+    "PostgresContextAwareGenieService",
+    # Optimization
+    "ContextAwareCacheEvalDataset",
+    "ContextAwareCacheEvalEntry",
+    "ThresholdOptimizationResult",
+    "clear_judge_cache",
+    "generate_eval_dataset_from_cache",
+    "optimize_context_aware_cache_thresholds",
+    "semantic_match_judge",
 ]

dao-ai 0.1.19__py3-none-any.whl → 0.1.21__py3-none-any.whl

dao-ai 0.1.19py3-none-any.whl → 0.1.21py3-none-any.whl