PyPI - dao-ai - Versions diffs - 0.1.20__py3-none-any.whl → 0.1.21__py3-none-any.whl - Mend

dao-ai 0.1.20py3-none-any.whl → 0.1.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

dao_ai/config.py +114 -33
dao_ai/genie/cache/__init__.py +11 -9
dao_ai/genie/cache/context_aware/__init__.py +21 -0
dao_ai/genie/cache/context_aware/base.py +54 -1
dao_ai/genie/cache/context_aware/in_memory.py +112 -0
dao_ai/genie/cache/{optimization.py → context_aware/optimization.py} +83 -43
dao_ai/genie/cache/context_aware/postgres.py +177 -0
dao_ai/middleware/__init__.py +8 -1
dao_ai/middleware/tool_call_observability.py +227 -0
dao_ai/utils.py +7 -3
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/METADATA +1 -1
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/RECORD +15 -14
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/WHEEL +0 -0
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/entry_points.txt +0 -0
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/licenses/LICENSE +0 -0

dao_ai/config.py CHANGED Viewed

@@ -20,8 +20,8 @@ from typing import (
 )
 if TYPE_CHECKING:
-    from dao_ai.genie.cache.optimization import (
-        SemanticCacheEvalDataset,
+    from dao_ai.genie.cache.context_aware.optimization import (
+        ContextAwareCacheEvalDataset,
         ThresholdOptimizationResult,
     )
     from dao_ai.state import Context
@@ -2316,6 +2316,7 @@ class FunctionType(str, Enum):
     FACTORY = "factory"
     UNITY_CATALOG = "unity_catalog"
     MCP = "mcp"
+    INLINE = "inline"
 class HumanInTheLoopModel(BaseModel):
@@ -2417,6 +2418,72 @@ class FactoryFunctionModel(BaseFunctionModel, HasFullName):
         return self
+class InlineFunctionModel(BaseFunctionModel):
+    """
+    Inline function model for defining tool code directly in YAML configuration.
+    This allows you to define simple tools without creating separate Python files.
+    The code should define a function decorated with @tool from langchain.tools.
+    Example YAML:
+        tools:
+          calculator:
+            name: calculator
+            function:
+              type: inline
+              code: |
+                from langchain.tools import tool
+                @tool
+                def calculator(expression: str) -> str:
+                    '''Evaluate a mathematical expression.'''
+                    return str(eval(expression))
+    The code block must:
+    - Import @tool from langchain.tools
+    - Define exactly one function decorated with @tool
+    - The function name becomes the tool name
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    type: Literal[FunctionType.INLINE] = FunctionType.INLINE
+    code: str = Field(
+        ...,
+        description="Python code defining a tool function decorated with @tool",
+    )
+    def as_tools(self, **kwargs: Any) -> Sequence[RunnableLike]:
+        """Execute the inline code and return the tool(s) defined in it."""
+        from langchain_core.tools import BaseTool
+        # Create a namespace for executing the code
+        namespace: dict[str, Any] = {}
+        # Execute the code in the namespace
+        try:
+            exec(self.code, namespace)
+        except Exception as e:
+            raise ValueError(f"Failed to execute inline tool code: {e}") from e
+        # Find all tools (functions decorated with @tool) in the namespace
+        tools: list[RunnableLike] = []
+        for name, obj in namespace.items():
+            if isinstance(obj, BaseTool):
+                tools.append(obj)
+        if not tools:
+            raise ValueError(
+                "Inline code must define at least one function decorated with @tool. "
+                "Make sure to import and use: from langchain.tools import tool"
+            )
+        logger.debug(
+            "Created inline tools",
+            tool_names=[t.name for t in tools if hasattr(t, "name")],
+        )
+        return tools
 class TransportType(str, Enum):
     STREAMABLE_HTTP = "streamable_http"
     STDIO = "stdio"
@@ -2737,6 +2804,7 @@ AnyTool: TypeAlias = (
     Union[
         PythonFunctionModel,
         FactoryFunctionModel,
+        InlineFunctionModel,
         UnityCatalogFunctionModel,
         McpFunctionModel,
     ]
@@ -3659,20 +3727,25 @@ class OptimizationsModel(BaseModel):
     prompt_optimizations: dict[str, PromptOptimizationModel] = Field(
         default_factory=dict
     )
+    cache_threshold_optimizations: dict[str, "ContextAwareCacheOptimizationModel"] = (
+        Field(default_factory=dict)
+    )
-    def optimize(self, w: WorkspaceClient | None = None) -> dict[str, PromptModel]:
+    def optimize(self, w: WorkspaceClient | None = None) -> dict[str, Any]:
         """
-        Optimize all prompts in this configuration.
+        Optimize all prompts and cache thresholds in this configuration.
         This method:
         1. Ensures all training datasets are created/registered in MLflow
         2. Runs each prompt optimization
+        3. Runs each cache threshold optimization
         Args:
             w: Optional WorkspaceClient for Databricks operations
         Returns:
-            dict[str, PromptModel]: Dictionary mapping optimization names to optimized prompts
+            dict[str, Any]: Dictionary with 'prompts' and 'cache_thresholds' keys
+                containing the respective optimization results
         """
         # First, ensure all training datasets are created/registered in MLflow
         logger.info(f"Ensuring {len(self.training_datasets)} training datasets exist")
@@ -3680,15 +3753,21 @@ class OptimizationsModel(BaseModel):
             logger.debug(f"Creating/updating dataset: {dataset_name}")
             dataset_model.as_dataset()
-        # Run optimizations
-        results: dict[str, PromptModel] = {}
+        # Run prompt optimizations
+        prompt_results: dict[str, PromptModel] = {}
         for name, optimization in self.prompt_optimizations.items():
-            results[name] = optimization.optimize(w)
-        return results
+            prompt_results[name] = optimization.optimize(w)
+        # Run cache threshold optimizations
+        cache_results: dict[str, Any] = {}
+        for name, optimization in self.cache_threshold_optimizations.items():
+            cache_results[name] = optimization.optimize(w)
+        return {"prompts": prompt_results, "cache_thresholds": cache_results}
-class SemanticCacheEvalEntryModel(BaseModel):
-    """Single evaluation entry for semantic cache threshold optimization.
+class ContextAwareCacheEvalEntryModel(BaseModel):
+    """Single evaluation entry for context-aware cache threshold optimization.
     Represents a pair of question/context combinations to evaluate
     whether the cache should return a hit or miss.
@@ -3718,8 +3797,8 @@ class SemanticCacheEvalEntryModel(BaseModel):
     expected_match: Optional[bool] = None  # None = use LLM judge
-class SemanticCacheEvalDatasetModel(BaseModel):
-    """Dataset for semantic cache threshold optimization.
+class ContextAwareCacheEvalDatasetModel(BaseModel):
+    """Dataset for context-aware cache threshold optimization.
     Contains pairs of questions/contexts to evaluate whether thresholds
     correctly identify semantic matches.
@@ -3736,17 +3815,17 @@ class SemanticCacheEvalDatasetModel(BaseModel):
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
     name: str
     description: str = ""
-    entries: list[SemanticCacheEvalEntryModel] = Field(default_factory=list)
+    entries: list[ContextAwareCacheEvalEntryModel] = Field(default_factory=list)
-    def as_eval_dataset(self) -> "SemanticCacheEvalDataset":
+    def as_eval_dataset(self) -> "ContextAwareCacheEvalDataset":
         """Convert to internal evaluation dataset format."""
-        from dao_ai.genie.cache.optimization import (
-            SemanticCacheEvalDataset,
-            SemanticCacheEvalEntry,
+        from dao_ai.genie.cache.context_aware.optimization import (
+            ContextAwareCacheEvalDataset,
+            ContextAwareCacheEvalEntry,
         )
         entries = [
-            SemanticCacheEvalEntry(
+            ContextAwareCacheEvalEntry(
                 question=e.question,
                 question_embedding=e.question_embedding,
                 context=e.context,
@@ -3760,33 +3839,35 @@ class SemanticCacheEvalDatasetModel(BaseModel):
             for e in self.entries
         ]
-        return SemanticCacheEvalDataset(
+        return ContextAwareCacheEvalDataset(
             name=self.name,
             entries=entries,
             description=self.description,
         )
-class SemanticCacheThresholdOptimizationModel(BaseModel):
-    """Configuration for semantic cache threshold optimization.
+class ContextAwareCacheOptimizationModel(BaseModel):
+    """Configuration for context-aware cache threshold optimization.
     Uses Optuna Bayesian optimization to find optimal threshold values
     that maximize cache hit accuracy (F1 score by default).
     Example:
-        threshold_optimization:
-          name: optimize_cache_thresholds
-          cache_parameters: *my_cache_params
-          dataset: *my_eval_dataset
-          judge_model: databricks-meta-llama-3-3-70b-instruct
-          n_trials: 50
-          metric: f1
+        optimizations:
+          cache_threshold_optimizations:
+            my_optimization:
+              name: optimize_cache_thresholds
+              cache_parameters: *my_cache_params
+              dataset: *my_eval_dataset
+              judge_model: databricks-meta-llama-3-3-70b-instruct
+              n_trials: 50
+              metric: f1
     """
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
     name: str
     cache_parameters: Optional[GenieContextAwareCacheParametersModel] = None
-    dataset: SemanticCacheEvalDatasetModel
+    dataset: ContextAwareCacheEvalDatasetModel
     judge_model: Optional[LLMModel | str] = "databricks-meta-llama-3-3-70b-instruct"
     n_trials: int = 50
     metric: Literal["f1", "precision", "recall", "fbeta"] = "f1"
@@ -3805,9 +3886,9 @@ class SemanticCacheThresholdOptimizationModel(BaseModel):
         Returns:
             ThresholdOptimizationResult with optimized thresholds
         """
-        from dao_ai.genie.cache.optimization import (
+        from dao_ai.genie.cache.context_aware.optimization import (
             ThresholdOptimizationResult,
-            optimize_semantic_cache_thresholds,
+            optimize_context_aware_cache_thresholds,
         )
         # Convert dataset
@@ -3831,7 +3912,7 @@ class SemanticCacheThresholdOptimizationModel(BaseModel):
         else:
             judge_model_name = "databricks-meta-llama-3-3-70b-instruct"
-        result: ThresholdOptimizationResult = optimize_semantic_cache_thresholds(
+        result: ThresholdOptimizationResult = optimize_context_aware_cache_thresholds(
             dataset=eval_dataset,
             original_thresholds=original_thresholds,
             judge_model=judge_model_name,

dao_ai/genie/cache/__init__.py CHANGED Viewed

@@ -34,17 +34,19 @@ from dao_ai.genie.cache.context_aware import (
     PersistentContextAwareGenieCacheService,
     PostgresContextAwareGenieService,
 )
-from dao_ai.genie.cache.core import execute_sql_via_warehouse
-from dao_ai.genie.cache.lru import LRUCacheService
-from dao_ai.genie.cache.optimization import (
-    SemanticCacheEvalDataset,
-    SemanticCacheEvalEntry,
+# Re-export optimization from context_aware for backwards compatibility
+from dao_ai.genie.cache.context_aware.optimization import (
+    ContextAwareCacheEvalDataset,
+    ContextAwareCacheEvalEntry,
     ThresholdOptimizationResult,
     clear_judge_cache,
     generate_eval_dataset_from_cache,
-    optimize_semantic_cache_thresholds,
+    optimize_context_aware_cache_thresholds,
     semantic_match_judge,
 )
+from dao_ai.genie.cache.core import execute_sql_via_warehouse
+from dao_ai.genie.cache.lru import LRUCacheService
 __all__ = [
     # Base types
@@ -60,11 +62,11 @@ __all__ = [
     "LRUCacheService",
     "PostgresContextAwareGenieService",
     # Optimization
-    "SemanticCacheEvalDataset",
-    "SemanticCacheEvalEntry",
+    "ContextAwareCacheEvalDataset",
+    "ContextAwareCacheEvalEntry",
     "ThresholdOptimizationResult",
     "clear_judge_cache",
     "generate_eval_dataset_from_cache",
-    "optimize_semantic_cache_thresholds",
+    "optimize_context_aware_cache_thresholds",
     "semantic_match_judge",
 ]

dao_ai/genie/cache/context_aware/__init__.py CHANGED Viewed

@@ -12,10 +12,23 @@ Available implementations:
 Base classes:
 - ContextAwareGenieService: Abstract base for all context-aware cache implementations
 - PersistentContextAwareGenieCacheService: Abstract base for database-backed implementations
+Optimization:
+- optimize_context_aware_cache_thresholds: Tune cache thresholds using Bayesian optimization
+- generate_eval_dataset_from_cache: Generate evaluation datasets from cache entries
 """
 from dao_ai.genie.cache.context_aware.base import ContextAwareGenieService
 from dao_ai.genie.cache.context_aware.in_memory import InMemoryContextAwareGenieService
+from dao_ai.genie.cache.context_aware.optimization import (
+    ContextAwareCacheEvalDataset,
+    ContextAwareCacheEvalEntry,
+    ThresholdOptimizationResult,
+    clear_judge_cache,
+    generate_eval_dataset_from_cache,
+    optimize_context_aware_cache_thresholds,
+    semantic_match_judge,
+)
 from dao_ai.genie.cache.context_aware.persistent import (
     PersistentContextAwareGenieCacheService,
 )
@@ -28,4 +41,12 @@ __all__ = [
     # Implementations
     "InMemoryContextAwareGenieService",
     "PostgresContextAwareGenieService",
+    # Optimization
+    "ContextAwareCacheEvalDataset",
+    "ContextAwareCacheEvalEntry",
+    "ThresholdOptimizationResult",
+    "clear_judge_cache",
+    "generate_eval_dataset_from_cache",
+    "optimize_context_aware_cache_thresholds",
+    "semantic_match_judge",
 ]

dao_ai/genie/cache/context_aware/base.py CHANGED Viewed

@@ -15,12 +15,13 @@ Subclasses must implement storage-specific methods:
 - invalidate_expired(): Remove expired entries
 - clear(): Clear all entries for space
 - stats(): Return cache statistics
+- get_entries(): Retrieve cache entries with filtering
 """
 from __future__ import annotations
 from abc import abstractmethod
-from datetime import timedelta
+from datetime import datetime, timedelta
 from typing import Any, Self, TypeVar
 import mlflow
@@ -315,6 +316,58 @@ class ContextAwareGenieService(GenieServiceBase):
         """
         pass
+    @abstractmethod
+    def get_entries(
+        self,
+        limit: int | None = None,
+        offset: int | None = None,
+        include_embeddings: bool = False,
+        conversation_id: str | None = None,
+        created_after: datetime | None = None,
+        created_before: datetime | None = None,
+        question_contains: str | None = None,
+    ) -> list[dict[str, Any]]:
+        """
+        Get cache entries with optional filtering.
+        This method retrieves cache entries for inspection, debugging, or
+        generating evaluation datasets for threshold optimization.
+        Args:
+            limit: Maximum number of entries to return (None = no limit)
+            offset: Number of entries to skip for pagination (None = 0)
+            include_embeddings: Whether to include embedding vectors in results.
+                Embeddings are large, so set False for general inspection.
+            conversation_id: Filter by conversation ID (None = all conversations)
+            created_after: Only entries created after this time (None = no filter)
+            created_before: Only entries created before this time (None = no filter)
+            question_contains: Case-insensitive text search on question field
+        Returns:
+            List of cache entry dicts with keys:
+            - id: Cache entry ID (int for persistent caches, None for in-memory)
+            - question: The cached question text
+            - conversation_context: Prior conversation context string
+            - sql_query: The cached SQL query
+            - description: Query description
+            - conversation_id: The conversation ID
+            - created_at: Entry creation timestamp (datetime)
+            - question_embedding: (only if include_embeddings=True)
+            - context_embedding: (only if include_embeddings=True)
+        Example:
+            # Get recent entries for inspection
+            entries = cache.get_entries(limit=10)
+            # Get entries with embeddings for evaluation dataset
+            entries = cache.get_entries(include_embeddings=True, limit=100)
+            eval_dataset = generate_eval_dataset_from_cache(entries)
+            # Search for specific questions
+            entries = cache.get_entries(question_contains="sales")
+        """
+        pass
     def stats(self) -> dict[str, Any]:
         """
         Template method for returning cache statistics.

dao_ai/genie/cache/context_aware/in_memory.py CHANGED Viewed

@@ -607,3 +607,115 @@ class InMemoryContextAwareGenieService(ContextAwareGenieService):
     def _get_additional_stats(self) -> dict[str, Any]:
         """Add capacity info to stats."""
         return {"capacity": self.parameters.capacity}
+    def get_entries(
+        self,
+        limit: int | None = None,
+        offset: int | None = None,
+        include_embeddings: bool = False,
+        conversation_id: str | None = None,
+        created_after: datetime | None = None,
+        created_before: datetime | None = None,
+        question_contains: str | None = None,
+    ) -> list[dict[str, Any]]:
+        """
+        Get cache entries with optional filtering.
+        This method retrieves cache entries for inspection, debugging, or
+        generating evaluation datasets for threshold optimization.
+        Args:
+            limit: Maximum number of entries to return (None = no limit)
+            offset: Number of entries to skip for pagination (None = 0)
+            include_embeddings: Whether to include embedding vectors in results.
+                Embeddings are large, so set False for general inspection.
+            conversation_id: Filter by conversation ID (None = all conversations)
+            created_after: Only entries created after this time (None = no filter)
+            created_before: Only entries created before this time (None = no filter)
+            question_contains: Case-insensitive text search on question field
+        Returns:
+            List of cache entry dicts. See base class for full key documentation.
+        Example:
+            # Get entries with embeddings for evaluation dataset generation
+            entries = cache.get_entries(include_embeddings=True, limit=100)
+            eval_dataset = generate_eval_dataset_from_cache(entries)
+        """
+        self._setup()
+        with self._lock:
+            # Filter entries for this space
+            filtered_entries: list[InMemoryCacheEntry] = []
+            for entry in self._cache:
+                # Filter by space_id
+                if entry.genie_space_id != self.space_id:
+                    continue
+                # Filter by conversation_id
+                if (
+                    conversation_id is not None
+                    and entry.conversation_id != conversation_id
+                ):
+                    continue
+                # Filter by created_after
+                if created_after is not None and entry.created_at <= created_after:
+                    continue
+                # Filter by created_before
+                if created_before is not None and entry.created_at >= created_before:
+                    continue
+                # Filter by question_contains (case-insensitive)
+                if question_contains is not None:
+                    if question_contains.lower() not in entry.question.lower():
+                        continue
+                filtered_entries.append(entry)
+            # Sort by created_at descending (most recent first)
+            filtered_entries.sort(key=lambda e: e.created_at, reverse=True)
+            # Apply offset
+            if offset is not None and offset > 0:
+                filtered_entries = filtered_entries[offset:]
+            # Apply limit
+            if limit is not None:
+                filtered_entries = filtered_entries[:limit]
+            # Convert to dicts
+            entries: list[dict[str, Any]] = []
+            for entry in filtered_entries:
+                result: dict[str, Any] = {
+                    "id": None,  # In-memory caches don't have database IDs
+                    "question": entry.question,
+                    "conversation_context": entry.conversation_context,
+                    "sql_query": entry.sql_query,
+                    "description": entry.description,
+                    "conversation_id": entry.conversation_id,
+                    "created_at": entry.created_at,
+                }
+                if include_embeddings:
+                    result["question_embedding"] = entry.question_embedding
+                    result["context_embedding"] = entry.context_embedding
+                entries.append(result)
+            logger.debug(
+                "Retrieved cache entries",
+                layer=self.name,
+                count=len(entries),
+                include_embeddings=include_embeddings,
+                filters={
+                    "conversation_id": conversation_id,
+                    "created_after": str(created_after) if created_after else None,
+                    "created_before": str(created_before) if created_before else None,
+                    "question_contains": question_contains,
+                },
+            )
+            return entries

dao-ai 0.1.20__py3-none-any.whl → 0.1.21__py3-none-any.whl

dao-ai 0.1.20py3-none-any.whl → 0.1.21py3-none-any.whl