PyPI - dao-ai - Versions diffs - 0.1.20__py3-none-any.whl → 0.1.21__py3-none-any.whl - Mend

dao-ai 0.1.20py3-none-any.whl → 0.1.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

dao_ai/config.py +114 -33
dao_ai/genie/cache/__init__.py +11 -9
dao_ai/genie/cache/context_aware/__init__.py +21 -0
dao_ai/genie/cache/context_aware/base.py +54 -1
dao_ai/genie/cache/context_aware/in_memory.py +112 -0
dao_ai/genie/cache/{optimization.py → context_aware/optimization.py} +83 -43
dao_ai/genie/cache/context_aware/postgres.py +177 -0
dao_ai/middleware/__init__.py +8 -1
dao_ai/middleware/tool_call_observability.py +227 -0
dao_ai/utils.py +7 -3
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/METADATA +1 -1
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/RECORD +15 -14
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/WHEEL +0 -0
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/entry_points.txt +0 -0
{dao_ai-0.1.20.dist-info → dao_ai-0.1.21.dist-info}/licenses/LICENSE +0 -0

dao_ai/genie/cache/{optimization.py → context_aware/optimization.py} RENAMED Viewed

@@ -1,7 +1,7 @@
 """
-Semantic cache threshold optimization using Optuna Bayesian optimization.
+Context-aware semantic cache threshold optimization using Optuna Bayesian optimization.
-This module provides optimization for Genie semantic cache thresholds using
+This module provides optimization for context-aware Genie cache thresholds using
 Optuna's Tree-structured Parzen Estimator (TPE) algorithm with LLM-as-Judge
 evaluation for semantic match validation.
@@ -11,10 +11,23 @@ The optimizer tunes these thresholds:
 - question_weight: Weight for question similarity in combined score
 Usage:
-    from dao_ai.genie.cache.optimization import optimize_semantic_cache_thresholds
+    from dao_ai.genie.cache.context_aware.optimization import (
+        optimize_context_aware_cache_thresholds,
+        generate_eval_dataset_from_cache,
+    )
+    # Get entries from your cache
+    entries = cache_service.get_entries(include_embeddings=True, limit=100)
+    # Generate evaluation dataset
+    eval_dataset = generate_eval_dataset_from_cache(
+        cache_entries=entries,
+        dataset_name="my_cache_eval",
+    )
-    result = optimize_semantic_cache_thresholds(
-        dataset=my_eval_dataset,
+    # Optimize thresholds
+    result = optimize_context_aware_cache_thresholds(
+        dataset=eval_dataset,
         judge_model="databricks-meta-llama-3-3-70b-instruct",
         n_trials=50,
         metric="f1",
@@ -29,37 +42,31 @@ import hashlib
 import math
 from dataclasses import dataclass, field
 from datetime import datetime, timezone
-from typing import Any, Callable, Literal, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Iterator, Literal, Sequence
 import mlflow
-import optuna
 from loguru import logger
-from optuna.samplers import TPESampler
-# Optional MLflow integration - requires optuna-integration[mlflow]
-try:
-    from optuna.integration import MLflowCallback
-    MLFLOW_CALLBACK_AVAILABLE = True
-except ModuleNotFoundError:
-    MLFLOW_CALLBACK_AVAILABLE = False
-    MLflowCallback = None  # type: ignore
 from dao_ai.config import GenieContextAwareCacheParametersModel, LLMModel
 from dao_ai.utils import dao_ai_version
+# Type-only import for optuna.Trial to support type hints without runtime dependency
+if TYPE_CHECKING:
+    import optuna
 __all__ = [
-    "SemanticCacheEvalEntry",
-    "SemanticCacheEvalDataset",
+    "ContextAwareCacheEvalEntry",
+    "ContextAwareCacheEvalDataset",
     "ThresholdOptimizationResult",
-    "optimize_semantic_cache_thresholds",
+    "optimize_context_aware_cache_thresholds",
     "generate_eval_dataset_from_cache",
     "semantic_match_judge",
+    "clear_judge_cache",
 ]
 @dataclass
-class SemanticCacheEvalEntry:
+class ContextAwareCacheEvalEntry:
     """Single evaluation entry for threshold optimization.
     Represents a pair of question/context combinations to evaluate
@@ -90,7 +97,7 @@ class SemanticCacheEvalEntry:
 @dataclass
-class SemanticCacheEvalDataset:
+class ContextAwareCacheEvalDataset:
     """Dataset for semantic cache threshold optimization.
     Attributes:
@@ -100,13 +107,13 @@ class SemanticCacheEvalDataset:
     """
     name: str
-    entries: list[SemanticCacheEvalEntry]
+    entries: list[ContextAwareCacheEvalEntry]
     description: str = ""
     def __len__(self) -> int:
         return len(self.entries)
-    def __iter__(self):
+    def __iter__(self) -> Iterator[ContextAwareCacheEvalEntry]:
         return iter(self.entries)
@@ -272,7 +279,7 @@ def _compute_l2_similarity(embedding1: list[float], embedding2: list[float]) ->
 def _evaluate_thresholds(
-    dataset: SemanticCacheEvalDataset,
+    dataset: ContextAwareCacheEvalDataset,
     similarity_threshold: float,
     context_similarity_threshold: float,
     question_weight: float,
@@ -370,14 +377,14 @@ def _evaluate_thresholds(
 def _create_objective(
-    dataset: SemanticCacheEvalDataset,
+    dataset: ContextAwareCacheEvalDataset,
     judge_model: LLMModel | str | None,
     metric: Literal["f1", "precision", "recall", "fbeta"],
     beta: float = 1.0,
-) -> Callable[[optuna.Trial], float]:
+) -> Callable[["optuna.Trial"], float]:
     """Create the Optuna objective function."""
-    def objective(trial: optuna.Trial) -> float:
+    def objective(trial: "optuna.Trial") -> float:
         # Sample parameters
         similarity_threshold = trial.suggest_float(
             "similarity_threshold", 0.5, 0.99, log=False
@@ -423,8 +430,8 @@ def _create_objective(
     return objective
-def optimize_semantic_cache_thresholds(
-    dataset: SemanticCacheEvalDataset,
+def optimize_context_aware_cache_thresholds(
+    dataset: ContextAwareCacheEvalDataset,
     original_thresholds: dict[str, float]
     | GenieContextAwareCacheParametersModel
     | None = None,
@@ -461,12 +468,12 @@ def optimize_semantic_cache_thresholds(
         ThresholdOptimizationResult with optimized thresholds and metrics
     Example:
-        from dao_ai.genie.cache.optimization import (
-            optimize_semantic_cache_thresholds,
-            SemanticCacheEvalDataset,
+        from dao_ai.genie.cache.context_aware.optimization import (
+            optimize_context_aware_cache_thresholds,
+            ContextAwareCacheEvalDataset,
         )
-        result = optimize_semantic_cache_thresholds(
+        result = optimize_context_aware_cache_thresholds(
             dataset=my_dataset,
             judge_model="databricks-meta-llama-3-3-70b-instruct",
             n_trials=50,
@@ -476,6 +483,26 @@ def optimize_semantic_cache_thresholds(
         if result.improved:
             print(f"New thresholds: {result.optimized_thresholds}")
     """
+    # Lazy import optuna - only loaded when optimization is actually called
+    # This allows the cache module to be imported without optuna installed
+    try:
+        import optuna
+        from optuna.samplers import TPESampler
+    except ImportError as e:
+        raise ImportError(
+            "optuna is required for cache threshold optimization. "
+            "Install it with: pip install optuna"
+        ) from e
+    # Optional MLflow integration - requires optuna-integration[mlflow]
+    try:
+        from optuna.integration import MLflowCallback
+        mlflow_callback_available = True
+    except ModuleNotFoundError:
+        mlflow_callback_available = False
+        MLflowCallback = None  # type: ignore
     logger.info(
         "Starting semantic cache threshold optimization",
         dataset_name=dataset.name,
@@ -539,7 +566,7 @@ def optimize_semantic_cache_thresholds(
     # Create study name if not provided
     if study_name is None:
         timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
-        study_name = f"semantic_cache_threshold_optimization_{timestamp}"
+        study_name = f"context_aware_cache_threshold_optimization_{timestamp}"
     # Create Optuna study
     sampler = TPESampler(seed=seed)
@@ -562,7 +589,7 @@ def optimize_semantic_cache_thresholds(
     # Set up MLflow callback if available
     callbacks = []
-    if MLFLOW_CALLBACK_AVAILABLE and MLflowCallback is not None:
+    if mlflow_callback_available and MLflowCallback is not None:
         try:
             mlflow_callback = MLflowCallback(
                 tracking_uri=mlflow.get_tracking_uri(),
@@ -743,7 +770,7 @@ def generate_eval_dataset_from_cache(
     num_negative_pairs: int = 50,
     paraphrase_model: LLMModel | str | None = None,
     dataset_name: str = "generated_eval_dataset",
-) -> SemanticCacheEvalDataset:
+) -> ContextAwareCacheEvalDataset:
     """
     Generate an evaluation dataset from existing cache entries.
@@ -752,7 +779,8 @@ def generate_eval_dataset_from_cache(
     Args:
         cache_entries: List of cache entries with 'question', 'conversation_context',
-            'question_embedding', and 'context_embedding' keys
+            'question_embedding', and 'context_embedding' keys. Use cache.get_entries()
+            with include_embeddings=True to retrieve these.
         embedding_model: Model for generating embeddings for paraphrased questions
         num_positive_pairs: Number of positive (matching) pairs to generate
         num_negative_pairs: Number of negative (non-matching) pairs to generate
@@ -760,7 +788,19 @@ def generate_eval_dataset_from_cache(
         dataset_name: Name for the generated dataset
     Returns:
-        SemanticCacheEvalDataset with generated entries
+        ContextAwareCacheEvalDataset with generated entries
+    Example:
+        # Get entries from cache with embeddings
+        entries = cache_service.get_entries(include_embeddings=True, limit=100)
+        # Generate evaluation dataset
+        eval_dataset = generate_eval_dataset_from_cache(
+            cache_entries=entries,
+            num_positive_pairs=50,
+            num_negative_pairs=50,
+            dataset_name="my_cache_eval",
+        )
     """
     import random
@@ -787,7 +827,7 @@ def generate_eval_dataset_from_cache(
     )
     chat = para_model.as_chat_model()
-    entries: list[SemanticCacheEvalEntry] = []
+    entries: list[ContextAwareCacheEvalEntry] = []
     # Generate positive pairs (paraphrases)
     logger.info(
@@ -824,7 +864,7 @@ Rephrased question:"""
             )
             entries.append(
-                SemanticCacheEvalEntry(
+                ContextAwareCacheEvalEntry(
                     question=paraphrased_question,
                     question_embedding=para_q_emb,
                     context=original_context,
@@ -855,7 +895,7 @@ Rephrased question:"""
         # Use entry1 as the "question" and entry2 as the "cached" entry
         entries.append(
-            SemanticCacheEvalEntry(
+            ContextAwareCacheEvalEntry(
                 question=entry1.get("question", ""),
                 question_embedding=entry1.get("question_embedding", []),
                 context=entry1.get("conversation_context", ""),
@@ -876,7 +916,7 @@ Rephrased question:"""
         negative_pairs=sum(1 for e in entries if e.expected_match is False),
     )
-    return SemanticCacheEvalDataset(
+    return ContextAwareCacheEvalDataset(
         name=dataset_name,
         entries=entries,
         description=f"Generated from {len(cache_entries)} cache entries",

dao_ai/genie/cache/context_aware/postgres.py CHANGED Viewed

@@ -849,6 +849,144 @@ class PostgresContextAwareGenieService(PersistentContextAwareGenieCacheService):
                     }
         return {}
+    def get_entries(
+        self,
+        limit: int | None = None,
+        offset: int | None = None,
+        include_embeddings: bool = False,
+        conversation_id: str | None = None,
+        created_after: datetime | None = None,
+        created_before: datetime | None = None,
+        question_contains: str | None = None,
+    ) -> list[dict[str, Any]]:
+        """
+        Get cache entries with optional filtering.
+        This method retrieves cache entries for inspection, debugging, or
+        generating evaluation datasets for threshold optimization.
+        Args:
+            limit: Maximum number of entries to return (None = no limit)
+            offset: Number of entries to skip for pagination (None = 0)
+            include_embeddings: Whether to include embedding vectors in results.
+                Embeddings are large, so set False for general inspection.
+            conversation_id: Filter by conversation ID (None = all conversations)
+            created_after: Only entries created after this time (None = no filter)
+            created_before: Only entries created before this time (None = no filter)
+            question_contains: Case-insensitive text search on question field
+        Returns:
+            List of cache entry dicts. See base class for full key documentation.
+        Example:
+            # Get entries with embeddings for evaluation dataset generation
+            entries = cache.get_entries(include_embeddings=True, limit=100)
+            eval_dataset = generate_eval_dataset_from_cache(entries)
+        """
+        self._setup()
+        # Build column list
+        base_columns = [
+            "id",
+            "question",
+            "conversation_context",
+            "sql_query",
+            "description",
+            "conversation_id",
+            "created_at",
+        ]
+        if include_embeddings:
+            columns = base_columns + ["question_embedding", "context_embedding"]
+        else:
+            columns = base_columns
+        columns_str = ", ".join(columns)
+        # Build WHERE clause with parameters
+        where_clauses = ["genie_space_id = %s"]
+        params: list[Any] = [self.space_id]
+        if conversation_id is not None:
+            where_clauses.append("conversation_id = %s")
+            params.append(conversation_id)
+        if created_after is not None:
+            where_clauses.append("created_at > %s")
+            params.append(created_after)
+        if created_before is not None:
+            where_clauses.append("created_at < %s")
+            params.append(created_before)
+        if question_contains is not None:
+            where_clauses.append("question ILIKE %s")
+            params.append(f"%{question_contains}%")
+        where_str = " AND ".join(where_clauses)
+        # Build full query
+        query = f"""
+            SELECT {columns_str}
+            FROM {self.table_name}
+            WHERE {where_str}
+            ORDER BY created_at DESC
+        """
+        if limit is not None:
+            query += f" LIMIT {int(limit)}"
+        if offset is not None:
+            query += f" OFFSET {int(offset)}"
+        # Execute query
+        with self._pool.connection() as conn:
+            with conn.cursor() as cur:
+                cur.execute(query, params)
+                rows = cur.fetchall()
+                entries: list[dict[str, Any]] = []
+                for row in rows:
+                    entry: dict[str, Any] = {
+                        "id": row.get("id"),
+                        "question": row.get("question"),
+                        "conversation_context": row.get("conversation_context"),
+                        "sql_query": row.get("sql_query"),
+                        "description": row.get("description"),
+                        "conversation_id": row.get("conversation_id"),
+                        "created_at": row.get("created_at"),
+                    }
+                    if include_embeddings:
+                        # Convert pgvector to list
+                        q_emb = row.get("question_embedding")
+                        c_emb = row.get("context_embedding")
+                        entry["question_embedding"] = (
+                            list(q_emb) if q_emb is not None else []
+                        )
+                        entry["context_embedding"] = (
+                            list(c_emb) if c_emb is not None else []
+                        )
+                    entries.append(entry)
+                logger.debug(
+                    "Retrieved cache entries",
+                    layer=self.name,
+                    count=len(entries),
+                    include_embeddings=include_embeddings,
+                    filters={
+                        "conversation_id": conversation_id,
+                        "created_after": str(created_after) if created_after else None,
+                        "created_before": (
+                            str(created_before) if created_before else None
+                        ),
+                        "question_contains": question_contains,
+                    },
+                )
+                return entries
     def from_space(
         self,
         space_id: str | None = None,
@@ -857,6 +995,7 @@ class PostgresContextAwareGenieService(PersistentContextAwareGenieCacheService):
         from_datetime: datetime | None = None,
         to_datetime: datetime | None = None,
         max_messages: int | None = None,
+        max_conversations: int | None = None,
     ) -> Self:
         """Populate cache from existing Genie space conversations.
@@ -872,6 +1011,7 @@ class PostgresContextAwareGenieService(PersistentContextAwareGenieCacheService):
             from_datetime: Only include messages after this time
             to_datetime: Only include messages before this time
             max_messages: Limit to last N messages (most recent first)
+            max_conversations: Limit to N conversations (stops pagination after reaching limit)
         Returns:
             self for method chaining
@@ -916,8 +1056,21 @@ class PostgresContextAwareGenieService(PersistentContextAwareGenieCacheService):
                 break
             if response.conversations is None:
+                logger.debug(
+                    "No conversations in response",
+                    layer=self.name,
+                    space_id=target_space_id,
+                )
                 break
+            logger.debug(
+                "Fetched conversations page",
+                layer=self.name,
+                conversations_in_page=len(response.conversations),
+                total_conversations_so_far=stats["conversations_processed"],
+                has_next_page=response.next_page_token is not None,
+            )
             for conversation in response.conversations:
                 if conversation.conversation_id is None:
                     continue
@@ -945,11 +1098,35 @@ class PostgresContextAwareGenieService(PersistentContextAwareGenieCacheService):
                 if max_messages and len(all_messages) >= max_messages:
                     break
+                if (
+                    max_conversations
+                    and stats["conversations_processed"] >= max_conversations
+                ):
+                    break
             if max_messages and len(all_messages) >= max_messages:
                 break
+            if (
+                max_conversations
+                and stats["conversations_processed"] >= max_conversations
+            ):
+                logger.debug(
+                    "Reached max_conversations limit",
+                    layer=self.name,
+                    max_conversations=max_conversations,
+                    total_conversations=stats["conversations_processed"],
+                )
+                break
             page_token = response.next_page_token
             if page_token is None:
+                logger.debug(
+                    "No more pages to fetch",
+                    layer=self.name,
+                    total_conversations=stats["conversations_processed"],
+                    total_messages=len(all_messages),
+                )
                 break
         # Sort and limit

dao_ai/middleware/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # DAO AI Middleware Module
-# This module provides middleware implementations compatible with LangChain v1's create_agent
+# Middleware implementations compatible with LangChain v1's create_agent
 # Re-export LangChain built-in middleware
 from langchain.agents.middleware import (
@@ -82,6 +82,10 @@ from dao_ai.middleware.summarization import (
     create_summarization_middleware,
 )
 from dao_ai.middleware.tool_call_limit import create_tool_call_limit_middleware
+from dao_ai.middleware.tool_call_observability import (
+    ToolCallObservabilityMiddleware,
+    create_tool_call_observability_middleware,
+)
 from dao_ai.middleware.tool_retry import create_tool_retry_middleware
 from dao_ai.middleware.tool_selector import create_llm_tool_selector_middleware
@@ -160,4 +164,7 @@ __all__ = [
     "create_clear_tool_uses_edit",
     # PII middleware factory functions
     "create_pii_middleware",
+    # Tool call observability middleware
+    "ToolCallObservabilityMiddleware",
+    "create_tool_call_observability_middleware",
 ]

dao-ai 0.1.20__py3-none-any.whl → 0.1.21__py3-none-any.whl

dao-ai 0.1.20py3-none-any.whl → 0.1.21py3-none-any.whl