PyPI - dao-ai - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

dao-ai 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

dao_ai/apps/handlers.py +22 -1
dao_ai/config.py +55 -32
dao_ai/tools/genie.py +72 -20
dao_ai/tools/mcp.py +36 -13
dao_ai/tools/slack.py +13 -2
dao_ai/tools/sql.py +7 -3
dao_ai/tools/unity_catalog.py +32 -10
dao_ai/tools/vector_search.py +61 -35
{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/METADATA +4 -3
{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/RECORD +13 -13
{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/WHEEL +0 -0
{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/entry_points.txt +0 -0
{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/licenses/LICENSE +0 -0

dao_ai/apps/handlers.py CHANGED Viewed

@@ -14,7 +14,7 @@ from typing import AsyncGenerator
 import mlflow
 from dotenv import load_dotenv
-from mlflow.genai.agent_server import invoke, stream
+from mlflow.genai.agent_server import get_request_headers, invoke, stream
 from mlflow.types.responses import (
     ResponsesAgentRequest,
     ResponsesAgentResponse,
@@ -25,6 +25,23 @@ from dao_ai.config import AppConfig
 from dao_ai.logging import configure_logging
 from dao_ai.models import LanggraphResponsesAgent
+def _inject_headers_into_request(request: ResponsesAgentRequest) -> None:
+    """Inject request headers into custom_inputs for Context propagation.
+    Captures headers from the MLflow AgentServer context (where they're available)
+    and injects them into request.custom_inputs.configurable.headers so they
+    flow through to Context and can be used for OBO authentication.
+    """
+    headers: dict[str, str] = get_request_headers()
+    if headers:
+        if request.custom_inputs is None:
+            request.custom_inputs = {}
+        if "configurable" not in request.custom_inputs:
+            request.custom_inputs["configurable"] = {}
+        request.custom_inputs["configurable"]["headers"] = headers
 # Load environment variables from .env.local if it exists
 load_dotenv(dotenv_path=".env.local", override=True)
@@ -61,6 +78,8 @@ async def non_streaming(request: ResponsesAgentRequest) -> ResponsesAgentRespons
     Returns:
         ResponsesAgentResponse with the complete output
     """
+    # Capture headers while in the AgentServer async context (before they're lost)
+    _inject_headers_into_request(request)
     return await _responses_agent.apredict(request)
@@ -80,5 +99,7 @@ async def streaming(
     Yields:
         ResponsesAgentStreamEvent objects as they are generated
     """
+    # Capture headers while in the AgentServer async context (before they're lost)
+    _inject_headers_into_request(request)
     async for event in _responses_agent.apredict_stream(request):
         yield event

dao_ai/config.py CHANGED Viewed

@@ -7,6 +7,7 @@ from enum import Enum
 from os import PathLike
 from pathlib import Path
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Iterator,
@@ -18,6 +19,9 @@ from typing import (
     Union,
 )
+if TYPE_CHECKING:
+    from dao_ai.state import Context
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.credentials_provider import (
     CredentialsStrategy,
@@ -284,8 +288,8 @@ class IsDatabricksResource(ABC, BaseModel):
         Authentication priority:
         1. On-Behalf-Of User (on_behalf_of_user=True):
-           - Forwarded headers (Databricks Apps)
-           - ModelServingUserCredentials (Model Serving)
+           - Uses ModelServingUserCredentials (Model Serving)
+           - For Databricks Apps with headers, use workspace_client_from(context)
         2. Service Principal (client_id + client_secret + workspace_host)
         3. PAT (pat + workspace_host)
         4. Ambient/default authentication
@@ -294,36 +298,6 @@ class IsDatabricksResource(ABC, BaseModel):
         # Check for OBO first (highest priority)
         if self.on_behalf_of_user:
-            # NEW: In Databricks Apps, use forwarded headers for per-user auth
-            try:
-                from mlflow.genai.agent_server import get_request_headers
-                headers = get_request_headers()
-                forwarded_token = headers.get("x-forwarded-access-token")
-                if forwarded_token:
-                    forwarded_user = headers.get("x-forwarded-user", "unknown")
-                    logger.debug(
-                        f"Creating WorkspaceClient for {self.__class__.__name__} "
-                        f"with OBO using forwarded token from Databricks Apps",
-                        forwarded_user=forwarded_user,
-                    )
-                    # Use workspace_host if configured, otherwise SDK will auto-detect
-                    workspace_host_value: str | None = (
-                        normalize_host(value_of(self.workspace_host))
-                        if self.workspace_host
-                        else None
-                    )
-                    return WorkspaceClient(
-                        host=workspace_host_value,
-                        token=forwarded_token,
-                        auth_type="pat",
-                    )
-            except (ImportError, LookupError):
-                # mlflow not available or headers not set - fall through to Model Serving
-                pass
-            # Fall back to Model Serving OBO (existing behavior)
             credentials_strategy: CredentialsStrategy = ModelServingUserCredentials()
             logger.debug(
                 f"Creating WorkspaceClient for {self.__class__.__name__} "
@@ -382,6 +356,55 @@ class IsDatabricksResource(ABC, BaseModel):
         )
         return WorkspaceClient()
+    def workspace_client_from(self, context: "Context | None") -> WorkspaceClient:
+        """
+        Get a WorkspaceClient using headers from the provided Context.
+        Use this method from tools that have access to ToolRuntime[Context].
+        This allows OBO authentication to work in Databricks Apps where headers
+        are captured at request entry and passed through the Context.
+        Args:
+            context: Runtime context containing headers for OBO auth.
+                     If None or no headers, falls back to workspace_client property.
+        Returns:
+            WorkspaceClient configured with appropriate authentication.
+        """
+        from dao_ai.utils import normalize_host
+        # Check if we have headers in context for OBO
+        if context and context.headers and self.on_behalf_of_user:
+            headers = context.headers
+            # Try both lowercase and title-case header names (HTTP headers are case-insensitive)
+            forwarded_token = headers.get("x-forwarded-access-token") or headers.get(
+                "X-Forwarded-Access-Token"
+            )
+            if forwarded_token:
+                forwarded_user = headers.get("x-forwarded-user") or headers.get(
+                    "X-Forwarded-User", "unknown"
+                )
+                logger.debug(
+                    f"Creating WorkspaceClient for {self.__class__.__name__} "
+                    f"with OBO using forwarded token from Context",
+                    forwarded_user=forwarded_user,
+                )
+                # Use workspace_host if configured, otherwise SDK will auto-detect
+                workspace_host_value: str | None = (
+                    normalize_host(value_of(self.workspace_host))
+                    if self.workspace_host
+                    else None
+                )
+                return WorkspaceClient(
+                    host=workspace_host_value,
+                    token=forwarded_token,
+                    auth_type="pat",
+                )
+        # Fall back to existing workspace_client property
+        return self.workspace_client
 class DeploymentTarget(str, Enum):
     """Target platform for agent deployment."""

dao_ai/tools/genie.py CHANGED Viewed

@@ -139,29 +139,53 @@ Returns:
 GenieResponse: A response object containing the conversation ID and result from Genie."""
     tool_description = tool_description + function_docs
-    genie: Genie = Genie(
-        space_id=space_id,
-        client=genie_room.workspace_client,
-        truncate_results=truncate_results,
-    )
+    # Cache for genie service - created lazily on first call
+    # This allows us to use workspace_client_from with runtime context for OBO
+    _cached_genie_service: GenieServiceBase | None = None
+    def _get_genie_service(context: Context | None) -> GenieServiceBase:
+        """Get or create the Genie service, using context for OBO auth if available."""
+        nonlocal _cached_genie_service
+        # Use cached service if available (for non-OBO or after first call)
+        # For OBO, we need fresh workspace client each time to use the user's token
+        if _cached_genie_service is not None and not genie_room.on_behalf_of_user:
+            return _cached_genie_service
+        # Get workspace client using context for OBO support
+        from databricks.sdk import WorkspaceClient
-    genie_service: GenieServiceBase = GenieService(genie)
-    # Wrap with semantic cache first (checked second due to decorator pattern)
-    if semantic_cache_parameters is not None:
-        genie_service = SemanticCacheService(
-            impl=genie_service,
-            parameters=semantic_cache_parameters,
-            workspace_client=genie_room.workspace_client,  # Pass workspace client for conversation history
-        ).initialize()  # Eagerly initialize to fail fast and create table
-    # Wrap with LRU cache last (checked first - fast O(1) exact match)
-    if lru_cache_parameters is not None:
-        genie_service = LRUCacheService(
-            impl=genie_service,
-            parameters=lru_cache_parameters,
+        workspace_client: WorkspaceClient = genie_room.workspace_client_from(context)
+        genie: Genie = Genie(
+            space_id=space_id,
+            client=workspace_client,
+            truncate_results=truncate_results,
         )
+        genie_service: GenieServiceBase = GenieService(genie)
+        # Wrap with semantic cache first (checked second due to decorator pattern)
+        if semantic_cache_parameters is not None:
+            genie_service = SemanticCacheService(
+                impl=genie_service,
+                parameters=semantic_cache_parameters,
+                workspace_client=workspace_client,
+            ).initialize()
+        # Wrap with LRU cache last (checked first - fast O(1) exact match)
+        if lru_cache_parameters is not None:
+            genie_service = LRUCacheService(
+                impl=genie_service,
+                parameters=lru_cache_parameters,
+            )
+        # Cache for non-OBO scenarios
+        if not genie_room.on_behalf_of_user:
+            _cached_genie_service = genie_service
+        return genie_service
     @tool(
         name_or_callable=tool_name,
         description=tool_description,
@@ -177,6 +201,10 @@ GenieResponse: A response object containing the conversation ID and result from
         # Access state through runtime
         state: AgentState = runtime.state
         tool_call_id: str = runtime.tool_call_id
+        context: Context | None = runtime.context
+        # Get genie service with OBO support via context
+        genie_service: GenieServiceBase = _get_genie_service(context)
         # Ensure space_id is a string for state keys
         space_id_str: str = str(space_id)
@@ -194,6 +222,14 @@ GenieResponse: A response object containing the conversation ID and result from
             conversation_id=existing_conversation_id,
         )
+        # Log the prompt being sent to Genie
+        logger.trace(
+            "Sending prompt to Genie",
+            space_id=space_id_str,
+            conversation_id=existing_conversation_id,
+            prompt=question[:500] + "..." if len(question) > 500 else question,
+        )
         # Call ask_question which always returns CacheResult with cache metadata
         cache_result: CacheResult = genie_service.ask_question(
             question, conversation_id=existing_conversation_id
@@ -211,6 +247,22 @@ GenieResponse: A response object containing the conversation ID and result from
             cache_key=cache_key,
         )
+        # Log truncated response for debugging
+        result_preview: str = str(genie_response.result)
+        if len(result_preview) > 500:
+            result_preview = result_preview[:500] + "..."
+        logger.trace(
+            "Genie response content",
+            question=question[:100] + "..." if len(question) > 100 else question,
+            query=genie_response.query,
+            description=(
+                genie_response.description[:200] + "..."
+                if genie_response.description and len(genie_response.description) > 200
+                else genie_response.description
+            ),
+            result_preview=result_preview,
+        )
         # Update session state with cache information
         if persist_conversation:
             session.genie.update_space(

dao_ai/tools/mcp.py CHANGED Viewed

@@ -30,6 +30,7 @@ from dao_ai.config import (
     McpFunctionModel,
     TransportType,
 )
+from dao_ai.state import Context
 @dataclass
@@ -173,6 +174,7 @@ def _get_auth_resource(function: McpFunctionModel) -> IsDatabricksResource:
 def _build_connection_config(
     function: McpFunctionModel,
+    context: Context | None = None,
 ) -> dict[str, Any]:
     """
     Build the connection configuration dictionary for MultiServerMCPClient.
@@ -193,6 +195,7 @@ def _build_connection_config(
     Args:
         function: The MCP function model configuration.
+        context: Optional runtime context with headers for OBO auth.
     Returns:
         A dictionary containing the transport-specific connection settings.
@@ -205,14 +208,17 @@ def _build_connection_config(
         }
     # For HTTP transport, use DatabricksOAuthClientProvider with unified auth
+    from databricks.sdk import WorkspaceClient
     from databricks_mcp import DatabricksOAuthClientProvider
     # Get the resource to use for authentication
-    auth_resource = _get_auth_resource(function)
+    auth_resource: IsDatabricksResource = _get_auth_resource(function)
-    # Get workspace client from the auth resource
-    workspace_client = auth_resource.workspace_client
-    auth_provider = DatabricksOAuthClientProvider(workspace_client)
+    # Get workspace client from the auth resource with OBO support via context
+    workspace_client: WorkspaceClient = auth_resource.workspace_client_from(context)
+    auth_provider: DatabricksOAuthClientProvider = DatabricksOAuthClientProvider(
+        workspace_client
+    )
     # Log which resource is providing auth
     resource_name = (
@@ -509,19 +515,28 @@ async def acreate_mcp_tools(
     def _create_tool_wrapper(mcp_tool: Tool) -> RunnableLike:
         """
         Create a LangChain tool wrapper for an MCP tool.
+        Supports OBO authentication via context headers.
         """
+        from langchain.tools import ToolRuntime
         @create_tool(
             mcp_tool.name,
             description=mcp_tool.description or f"MCP tool: {mcp_tool.name}",
             args_schema=mcp_tool.inputSchema,
         )
-        async def tool_wrapper(**kwargs: Any) -> str:
+        async def tool_wrapper(
+            runtime: ToolRuntime[Context] = None,
+            **kwargs: Any,
+        ) -> str:
             """Execute MCP tool with fresh session."""
             logger.trace("Invoking MCP tool", tool_name=mcp_tool.name, args=kwargs)
-            invocation_client = MultiServerMCPClient(
-                {"mcp_function": _build_connection_config(function)}
+            # Get context for OBO support
+            context: Context | None = runtime.context if runtime else None
+            invocation_client: MultiServerMCPClient = MultiServerMCPClient(
+                {"mcp_function": _build_connection_config(function, context)}
             )
             try:
@@ -530,7 +545,7 @@ async def acreate_mcp_tools(
                         mcp_tool.name, kwargs
                     )
-                    text_result = _extract_text_content(result)
+                    text_result: str = _extract_text_content(result)
                     logger.trace(
                         "MCP tool completed",
@@ -625,20 +640,28 @@ def create_mcp_tools(
         This wrapper handles:
         - Fresh session creation per invocation (stateless)
         - Content extraction to plain text (avoiding extra fields)
+        - OBO authentication via context headers
         """
+        from langchain.tools import ToolRuntime
         @create_tool(
             mcp_tool.name,
             description=mcp_tool.description or f"MCP tool: {mcp_tool.name}",
             args_schema=mcp_tool.inputSchema,
         )
-        async def tool_wrapper(**kwargs: Any) -> str:
+        async def tool_wrapper(
+            runtime: ToolRuntime[Context] = None,
+            **kwargs: Any,
+        ) -> str:
             """Execute MCP tool with fresh session."""
             logger.trace("Invoking MCP tool", tool_name=mcp_tool.name, args=kwargs)
-            # Create a fresh client/session for each invocation
-            invocation_client = MultiServerMCPClient(
-                {"mcp_function": _build_connection_config(function)}
+            # Get context for OBO support
+            context: Context | None = runtime.context if runtime else None
+            # Create a fresh client/session for each invocation with OBO support
+            invocation_client: MultiServerMCPClient = MultiServerMCPClient(
+                {"mcp_function": _build_connection_config(function, context)}
             )
             try:
@@ -648,7 +671,7 @@ def create_mcp_tools(
                     )
                     # Extract text content, avoiding extra fields
-                    text_result = _extract_text_content(result)
+                    text_result: str = _extract_text_content(result)
                     logger.trace(
                         "MCP tool completed",

dao_ai/tools/slack.py CHANGED Viewed

@@ -1,11 +1,13 @@
 from typing import Any, Callable, Optional
 from databricks.sdk.service.serving import ExternalFunctionRequestHttpMethod
+from langchain.tools import ToolRuntime
 from langchain_core.tools import tool
 from loguru import logger
 from requests import Response
 from dao_ai.config import ConnectionModel
+from dao_ai.state import Context
 def _find_channel_id_by_name(
@@ -129,8 +131,17 @@ def create_send_slack_message_tool(
         name_or_callable=name,
         description=description,
     )
-    def send_slack_message(text: str) -> str:
-        response: Response = connection.workspace_client.serving_endpoints.http_request(
+    def send_slack_message(
+        text: str,
+        runtime: ToolRuntime[Context] = None,
+    ) -> str:
+        from databricks.sdk import WorkspaceClient
+        # Get workspace client with OBO support via context
+        context: Context | None = runtime.context if runtime else None
+        workspace_client: WorkspaceClient = connection.workspace_client_from(context)
+        response: Response = workspace_client.serving_endpoints.http_request(
             conn=connection.name,
             method=ExternalFunctionRequestHttpMethod.POST,
             path="/api/chat.postMessage",

dao_ai/tools/sql.py CHANGED Viewed

@@ -7,10 +7,11 @@ pre-configured SQL statements against a Databricks SQL warehouse.
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.service.sql import StatementResponse, StatementState
-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
 from loguru import logger
 from dao_ai.config import WarehouseModel, value_of
+from dao_ai.state import Context
 def create_execute_statement_tool(
@@ -63,7 +64,6 @@ def create_execute_statement_tool(
         description = f"Execute a pre-configured SQL query against the {warehouse.name} warehouse and return the results."
     warehouse_id: str = value_of(warehouse.warehouse_id)
-    workspace_client: WorkspaceClient = warehouse.workspace_client
     logger.debug(
         "Creating SQL execution tool",
@@ -74,7 +74,7 @@ def create_execute_statement_tool(
     )
     @tool(name_or_callable=name, description=description)
-    def execute_statement_tool() -> str:
+    def execute_statement_tool(runtime: ToolRuntime[Context] = None) -> str:
         """
         Execute the pre-configured SQL statement against the Databricks SQL warehouse.
@@ -88,6 +88,10 @@ def create_execute_statement_tool(
             sql_preview=statement[:100] + "..." if len(statement) > 100 else statement,
         )
+        # Get workspace client with OBO support via context
+        context: Context | None = runtime.context if runtime else None
+        workspace_client: WorkspaceClient = warehouse.workspace_client_from(context)
         try:
             # Execute the SQL statement
             statement_response: StatementResponse = (

dao_ai/tools/unity_catalog.py CHANGED Viewed

@@ -1,10 +1,11 @@
-from typing import Any, Dict, Optional, Sequence, Set
+from typing import Annotated, Any, Dict, Optional, Sequence, Set
 from databricks.sdk import WorkspaceClient
 from databricks.sdk.service.catalog import FunctionInfo, PermissionsChange, Privilege
 from databricks_langchain import DatabricksFunctionClient, UCFunctionToolkit
+from langchain.tools import ToolRuntime
 from langchain_core.runnables.base import RunnableLike
-from langchain_core.tools import StructuredTool
+from langchain_core.tools import InjectedToolArg, StructuredTool
 from loguru import logger
 from pydantic import BaseModel
 from unitycatalog.ai.core.base import FunctionExecutionResult
@@ -15,6 +16,7 @@ from dao_ai.config import (
     UnityCatalogFunctionModel,
     value_of,
 )
+from dao_ai.state import Context
 from dao_ai.utils import normalize_host
@@ -35,13 +37,11 @@ def create_uc_tools(
         A sequence of BaseTool objects that wrap the specified UC functions
     """
     original_function_model: UnityCatalogFunctionModel | None = None
-    workspace_client: WorkspaceClient | None = None
     function_name: str
     if isinstance(function, UnityCatalogFunctionModel):
         original_function_model = function
         function_name = function.resource.full_name
-        workspace_client = function.resource.workspace_client
     else:
         function_name = function
@@ -56,6 +56,12 @@ def create_uc_tools(
         # Use with_partial_args directly with UnityCatalogFunctionModel
         tools = [with_partial_args(original_function_model)]
     else:
+        # For standard UC toolkit, we need workspace_client at creation time
+        # Use the resource's workspace_client (will use ambient auth if no OBO)
+        workspace_client: WorkspaceClient | None = None
+        if original_function_model:
+            workspace_client = original_function_model.resource.workspace_client
         # Fallback to standard UC toolkit approach
         client: DatabricksFunctionClient = DatabricksFunctionClient(
             client=workspace_client
@@ -356,7 +362,6 @@ def with_partial_args(
     # Get function info from the resource
     function_name: str = uc_function.resource.full_name
     tool_name: str = uc_function.resource.name or function_name.replace(".", "_")
-    workspace_client: WorkspaceClient = uc_function.resource.workspace_client
     logger.debug(
         "Creating UC tool with partial args",
@@ -365,7 +370,7 @@ def with_partial_args(
         partial_args=list(resolved_args.keys()),
     )
-    # Grant permissions if we have credentials
+    # Grant permissions if we have credentials (using ambient auth for setup)
     if "client_id" in resolved_args:
         client_id: str = resolved_args["client_id"]
         host: Optional[str] = resolved_args.get("host")
@@ -376,14 +381,18 @@ def with_partial_args(
                 "Failed to grant permissions", function_name=function_name, error=str(e)
             )
-    # Create the client for function execution using the resource's workspace client
-    client: DatabricksFunctionClient = DatabricksFunctionClient(client=workspace_client)
+    # Get workspace client for schema introspection (uses ambient auth at definition time)
+    # Actual execution will use OBO via context
+    setup_workspace_client: WorkspaceClient = uc_function.resource.workspace_client
+    setup_client: DatabricksFunctionClient = DatabricksFunctionClient(
+        client=setup_workspace_client
+    )
     # Try to get the function schema for better tool definition
     schema_model: type[BaseModel]
     tool_description: str
     try:
-        function_info: FunctionInfo = client.get_function(function_name)
+        function_info: FunctionInfo = setup_client.get_function(function_name)
         schema_info = generate_function_input_params_schema(function_info)
         tool_description = (
             function_info.comment or f"Unity Catalog function: {function_name}"
@@ -419,8 +428,21 @@ def with_partial_args(
         tool_description = f"Unity Catalog function: {function_name}"
     # Create a wrapper function that calls _execute_uc_function with partial args
-    def uc_function_wrapper(**kwargs) -> str:
+    # Uses InjectedToolArg to ensure runtime is injected but hidden from the LLM
+    def uc_function_wrapper(
+        runtime: Annotated[ToolRuntime[Context], InjectedToolArg] = None,
+        **kwargs: Any,
+    ) -> str:
         """Wrapper function that executes Unity Catalog function with partial args."""
+        # Get workspace client with OBO support via context
+        context: Context | None = runtime.context if runtime else None
+        workspace_client: WorkspaceClient = uc_function.resource.workspace_client_from(
+            context
+        )
+        client: DatabricksFunctionClient = DatabricksFunctionClient(
+            client=workspace_client
+        )
         return _execute_uc_function(
             client=client,
             function_name=function_name,

dao_ai/tools/vector_search.py CHANGED Viewed

@@ -7,13 +7,14 @@ with dynamic filter schemas based on table columns and FlashRank reranking suppo
 import json
 import os
-from typing import Any, Optional
+from typing import Annotated, Any, Optional
 import mlflow
 from databricks.sdk import WorkspaceClient
 from databricks.vector_search.reranker import DatabricksReranker
 from databricks_langchain import DatabricksVectorSearch
 from flashrank import Ranker, RerankRequest
+from langchain.tools import ToolRuntime, tool
 from langchain_core.documents import Document
 from langchain_core.tools import StructuredTool
 from loguru import logger
@@ -27,6 +28,7 @@ from dao_ai.config import (
     VectorStoreModel,
     value_of,
 )
+from dao_ai.state import Context
 from dao_ai.utils import normalize_host
 # Create FilterItem model at module level so it can be used in type hints
@@ -299,35 +301,67 @@ def create_vector_search_tool(
         client_args_keys=list(client_args.keys()) if client_args else [],
     )
-    # Create DatabricksVectorSearch
-    # Note: text_column should be None for Databricks-managed embeddings
-    # (it's automatically determined from the index)
-    vector_search: DatabricksVectorSearch = DatabricksVectorSearch(
-        index_name=index_name,
-        text_column=None,
-        columns=columns,
-        workspace_client=vector_store.workspace_client,
-        client_args=client_args if client_args else None,
-        primary_key=vector_store.primary_key,
-        doc_uri=vector_store.doc_uri,
-        include_score=True,
-        reranker=(
-            DatabricksReranker(columns_to_rerank=rerank_config.columns)
-            if rerank_config and rerank_config.columns
-            else None
-        ),
-    )
+    # Cache for DatabricksVectorSearch - created lazily for OBO support
+    _cached_vector_search: DatabricksVectorSearch | None = None
+    def _get_vector_search(context: Context | None) -> DatabricksVectorSearch:
+        """Get or create DatabricksVectorSearch, using context for OBO auth if available."""
+        nonlocal _cached_vector_search
+        # Use cached instance if available and not OBO
+        if _cached_vector_search is not None and not vector_store.on_behalf_of_user:
+            return _cached_vector_search
+        # Get workspace client with OBO support via context
+        workspace_client: WorkspaceClient = vector_store.workspace_client_from(context)
+        # Create DatabricksVectorSearch
+        # Note: text_column should be None for Databricks-managed embeddings
+        # (it's automatically determined from the index)
+        vs: DatabricksVectorSearch = DatabricksVectorSearch(
+            index_name=index_name,
+            text_column=None,
+            columns=columns,
+            workspace_client=workspace_client,
+            client_args=client_args if client_args else None,
+            primary_key=vector_store.primary_key,
+            doc_uri=vector_store.doc_uri,
+            include_score=True,
+            reranker=(
+                DatabricksReranker(columns_to_rerank=rerank_config.columns)
+                if rerank_config and rerank_config.columns
+                else None
+            ),
+        )
-    # Create dynamic input schema
-    input_schema: type[BaseModel] = _create_dynamic_input_schema(
-        index_name, vector_store.workspace_client
-    )
+        # Cache for non-OBO scenarios
+        if not vector_store.on_behalf_of_user:
+            _cached_vector_search = vs
+        return vs
+    # Determine tool name and description
+    tool_name: str = name or f"vector_search_{vector_store.index.name}"
+    tool_description: str = description or f"Search documents in {index_name}"
-    # Define the tool function
+    # Use @tool decorator for proper ToolRuntime injection
+    # The decorator ensures runtime is automatically injected and hidden from the LLM
+    @tool(name_or_callable=tool_name, description=tool_description)
     def vector_search_func(
-        query: str, filters: Optional[list[FilterItem]] = None
+        query: Annotated[str, "The search query to find relevant documents"],
+        filters: Annotated[
+            Optional[list[FilterItem]],
+            "Optional filters to apply to the search results",
+        ] = None,
+        runtime: ToolRuntime[Context] = None,
     ) -> str:
         """Search for relevant documents using vector similarity."""
+        # Get context for OBO support
+        context: Context | None = runtime.context if runtime else None
+        # Get vector search instance with OBO support
+        vector_search: DatabricksVectorSearch = _get_vector_search(context)
         # Convert FilterItem Pydantic models to dict format for DatabricksVectorSearch
         filters_dict: dict[str, Any] = {}
         if filters:
@@ -379,14 +413,6 @@ def create_vector_search_tool(
         # Return as JSON string
         return json.dumps(serialized_docs)
-    # Create the StructuredTool
-    tool: StructuredTool = StructuredTool.from_function(
-        func=vector_search_func,
-        name=name or f"vector_search_{vector_store.index.name}",
-        description=description or f"Search documents in {index_name}",
-        args_schema=input_schema,
-    )
-    logger.success("Vector search tool created", name=tool.name, index=index_name)
+    logger.success("Vector search tool created", name=tool_name, index=index_name)
-    return tool
+    return vector_search_func

{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dao-ai
-Version: 0.1.12
+Version: 0.1.14
 Summary: DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML.
 Project-URL: Homepage, https://github.com/natefleming/dao-ai
 Project-URL: Documentation, https://natefleming.github.io/dao-ai
@@ -125,7 +125,7 @@ DAO AI Builder generates valid YAML configurations that work seamlessly with thi
 - **[Architecture](docs/architecture.md)** - Understand how DAO works under the hood
 ### Core Concepts
-- **[Key Capabilities](docs/key-capabilities.md)** - Explore 14 powerful features for production agents
+- **[Key Capabilities](docs/key-capabilities.md)** - Explore 15 powerful features for production agents
 - **[Configuration Reference](docs/configuration-reference.md)** - Complete YAML configuration guide
 - **[Examples](docs/examples.md)** - Ready-to-use example configurations
@@ -148,7 +148,7 @@ Before you begin, you'll need:
 - **Python 3.11 or newer** installed on your computer ([download here](https://www.python.org/downloads/))
 - **A Databricks workspace** (ask your IT team or see [Databricks docs](https://docs.databricks.com/))
   - Access to **Unity Catalog** (your organization's data catalog)
-  - **Model Serving** enabled (for deploying AI agents)
+  - **Model Serving** or **Databricks Apps** enabled (for deploying AI agents)
   - *Optional*: Vector Search, Genie (for advanced features)
 **Not sure if you have access?** Your Databricks administrator can grant you permissions.
@@ -345,6 +345,7 @@ DAO provides powerful capabilities for building production-ready AI agents:
 | Feature | Description |
 |---------|-------------|
+| **Dual Deployment Targets** | Deploy to Databricks Model Serving or Databricks Apps with a single config |
 | **Multi-Tool Support** | Python functions, Unity Catalog, MCP, Agent Endpoints |
 | **On-Behalf-Of User** | Per-user permissions and governance |
 | **Advanced Caching** | Two-tier (LRU + Semantic) caching for cost optimization |

{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 dao_ai/__init__.py,sha256=18P98ExEgUaJ1Byw440Ct1ty59v6nxyWtc5S6Uq2m9Q,1062
 dao_ai/catalog.py,sha256=sPZpHTD3lPx4EZUtIWeQV7VQM89WJ6YH__wluk1v2lE,4947
 dao_ai/cli.py,sha256=1Ox8qjLKRlrKu2YXozm0lWoeZnDCouECeZSGVPkQgIQ,50923
-dao_ai/config.py,sha256=9G_JiPbr_ihUCaqYPvnMbzLKtyppXTjraQfVOxnqeBA,129323
+dao_ai/config.py,sha256=7MDuX7xGSyDuBpdFZbKNDUPuTiuVe9onnUEGFtDI0jc,130123
 dao_ai/graph.py,sha256=1-uQlo7iXZQTT3uU8aYu0N5rnhw5_g_2YLwVsAs6M-U,1119
 dao_ai/logging.py,sha256=lYy4BmucCHvwW7aI3YQkQXKJtMvtTnPDu9Hnd7_O4oc,1556
 dao_ai/messages.py,sha256=4ZBzO4iFdktGSLrmhHzFjzMIt2tpaL-aQLHOQJysGnY,6959
@@ -14,7 +14,7 @@ dao_ai/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dao_ai/utils.py,sha256=_Urd7Nj2VzrgPKf3NS4E6vt0lWRhEUddBqWN9BksqeE,11543
 dao_ai/vector_search.py,sha256=8d3xROg9zSIYNXjRRl6rSexsJTlufjRl5Fy1ZA8daKA,4019
 dao_ai/apps/__init__.py,sha256=RLuhZf4gQ4pemwKDz1183aXib8UfaRhwfKvRx68GRlM,661
-dao_ai/apps/handlers.py,sha256=nbJZOgmnHG5icR4Pb56jxIWsm_AGnsURgViMJX2_LTU,2608
+dao_ai/apps/handlers.py,sha256=6-IhhklHSPnS8aqKp155wPaSnYWTU1BSOPwbdWYBkFU,3594
 dao_ai/apps/model_serving.py,sha256=XLt3_0pGSRceMK6YtOrND9Jnh7mKLPCtwjVDLIaptQU,847
 dao_ai/apps/resources.py,sha256=5l6UxfMq6uspOql-HNDyUikfqRAa9eH_TiJHrGgMb6s,40029
 dao_ai/apps/server.py,sha256=neWbVnC2z9f-tJZBnho70FytNDEVOdOM1YngoGc5KHI,1264
@@ -58,18 +58,18 @@ dao_ai/tools/__init__.py,sha256=NfRpAKds_taHbx6gzLPWgtPXve-YpwzkoOAUflwxceM,1734
 dao_ai/tools/agent.py,sha256=plIWALywRjaDSnot13nYehBsrHRpBUpsVZakoGeajOE,1858
 dao_ai/tools/core.py,sha256=bRIN3BZhRQX8-Kpu3HPomliodyskCqjxynQmYbk6Vjs,3783
 dao_ai/tools/email.py,sha256=A3TsCoQgJR7UUWR0g45OPRGDpVoYwctFs1MOZMTt_d4,7389
-dao_ai/tools/genie.py,sha256=4e_5MeAe7kDzHbYeXuNPFbY5z8ci3ouj8l5254CZ2lA,8874
-dao_ai/tools/mcp.py,sha256=tfn-sdKwfNY31RsDFlafdGyN4XlKGfniXG_mO-Meh4E,21030
+dao_ai/tools/genie.py,sha256=b0R51N5D58H1vpOCUCA88ALjLs58KSMn6nl80ap8_c0,11009
+dao_ai/tools/mcp.py,sha256=K1yMQ39UgJ0Q4xhMpNWV3AVNx929w9vxZlLoCq_jrws,22016
 dao_ai/tools/memory.py,sha256=lwObKimAand22Nq3Y63tsv-AXQ5SXUigN9PqRjoWKes,1836
 dao_ai/tools/python.py,sha256=jWFnZPni2sCdtd8D1CqXnZIPHnWkdK27bCJnBXpzhvo,1879
 dao_ai/tools/search.py,sha256=cJ3D9FKr1GAR6xz55dLtRkjtQsI0WRueGt9TPDFpOxc,433
-dao_ai/tools/slack.py,sha256=QpLMXDApjPKyRpEanLp0tOhCp9WXaEBa615p4t0pucs,5040
-dao_ai/tools/sql.py,sha256=tKd1gjpLuKdQDyfmyYYtMiNRHDW6MGRbdEVaeqyB8Ok,7632
+dao_ai/tools/slack.py,sha256=QnMsA7cYD1MnEcqGqqSr6bKIhV0RgDpkyaiPmDqnAts,5433
+dao_ai/tools/sql.py,sha256=FG-Aa0FAUAnhCuZvao1J-y-cMM6bU5eCujNbsYn0xDw,7864
 dao_ai/tools/time.py,sha256=tufJniwivq29y0LIffbgeBTIDE6VgrLpmVf8Qr90qjw,9224
-dao_ai/tools/unity_catalog.py,sha256=AjQfW7bvV8NurqDLIyntYRv2eJuTwNdbvex1L5CRjOk,15534
-dao_ai/tools/vector_search.py,sha256=oe2uBwl2TfeJIXPpwiS6Rmz7wcHczSxNyqS9P3hE6co,14542
-dao_ai-0.1.12.dist-info/METADATA,sha256=BhkwtDjbzohpk86ICfQP2qAeNLsvo9kBbgwzpnB_WZQ,16698
-dao_ai-0.1.12.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-dao_ai-0.1.12.dist-info/entry_points.txt,sha256=Xa-UFyc6gWGwMqMJOt06ZOog2vAfygV_DSwg1AiP46g,43
-dao_ai-0.1.12.dist-info/licenses/LICENSE,sha256=YZt3W32LtPYruuvHE9lGk2bw6ZPMMJD8yLrjgHybyz4,1069
-dao_ai-0.1.12.dist-info/RECORD,,
+dao_ai/tools/unity_catalog.py,sha256=oBlW6pH-Ne08g60QW9wVi_tyeVYDiecuNoxQbIIFmN8,16515
+dao_ai/tools/vector_search.py,sha256=LF_72vlEF6TwUjKVv6nkUetLK766l9Kl6DQQTc9ebJI,15888
+dao_ai-0.1.14.dist-info/METADATA,sha256=3cgCatKya02uIxRs9fP-P2R_GbV3DfrQ-_JsknH0kkg,16830
+dao_ai-0.1.14.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+dao_ai-0.1.14.dist-info/entry_points.txt,sha256=Xa-UFyc6gWGwMqMJOt06ZOog2vAfygV_DSwg1AiP46g,43
+dao_ai-0.1.14.dist-info/licenses/LICENSE,sha256=YZt3W32LtPYruuvHE9lGk2bw6ZPMMJD8yLrjgHybyz4,1069
+dao_ai-0.1.14.dist-info/RECORD,,

{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/WHEEL RENAMED Viewed

File without changes

{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dao_ai-0.1.12.dist-info → dao_ai-0.1.14.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

dao-ai 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

dao-ai 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl