PyPI - dao-ai - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

dao-ai 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

dao_ai/cli.py +320 -1
dao_ai/config.py +119 -19
dao_ai/middleware/__init__.py +5 -0
dao_ai/middleware/tool_selector.py +129 -0
dao_ai/prompts.py +2 -60
dao_ai/tools/__init__.py +3 -1
dao_ai/tools/mcp.py +338 -47
{dao_ai-0.1.5.dist-info → dao_ai-0.1.7.dist-info}/METADATA +1 -1
{dao_ai-0.1.5.dist-info → dao_ai-0.1.7.dist-info}/RECORD +12 -11
{dao_ai-0.1.5.dist-info → dao_ai-0.1.7.dist-info}/WHEEL +0 -0
{dao_ai-0.1.5.dist-info → dao_ai-0.1.7.dist-info}/entry_points.txt +0 -0
{dao_ai-0.1.5.dist-info → dao_ai-0.1.7.dist-info}/licenses/LICENSE +0 -0

dao_ai/middleware/tool_selector.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""
+Tool selector middleware for intelligently filtering tools before LLM calls.
+This middleware uses an LLM to select relevant tools from a large set, improving
+performance and accuracy by reducing context size and improving focus.
+"""
+from __future__ import annotations
+from typing import Any
+from langchain.agents.middleware import LLMToolSelectorMiddleware
+from langchain_core.language_models import LanguageModelLike
+from loguru import logger
+from dao_ai.config import ToolModel
+def create_llm_tool_selector_middleware(
+    model: LanguageModelLike,
+    max_tools: int = 3,
+    always_include: list[str | ToolModel | dict[str, Any]] | None = None,
+) -> LLMToolSelectorMiddleware:
+    """
+    Create an LLMToolSelectorMiddleware for intelligent tool selection.
+    Uses an LLM to analyze the current query and select the most relevant tools
+    before calling the main model. This is particularly useful for agents with
+    many tools (10+) where most aren't relevant for any given query.
+    Benefits:
+    - Reduces token usage by filtering irrelevant tools
+    - Improves model focus and accuracy
+    - Optimizes cost for agents with large tool sets
+    - Maintains context window efficiency
+    Args:
+        model: The LLM to use for tool selection. Typically a smaller, faster
+            model like "gpt-4o-mini" or similar.
+        max_tools: Maximum number of tools to select for each query.
+            Default 3. Adjust based on your use case - higher values
+            increase context but improve tool coverage.
+        always_include: List of tools that should always be included regardless
+            of the LLM's selection. Can be:
+            - str: Tool name
+            - ToolModel: Full tool configuration
+            - dict: Tool configuration dictionary
+            Use this for critical tools that should always be available.
+    Returns:
+        LLMToolSelectorMiddleware configured with the specified parameters
+    Example:
+        from dao_ai.middleware import create_llm_tool_selector_middleware
+        from dao_ai.llms import create_llm
+        # Use a fast, cheap model for tool selection
+        selector_llm = create_llm("databricks-gpt-4o-mini")
+        middleware = create_llm_tool_selector_middleware(
+            model=selector_llm,
+            max_tools=3,
+            always_include=["search_web"],  # Always include search
+        )
+    Use Cases:
+        - Large tool sets (10+ tools) where most are specialized
+        - Cost optimization by reducing tokens in main model calls
+        - Improved accuracy by reducing tool confusion
+        - Dynamic tool filtering based on query relevance
+    Note:
+        The selector model makes an additional LLM call for each agent turn.
+        Choose a fast, inexpensive model to minimize latency and cost overhead.
+    """
+    # Extract tool names from always_include
+    always_include_names: list[str] = []
+    if always_include:
+        always_include_names = _resolve_tool_names(always_include)
+    logger.debug(
+        "Creating LLM tool selector middleware",
+        max_tools=max_tools,
+        always_include_count=len(always_include_names),
+        always_include=always_include_names,
+    )
+    return LLMToolSelectorMiddleware(
+        model=model,
+        max_tools=max_tools,
+        always_include=always_include_names if always_include_names else None,
+    )
+def _resolve_tool_names(tools: list[str | ToolModel | dict[str, Any]]) -> list[str]:
+    """
+    Extract tool names from a list of tool specifications.
+    Args:
+        tools: List of tool specifications (strings, ToolModels, or dicts)
+    Returns:
+        List of tool names as strings
+    """
+    names: list[str] = []
+    for tool_spec in tools:
+        if isinstance(tool_spec, str):
+            # Simple string tool name
+            names.append(tool_spec)
+        elif isinstance(tool_spec, ToolModel):
+            # ToolModel - use its name
+            names.append(tool_spec.name)
+        elif isinstance(tool_spec, dict):
+            # Dictionary - try to extract name
+            if "name" in tool_spec:
+                names.append(tool_spec["name"])
+            else:
+                logger.warning(
+                    "Tool dict missing 'name' field, skipping",
+                    tool_spec=tool_spec,
+                )
+        else:
+            logger.warning(
+                "Unknown tool specification type, skipping",
+                tool_spec_type=type(tool_spec).__name__,
+            )
+    return names

dao_ai/prompts.py CHANGED Viewed

@@ -61,19 +61,14 @@ def make_prompt(
     @dynamic_prompt
     def dynamic_system_prompt(request: ModelRequest) -> str:
         """Generate dynamic system prompt based on runtime context."""
-        # Get parameters from runtime context
+        # Initialize parameters for template variables
         params: dict[str, Any] = {
             input_variable: "" for input_variable in prompt_template.input_variables
         }
-        # Access context from runtime
+        # Apply context fields as template parameters
         context: Context = request.runtime.context
         if context:
-            if context.user_id and "user_id" in params:
-                params["user_id"] = context.user_id
-            if context.thread_id and "thread_id" in params:
-                params["thread_id"] = context.thread_id
-            # Apply all context fields as template parameters
             context_dict = context.model_dump()
             for key, value in context_dict.items():
                 if key in params and value is not None:
@@ -89,56 +84,3 @@ def make_prompt(
         return formatted_prompt
     return dynamic_system_prompt
-def create_prompt_middleware(
-    base_system_prompt: Optional[str | PromptModel],
-) -> AgentMiddleware | None:
-    """
-    Create a dynamic prompt middleware from configuration.
-    This always returns an AgentMiddleware suitable for use with
-    LangChain v1's middleware system.
-    Args:
-        base_system_prompt: The system prompt string or PromptModel
-    Returns:
-        An AgentMiddleware created by @dynamic_prompt, or None if no prompt
-    """
-    if not base_system_prompt:
-        return None
-    # Extract template string from PromptModel or use string directly
-    template_str: str
-    if isinstance(base_system_prompt, PromptModel):
-        template_str = base_system_prompt.template
-    else:
-        template_str = base_system_prompt
-    prompt_template: PromptTemplate = PromptTemplate.from_template(template_str)
-    @dynamic_prompt
-    def prompt_middleware(request: ModelRequest) -> str:
-        """Generate system prompt based on runtime context."""
-        # Get parameters from runtime context
-        params: dict[str, Any] = {
-            input_variable: "" for input_variable in prompt_template.input_variables
-        }
-        # Access context from runtime
-        context: Context = request.runtime.context
-        if context:
-            # Apply all context fields as template parameters
-            context_dict = context.model_dump()
-            for key, value in context_dict.items():
-                if key in params and value is not None:
-                    params[key] = value
-        # Format the prompt
-        formatted_prompt: str = prompt_template.format(**params)
-        logger.trace("Formatted dynamic prompt with context")
-        return formatted_prompt
-    return prompt_middleware

dao_ai/tools/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ from dao_ai.tools.agent import create_agent_endpoint_tool
 from dao_ai.tools.core import create_tools, say_hello_tool
 from dao_ai.tools.email import create_send_email_tool
 from dao_ai.tools.genie import create_genie_tool
-from dao_ai.tools.mcp import create_mcp_tools
+from dao_ai.tools.mcp import MCPToolInfo, create_mcp_tools, list_mcp_tools
 from dao_ai.tools.memory import create_search_memory_tool
 from dao_ai.tools.python import create_factory_tool, create_python_tool
 from dao_ai.tools.search import create_search_tool
@@ -30,6 +30,8 @@ __all__ = [
     "create_genie_tool",
     "create_hooks",
     "create_mcp_tools",
+    "list_mcp_tools",
+    "MCPToolInfo",
     "create_python_tool",
     "create_search_memory_tool",
     "create_search_tool",

dao_ai/tools/mcp.py CHANGED Viewed

@@ -7,10 +7,16 @@ MCP SDK and langchain-mcp-adapters library.
 For compatibility with Databricks APIs, we use manual tool wrappers
 that give us full control over the response format.
+Public API:
+- list_mcp_tools(): List available tools from an MCP server (for discovery/UI)
+- create_mcp_tools(): Create LangChain tools for agent execution
 Reference: https://docs.langchain.com/oss/python/langchain/mcp
 """
 import asyncio
+import fnmatch
+from dataclasses import dataclass
 from typing import Any, Sequence
 from langchain_core.runnables.base import RunnableLike
@@ -26,6 +32,117 @@ from dao_ai.config import (
 )
+@dataclass
+class MCPToolInfo:
+    """
+    Information about an MCP tool for display and selection.
+    This is a simplified representation of an MCP tool that contains
+    only the information needed for UI display and tool selection.
+    It's designed to be easily serializable for use in web UIs.
+    Attributes:
+        name: The unique identifier/name of the tool
+        description: Human-readable description of what the tool does
+        input_schema: JSON Schema describing the tool's input parameters
+    """
+    name: str
+    description: str | None
+    input_schema: dict[str, Any]
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for JSON serialization."""
+        return {
+            "name": self.name,
+            "description": self.description,
+            "input_schema": self.input_schema,
+        }
+def _matches_pattern(tool_name: str, patterns: list[str]) -> bool:
+    """
+    Check if tool name matches any of the provided patterns.
+    Supports glob patterns:
+    - * matches any characters
+    - ? matches single character
+    - [abc] matches any char in set
+    - [!abc] matches any char NOT in set
+    Args:
+        tool_name: Name of the tool to check
+        patterns: List of exact names or glob patterns
+    Returns:
+        True if tool name matches any pattern
+    Examples:
+        >>> _matches_pattern("query_sales", ["query_*"])
+        True
+        >>> _matches_pattern("list_tables", ["query_*"])
+        False
+        >>> _matches_pattern("tool_a", ["tool_?"])
+        True
+    """
+    for pattern in patterns:
+        if fnmatch.fnmatch(tool_name, pattern):
+            return True
+    return False
+def _should_include_tool(
+    tool_name: str,
+    include_tools: list[str] | None,
+    exclude_tools: list[str] | None,
+) -> bool:
+    """
+    Determine if a tool should be included based on include/exclude filters.
+    Logic:
+    1. If exclude_tools specified and tool matches: EXCLUDE (highest priority)
+    2. If include_tools specified and tool matches: INCLUDE
+    3. If include_tools specified and tool doesn't match: EXCLUDE
+    4. If no filters specified: INCLUDE (default)
+    Args:
+        tool_name: Name of the tool
+        include_tools: Optional list of tools/patterns to include
+        exclude_tools: Optional list of tools/patterns to exclude
+    Returns:
+        True if tool should be included
+    Examples:
+        >>> _should_include_tool("query_sales", ["query_*"], None)
+        True
+        >>> _should_include_tool("drop_table", None, ["drop_*"])
+        False
+        >>> _should_include_tool("query_sales", ["query_*"], ["*_sales"])
+        False  # exclude takes precedence
+    """
+    # Exclude has highest priority
+    if exclude_tools and _matches_pattern(tool_name, exclude_tools):
+        logger.debug("Tool excluded by exclude_tools", tool_name=tool_name)
+        return False
+    # If include list exists, tool must match it
+    if include_tools:
+        if _matches_pattern(tool_name, include_tools):
+            logger.debug("Tool included by include_tools", tool_name=tool_name)
+            return True
+        else:
+            logger.debug(
+                "Tool not in include_tools",
+                tool_name=tool_name,
+                include_patterns=include_tools,
+            )
+            return False
+    # Default: include all tools
+    return True
 def _build_connection_config(
     function: McpFunctionModel,
 ) -> dict[str, Any]:
@@ -124,69 +241,33 @@ def _extract_text_content(result: CallToolResult) -> str:
     return "\n".join(text_parts)
-def create_mcp_tools(
-    function: McpFunctionModel,
-) -> Sequence[RunnableLike]:
+def _fetch_tools_from_server(function: McpFunctionModel) -> list[Tool]:
     """
-    Create tools for invoking Databricks MCP functions.
+    Fetch raw MCP tools from the server.
-    Supports both direct MCP connections and UC Connection-based MCP access.
-    Uses manual tool wrappers to ensure response format compatibility with
-    Databricks APIs (which reject extra fields in tool results).
-    Based on: https://docs.databricks.com/aws/en/generative-ai/mcp/external-mcp
+    This is the core async operation that connects to the MCP server
+    and retrieves the list of available tools.
     Args:
         function: The MCP function model configuration.
     Returns:
-        A sequence of LangChain tools that can be used by agents.
-    """
-    mcp_url = function.mcp_url
-    logger.debug("Creating MCP tools", mcp_url=mcp_url)
+        List of raw MCP Tool objects from the server.
+    Raises:
+        RuntimeError: If connection to MCP server fails.
+    """
     connection_config = _build_connection_config(function)
-    if function.connection:
-        logger.debug(
-            "Using UC Connection for MCP",
-            connection_name=function.connection.name,
-            mcp_url=mcp_url,
-        )
-    else:
-        logger.debug(
-            "Using direct connection for MCP",
-            transport=function.transport,
-            mcp_url=mcp_url,
-        )
-    # Create client to list available tools
     client = MultiServerMCPClient({"mcp_function": connection_config})
-    async def _list_tools() -> list[Tool]:
-        """List available MCP tools from the server."""
+    async def _list_tools_async() -> list[Tool]:
+        """Async helper to list tools from MCP server."""
         async with client.session("mcp_function") as session:
             result = await session.list_tools()
             return result.tools if hasattr(result, "tools") else list(result)
     try:
-        mcp_tools: list[Tool] = asyncio.run(_list_tools())
-        # Log discovered tools
-        logger.info(
-            "Discovered MCP tools",
-            tools_count=len(mcp_tools),
-            mcp_url=mcp_url,
-        )
-        for mcp_tool in mcp_tools:
-            logger.debug(
-                "MCP tool discovered",
-                tool_name=mcp_tool.name,
-                tool_description=(
-                    mcp_tool.description[:100] if mcp_tool.description else None
-                ),
-            )
+        return asyncio.run(_list_tools_async())
     except Exception as e:
         if function.connection:
             logger.error(
@@ -210,6 +291,216 @@ def create_mcp_tools(
                 f"and URL '{function.url}': {e}"
             ) from e
+def list_mcp_tools(
+    function: McpFunctionModel,
+    apply_filters: bool = True,
+) -> list[MCPToolInfo]:
+    """
+    List available tools from an MCP server.
+    This function connects to an MCP server and returns information about
+    all available tools. It's designed for:
+    - Tool discovery and exploration
+    - UI-based tool selection (e.g., in DAO AI Builder)
+    - Debugging and validation of MCP configurations
+    The returned MCPToolInfo objects contain all information needed to
+    display tools in a UI and allow users to select which tools to use.
+    Args:
+        function: The MCP function model configuration containing:
+            - Connection details (url, connection, headers, etc.)
+            - Optional filtering (include_tools, exclude_tools)
+        apply_filters: Whether to apply include_tools/exclude_tools filters.
+            Set to False to get the complete list of available tools
+            regardless of filter configuration. Default True.
+    Returns:
+        List of MCPToolInfo objects describing available tools.
+        Each contains name, description, and input_schema.
+    Raises:
+        RuntimeError: If connection to MCP server fails.
+    Example:
+        # List all tools from a DBSQL MCP server
+        from dao_ai.config import McpFunctionModel
+        from dao_ai.tools.mcp import list_mcp_tools
+        function = McpFunctionModel(sql=True)
+        tools = list_mcp_tools(function)
+        for tool in tools:
+            print(f"{tool.name}: {tool.description}")
+        # Get unfiltered list (ignore include_tools/exclude_tools)
+        all_tools = list_mcp_tools(function, apply_filters=False)
+    Note:
+        For creating executable LangChain tools, use create_mcp_tools() instead.
+        This function is for discovery/display purposes only.
+    """
+    mcp_url = function.mcp_url
+    logger.debug("Listing MCP tools", mcp_url=mcp_url, apply_filters=apply_filters)
+    # Log connection type
+    if function.connection:
+        logger.debug(
+            "Using UC Connection for MCP",
+            connection_name=function.connection.name,
+            mcp_url=mcp_url,
+        )
+    else:
+        logger.debug(
+            "Using direct connection for MCP",
+            transport=function.transport,
+            mcp_url=mcp_url,
+        )
+    # Fetch tools from server
+    mcp_tools: list[Tool] = _fetch_tools_from_server(function)
+    # Log discovered tools
+    logger.info(
+        "Discovered MCP tools from server",
+        tools_count=len(mcp_tools),
+        tool_names=[t.name for t in mcp_tools],
+        mcp_url=mcp_url,
+    )
+    # Apply filtering if requested and configured
+    if apply_filters and (function.include_tools or function.exclude_tools):
+        original_count = len(mcp_tools)
+        mcp_tools = [
+            tool
+            for tool in mcp_tools
+            if _should_include_tool(
+                tool.name,
+                function.include_tools,
+                function.exclude_tools,
+            )
+        ]
+        filtered_count = original_count - len(mcp_tools)
+        logger.info(
+            "Filtered MCP tools",
+            original_count=original_count,
+            filtered_count=filtered_count,
+            final_count=len(mcp_tools),
+            include_patterns=function.include_tools,
+            exclude_patterns=function.exclude_tools,
+        )
+    # Convert to MCPToolInfo for cleaner API
+    tool_infos: list[MCPToolInfo] = []
+    for mcp_tool in mcp_tools:
+        tool_info = MCPToolInfo(
+            name=mcp_tool.name,
+            description=mcp_tool.description,
+            input_schema=mcp_tool.inputSchema or {},
+        )
+        tool_infos.append(tool_info)
+        logger.debug(
+            "MCP tool available",
+            tool_name=mcp_tool.name,
+            tool_description=(
+                mcp_tool.description[:100] if mcp_tool.description else None
+            ),
+        )
+    return tool_infos
+def create_mcp_tools(
+    function: McpFunctionModel,
+) -> Sequence[RunnableLike]:
+    """
+    Create executable LangChain tools for invoking Databricks MCP functions.
+    Supports both direct MCP connections and UC Connection-based MCP access.
+    Uses manual tool wrappers to ensure response format compatibility with
+    Databricks APIs (which reject extra fields in tool results).
+    This function:
+    1. Fetches available tools from the MCP server
+    2. Applies include_tools/exclude_tools filters
+    3. Wraps each tool for LangChain agent execution
+    For tool discovery without creating executable tools, use list_mcp_tools().
+    Based on: https://docs.databricks.com/aws/en/generative-ai/mcp/external-mcp
+    Args:
+        function: The MCP function model configuration containing:
+            - Connection details (url, connection, headers, etc.)
+            - Optional filtering (include_tools, exclude_tools)
+    Returns:
+        A sequence of LangChain tools that can be used by agents.
+    Raises:
+        RuntimeError: If connection to MCP server fails.
+    Example:
+        from dao_ai.config import McpFunctionModel
+        from dao_ai.tools.mcp import create_mcp_tools
+        function = McpFunctionModel(sql=True)
+        tools = create_mcp_tools(function)
+        # Use tools in an agent
+        agent = create_agent(model=model, tools=tools)
+    """
+    mcp_url = function.mcp_url
+    logger.debug("Creating MCP tools", mcp_url=mcp_url)
+    # Fetch and filter tools using shared logic
+    # We need the raw Tool objects here, not MCPToolInfo
+    mcp_tools: list[Tool] = _fetch_tools_from_server(function)
+    # Log discovered tools
+    logger.info(
+        "Discovered MCP tools from server",
+        tools_count=len(mcp_tools),
+        tool_names=[t.name for t in mcp_tools],
+        mcp_url=mcp_url,
+    )
+    # Apply filtering if configured
+    if function.include_tools or function.exclude_tools:
+        original_count = len(mcp_tools)
+        mcp_tools = [
+            tool
+            for tool in mcp_tools
+            if _should_include_tool(
+                tool.name,
+                function.include_tools,
+                function.exclude_tools,
+            )
+        ]
+        filtered_count = original_count - len(mcp_tools)
+        logger.info(
+            "Filtered MCP tools",
+            original_count=original_count,
+            filtered_count=filtered_count,
+            final_count=len(mcp_tools),
+            include_patterns=function.include_tools,
+            exclude_patterns=function.exclude_tools,
+        )
+    # Log final tool list
+    for mcp_tool in mcp_tools:
+        logger.debug(
+            "MCP tool available",
+            tool_name=mcp_tool.name,
+            tool_description=(
+                mcp_tool.description[:100] if mcp_tool.description else None
+            ),
+        )
     def _create_tool_wrapper(mcp_tool: Tool) -> RunnableLike:
         """
         Create a LangChain tool wrapper for an MCP tool.

{dao_ai-0.1.5.dist-info → dao_ai-0.1.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dao-ai
-Version: 0.1.5
+Version: 0.1.7
 Summary: DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML.
 Project-URL: Homepage, https://github.com/natefleming/dao-ai
 Project-URL: Documentation, https://natefleming.github.io/dao-ai

dao-ai 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

dao-ai 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl