PyPI - dao-ai - Versions diffs - 0.0.36__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

dao-ai 0.0.36py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

dao_ai/__init__.py +29 -0
dao_ai/cli.py +195 -30
dao_ai/config.py +770 -244
dao_ai/genie/__init__.py +1 -22
dao_ai/genie/cache/__init__.py +1 -2
dao_ai/genie/cache/base.py +20 -70
dao_ai/genie/cache/core.py +75 -0
dao_ai/genie/cache/lru.py +44 -21
dao_ai/genie/cache/semantic.py +390 -109
dao_ai/genie/core.py +35 -0
dao_ai/graph.py +27 -253
dao_ai/hooks/__init__.py +9 -6
dao_ai/hooks/core.py +22 -190
dao_ai/memory/__init__.py +10 -0
dao_ai/memory/core.py +23 -5
dao_ai/memory/databricks.py +389 -0
dao_ai/memory/postgres.py +2 -2
dao_ai/messages.py +6 -4
dao_ai/middleware/__init__.py +125 -0
dao_ai/middleware/assertions.py +778 -0
dao_ai/middleware/base.py +50 -0
dao_ai/middleware/core.py +61 -0
dao_ai/middleware/guardrails.py +415 -0
dao_ai/middleware/human_in_the_loop.py +228 -0
dao_ai/middleware/message_validation.py +554 -0
dao_ai/middleware/summarization.py +192 -0
dao_ai/models.py +1177 -108
dao_ai/nodes.py +118 -161
dao_ai/optimization.py +664 -0
dao_ai/orchestration/__init__.py +52 -0
dao_ai/orchestration/core.py +287 -0
dao_ai/orchestration/supervisor.py +264 -0
dao_ai/orchestration/swarm.py +226 -0
dao_ai/prompts.py +126 -29
dao_ai/providers/databricks.py +126 -381
dao_ai/state.py +139 -21
dao_ai/tools/__init__.py +8 -5
dao_ai/tools/core.py +57 -4
dao_ai/tools/email.py +280 -0
dao_ai/tools/genie.py +47 -24
dao_ai/tools/mcp.py +4 -3
dao_ai/tools/memory.py +50 -0
dao_ai/tools/python.py +4 -12
dao_ai/tools/search.py +14 -0
dao_ai/tools/slack.py +1 -1
dao_ai/tools/unity_catalog.py +8 -6
dao_ai/tools/vector_search.py +16 -9
dao_ai/utils.py +72 -8
dao_ai-0.1.1.dist-info/METADATA +1878 -0
dao_ai-0.1.1.dist-info/RECORD +62 -0
dao_ai/chat_models.py +0 -204
dao_ai/guardrails.py +0 -112
dao_ai/tools/genie/__init__.py +0 -236
dao_ai/tools/human_in_the_loop.py +0 -100
dao_ai-0.0.36.dist-info/METADATA +0 -951
dao_ai-0.0.36.dist-info/RECORD +0 -47
{dao_ai-0.0.36.dist-info → dao_ai-0.1.1.dist-info}/WHEEL +0 -0
{dao_ai-0.0.36.dist-info → dao_ai-0.1.1.dist-info}/entry_points.txt +0 -0
{dao_ai-0.0.36.dist-info → dao_ai-0.1.1.dist-info}/licenses/LICENSE +0 -0

dao_ai/tools/genie.py CHANGED Viewed

@@ -16,10 +16,8 @@ from typing import Annotated, Any, Callable
 import pandas as pd
 from databricks_ai_bridge.genie import Genie, GenieResponse
-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
 from langchain_core.messages import ToolMessage
-from langchain_core.tools import InjectedToolCallId
-from langgraph.prebuilt import InjectedState
 from langgraph.types import Command
 from loguru import logger
 from pydantic import BaseModel
@@ -33,7 +31,8 @@ from dao_ai.config import (
     value_of,
 )
 from dao_ai.genie import GenieService, GenieServiceBase
-from dao_ai.genie.cache import LRUCacheService, SemanticCacheService
+from dao_ai.genie.cache import CacheResult, LRUCacheService, SemanticCacheService
+from dao_ai.state import AgentState, Context, SessionState
 class GenieToolInput(BaseModel):
@@ -97,9 +96,6 @@ def create_genie_tool(
     logger.debug(f"truncate_results: {truncate_results}")
     logger.debug(f"name: {name}")
     logger.debug(f"description: {description}")
-    logger.debug(f"genie_room: {genie_room}")
-    logger.debug(f"persist_conversation: {persist_conversation}")
-    logger.debug(f"truncate_results: {truncate_results}")
     logger.debug(f"lru_cache_parameters: {lru_cache_parameters}")
     logger.debug(f"semantic_cache_parameters: {semantic_cache_parameters}")
@@ -156,7 +152,7 @@ GenieResponse: A response object containing the conversation ID and result from
         genie_service = SemanticCacheService(
             impl=genie_service,
             parameters=semantic_cache_parameters,
-            genie_space_id=space_id,
+            workspace_client=genie_room.workspace_client,  # Pass workspace client for conversation history
         ).initialize()  # Eagerly initialize to fail fast and create table
     # Wrap with LRU cache last (checked first - fast O(1) exact match)
@@ -172,38 +168,65 @@ GenieResponse: A response object containing the conversation ID and result from
     )
     def genie_tool(
         question: Annotated[str, "The question to ask Genie about your data"],
-        state: Annotated[dict, InjectedState],
-        tool_call_id: Annotated[str, InjectedToolCallId],
+        runtime: ToolRuntime[Context, AgentState],
     ) -> Command:
-        """Process a natural language question through Databricks Genie."""
-        # Get existing conversation mapping and retrieve conversation ID for this space
-        conversation_ids: dict[str, str] = state.get("genie_conversation_ids", {})
-        existing_conversation_id: str | None = conversation_ids.get(space_id)
+        """Process a natural language question through Databricks Genie.
+        Uses ToolRuntime to access state and context in a type-safe way.
+        """
+        # Access state through runtime
+        state: AgentState = runtime.state
+        tool_call_id: str = runtime.tool_call_id
+        # Ensure space_id is a string for state keys
+        space_id_str: str = str(space_id)
+        # Get session state (or create new one)
+        session: SessionState = state.get("session", SessionState())
+        # Get existing conversation ID from session
+        existing_conversation_id: str | None = session.genie.get_conversation_id(
+            space_id_str
+        )
         logger.debug(
-            f"Existing conversation ID for space {space_id}: {existing_conversation_id}"
+            f"Existing conversation ID for space {space_id_str}: {existing_conversation_id}"
         )
-        response: GenieResponse = genie_service.ask_question(
+        # Call ask_question which always returns CacheResult with cache metadata
+        cache_result: CacheResult = genie_service.ask_question(
             question, conversation_id=existing_conversation_id
         )
+        genie_response: GenieResponse = cache_result.response
+        cache_hit: bool = cache_result.cache_hit
+        cache_key: str | None = cache_result.served_by
-        current_conversation_id: str = response.conversation_id
+        current_conversation_id: str = genie_response.conversation_id
         logger.debug(
-            f"Current conversation ID for space {space_id}: {current_conversation_id}"
+            f"Current conversation ID for space {space_id_str}: {current_conversation_id}, "
+            f"cache_hit: {cache_hit}, cache_key: {cache_key}"
         )
-        # Update the conversation mapping with the new conversation ID for this space
+        # Update session state with cache information
+        if persist_conversation:
+            session.genie.update_space(
+                space_id=space_id_str,
+                conversation_id=current_conversation_id,
+                cache_hit=cache_hit,
+                cache_key=cache_key,
+                last_query=question,
+            )
+        # Build update dict with response and session
         update: dict[str, Any] = {
             "messages": [
-                ToolMessage(_response_to_json(response), tool_call_id=tool_call_id)
+                ToolMessage(
+                    _response_to_json(genie_response), tool_call_id=tool_call_id
+                )
             ],
         }
         if persist_conversation:
-            updated_conversation_ids: dict[str, str] = conversation_ids.copy()
-            updated_conversation_ids[space_id] = current_conversation_id
-            update["genie_conversation_ids"] = updated_conversation_ids
+            update["session"] = session
         return Command(update=update)

dao_ai/tools/mcp.py CHANGED Viewed

@@ -14,7 +14,6 @@ from dao_ai.config import (
     McpFunctionModel,
     TransportType,
 )
-from dao_ai.tools.human_in_the_loop import as_human_in_the_loop
 def create_mcp_tools(
@@ -95,7 +94,8 @@ def create_mcp_tools(
                     logger.error(f"MCP tool {mcp_tool.name} failed: {e}")
                     raise
-            return as_human_in_the_loop(tool_wrapper, function)
+            # HITL is now handled at middleware level via HumanInTheLoopMiddleware
+            return tool_wrapper
         return [_create_tool_wrapper_with_connection(tool) for tool in mcp_tools]
@@ -190,6 +190,7 @@ def create_mcp_tools(
                     logger.error(f"MCP tool {mcp_tool.name} failed: {e}")
                     raise
-            return as_human_in_the_loop(tool_wrapper, function)
+            # HITL is now handled at middleware level via HumanInTheLoopMiddleware
+            return tool_wrapper
         return [_create_tool_wrapper(tool) for tool in mcp_tools]

dao_ai/tools/memory.py ADDED Viewed

@@ -0,0 +1,50 @@
+"""Memory tools for DAO AI."""
+from typing import Any
+from langchain_core.tools import BaseTool, StructuredTool
+from langmem import create_search_memory_tool as langmem_create_search_memory_tool
+from pydantic import BaseModel, Field
+def create_search_memory_tool(namespace: tuple[str, ...]) -> BaseTool:
+    """
+    Create a Databricks-compatible search_memory tool.
+    The langmem search_memory tool has a 'filter' field with additionalProperties: true
+    in its schema, which Databricks LLM endpoints reject. This function creates a
+    wrapper tool that omits the problematic filter field.
+    Args:
+        namespace: The memory namespace tuple
+    Returns:
+        A StructuredTool compatible with Databricks
+    """
+    # Get the original tool
+    original_tool = langmem_create_search_memory_tool(namespace=namespace)
+    # Create a schema without the problematic filter field
+    class SearchMemoryInput(BaseModel):
+        """Input for search_memory tool."""
+        query: str = Field(..., description="The search query")
+        limit: int = Field(default=10, description="Maximum number of results")
+        offset: int = Field(default=0, description="Offset for pagination")
+    # Create a wrapper function
+    async def search_memory_wrapper(
+        query: str, limit: int = 10, offset: int = 0
+    ) -> Any:
+        """Search your long-term memories for information relevant to your current context."""
+        return await original_tool.ainvoke(
+            {"query": query, "limit": limit, "offset": offset}
+        )
+    # Create the new tool
+    return StructuredTool.from_function(
+        coroutine=search_memory_wrapper,
+        name="search_memory",
+        description="Search your long-term memories for information relevant to your current context.",
+        args_schema=SearchMemoryInput,
+    )

dao_ai/tools/python.py CHANGED Viewed

@@ -7,7 +7,6 @@ from dao_ai.config import (
     FactoryFunctionModel,
     PythonFunctionModel,
 )
-from dao_ai.tools.human_in_the_loop import as_human_in_the_loop
 from dao_ai.utils import load_function
@@ -25,11 +24,8 @@ def create_factory_tool(
     logger.debug(f"create_factory_tool: {function}")
     factory: Callable[..., Any] = load_function(function_name=function.full_name)
-    tool: Callable[..., Any] = factory(**function.args)
-    tool = as_human_in_the_loop(
-        tool=tool,
-        function=function,
-    )
+    tool: RunnableLike = factory(**function.args)
+    # HITL is now handled at middleware level via HumanInTheLoopMiddleware
     return tool
@@ -51,10 +47,6 @@ def create_python_tool(
         function = function.full_name
     # Load the Python function dynamically
-    tool: Callable[..., Any] = load_function(function_name=function)
-    tool = as_human_in_the_loop(
-        tool=tool,
-        function=function,
-    )
+    tool: RunnableLike = load_function(function_name=function)
+    # HITL is now handled at middleware level via HumanInTheLoopMiddleware
     return tool

dao_ai/tools/search.py ADDED Viewed

@@ -0,0 +1,14 @@
+from langchain_community.tools import DuckDuckGoSearchRun
+from langchain_core.runnables.base import RunnableLike
+from loguru import logger
+def create_search_tool() -> RunnableLike:
+    """
+    Create a DuckDuckGo search tool.
+    Returns:
+        RunnableLike: A DuckDuckGo search tool that returns results as a list
+    """
+    logger.debug("Creating DuckDuckGo search tool")
+    return DuckDuckGoSearchRun(output_format="list")

dao_ai/tools/slack.py CHANGED Viewed

@@ -71,7 +71,7 @@ def create_send_slack_message_tool(
     channel_name: Optional[str] = None,
     name: Optional[str] = None,
     description: Optional[str] = None,
-) -> Callable[[str], Any]:
+) -> Callable[[str], str]:
     """
     Create a tool that sends a message to a Slack channel.

dao_ai/tools/unity_catalog.py CHANGED Viewed

@@ -6,6 +6,7 @@ from databricks_langchain import DatabricksFunctionClient, UCFunctionToolkit
 from langchain_core.runnables.base import RunnableLike
 from langchain_core.tools import StructuredTool
 from loguru import logger
+from unitycatalog.ai.core.base import FunctionExecutionResult
 from dao_ai.config import (
     AnyVariable,
@@ -14,7 +15,6 @@ from dao_ai.config import (
     UnityCatalogFunctionModel,
     value_of,
 )
-from dao_ai.tools.human_in_the_loop import as_human_in_the_loop
 from dao_ai.utils import normalize_host
@@ -65,8 +65,8 @@ def create_uc_tools(
         tools = toolkit.tools or []
         logger.debug(f"Retrieved tools: {tools}")
-    # Apply human-in-the-loop wrapper to all tools and return
-    return [as_human_in_the_loop(tool=tool, function=function_name) for tool in tools]
+    # HITL is now handled at middleware level via HumanInTheLoopMiddleware
+    return list(tools)
 def _execute_uc_function(
@@ -87,14 +87,16 @@ def _execute_uc_function(
         f"Calling UC function {function_name} with parameters: {list(all_params.keys())}"
     )
-    result = client.execute_function(function_name=function_name, parameters=all_params)
+    result: FunctionExecutionResult = client.execute_function(
+        function_name=function_name, parameters=all_params
+    )
     # Handle errors and extract result
-    if hasattr(result, "error") and result.error:
+    if result.error:
         logger.error(f"Unity Catalog function error: {result.error}")
         raise RuntimeError(f"Function execution failed: {result.error}")
-    result_value: str = result.value if hasattr(result, "value") else str(result)
+    result_value: str = result.value if result.value is not None else str(result)
     logger.debug(f"UC function result: {result_value}")
     return result_value

dao_ai/tools/vector_search.py CHANGED Viewed

@@ -1,5 +1,12 @@
+"""
+Vector search tool for retrieving documents from Databricks Vector Search.
+This module provides a tool factory for creating semantic search tools
+using ToolRuntime[Context, AgentState] for type-safe runtime access.
+"""
 import os
-from typing import Annotated, Any, Callable, List, Optional, Sequence
+from typing import Any, Callable, List, Optional, Sequence
 import mlflow
 from databricks.vector_search.reranker import DatabricksReranker
@@ -9,9 +16,8 @@ from databricks_ai_bridge.vector_search_retriever_tool import (
 )
 from databricks_langchain.vectorstores import DatabricksVectorSearch
 from flashrank import Ranker, RerankRequest
+from langchain.tools import ToolRuntime, tool
 from langchain_core.documents import Document
-from langchain_core.tools import InjectedToolCallId, tool
-from langgraph.prebuilt import InjectedState
 from loguru import logger
 from mlflow.entities import SpanType
@@ -20,6 +26,7 @@ from dao_ai.config import (
     RetrieverModel,
     VectorStoreModel,
 )
+from dao_ai.state import AgentState, Context
 from dao_ai.utils import normalize_host
@@ -27,7 +34,7 @@ def create_vector_search_tool(
     retriever: RetrieverModel | dict[str, Any],
     name: Optional[str] = None,
     description: Optional[str] = None,
-) -> Callable:
+) -> Callable[..., list[dict[str, Any]]]:
     """
     Create a Vector Search tool for retrieving documents from a Databricks Vector Search index.
@@ -254,8 +261,7 @@ def create_vector_search_tool(
         return reranked_docs
     # Create the main vector search tool using @tool decorator
-    # Note: args_schema provides descriptions for query and filters,
-    # so Annotated is only needed for injected LangGraph parameters
+    # Uses ToolRuntime[Context, AgentState] for type-safe runtime access
     @tool(
         name_or_callable=name or index_name,
         description=description or "Search for documents using vector similarity",
@@ -264,8 +270,7 @@ def create_vector_search_tool(
     def vector_search_tool(
         query: str,
         filters: Optional[List[FilterItem]] = None,
-        state: Annotated[dict, InjectedState] = None,
-        tool_call_id: Annotated[str, InjectedToolCallId] = None,
+        runtime: ToolRuntime[Context, AgentState] = None,
     ) -> list[dict[str, Any]]:
         """
         Search for documents using vector similarity with optional reranking.
@@ -276,8 +281,10 @@ def create_vector_search_tool(
         Both stages are traced in MLflow for observability.
+        Uses ToolRuntime[Context, AgentState] for type-safe runtime access.
         Returns:
-            Command with ToolMessage containing the retrieved documents
+            List of serialized documents with page_content and metadata
         """
         logger.debug(
             f"Vector search tool called: query='{query[:50]}...', reranking={reranker_config is not None}"

dao_ai/utils.py CHANGED Viewed

@@ -7,6 +7,7 @@ from importlib.metadata import PackageNotFoundError, version
 from pathlib import Path
 from typing import Any, Callable, Sequence
+from langchain_core.tools import BaseTool
 from loguru import logger
 import dao_ai
@@ -19,7 +20,7 @@ def is_lib_provided(lib_name: str, pip_requirements: Sequence[str]) -> bool:
     )
-def is_installed():
+def is_installed() -> bool:
     current_file = os.path.abspath(dao_ai.__file__)
     site_packages = [os.path.abspath(path) for path in site.getsitepackages()]
     if site.getusersitepackages():
@@ -157,9 +158,6 @@ def get_installed_packages() -> dict[str, str]:
         f"langchain-tavily=={version('langchain-tavily')}",
         f"langgraph=={version('langgraph')}",
         f"langgraph-checkpoint-postgres=={version('langgraph-checkpoint-postgres')}",
-        f"langgraph-prebuilt=={version('langgraph-prebuilt')}",
-        f"langgraph-supervisor=={version('langgraph-supervisor')}",
-        f"langgraph-swarm=={version('langgraph-swarm')}",
         f"langmem=={version('langmem')}",
         f"loguru=={version('loguru')}",
         f"mcp=={version('mcp')}",
@@ -212,13 +210,13 @@ def load_function(function_name: str) -> Callable[..., Any]:
         module = importlib.import_module(module_path)
         # Get the function from the module
-        func = getattr(module, func_name)
+        func: Any = getattr(module, func_name)
-        # Verify that the resolved object is callable or is a langchain tool
+        # Verify that the resolved object is callable or is a LangChain tool
         # In langchain 1.x, StructuredTool objects are not directly callable
         # but have an invoke() method
-        is_callable = callable(func)
-        is_langchain_tool = hasattr(func, "invoke") and hasattr(func, "name")
+        is_callable: bool = callable(func)
+        is_langchain_tool: bool = isinstance(func, BaseTool)
         if not is_callable and not is_langchain_tool:
             raise TypeError(f"Function {func_name} is not callable or invocable.")
@@ -229,6 +227,72 @@ def load_function(function_name: str) -> Callable[..., Any]:
         raise ImportError(f"Failed to import {function_name}: {e}")
+def type_from_fqn(type_name: str) -> type:
+    """
+    Load a type from a fully qualified name (FQN).
+    Dynamically imports and returns a type (class) from a module using its
+    fully qualified name. Useful for loading Pydantic models, dataclasses,
+    or any Python type specified as a string in configuration files.
+    Args:
+        type_name: Fully qualified type name in format "module.path.ClassName"
+    Returns:
+        The imported type/class
+    Raises:
+        ValueError: If the FQN format is invalid
+        ImportError: If the module cannot be imported
+        AttributeError: If the type doesn't exist in the module
+        TypeError: If the resolved object is not a type
+    Example:
+        >>> ProductModel = type_from_fqn("my_models.ProductInfo")
+        >>> instance = ProductModel(name="Widget", price=9.99)
+    """
+    logger.debug(f"Loading type: {type_name}")
+    try:
+        # Split the FQN into module path and class name
+        parts = type_name.rsplit(".", 1)
+        if len(parts) != 2:
+            raise ValueError(
+                f"Invalid type name '{type_name}'. "
+                "Expected format: 'module.path.ClassName'"
+            )
+        module_path, class_name = parts
+        # Dynamically import the module
+        try:
+            module = importlib.import_module(module_path)
+        except ModuleNotFoundError as e:
+            raise ImportError(
+                f"Could not import module '{module_path}' for type '{type_name}': {e}"
+            ) from e
+        # Get the class from the module
+        if not hasattr(module, class_name):
+            raise AttributeError(
+                f"Module '{module_path}' does not have attribute '{class_name}'"
+            )
+        resolved_type = getattr(module, class_name)
+        # Verify it's actually a type
+        if not isinstance(resolved_type, type):
+            raise TypeError(
+                f"'{type_name}' resolved to {resolved_type}, which is not a type"
+            )
+        return resolved_type
+    except (ValueError, ImportError, AttributeError, TypeError) as e:
+        # Provide a detailed error message that includes the original exception
+        raise type(e)(f"Failed to load type '{type_name}': {e}") from e
 def is_in_model_serving() -> bool:
     """Check if running in Databricks Model Serving environment.

dao-ai 0.0.36__py3-none-any.whl → 0.1.1__py3-none-any.whl

dao-ai 0.0.36py3-none-any.whl → 0.1.1py3-none-any.whl