PyPI - dao-ai - Versions diffs - 0.0.20__py3-none-any.whl → 0.0.21__py3-none-any.whl - Mend

dao-ai 0.0.20py3-none-any.whl → 0.0.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

dao_ai/config.py +21 -6
dao_ai/memory/postgres.py +31 -3
dao_ai/models.py +137 -2
dao_ai/providers/databricks.py +2 -0
dao_ai/state.py +3 -0
dao_ai/tools/genie.py +346 -34
{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/METADATA +1 -1
{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/RECORD +11 -11
{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/WHEEL +0 -0
{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/entry_points.txt +0 -0
{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/licenses/LICENSE +0 -0

dao_ai/config.py CHANGED Viewed

@@ -427,7 +427,8 @@ class GenieRoomModel(BaseModel, IsDatabricksResource):
     def as_resources(self) -> Sequence[DatabricksResource]:
         return [
             DatabricksGenieSpace(
-                genie_space_id=self.space_id, on_behalf_of_user=self.on_behalf_of_user
+                genie_space_id=value_of(self.space_id),
+                on_behalf_of_user=self.on_behalf_of_user,
             )
         ]
@@ -437,7 +438,7 @@ class GenieRoomModel(BaseModel, IsDatabricksResource):
         return self
-class VolumeModel(BaseModel, HasFullName):
+class VolumeModel(BaseModel, HasFullName, IsDatabricksResource):
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
     schema_model: Optional[SchemaModel] = Field(default=None, alias="schema")
     name: str
@@ -455,6 +456,13 @@ class VolumeModel(BaseModel, HasFullName):
         provider: ServiceProvider = DatabricksProvider(w=w)
         provider.create_volume(self)
+    @property
+    def api_scopes(self) -> Sequence[str]:
+        return ["files.files", "catalog.volumes"]
+    def as_resources(self) -> Sequence[DatabricksResource]:
+        return []
 class VolumePathModel(BaseModel, HasFullName):
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
@@ -683,7 +691,8 @@ class WarehouseModel(BaseModel, IsDatabricksResource):
     def as_resources(self) -> Sequence[DatabricksResource]:
         return [
             DatabricksSQLWarehouse(
-                warehouse_id=self.warehouse_id, on_behalf_of_user=self.on_behalf_of_user
+                warehouse_id=value_of(self.warehouse_id),
+                on_behalf_of_user=self.on_behalf_of_user,
             )
         ]
@@ -867,7 +876,7 @@ class PythonFunctionModel(BaseFunctionModel, HasFullName):
 class FactoryFunctionModel(BaseFunctionModel, HasFullName):
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
-    args: Optional[dict[str, AnyVariable]] = Field(default_factory=dict)
+    args: Optional[dict[str, Any]] = Field(default_factory=dict)
     type: Literal[FunctionType.FACTORY] = FunctionType.FACTORY
     @property
@@ -879,6 +888,12 @@ class FactoryFunctionModel(BaseFunctionModel, HasFullName):
         return [create_factory_tool(self, **kwargs)]
+    @model_validator(mode="after")
+    def update_args(self):
+        for key, value in self.args.items():
+            self.args[key] = value_of(value)
+        return self
 class TransportType(str, Enum):
     STREAMABLE_HTTP = "streamable_http"
@@ -1262,12 +1277,12 @@ class AppModel(BaseModel):
             if len(self.agents) > 1:
                 default_agent: AgentModel = self.agents[0]
                 self.orchestration = OrchestrationModel(
-                    swarm=SupervisorModel(model=default_agent.model)
+                    supervisor=SupervisorModel(model=default_agent.model)
                 )
             elif len(self.agents) == 1:
                 default_agent: AgentModel = self.agents[0]
                 self.orchestration = OrchestrationModel(
-                    supervisor=SwarmModel(
+                    swarm=SwarmModel(
                         model=default_agent.model, default_agent=default_agent
                     )
                 )

dao_ai/memory/postgres.py CHANGED Viewed

@@ -74,8 +74,17 @@ class AsyncPostgresPoolManager:
         async with cls._lock:
             for connection_key, pool in cls._pools.items():
                 try:
-                    await pool.close()
+                    # Use a short timeout to avoid blocking on pool closure
+                    await asyncio.wait_for(pool.close(), timeout=2.0)
                     logger.debug(f"Closed PostgreSQL pool: {connection_key}")
+                except asyncio.TimeoutError:
+                    logger.warning(
+                        f"Timeout closing pool {connection_key}, forcing closure"
+                    )
+                except asyncio.CancelledError:
+                    logger.warning(
+                        f"Pool closure cancelled for {connection_key} (shutdown in progress)"
+                    )
                 except Exception as e:
                     logger.error(f"Error closing pool {connection_key}: {e}")
             cls._pools.clear()
@@ -369,8 +378,27 @@ def _shutdown_pools():
 def _shutdown_async_pools():
     try:
-        asyncio.run(AsyncPostgresPoolManager.close_all_pools())
-        logger.debug("Successfully closed all asynchronous PostgreSQL pools")
+        # Try to get the current event loop first
+        try:
+            loop = asyncio.get_running_loop()
+            # If we're already in an event loop, create a task
+            loop.create_task(AsyncPostgresPoolManager.close_all_pools())
+            logger.debug("Scheduled async pool closure in running event loop")
+        except RuntimeError:
+            # No running loop, try to get or create one
+            try:
+                loop = asyncio.get_event_loop()
+                if loop.is_closed():
+                    # Loop is closed, create a new one
+                    loop = asyncio.new_event_loop()
+                    asyncio.set_event_loop(loop)
+                loop.run_until_complete(AsyncPostgresPoolManager.close_all_pools())
+                logger.debug("Successfully closed all asynchronous PostgreSQL pools")
+            except Exception as inner_e:
+                # If all else fails, just log the error
+                logger.warning(
+                    f"Could not close async pools cleanly during shutdown: {inner_e}"
+                )
     except Exception as e:
         logger.error(
             f"Error closing asynchronous PostgreSQL pools during shutdown: {e}"

dao_ai/models.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Generator, Optional, Sequence, Union
 from langchain_core.messages import AIMessage, AIMessageChunk, BaseMessage
 from langgraph.graph.state import CompiledStateGraph
+from langgraph.types import StateSnapshot
 from loguru import logger
 from mlflow import MlflowClient
 from mlflow.pyfunc import ChatAgent, ChatModel, ResponsesAgent
@@ -59,6 +60,113 @@ def get_latest_model_version(model_name: str) -> int:
     return latest_version
+async def get_state_snapshot_async(
+    graph: CompiledStateGraph, thread_id: str
+) -> Optional[StateSnapshot]:
+    """
+    Retrieve the state snapshot from the graph for a given thread_id asynchronously.
+    This utility function accesses the graph's checkpointer to retrieve the current
+    state snapshot, which contains the full state values and metadata.
+    Args:
+        graph: The compiled LangGraph state machine
+        thread_id: The thread/conversation ID to retrieve state for
+    Returns:
+        StateSnapshot if found, None otherwise
+    """
+    logger.debug(f"Retrieving state snapshot for thread_id: {thread_id}")
+    try:
+        # Check if graph has a checkpointer
+        if graph.checkpointer is None:
+            logger.debug("No checkpointer available in graph")
+            return None
+        # Get the current state from the checkpointer (use async version)
+        config: dict[str, Any] = {"configurable": {"thread_id": thread_id}}
+        state_snapshot: Optional[StateSnapshot] = await graph.aget_state(config)
+        if state_snapshot is None:
+            logger.debug(f"No state found for thread_id: {thread_id}")
+            return None
+        return state_snapshot
+    except Exception as e:
+        logger.warning(f"Error retrieving state snapshot for thread {thread_id}: {e}")
+        return None
+def get_state_snapshot(
+    graph: CompiledStateGraph, thread_id: str
+) -> Optional[StateSnapshot]:
+    """
+    Retrieve the state snapshot from the graph for a given thread_id.
+    This is a synchronous wrapper around get_state_snapshot_async.
+    Use this for backward compatibility in synchronous contexts.
+    Args:
+        graph: The compiled LangGraph state machine
+        thread_id: The thread/conversation ID to retrieve state for
+    Returns:
+        StateSnapshot if found, None otherwise
+    """
+    import asyncio
+    try:
+        loop = asyncio.get_event_loop()
+    except RuntimeError:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+    try:
+        return loop.run_until_complete(get_state_snapshot_async(graph, thread_id))
+    except Exception as e:
+        logger.warning(f"Error in synchronous state snapshot retrieval: {e}")
+        return None
+def get_genie_conversation_ids_from_state(
+    state_snapshot: Optional[StateSnapshot],
+) -> dict[str, str]:
+    """
+    Extract genie_conversation_ids from a state snapshot.
+    This function extracts the genie_conversation_ids dictionary from the state
+    snapshot values if present.
+    Args:
+        state_snapshot: The state snapshot to extract conversation IDs from
+    Returns:
+        A dictionary mapping genie space_id to conversation_id, or empty dict if not found
+    """
+    if state_snapshot is None:
+        return {}
+    try:
+        # Extract state values - these contain the actual state data
+        state_values: dict[str, Any] = state_snapshot.values
+        # Extract genie_conversation_ids from state values
+        genie_conversation_ids: dict[str, str] = state_values.get(
+            "genie_conversation_ids", {}
+        )
+        if genie_conversation_ids:
+            logger.debug(f"Retrieved genie_conversation_ids: {genie_conversation_ids}")
+            return genie_conversation_ids
+        return {}
+    except Exception as e:
+        logger.warning(f"Error extracting genie_conversation_ids from state: {e}")
+        return {}
 class LanggraphChatModel(ChatModel):
     """
     ChatModel that delegates requests to a LangGraph CompiledStateGraph.
@@ -257,7 +365,19 @@ class LanggraphResponsesAgent(ResponsesAgent):
             text=last_message.content, id=f"msg_{uuid.uuid4().hex[:8]}"
         )
-        custom_outputs = custom_inputs
+        # Retrieve genie_conversation_ids from state if available
+        custom_outputs: dict[str, Any] = custom_inputs.copy()
+        thread_id: Optional[str] = context.thread_id
+        if thread_id:
+            state_snapshot: Optional[StateSnapshot] = loop.run_until_complete(
+                get_state_snapshot_async(self.graph, thread_id)
+            )
+            genie_conversation_ids: dict[str, str] = (
+                get_genie_conversation_ids_from_state(state_snapshot)
+            )
+            if genie_conversation_ids:
+                custom_outputs["genie_conversation_ids"] = genie_conversation_ids
         return ResponsesAgentResponse(
             output=[output_item], custom_outputs=custom_outputs
         )
@@ -318,7 +438,22 @@ class LanggraphResponsesAgent(ResponsesAgent):
                                 **self.create_text_delta(delta=content, item_id=item_id)
                             )
-                custom_outputs = custom_inputs
+                # Retrieve genie_conversation_ids from state if available
+                custom_outputs: dict[str, Any] = custom_inputs.copy()
+                thread_id: Optional[str] = context.thread_id
+                if thread_id:
+                    state_snapshot: Optional[
+                        StateSnapshot
+                    ] = await get_state_snapshot_async(self.graph, thread_id)
+                    genie_conversation_ids: dict[str, str] = (
+                        get_genie_conversation_ids_from_state(state_snapshot)
+                    )
+                    if genie_conversation_ids:
+                        custom_outputs["genie_conversation_ids"] = (
+                            genie_conversation_ids
+                        )
                 # Yield final output item
                 yield ResponsesAgentStreamEvent(
                     type="response.output_item.done",

dao_ai/providers/databricks.py CHANGED Viewed

@@ -226,6 +226,7 @@ class DatabricksProvider(ServiceProvider):
             config.resources.connections.values()
         )
         databases: Sequence[DatabaseModel] = list(config.resources.databases.values())
+        volumes: Sequence[VolumeModel] = list(config.resources.volumes.values())
         resources: Sequence[IsDatabricksResource] = (
             llms
@@ -236,6 +237,7 @@ class DatabricksProvider(ServiceProvider):
             + tables
             + connections
             + databases
+            + volumes
         )
         # Flatten all resources from all models into a single list

dao_ai/state.py CHANGED Viewed

@@ -31,6 +31,9 @@ class SharedState(MessagesState):
     is_valid: bool  # message validation node
     message_error: str
+    # A mapping of genie space_id to conversation_id
+    genie_conversation_ids: dict[str, str]  # Genie
 class Context(BaseModel):
     user_id: str | None = None

dao_ai/tools/genie.py CHANGED Viewed

@@ -1,20 +1,284 @@
+import bisect
+import json
+import logging
 import os
+import time
+from dataclasses import asdict, dataclass
+from datetime import datetime
 from textwrap import dedent
-from typing import Any, Callable, Optional
+from typing import Annotated, Any, Callable, Optional, Union
-from databricks_ai_bridge.genie import GenieResponse
-from databricks_langchain.genie import Genie
-from langchain_core.tools import StructuredTool
+import mlflow
+import pandas as pd
+from databricks.sdk import WorkspaceClient
+from langchain_core.messages import ToolMessage
+from langchain_core.tools import InjectedToolCallId, tool
+from langgraph.prebuilt import InjectedState
+from langgraph.types import Command
+from loguru import logger
+from pydantic import BaseModel, Field
-from dao_ai.config import (
-    GenieRoomModel,
-)
+from dao_ai.config import AnyVariable, CompositeVariableModel, GenieRoomModel, value_of
+MAX_TOKENS_OF_DATA: int = 20000
+MAX_ITERATIONS: int = 50
+DEFAULT_POLLING_INTERVAL_SECS: int = 2
+def _count_tokens(text):
+    import tiktoken
+    encoding = tiktoken.encoding_for_model("gpt-4o")
+    return len(encoding.encode(text))
+@dataclass
+class GenieResponse:
+    conversation_id: str
+    result: Union[str, pd.DataFrame]
+    query: Optional[str] = ""
+    description: Optional[str] = ""
+    def to_json(self):
+        return json.dumps(asdict(self))
+class GenieToolInput(BaseModel):
+    """Input schema for the Genie tool."""
+    question: str = Field(
+        description="The question to ask Genie about your data. Ask simple, clear questions about your tabular data. For complex analysis, ask multiple simple questions rather than one complex question."
+    )
+def _truncate_result(dataframe: pd.DataFrame) -> str:
+    query_result = dataframe.to_markdown()
+    tokens_used = _count_tokens(query_result)
+    # If the full result fits, return it
+    if tokens_used <= MAX_TOKENS_OF_DATA:
+        return query_result.strip()
+    def is_too_big(n):
+        return _count_tokens(dataframe.iloc[:n].to_markdown()) > MAX_TOKENS_OF_DATA
+    # Use bisect_left to find the cutoff point of rows within the max token data limit in a O(log n) complexity
+    # Passing True, as this is the target value we are looking for when _is_too_big returns
+    cutoff = bisect.bisect_left(range(len(dataframe) + 1), True, key=is_too_big)
+    # Slice to the found limit
+    truncated_df = dataframe.iloc[:cutoff]
+    # Edge case: Cannot return any rows because of tokens so return an empty string
+    if len(truncated_df) == 0:
+        return ""
+    truncated_result = truncated_df.to_markdown()
+    # Double-check edge case if we overshot by one
+    if _count_tokens(truncated_result) > MAX_TOKENS_OF_DATA:
+        truncated_result = truncated_df.iloc[:-1].to_markdown()
+    return truncated_result
+@mlflow.trace(span_type="PARSER")
+def _parse_query_result(resp, truncate_results) -> Union[str, pd.DataFrame]:
+    output = resp["result"]
+    if not output:
+        return "EMPTY"
+    columns = resp["manifest"]["schema"]["columns"]
+    header = [str(col["name"]) for col in columns]
+    rows = []
+    for item in output["data_array"]:
+        row = []
+        for column, value in zip(columns, item):
+            type_name = column["type_name"]
+            if value is None:
+                row.append(None)
+                continue
+            if type_name in ["INT", "LONG", "SHORT", "BYTE"]:
+                row.append(int(value))
+            elif type_name in ["FLOAT", "DOUBLE", "DECIMAL"]:
+                row.append(float(value))
+            elif type_name == "BOOLEAN":
+                row.append(value.lower() == "true")
+            elif type_name == "DATE" or type_name == "TIMESTAMP":
+                row.append(datetime.strptime(value[:10], "%Y-%m-%d").date())
+            elif type_name == "BINARY":
+                row.append(bytes(value, "utf-8"))
+            else:
+                row.append(value)
+        rows.append(row)
+    dataframe = pd.DataFrame(rows, columns=header)
+    if truncate_results:
+        query_result = _truncate_result(dataframe)
+    else:
+        query_result = dataframe.to_markdown()
+    return query_result.strip()
+class Genie:
+    def __init__(
+        self,
+        space_id,
+        client: WorkspaceClient | None = None,
+        truncate_results: bool = False,
+        polling_interval: int = DEFAULT_POLLING_INTERVAL_SECS,
+    ):
+        self.space_id = space_id
+        workspace_client = client or WorkspaceClient()
+        self.genie = workspace_client.genie
+        self.description = self.genie.get_space(space_id).description
+        self.headers = {
+            "Accept": "application/json",
+            "Content-Type": "application/json",
+        }
+        self.truncate_results = truncate_results
+        if polling_interval < 1 or polling_interval > 30:
+            raise ValueError("poll_interval must be between 1 and 30 seconds")
+        self.poll_interval = polling_interval
+    @mlflow.trace()
+    def start_conversation(self, content):
+        resp = self.genie._api.do(
+            "POST",
+            f"/api/2.0/genie/spaces/{self.space_id}/start-conversation",
+            body={"content": content},
+            headers=self.headers,
+        )
+        return resp
+    @mlflow.trace()
+    def create_message(self, conversation_id, content):
+        resp = self.genie._api.do(
+            "POST",
+            f"/api/2.0/genie/spaces/{self.space_id}/conversations/{conversation_id}/messages",
+            body={"content": content},
+            headers=self.headers,
+        )
+        return resp
+    @mlflow.trace()
+    def poll_for_result(self, conversation_id, message_id):
+        @mlflow.trace()
+        def poll_query_results(attachment_id, query_str, description):
+            iteration_count = 0
+            while iteration_count < MAX_ITERATIONS:
+                iteration_count += 1
+                resp = self.genie._api.do(
+                    "GET",
+                    f"/api/2.0/genie/spaces/{self.space_id}/conversations/{conversation_id}/messages/{message_id}/attachments/{attachment_id}/query-result",
+                    headers=self.headers,
+                )["statement_response"]
+                state = resp["status"]["state"]
+                if state == "SUCCEEDED":
+                    result = _parse_query_result(resp, self.truncate_results)
+                    return GenieResponse(
+                        conversation_id, result, query_str, description
+                    )
+                elif state in ["RUNNING", "PENDING"]:
+                    logging.debug("Waiting for query result...")
+                    time.sleep(self.poll_interval)
+                else:
+                    return GenieResponse(
+                        conversation_id,
+                        f"No query result: {resp['state']}",
+                        query_str,
+                        description,
+                    )
+            return GenieResponse(
+                conversation_id,
+                f"Genie query for result timed out after {MAX_ITERATIONS} iterations of {self.poll_interval} seconds",
+                query_str,
+                description,
+            )
+        @mlflow.trace()
+        def poll_result():
+            iteration_count = 0
+            while iteration_count < MAX_ITERATIONS:
+                iteration_count += 1
+                resp = self.genie._api.do(
+                    "GET",
+                    f"/api/2.0/genie/spaces/{self.space_id}/conversations/{conversation_id}/messages/{message_id}",
+                    headers=self.headers,
+                )
+                if resp["status"] == "COMPLETED":
+                    # Check if attachments key exists in response
+                    attachments = resp.get("attachments", [])
+                    if not attachments:
+                        # Handle case where response has no attachments
+                        return GenieResponse(
+                            conversation_id,
+                            result=f"Genie query completed but no attachments found. Response: {resp}",
+                        )
+                    attachment = next((r for r in attachments if "query" in r), None)
+                    if attachment:
+                        query_obj = attachment["query"]
+                        description = query_obj.get("description", "")
+                        query_str = query_obj.get("query", "")
+                        attachment_id = attachment["attachment_id"]
+                        return poll_query_results(attachment_id, query_str, description)
+                    if resp["status"] == "COMPLETED":
+                        text_content = next(
+                            (r for r in attachments if "text" in r), None
+                        )
+                        if text_content:
+                            return GenieResponse(
+                                conversation_id, result=text_content["text"]["content"]
+                            )
+                        return GenieResponse(
+                            conversation_id,
+                            result="Genie query completed but no text content found in attachments.",
+                        )
+                elif resp["status"] in {"CANCELLED", "QUERY_RESULT_EXPIRED"}:
+                    return GenieResponse(
+                        conversation_id, result=f"Genie query {resp['status'].lower()}."
+                    )
+                elif resp["status"] == "FAILED":
+                    return GenieResponse(
+                        conversation_id,
+                        result=f"Genie query failed with error: {resp.get('error', 'Unknown error')}",
+                    )
+                # includes EXECUTING_QUERY, Genie can retry after this status
+                else:
+                    logging.debug(f"Waiting...: {resp['status']}")
+                    time.sleep(self.poll_interval)
+            return GenieResponse(
+                conversation_id,
+                f"Genie query timed out after {MAX_ITERATIONS} iterations of {self.poll_interval} seconds",
+            )
+        return poll_result()
+    @mlflow.trace()
+    def ask_question(self, question: str, conversation_id: str | None = None):
+        logger.debug(
+            f"ask_question called with question: {question}, conversation_id: {conversation_id}"
+        )
+        if conversation_id:
+            resp = self.create_message(conversation_id, question)
+        else:
+            resp = self.start_conversation(question)
+        logger.debug(f"ask_question response: {resp}")
+        return self.poll_for_result(resp["conversation_id"], resp["message_id"])
 def create_genie_tool(
     genie_room: GenieRoomModel | dict[str, Any],
     name: Optional[str] = None,
     description: Optional[str] = None,
+    persist_conversation: bool = True,
+    truncate_results: bool = False,
+    poll_interval: int = DEFAULT_POLLING_INTERVAL_SECS,
 ) -> Callable[[str], GenieResponse]:
     """
     Create a tool for interacting with Databricks Genie for natural language queries to databases.
@@ -24,22 +288,33 @@ def create_genie_tool(
     answering questions about inventory, sales, and other structured retail data.
     Args:
-        space_id: Databricks workspace ID where Genie is configured. If None, tries to
-                get it from DATABRICKS_GENIE_SPACE_ID environment variable.
+        genie_room: GenieRoomModel or dict containing Genie configuration
+        name: Optional custom name for the tool. If None, uses default "genie_tool"
+        description: Optional custom description for the tool. If None, uses default description
     Returns:
-        A callable tool function that processes natural language queries through Genie
+        A LangGraph tool that processes natural language queries through Genie
     """
     if isinstance(genie_room, dict):
         genie_room = GenieRoomModel(**genie_room)
-    space_id: str = genie_room.space_id or os.environ.get("DATABRICKS_GENIE_SPACE_ID")
-    genie: Genie = Genie(
-        space_id=space_id,
-        client=genie_room.workspace_client,
+    space_id: AnyVariable = genie_room.space_id or os.environ.get(
+        "DATABRICKS_GENIE_SPACE_ID"
+    )
+    space_id: AnyVariable = genie_room.space_id or os.environ.get(
+        "DATABRICKS_GENIE_SPACE_ID"
     )
+    if isinstance(space_id, dict):
+        space_id = CompositeVariableModel(**space_id)
+    space_id = value_of(space_id)
+    # genie: Genie = Genie(
+    #     space_id=space_id,
+    #     client=genie_room.workspace_client,
+    #     truncate_results=truncate_results,
+    #     polling_interval=poll_interval,
+    # )
     default_description: str = dedent("""
     This tool lets you have a conversation and chat with tabular data about <topic>. You should ask
@@ -49,29 +324,66 @@ def create_genie_tool(
     Prefer to call this tool multiple times rather than asking a complex question.
     """)
-    if description is None:
-        description = default_description
+    tool_description: str = (
+        description if description is not None else default_description
+    )
+    tool_name: str = name if name is not None else "genie_tool"
-    doc_signature: str = dedent("""
-    Args:
-        question (str): The question to ask to ask Genie
+    function_docs = """
-    Returns:
-        response (GenieResponse): An object containing the Genie response
-    """)
+Args:
+question (str): The question to ask to ask Genie about your data. Ask simple, clear questions about your tabular data. For complex analysis, ask multiple simple questions rather than one complex question.
-    doc: str = description + "\n" + doc_signature
+Returns:
+GenieResponse: A response object containing the conversation ID and result from Genie."""
+    tool_description = tool_description + function_docs
-    async def genie_tool(question: str) -> GenieResponse:
-        # Use sync API for now since Genie doesn't support async yet
-        # Can be easily updated to await when Genie gets async support
-        response: GenieResponse = genie.ask_question(question)
-        return response
+    @tool(
+        name_or_callable=tool_name,
+        description=tool_description,
+    )
+    def genie_tool(
+        question: Annotated[str, "The question to ask Genie about your data"],
+        state: Annotated[dict, InjectedState],
+        tool_call_id: Annotated[str, InjectedToolCallId],
+    ) -> Command:
+        genie: Genie = Genie(
+            space_id=space_id,
+            client=genie_room.workspace_client,
+            truncate_results=truncate_results,
+            polling_interval=poll_interval,
+        )
-    name: str = name if name else genie_tool.__name__
+        """Process a natural language question through Databricks Genie."""
+        # Get existing conversation mapping and retrieve conversation ID for this space
+        conversation_ids: dict[str, str] = state.get("genie_conversation_ids", {})
+        existing_conversation_id: str | None = conversation_ids.get(space_id)
+        logger.debug(
+            f"Existing conversation ID for space {space_id}: {existing_conversation_id}"
+        )
-    structured_tool: StructuredTool = StructuredTool.from_function(
-        coroutine=genie_tool, name=name, description=doc, parse_docstring=False
-    )
+        response: GenieResponse = genie.ask_question(
+            question, conversation_id=existing_conversation_id
+        )
+        current_conversation_id: str = response.conversation_id
+        logger.debug(
+            f"Current conversation ID for space {space_id}: {current_conversation_id}"
+        )
+        # Update the conversation mapping with the new conversation ID for this space
+        update: dict[str, Any] = {
+            "messages": [ToolMessage(response.to_json(), tool_call_id=tool_call_id)],
+        }
+        if persist_conversation:
+            updated_conversation_ids: dict[str, str] = conversation_ids.copy()
+            updated_conversation_ids[space_id] = current_conversation_id
+            update["genie_conversation_ids"] = updated_conversation_ids
+        logger.debug(f"State update: {update}")
+        return Command(update=update)
-    return structured_tool
+    return genie_tool

{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dao-ai
-Version: 0.0.20
+Version: 0.0.21
 Summary: DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML.
 Project-URL: Homepage, https://github.com/natefleming/dao-ai
 Project-URL: Documentation, https://natefleming.github.io/dao-ai

{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/RECORD RENAMED Viewed

@@ -3,14 +3,14 @@ dao_ai/agent_as_code.py,sha256=kPSeDz2-1jRaed1TMs4LA3VECoyqe9_Ed2beRLB9gXQ,472
 dao_ai/catalog.py,sha256=sPZpHTD3lPx4EZUtIWeQV7VQM89WJ6YH__wluk1v2lE,4947
 dao_ai/chat_models.py,sha256=uhwwOTeLyHWqoTTgHrs4n5iSyTwe4EQcLKnh3jRxPWI,8626
 dao_ai/cli.py,sha256=Aez2TQW3Q8Ho1IaIkRggt0NevDxAAVPjXkePC5GPJF0,20429
-dao_ai/config.py,sha256=ZO5ei45gnhqg1BtD0R9aekJz4ClmiTw2GHhOk4Idil4,51958
+dao_ai/config.py,sha256=GeaM00wNlYecwe3HhqeG88Hprt0SvGg4HtC7g_m-v98,52386
 dao_ai/graph.py,sha256=gmD9mxODfXuvn9xWeBfewm1FiuVAWMLEdnZz7DNmSH0,7859
 dao_ai/guardrails.py,sha256=4TKArDONRy8RwHzOT1plZ1rhy3x9GF_aeGpPCRl6wYA,4016
 dao_ai/messages.py,sha256=xl_3-WcFqZKCFCiov8sZOPljTdM3gX3fCHhxq-xFg2U,7005
-dao_ai/models.py,sha256=Xb23U-lhDG8KyNRIijcJ4InluadlaGNy4rrYx7Cjgfg,26939
+dao_ai/models.py,sha256=8r8GIG3EGxtVyWsRNI56lVaBjiNrPkzh4HdwMZRq8iw,31689
 dao_ai/nodes.py,sha256=SSuFNTXOdFaKg_aX-yUkQO7fM9wvNGu14lPXKDapU1U,8461
 dao_ai/prompts.py,sha256=vpmIbWs_szXUgNNDs5Gh2LcxKZti5pHDKSfoClUcgX0,1289
-dao_ai/state.py,sha256=GwbMbd1TWZx1T5iQrEOX6_rpxOitlmyeJ8dMr2o_pag,1031
+dao_ai/state.py,sha256=_lF9krAYYjvFDMUwZzVKOn0ZnXKcOrbjWKdre0C5B54,1137
 dao_ai/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dao_ai/utils.py,sha256=dkZTXNN6q0xwkrvSWdNq8937W2xGuLCRWRb6hRQM6kA,4217
 dao_ai/vector_search.py,sha256=jlaFS_iizJ55wblgzZmswMM3UOL-qOp2BGJc0JqXYSg,2839
@@ -19,22 +19,22 @@ dao_ai/hooks/core.py,sha256=ZShHctUSoauhBgdf1cecy9-D7J6-sGn-pKjuRMumW5U,6663
 dao_ai/memory/__init__.py,sha256=1kHx_p9abKYFQ6EYD05nuc1GS5HXVEpufmjBGw_7Uho,260
 dao_ai/memory/base.py,sha256=99nfr2UZJ4jmfTL_KrqUlRSCoRxzkZyWyx5WqeUoMdQ,338
 dao_ai/memory/core.py,sha256=g7chjBgVgx3iKjR2hghl0QL1j3802uIM_e7mgszur9M,4151
-dao_ai/memory/postgres.py,sha256=pxxMjGotgqjrKhx0lVR3EAjSZTQgBpiPZOB0-cyjprc,12505
+dao_ai/memory/postgres.py,sha256=aWHRLhPm-9ywjlQe2B4XSdLbeaiuVV88p4PiQJFNEWo,13924
 dao_ai/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 dao_ai/providers/base.py,sha256=-fjKypCOk28h6vioPfMj9YZSw_3Kcbi2nMuAyY7vX9k,1383
-dao_ai/providers/databricks.py,sha256=fZ8mGotfA3W3t5yUej2xGmGHSybjBFYr895mOctT418,28203
+dao_ai/providers/databricks.py,sha256=PX5mBvZaIxSJIAHWVnPXsho1XvxcoR3Qs3I9UavFRsY,28306
 dao_ai/tools/__init__.py,sha256=ye6MHaJY7tUnJ8336YJiLxuZr55zDPNdOw6gm7j5jlc,1103
 dao_ai/tools/agent.py,sha256=WbQnyziiT12TLMrA7xK0VuOU029tdmUBXbUl-R1VZ0Q,1886
 dao_ai/tools/core.py,sha256=Kei33S8vrmvPOAyrFNekaWmV2jqZ-IPS1QDSvU7RZF0,1984
-dao_ai/tools/genie.py,sha256=GzV5lfDYKmzW_lSLxAsPaTwnzX6GxQOB1UcLaTDqpfY,2787
+dao_ai/tools/genie.py,sha256=1CbLViNQ3KnmDtHXuwqCPug7rEhCGvuHP1NgsY-AJZ0,15050
 dao_ai/tools/human_in_the_loop.py,sha256=yk35MO9eNETnYFH-sqlgR-G24TrEgXpJlnZUustsLkI,3681
 dao_ai/tools/mcp.py,sha256=auEt_dwv4J26fr5AgLmwmnAsI894-cyuvkvjItzAUxs,4419
 dao_ai/tools/python.py,sha256=XcQiTMshZyLUTVR5peB3vqsoUoAAy8gol9_pcrhddfI,1831
 dao_ai/tools/time.py,sha256=Y-23qdnNHzwjvnfkWvYsE7PoWS1hfeKy44tA7sCnNac,8759
 dao_ai/tools/unity_catalog.py,sha256=uX_h52BuBAr4c9UeqSMI7DNz3BPRLeai5tBVW4sJqRI,13113
 dao_ai/tools/vector_search.py,sha256=EDYQs51zIPaAP0ma1D81wJT77GQ-v-cjb2XrFVWfWdg,2621
-dao_ai-0.0.20.dist-info/METADATA,sha256=gWNRLhswz5sCe1vxbBQ6dGlgiObI9nI829Q5DQRqRRY,41380
-dao_ai-0.0.20.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-dao_ai-0.0.20.dist-info/entry_points.txt,sha256=Xa-UFyc6gWGwMqMJOt06ZOog2vAfygV_DSwg1AiP46g,43
-dao_ai-0.0.20.dist-info/licenses/LICENSE,sha256=YZt3W32LtPYruuvHE9lGk2bw6ZPMMJD8yLrjgHybyz4,1069
-dao_ai-0.0.20.dist-info/RECORD,,
+dao_ai-0.0.21.dist-info/METADATA,sha256=PG-eOltuUpaJf4lYEw-DoVy5BFT9LbMCfe8GanIV7zQ,41380
+dao_ai-0.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+dao_ai-0.0.21.dist-info/entry_points.txt,sha256=Xa-UFyc6gWGwMqMJOt06ZOog2vAfygV_DSwg1AiP46g,43
+dao_ai-0.0.21.dist-info/licenses/LICENSE,sha256=YZt3W32LtPYruuvHE9lGk2bw6ZPMMJD8yLrjgHybyz4,1069
+dao_ai-0.0.21.dist-info/RECORD,,

{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/WHEEL RENAMED Viewed

File without changes

{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{dao_ai-0.0.20.dist-info → dao_ai-0.0.21.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

dao-ai 0.0.20__py3-none-any.whl → 0.0.21__py3-none-any.whl

dao-ai 0.0.20py3-none-any.whl → 0.0.21py3-none-any.whl